mirror of
https://github.com/f4exb/sdrangel.git
synced 2024-11-25 17:28:50 -05:00
Use more precise SIMD flags and detect actual x86_64 SIMD features
This commit is contained in:
parent
dbbbfa12ee
commit
63d6eea066
@ -61,10 +61,6 @@ if (NOT BUILD_DEBIAN)
|
||||
find_package(SerialDV)
|
||||
endif()
|
||||
|
||||
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|x86")
|
||||
SET(USE_SSE "SSE4_1" CACHE STRING "Use SSE 4.1 SIMD instructions")
|
||||
ENDIF()
|
||||
|
||||
# MacOS Compatibility
|
||||
if(APPLE)
|
||||
find_package(ICONV)
|
||||
@ -411,17 +407,87 @@ include_directories(
|
||||
${OPENGL_INCLUDE_DIR}
|
||||
)
|
||||
|
||||
if(USE_SSE MATCHES SSE4_1)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse4.1" )
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse4.1" )
|
||||
add_definitions(-DUSE_SSE)
|
||||
elseif(MSVC)
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSE4_1" )
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSE4_1" )
|
||||
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
||||
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
||||
add_definitions(-DUSE_SSE)
|
||||
##############################################################################
|
||||
|
||||
EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE )
|
||||
message( STATUS "Architecture: ${ARCHITECTURE}" )
|
||||
|
||||
if (${ARCHITECTURE} MATCHES "x86_64|AMD64|x86")
|
||||
EXECUTE_PROCESS( COMMAND grep flags /proc/cpuinfo OUTPUT_VARIABLE CPU_FLAGS )
|
||||
if (${CPU_FLAGS} MATCHES "avx2")
|
||||
set(HAS_AVX2 ON CACHE BOOL "Architecture has AVX2 SIMD enabled")
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mavx2" )
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mavx2" )
|
||||
message(STATUS "Use AVX2 SIMD instructions")
|
||||
add_definitions(-DUSE_AVX2)
|
||||
else()
|
||||
set(HAS_AVX2 OFF CACHE BOOL "Architecture does not have AVX2 SIMD enabled")
|
||||
endif()
|
||||
endif()
|
||||
if (${CPU_FLAGS} MATCHES "sse4_1")
|
||||
set(HAS_SSE4_1 ON CACHE BOOL "Architecture has SSE 4.1 SIMD enabled")
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse4.1" )
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse4.1" )
|
||||
message(STATUS "Use SSE 4.1 SIMD instructions")
|
||||
add_definitions(-DUSE_SSE4_1)
|
||||
elseif(MSVC)
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSE4_1" )
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSE4_1" )
|
||||
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
||||
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
||||
add_definitions(-DUSE_SSE4_1)
|
||||
endif()
|
||||
else()
|
||||
set(HAS_SSE4_1 OFF CACHE BOOL "Architecture does not have SSE 4.1 SIMD enabled")
|
||||
endif()
|
||||
if (${CPU_FLAGS} MATCHES "ssse3")
|
||||
set(HAS_SSSE3 ON CACHE BOOL "Architecture has SSSE3 SIMD enabled")
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mssse3" )
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mssse3" )
|
||||
message(STATUS "Use SSSE3 SIMD instructions")
|
||||
add_definitions(-DUSE_SSSE3)
|
||||
elseif(MSVC)
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSSE3" )
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSSE3" )
|
||||
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
||||
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
||||
add_definitions(-DUSE_SSSE3)
|
||||
endif()
|
||||
else()
|
||||
set(HAS_SSSE3 OFF CACHE BOOL "Architecture does not have SSSE3 SIMD enabled")
|
||||
endif()
|
||||
if (${CPU_FLAGS} MATCHES "sse2")
|
||||
set(HAS_SSE2 ON CACHE BOOL "Architecture has SSE2 SIMD enabled")
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse2" )
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse2" )
|
||||
message(STATUS "Use SSE2 SIMD instructions")
|
||||
add_definitions(-DUSE_SSE2)
|
||||
elseif(MSVC)
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSE2" )
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSE2" )
|
||||
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
||||
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
||||
add_definitions(-DUSE_SSE2)
|
||||
endif()
|
||||
else()
|
||||
set(HAS_SSE2 OFF CACHE BOOL "Architecture does not have SSE2 SIMD enabled")
|
||||
endif()
|
||||
elseif (${ARCHITECTURE} MATCHES "armv7l")
|
||||
EXECUTE_PROCESS( COMMAND grep Features /proc/cpuinfo OUTPUT_VARIABLE CPU_FLAGS )
|
||||
if (${CPU_FLAGS} MATCHES "neon")
|
||||
set(HAS_NEON ON CACHE BOOL "Architecture has NEON SIMD enabled")
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfpu=neon" )
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mfpu=neon" )
|
||||
message(STATUS "Use NEON SIMD instructions")
|
||||
add_definitions(-DUSE_NEON)
|
||||
endif()
|
||||
else()
|
||||
set(HAS_NEON OFF CACHE BOOL "Architecture does not have NEON SIMD enabled")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -1,39 +1,11 @@
|
||||
project(cm256cc)
|
||||
|
||||
EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE )
|
||||
message( STATUS "CM256cc: Architecture: ${ARCHITECTURE}" )
|
||||
|
||||
if(${ARCHITECTURE} MATCHES "x86_64|AMD64|x86")
|
||||
SET(USE_SIMD "SSSE3")
|
||||
elseif(${ARCHITECTURE} MATCHES "armv7l")
|
||||
SET(USE_SIMD "NEON")
|
||||
endif()
|
||||
|
||||
message( STATUS "CM256cc: use SIMD: ${USE_SIMD}" )
|
||||
|
||||
if(USE_SIMD MATCHES SSSE3)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mssse3" )
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mssse3" )
|
||||
message(STATUS "g++ SSSE3")
|
||||
add_definitions(-DUSE_SIMD)
|
||||
elseif(MSVC)
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSSE3" )
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSSE3" )
|
||||
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
||||
message(STATUS "MSVC SSSE3")
|
||||
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
||||
add_definitions(-DUSE_SIMD)
|
||||
endif()
|
||||
elseif(USE_SIMD MATCHES NEON)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfpu=neon" )
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mfpu=neon" )
|
||||
message(STATUS "g++ NEON")
|
||||
add_definitions(-DUSE_NEON)
|
||||
endif()
|
||||
if (HAS_SSSE3)
|
||||
message(STATUS "SDRdaemonFEC: use SSSE3 SIMD" )
|
||||
elseif (HAS_NEON)
|
||||
message(STATUS "SDRdaemonFEC: use Neon SIMD" )
|
||||
else()
|
||||
message(STATUS "CM256cc: Unsupported architecture")
|
||||
message(STATUS "SDRdaemonFEC: Unsupported architecture")
|
||||
return()
|
||||
endif()
|
||||
|
||||
|
@ -15,7 +15,7 @@ CONFIG(MINGW64):LIBCM256CCSRC = "D:\softs\cm256cc"
|
||||
INCLUDEPATH += $$LIBCM256CCSRC
|
||||
|
||||
DEFINES += __WINDOWS__=1
|
||||
DEFINES += USE_SIMD=1
|
||||
DEFINES += USE_SSSE3=1
|
||||
QMAKE_CXXFLAGS += -msse4.1
|
||||
|
||||
CONFIG(Release):build_subdir = release
|
||||
|
@ -13,7 +13,7 @@ TARGET = modam
|
||||
INCLUDEPATH += $$PWD
|
||||
INCLUDEPATH += ../../../sdrbase
|
||||
|
||||
DEFINES += USE_SSE=1
|
||||
DEFINES += USE_SSE4_1=1
|
||||
QMAKE_CXXFLAGS += -msse4.1
|
||||
|
||||
CONFIG(Release):build_subdir = release
|
||||
|
@ -2,40 +2,10 @@ project(sdrdaemonfec)
|
||||
|
||||
find_package(LibNANOMSG)
|
||||
|
||||
EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE )
|
||||
message( STATUS "SDRdaemonFEC: Architecture: ${ARCHITECTURE}" )
|
||||
|
||||
if(${ARCHITECTURE} MATCHES "x86_64|AMD64|x86")
|
||||
SET(USE_SIMD "SSSE3")
|
||||
elseif(${ARCHITECTURE} MATCHES "armv7l")
|
||||
SET(USE_SIMD "NEON")
|
||||
endif()
|
||||
|
||||
message( STATUS "SDRdaemonFEC: use SIMD: ${USE_SIMD}" )
|
||||
|
||||
if(USE_SIMD MATCHES SSSE3)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mssse3" )
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mssse3" )
|
||||
message(STATUS "SDRdaemonFEC: g++ SSSE3")
|
||||
add_definitions(-DUSE_SIMD)
|
||||
add_definitions(-DUSE_SSE)
|
||||
elseif(MSVC)
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSSE3" )
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSSE3" )
|
||||
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
||||
message(STATUS "SDRdaemonFEC: MSVC SSSE3")
|
||||
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
||||
add_definitions(-DUSE_SIMD)
|
||||
add_definitions(-DUSE_SSE)
|
||||
endif()
|
||||
elseif(USE_SIMD MATCHES NEON)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfpu=neon" )
|
||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mfpu=neon" )
|
||||
message(STATUS "SDRdaemonFEC: g++ NEON")
|
||||
add_definitions(-DUSE_NEON)
|
||||
endif()
|
||||
if (HAS_SSSE3)
|
||||
message(STATUS "SDRdaemonFEC: use SSSE3 SIMD" )
|
||||
elseif (HAS_NEON)
|
||||
message(STATUS "SDRdaemonFEC: use Neon SIMD" )
|
||||
else()
|
||||
message(STATUS "SDRdaemonFEC: Unsupported architecture")
|
||||
return()
|
||||
|
@ -23,9 +23,8 @@ INCLUDEPATH += ../../../lz4
|
||||
INCLUDEPATH += $$LIBNANOMSGSRC/src
|
||||
INCLUDEPATH += $$LIBCM256CCSRC
|
||||
|
||||
DEFINES += USE_SIMD=1
|
||||
DEFINES += USE_SSE=1
|
||||
QMAKE_CXXFLAGS += -msse4.1
|
||||
DEFINES += USE_SSSE3=1
|
||||
QMAKE_CXXFLAGS += -mssse3
|
||||
|
||||
CONFIG(Release):build_subdir = release
|
||||
CONFIG(Debug):build_subdir = debug
|
||||
|
@ -18,7 +18,7 @@
|
||||
#define INCLUDE_GPL_DSP_DECIMATORS_H_
|
||||
|
||||
#include "dsp/dsptypes.h"
|
||||
#ifdef USE_SSE
|
||||
#ifdef USE_SSE4_1
|
||||
#include "dsp/inthalfbandfiltereo1.h"
|
||||
#else
|
||||
#include "dsp/inthalfbandfilterdb.h"
|
||||
@ -124,7 +124,7 @@ public:
|
||||
void decimate64_cen(SampleVector::iterator* it, const T* buf, qint32 len);
|
||||
|
||||
private:
|
||||
#ifdef USE_SSE
|
||||
#ifdef USE_SSE4_1
|
||||
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator2; // 1st stages
|
||||
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator4; // 2nd stages
|
||||
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator8; // 3rd stages
|
||||
|
@ -1,8 +1,8 @@
|
||||
#ifndef INCLUDE_INTERPOLATOR_H
|
||||
#define INCLUDE_INTERPOLATOR_H
|
||||
|
||||
#ifdef USE_SSE
|
||||
#include <immintrin.h>
|
||||
#ifdef USE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include "dsp/dsptypes.h"
|
||||
#include "util/export.h"
|
||||
@ -125,7 +125,7 @@ private:
|
||||
{
|
||||
if (phase < 0)
|
||||
phase = 0;
|
||||
#if USE_SSE
|
||||
#if USE_SSE2
|
||||
// beware of the ringbuffer
|
||||
if(m_ptr == 0) {
|
||||
// only one straight block
|
||||
|
@ -22,7 +22,7 @@
|
||||
#ifndef SDRBASE_DSP_INTHALFBANDFILTEREO_H_
|
||||
#define SDRBASE_DSP_INTHALFBANDFILTEREO_H_
|
||||
|
||||
#ifdef USE_SSE
|
||||
#ifdef USE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -464,7 +464,7 @@ protected:
|
||||
qint32 iAcc = 0;
|
||||
qint32 qAcc = 0;
|
||||
|
||||
#ifdef USE_SSE
|
||||
#ifdef USE_SSE4_1
|
||||
//#warning "IntHalfbandFiler SIMD"
|
||||
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
||||
__m128i sumI = _mm_setzero_si128();
|
||||
@ -551,7 +551,7 @@ protected:
|
||||
qint32 iAcc = 0;
|
||||
qint32 qAcc = 0;
|
||||
|
||||
#ifdef USE_SSE
|
||||
#ifdef USE_SSE4_1
|
||||
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
||||
__m128i sumI = _mm_setzero_si128();
|
||||
__m128i sumQ = _mm_setzero_si128();
|
||||
|
@ -22,10 +22,14 @@
|
||||
#ifndef SDRBASE_DSP_INTHALFBANDFILTEREO2_H_
|
||||
#define SDRBASE_DSP_INTHALFBANDFILTEREO2_H_
|
||||
|
||||
#ifdef USE_SSE
|
||||
#ifdef USE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef USE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dsp/dsptypes.h"
|
||||
#include "dsp/hbfiltertraits.h"
|
||||
@ -484,8 +488,7 @@ protected:
|
||||
qint32 iAcc = 0;
|
||||
qint32 qAcc = 0;
|
||||
|
||||
#ifdef USE_SSE
|
||||
//#warning "IntHalfbandFiler SIMD"
|
||||
#if defined(USE_SSE4_1)
|
||||
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
||||
__m128i sumI = _mm_setzero_si128();
|
||||
__m128i sumQ = _mm_setzero_si128();
|
||||
@ -528,6 +531,47 @@ protected:
|
||||
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
|
||||
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
|
||||
qAcc = _mm_cvtsi128_si32(sumQ);
|
||||
#elif defined(USE_NEON)
|
||||
int32x4_t sumI = vdupq_n_s32(0);
|
||||
int32x4_t sumQ = vdupq_n_s32(0);
|
||||
int32x4_t sa, sb, sh;
|
||||
|
||||
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
|
||||
{
|
||||
sh = vld1_s32(&h[4*i]);
|
||||
|
||||
if ((m_ptrB % 2) == 0)
|
||||
{
|
||||
sa = vld1q_s32(&(m_evenA[0][a]));
|
||||
sb = vld1q_s32(&(m_evenB[0][b]));
|
||||
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||
|
||||
sa = vld1q_s32(&(m_evenA[1][a]));
|
||||
sb = vld1q_s32(&(m_evenB[1][b]));
|
||||
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||
}
|
||||
else
|
||||
{
|
||||
sa = vld1q_s32(&(m_oddA[0][a]));
|
||||
sb = vld1q_s32(&(m_oddB[0][b]));
|
||||
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||
|
||||
sa = vld1q_s32(&(m_oddA[1][a]));
|
||||
sb = vld1q_s32(&(m_oddB[1][b]));
|
||||
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||
}
|
||||
|
||||
a += 4;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI));
|
||||
int32x2_t sumI2 = vpadd_s32(sumI1, sumI1);
|
||||
iAcc = vget_lane_s32(sumI2, 0);
|
||||
|
||||
int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ));
|
||||
int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1);
|
||||
qAcc = vget_lane_s32(sumQ2, 0);
|
||||
#else
|
||||
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
|
||||
{
|
||||
@ -570,7 +614,7 @@ protected:
|
||||
qint32 iAcc = 0;
|
||||
qint32 qAcc = 0;
|
||||
|
||||
#ifdef USE_SSE
|
||||
#if defined(USE_SSE4_1)
|
||||
//#warning "IntHalfbandFiler SIMD"
|
||||
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
||||
__m128i sumI = _mm_setzero_si128();
|
||||
@ -614,6 +658,47 @@ protected:
|
||||
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
|
||||
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
|
||||
qAcc = _mm_cvtsi128_si32(sumQ);
|
||||
#elif defined(USE_NEON)
|
||||
int32x4_t sumI = vdupq_n_s32(0);
|
||||
int32x4_t sumQ = vdupq_n_s32(0);
|
||||
int32x4_t sa, sb, sh;
|
||||
|
||||
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
|
||||
{
|
||||
sh = vld1_s32(&h[4*i]);
|
||||
|
||||
if ((m_ptrB % 2) == 0)
|
||||
{
|
||||
sa = vld1q_s32(&(m_evenA[0][a]));
|
||||
sb = vld1q_s32(&(m_evenB[0][b]));
|
||||
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||
|
||||
sa = vld1q_s32(&(m_evenA[1][a]));
|
||||
sb = vld1q_s32(&(m_evenB[1][b]));
|
||||
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||
}
|
||||
else
|
||||
{
|
||||
sa = vld1q_s32(&(m_oddA[0][a]));
|
||||
sb = vld1q_s32(&(m_oddB[0][b]));
|
||||
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||
|
||||
sa = vld1q_s32(&(m_oddA[1][a]));
|
||||
sb = vld1q_s32(&(m_oddB[1][b]));
|
||||
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||
}
|
||||
|
||||
a += 4;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI));
|
||||
int32x2_t sumI2 = vpadd_s32(sumI1, sumI1);
|
||||
iAcc = vget_lane_s32(sumI2, 0);
|
||||
|
||||
int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ));
|
||||
int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1);
|
||||
qAcc = vget_lane_s32(sumQ2, 0);
|
||||
#else
|
||||
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
|
||||
{
|
||||
|
@ -201,7 +201,7 @@ void UpChannelizer::applyConfiguration()
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_SSE
|
||||
#ifdef USE_SSE4_1
|
||||
UpChannelizer::FilterStage::FilterStage(Mode mode) :
|
||||
m_filter(new IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>),
|
||||
m_workFunction(0)
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include <QMutex>
|
||||
#include "util/export.h"
|
||||
#include "util/message.h"
|
||||
#ifdef USE_SSE
|
||||
#ifdef USE_SSE4_1
|
||||
#include "dsp/inthalfbandfiltereo2.h"
|
||||
#else
|
||||
#include "dsp/inthalfbandfilterdb.h"
|
||||
@ -73,7 +73,7 @@ protected:
|
||||
ModeUpperHalf
|
||||
};
|
||||
|
||||
#ifdef USE_SSE
|
||||
#ifdef USE_SSE4_1
|
||||
typedef bool (IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>::*WorkFunction)(Sample* sIn, Sample *sOut);
|
||||
IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>* m_filter;
|
||||
#else
|
||||
|
@ -15,8 +15,8 @@
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>. //
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef USE_SSE
|
||||
#include <immintrin.h>
|
||||
#ifdef USE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#include <QMouseEvent>
|
||||
@ -381,83 +381,83 @@ void GLSpectrum::updateHistogram(const std::vector<Real>& spectrum)
|
||||
|
||||
m_currentSpectrum = &spectrum; // Store spectrum for current spectrum line display
|
||||
|
||||
#ifndef USE_SSE
|
||||
for(int i = 0; i < m_fftSize; i++) {
|
||||
int v = (int)((spectrum[i] - m_referenceLevel) * 100.0 / m_powerRange + 100.0);
|
||||
#ifdef USE_SSE2
|
||||
if(m_decay >= 0) { // normal
|
||||
const __m128 refl = {m_referenceLevel, m_referenceLevel, m_referenceLevel, m_referenceLevel};
|
||||
const __m128 power = {m_powerRange, m_powerRange, m_powerRange, m_powerRange};
|
||||
const __m128 mul = {100.0f, 100.0f, 100.0f, 100.0f};
|
||||
|
||||
if ((v >= 0) && (v <= 99)) {
|
||||
b = m_histogram + i * 100 + v;
|
||||
if(*b < 220)
|
||||
*b += m_histogramStroke; // was 4
|
||||
else if(*b < 239)
|
||||
*b += 1;
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < m_fftSize; i += 4) {
|
||||
__m128 abc = _mm_loadu_ps (&spectrum[i]);
|
||||
abc = _mm_sub_ps(abc, refl);
|
||||
abc = _mm_mul_ps(abc, mul);
|
||||
abc = _mm_div_ps(abc, power);
|
||||
abc = _mm_add_ps(abc, mul);
|
||||
__m128i result = _mm_cvtps_epi32(abc);
|
||||
|
||||
for(int j = 0; j < 4; j++) {
|
||||
int v = ((int*)&result)[j];
|
||||
if((v >= 0) && (v <= 99)) {
|
||||
b = m_histogram + (i + j) * 100 + v;
|
||||
if(*b < 220)
|
||||
*b += m_histogramStroke; // was 4
|
||||
else if(*b < 239)
|
||||
*b += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else { // draw double pixels
|
||||
int add = -m_decay * 4;
|
||||
const __m128 refl = {m_referenceLevel, m_referenceLevel, m_referenceLevel, m_referenceLevel};
|
||||
const __m128 power = {m_powerRange, m_powerRange, m_powerRange, m_powerRange};
|
||||
const __m128 mul = {100.0f, 100.0f, 100.0f, 100.0f};
|
||||
|
||||
for(int i = 0; i < m_fftSize; i += 4) {
|
||||
__m128 abc = _mm_loadu_ps (&spectrum[i]);
|
||||
abc = _mm_sub_ps(abc, refl);
|
||||
abc = _mm_mul_ps(abc, mul);
|
||||
abc = _mm_div_ps(abc, power);
|
||||
abc = _mm_add_ps(abc, mul);
|
||||
__m128i result = _mm_cvtps_epi32(abc);
|
||||
|
||||
for(int j = 0; j < 4; j++) {
|
||||
int v = ((int*)&result)[j];
|
||||
if((v >= 1) && (v <= 98)) {
|
||||
b = m_histogram + (i + j) * 100 + v;
|
||||
if(b[-1] < 220)
|
||||
b[-1] += add;
|
||||
else if(b[-1] < 239)
|
||||
b[-1] += 1;
|
||||
if(b[0] < 220)
|
||||
b[0] += add;
|
||||
else if(b[0] < 239)
|
||||
b[0] += 1;
|
||||
if(b[1] < 220)
|
||||
b[1] += add;
|
||||
else if(b[1] < 239)
|
||||
b[1] += 1;
|
||||
} else if((v >= 0) && (v <= 99)) {
|
||||
b = m_histogram + (i + j) * 100 + v;
|
||||
if(*b < 220)
|
||||
*b += add;
|
||||
else if(*b < 239)
|
||||
*b += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
if(m_decay >= 0) { // normal
|
||||
const __m128 refl = {m_referenceLevel, m_referenceLevel, m_referenceLevel, m_referenceLevel};
|
||||
const __m128 power = {m_powerRange, m_powerRange, m_powerRange, m_powerRange};
|
||||
const __m128 mul = {100.0f, 100.0f, 100.0f, 100.0f};
|
||||
for(int i = 0; i < m_fftSize; i++) {
|
||||
int v = (int)((spectrum[i] - m_referenceLevel) * 100.0 / m_powerRange + 100.0);
|
||||
|
||||
for(int i = 0; i < m_fftSize; i += 4) {
|
||||
__m128 abc = _mm_loadu_ps (&spectrum[i]);
|
||||
abc = _mm_sub_ps(abc, refl);
|
||||
abc = _mm_mul_ps(abc, mul);
|
||||
abc = _mm_div_ps(abc, power);
|
||||
abc = _mm_add_ps(abc, mul);
|
||||
__m128i result = _mm_cvtps_epi32(abc);
|
||||
|
||||
for(int j = 0; j < 4; j++) {
|
||||
int v = ((int*)&result)[j];
|
||||
if((v >= 0) && (v <= 99)) {
|
||||
b = m_histogram + (i + j) * 100 + v;
|
||||
if(*b < 220)
|
||||
*b += m_histogramStroke; // was 4
|
||||
else if(*b < 239)
|
||||
*b += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else { // draw double pixels
|
||||
int add = -m_decay * 4;
|
||||
const __m128 refl = {m_referenceLevel, m_referenceLevel, m_referenceLevel, m_referenceLevel};
|
||||
const __m128 power = {m_powerRange, m_powerRange, m_powerRange, m_powerRange};
|
||||
const __m128 mul = {100.0f, 100.0f, 100.0f, 100.0f};
|
||||
|
||||
for(int i = 0; i < m_fftSize; i += 4) {
|
||||
__m128 abc = _mm_loadu_ps (&spectrum[i]);
|
||||
abc = _mm_sub_ps(abc, refl);
|
||||
abc = _mm_mul_ps(abc, mul);
|
||||
abc = _mm_div_ps(abc, power);
|
||||
abc = _mm_add_ps(abc, mul);
|
||||
__m128i result = _mm_cvtps_epi32(abc);
|
||||
|
||||
for(int j = 0; j < 4; j++) {
|
||||
int v = ((int*)&result)[j];
|
||||
if((v >= 1) && (v <= 98)) {
|
||||
b = m_histogram + (i + j) * 100 + v;
|
||||
if(b[-1] < 220)
|
||||
b[-1] += add;
|
||||
else if(b[-1] < 239)
|
||||
b[-1] += 1;
|
||||
if(b[0] < 220)
|
||||
b[0] += add;
|
||||
else if(b[0] < 239)
|
||||
b[0] += 1;
|
||||
if(b[1] < 220)
|
||||
b[1] += add;
|
||||
else if(b[1] < 239)
|
||||
b[1] += 1;
|
||||
} else if((v >= 0) && (v <= 99)) {
|
||||
b = m_histogram + (i + j) * 100 + v;
|
||||
if(*b < 220)
|
||||
*b += add;
|
||||
else if(*b < 239)
|
||||
*b += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((v >= 0) && (v <= 99)) {
|
||||
b = m_histogram + i * 100 + v;
|
||||
if(*b < 220)
|
||||
*b += m_histogramStroke; // was 4
|
||||
else if(*b < 239)
|
||||
*b += 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -14,8 +14,8 @@ INCLUDEPATH += $$PWD
|
||||
DEFINES += USE_KISSFFT=1
|
||||
DEFINES += __WINDOWS__=1
|
||||
DEFINES += DSD_USE_SERIALDV=1
|
||||
DEFINES += USE_SSE=1
|
||||
QMAKE_CXXFLAGS += -msse4.1
|
||||
DEFINES += USE_SSE2=1
|
||||
QMAKE_CXXFLAGS += -msse2
|
||||
|
||||
CONFIG(Release):build_subdir = release
|
||||
CONFIG(Debug):build_subdir = debug
|
||||
|
Loading…
Reference in New Issue
Block a user