Use more precise SIMD flags and detect actual x86_64 SIMD features

This commit is contained in:
f4exb 2016-11-07 00:42:57 +01:00
parent dbbbfa12ee
commit 63d6eea066
14 changed files with 272 additions and 180 deletions

View File

@ -61,10 +61,6 @@ if (NOT BUILD_DEBIAN)
find_package(SerialDV)
endif()
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|x86")
SET(USE_SSE "SSE4_1" CACHE STRING "Use SSE 4.1 SIMD instructions")
ENDIF()
# MacOS Compatibility
if(APPLE)
find_package(ICONV)
@ -411,17 +407,87 @@ include_directories(
${OPENGL_INCLUDE_DIR}
)
if(USE_SSE MATCHES SSE4_1)
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse4.1" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse4.1" )
add_definitions(-DUSE_SSE)
elseif(MSVC)
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSE4_1" )
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSE4_1" )
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
add_definitions(-DUSE_SSE)
##############################################################################
EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE )
message( STATUS "Architecture: ${ARCHITECTURE}" )
if (${ARCHITECTURE} MATCHES "x86_64|AMD64|x86")
EXECUTE_PROCESS( COMMAND grep flags /proc/cpuinfo OUTPUT_VARIABLE CPU_FLAGS )
if (${CPU_FLAGS} MATCHES "avx2")
set(HAS_AVX2 ON CACHE BOOL "Architecture has AVX2 SIMD enabled")
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mavx2" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mavx2" )
message(STATUS "Use AVX2 SIMD instructions")
add_definitions(-DUSE_AVX2)
else()
set(HAS_AVX2 OFF CACHE BOOL "Architecture does not have AVX2 SIMD enabled")
endif()
endif()
if (${CPU_FLAGS} MATCHES "sse4_1")
set(HAS_SSE4_1 ON CACHE BOOL "Architecture has SSE 4.1 SIMD enabled")
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse4.1" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse4.1" )
message(STATUS "Use SSE 4.1 SIMD instructions")
add_definitions(-DUSE_SSE4_1)
elseif(MSVC)
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSE4_1" )
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSE4_1" )
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
add_definitions(-DUSE_SSE4_1)
endif()
else()
set(HAS_SSE4_1 OFF CACHE BOOL "Architecture does not have SSE 4.1 SIMD enabled")
endif()
if (${CPU_FLAGS} MATCHES "ssse3")
set(HAS_SSSE3 ON CACHE BOOL "Architecture has SSSE3 SIMD enabled")
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mssse3" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mssse3" )
message(STATUS "Use SSSE3 SIMD instructions")
add_definitions(-DUSE_SSSE3)
elseif(MSVC)
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSSE3" )
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSSE3" )
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
add_definitions(-DUSE_SSSE3)
endif()
else()
set(HAS_SSSE3 OFF CACHE BOOL "Architecture does not have SSSE3 SIMD enabled")
endif()
if (${CPU_FLAGS} MATCHES "sse2")
set(HAS_SSE2 ON CACHE BOOL "Architecture has SSE2 SIMD enabled")
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse2" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse2" )
message(STATUS "Use SSE2 SIMD instructions")
add_definitions(-DUSE_SSE2)
elseif(MSVC)
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSE2" )
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSE2" )
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
add_definitions(-DUSE_SSE2)
endif()
else()
set(HAS_SSE2 OFF CACHE BOOL "Architecture does not have SSE2 SIMD enabled")
endif()
elseif (${ARCHITECTURE} MATCHES "armv7l")
EXECUTE_PROCESS( COMMAND grep Features /proc/cpuinfo OUTPUT_VARIABLE CPU_FLAGS )
if (${CPU_FLAGS} MATCHES "neon")
set(HAS_NEON ON CACHE BOOL "Architecture has NEON SIMD enabled")
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfpu=neon" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mfpu=neon" )
message(STATUS "Use NEON SIMD instructions")
add_definitions(-DUSE_NEON)
endif()
else()
set(HAS_NEON OFF CACHE BOOL "Architecture does not have NEON SIMD enabled")
endif()
endif()

View File

@ -1,39 +1,11 @@
project(cm256cc)
EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE )
message( STATUS "CM256cc: Architecture: ${ARCHITECTURE}" )
if(${ARCHITECTURE} MATCHES "x86_64|AMD64|x86")
SET(USE_SIMD "SSSE3")
elseif(${ARCHITECTURE} MATCHES "armv7l")
SET(USE_SIMD "NEON")
endif()
message( STATUS "CM256cc: use SIMD: ${USE_SIMD}" )
if(USE_SIMD MATCHES SSSE3)
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mssse3" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mssse3" )
message(STATUS "g++ SSSE3")
add_definitions(-DUSE_SIMD)
elseif(MSVC)
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSSE3" )
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSSE3" )
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
message(STATUS "MSVC SSSE3")
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
add_definitions(-DUSE_SIMD)
endif()
elseif(USE_SIMD MATCHES NEON)
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfpu=neon" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mfpu=neon" )
message(STATUS "g++ NEON")
add_definitions(-DUSE_NEON)
endif()
if (HAS_SSSE3)
message(STATUS "SDRdaemonFEC: use SSSE3 SIMD" )
elseif (HAS_NEON)
message(STATUS "SDRdaemonFEC: use Neon SIMD" )
else()
message(STATUS "CM256cc: Unsupported architecture")
message(STATUS "SDRdaemonFEC: Unsupported architecture")
return()
endif()

View File

@ -15,7 +15,7 @@ CONFIG(MINGW64):LIBCM256CCSRC = "D:\softs\cm256cc"
INCLUDEPATH += $$LIBCM256CCSRC
DEFINES += __WINDOWS__=1
DEFINES += USE_SIMD=1
DEFINES += USE_SSSE3=1
QMAKE_CXXFLAGS += -msse4.1
CONFIG(Release):build_subdir = release

View File

@ -13,7 +13,7 @@ TARGET = modam
INCLUDEPATH += $$PWD
INCLUDEPATH += ../../../sdrbase
DEFINES += USE_SSE=1
DEFINES += USE_SSE4_1=1
QMAKE_CXXFLAGS += -msse4.1
CONFIG(Release):build_subdir = release

View File

@ -2,40 +2,10 @@ project(sdrdaemonfec)
find_package(LibNANOMSG)
EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE )
message( STATUS "SDRdaemonFEC: Architecture: ${ARCHITECTURE}" )
if(${ARCHITECTURE} MATCHES "x86_64|AMD64|x86")
SET(USE_SIMD "SSSE3")
elseif(${ARCHITECTURE} MATCHES "armv7l")
SET(USE_SIMD "NEON")
endif()
message( STATUS "SDRdaemonFEC: use SIMD: ${USE_SIMD}" )
if(USE_SIMD MATCHES SSSE3)
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mssse3" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mssse3" )
message(STATUS "SDRdaemonFEC: g++ SSSE3")
add_definitions(-DUSE_SIMD)
add_definitions(-DUSE_SSE)
elseif(MSVC)
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSSE3" )
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSSE3" )
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
message(STATUS "SDRdaemonFEC: MSVC SSSE3")
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
add_definitions(-DUSE_SIMD)
add_definitions(-DUSE_SSE)
endif()
elseif(USE_SIMD MATCHES NEON)
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfpu=neon" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mfpu=neon" )
message(STATUS "SDRdaemonFEC: g++ NEON")
add_definitions(-DUSE_NEON)
endif()
if (HAS_SSSE3)
message(STATUS "SDRdaemonFEC: use SSSE3 SIMD" )
elseif (HAS_NEON)
message(STATUS "SDRdaemonFEC: use Neon SIMD" )
else()
message(STATUS "SDRdaemonFEC: Unsupported architecture")
return()

View File

@ -23,9 +23,8 @@ INCLUDEPATH += ../../../lz4
INCLUDEPATH += $$LIBNANOMSGSRC/src
INCLUDEPATH += $$LIBCM256CCSRC
DEFINES += USE_SIMD=1
DEFINES += USE_SSE=1
QMAKE_CXXFLAGS += -msse4.1
DEFINES += USE_SSSE3=1
QMAKE_CXXFLAGS += -mssse3
CONFIG(Release):build_subdir = release
CONFIG(Debug):build_subdir = debug

View File

@ -18,7 +18,7 @@
#define INCLUDE_GPL_DSP_DECIMATORS_H_
#include "dsp/dsptypes.h"
#ifdef USE_SSE
#ifdef USE_SSE4_1
#include "dsp/inthalfbandfiltereo1.h"
#else
#include "dsp/inthalfbandfilterdb.h"
@ -124,7 +124,7 @@ public:
void decimate64_cen(SampleVector::iterator* it, const T* buf, qint32 len);
private:
#ifdef USE_SSE
#ifdef USE_SSE4_1
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator2; // 1st stages
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator4; // 2nd stages
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator8; // 3rd stages

View File

@ -1,8 +1,8 @@
#ifndef INCLUDE_INTERPOLATOR_H
#define INCLUDE_INTERPOLATOR_H
#ifdef USE_SSE
#include <immintrin.h>
#ifdef USE_SSE2
#include <emmintrin.h>
#endif
#include "dsp/dsptypes.h"
#include "util/export.h"
@ -125,7 +125,7 @@ private:
{
if (phase < 0)
phase = 0;
#if USE_SSE
#if USE_SSE2
// beware of the ringbuffer
if(m_ptr == 0) {
// only one straight block

View File

@ -22,7 +22,7 @@
#ifndef SDRBASE_DSP_INTHALFBANDFILTEREO_H_
#define SDRBASE_DSP_INTHALFBANDFILTEREO_H_
#ifdef USE_SSE
#ifdef USE_SSE4_1
#include <smmintrin.h>
#endif
@ -464,7 +464,7 @@ protected:
qint32 iAcc = 0;
qint32 qAcc = 0;
#ifdef USE_SSE
#ifdef USE_SSE4_1
//#warning "IntHalfbandFiler SIMD"
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
__m128i sumI = _mm_setzero_si128();
@ -551,7 +551,7 @@ protected:
qint32 iAcc = 0;
qint32 qAcc = 0;
#ifdef USE_SSE
#ifdef USE_SSE4_1
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
__m128i sumI = _mm_setzero_si128();
__m128i sumQ = _mm_setzero_si128();

View File

@ -22,10 +22,14 @@
#ifndef SDRBASE_DSP_INTHALFBANDFILTEREO2_H_
#define SDRBASE_DSP_INTHALFBANDFILTEREO2_H_
#ifdef USE_SSE
#ifdef USE_SSE4_1
#include <smmintrin.h>
#endif
#ifdef USE_NEON
#include <arm_neon.h>
#endif
#include <stdint.h>
#include "dsp/dsptypes.h"
#include "dsp/hbfiltertraits.h"
@ -484,8 +488,7 @@ protected:
qint32 iAcc = 0;
qint32 qAcc = 0;
#ifdef USE_SSE
//#warning "IntHalfbandFiler SIMD"
#if defined(USE_SSE4_1)
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
__m128i sumI = _mm_setzero_si128();
__m128i sumQ = _mm_setzero_si128();
@ -528,6 +531,47 @@ protected:
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
qAcc = _mm_cvtsi128_si32(sumQ);
#elif defined(USE_NEON)
int32x4_t sumI = vdupq_n_s32(0);
int32x4_t sumQ = vdupq_n_s32(0);
int32x4_t sa, sb, sh;
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
{
sh = vld1_s32(&h[4*i]);
if ((m_ptrB % 2) == 0)
{
sa = vld1q_s32(&(m_evenA[0][a]));
sb = vld1q_s32(&(m_evenB[0][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
sa = vld1q_s32(&(m_evenA[1][a]));
sb = vld1q_s32(&(m_evenB[1][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
}
else
{
sa = vld1q_s32(&(m_oddA[0][a]));
sb = vld1q_s32(&(m_oddB[0][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
sa = vld1q_s32(&(m_oddA[1][a]));
sb = vld1q_s32(&(m_oddB[1][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
}
a += 4;
b += 4;
}
int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI));
int32x2_t sumI2 = vpadd_s32(sumI1, sumI1);
iAcc = vget_lane_s32(sumI2, 0);
int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ));
int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1);
qAcc = vget_lane_s32(sumQ2, 0);
#else
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
{
@ -570,7 +614,7 @@ protected:
qint32 iAcc = 0;
qint32 qAcc = 0;
#ifdef USE_SSE
#if defined(USE_SSE4_1)
//#warning "IntHalfbandFiler SIMD"
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
__m128i sumI = _mm_setzero_si128();
@ -614,6 +658,47 @@ protected:
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
qAcc = _mm_cvtsi128_si32(sumQ);
#elif defined(USE_NEON)
int32x4_t sumI = vdupq_n_s32(0);
int32x4_t sumQ = vdupq_n_s32(0);
int32x4_t sa, sb, sh;
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
{
sh = vld1_s32(&h[4*i]);
if ((m_ptrB % 2) == 0)
{
sa = vld1q_s32(&(m_evenA[0][a]));
sb = vld1q_s32(&(m_evenB[0][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
sa = vld1q_s32(&(m_evenA[1][a]));
sb = vld1q_s32(&(m_evenB[1][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
}
else
{
sa = vld1q_s32(&(m_oddA[0][a]));
sb = vld1q_s32(&(m_oddB[0][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
sa = vld1q_s32(&(m_oddA[1][a]));
sb = vld1q_s32(&(m_oddB[1][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
}
a += 4;
b += 4;
}
int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI));
int32x2_t sumI2 = vpadd_s32(sumI1, sumI1);
iAcc = vget_lane_s32(sumI2, 0);
int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ));
int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1);
qAcc = vget_lane_s32(sumQ2, 0);
#else
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
{

View File

@ -201,7 +201,7 @@ void UpChannelizer::applyConfiguration()
}
}
#ifdef USE_SSE
#ifdef USE_SSE4_1
UpChannelizer::FilterStage::FilterStage(Mode mode) :
m_filter(new IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>),
m_workFunction(0)

View File

@ -23,7 +23,7 @@
#include <QMutex>
#include "util/export.h"
#include "util/message.h"
#ifdef USE_SSE
#ifdef USE_SSE4_1
#include "dsp/inthalfbandfiltereo2.h"
#else
#include "dsp/inthalfbandfilterdb.h"
@ -73,7 +73,7 @@ protected:
ModeUpperHalf
};
#ifdef USE_SSE
#ifdef USE_SSE4_1
typedef bool (IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>::*WorkFunction)(Sample* sIn, Sample *sOut);
IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>* m_filter;
#else

View File

@ -15,8 +15,8 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>. //
///////////////////////////////////////////////////////////////////////////////////
#ifdef USE_SSE
#include <immintrin.h>
#ifdef USE_SSE2
#include <emmintrin.h>
#endif
#include <QMouseEvent>
@ -381,83 +381,83 @@ void GLSpectrum::updateHistogram(const std::vector<Real>& spectrum)
m_currentSpectrum = &spectrum; // Store spectrum for current spectrum line display
#ifndef USE_SSE
for(int i = 0; i < m_fftSize; i++) {
int v = (int)((spectrum[i] - m_referenceLevel) * 100.0 / m_powerRange + 100.0);
#ifdef USE_SSE2
if(m_decay >= 0) { // normal
const __m128 refl = {m_referenceLevel, m_referenceLevel, m_referenceLevel, m_referenceLevel};
const __m128 power = {m_powerRange, m_powerRange, m_powerRange, m_powerRange};
const __m128 mul = {100.0f, 100.0f, 100.0f, 100.0f};
if ((v >= 0) && (v <= 99)) {
b = m_histogram + i * 100 + v;
if(*b < 220)
*b += m_histogramStroke; // was 4
else if(*b < 239)
*b += 1;
}
}
for(int i = 0; i < m_fftSize; i += 4) {
__m128 abc = _mm_loadu_ps (&spectrum[i]);
abc = _mm_sub_ps(abc, refl);
abc = _mm_mul_ps(abc, mul);
abc = _mm_div_ps(abc, power);
abc = _mm_add_ps(abc, mul);
__m128i result = _mm_cvtps_epi32(abc);
for(int j = 0; j < 4; j++) {
int v = ((int*)&result)[j];
if((v >= 0) && (v <= 99)) {
b = m_histogram + (i + j) * 100 + v;
if(*b < 220)
*b += m_histogramStroke; // was 4
else if(*b < 239)
*b += 1;
}
}
}
} else { // draw double pixels
int add = -m_decay * 4;
const __m128 refl = {m_referenceLevel, m_referenceLevel, m_referenceLevel, m_referenceLevel};
const __m128 power = {m_powerRange, m_powerRange, m_powerRange, m_powerRange};
const __m128 mul = {100.0f, 100.0f, 100.0f, 100.0f};
for(int i = 0; i < m_fftSize; i += 4) {
__m128 abc = _mm_loadu_ps (&spectrum[i]);
abc = _mm_sub_ps(abc, refl);
abc = _mm_mul_ps(abc, mul);
abc = _mm_div_ps(abc, power);
abc = _mm_add_ps(abc, mul);
__m128i result = _mm_cvtps_epi32(abc);
for(int j = 0; j < 4; j++) {
int v = ((int*)&result)[j];
if((v >= 1) && (v <= 98)) {
b = m_histogram + (i + j) * 100 + v;
if(b[-1] < 220)
b[-1] += add;
else if(b[-1] < 239)
b[-1] += 1;
if(b[0] < 220)
b[0] += add;
else if(b[0] < 239)
b[0] += 1;
if(b[1] < 220)
b[1] += add;
else if(b[1] < 239)
b[1] += 1;
} else if((v >= 0) && (v <= 99)) {
b = m_histogram + (i + j) * 100 + v;
if(*b < 220)
*b += add;
else if(*b < 239)
*b += 1;
}
}
}
}
#else
if(m_decay >= 0) { // normal
const __m128 refl = {m_referenceLevel, m_referenceLevel, m_referenceLevel, m_referenceLevel};
const __m128 power = {m_powerRange, m_powerRange, m_powerRange, m_powerRange};
const __m128 mul = {100.0f, 100.0f, 100.0f, 100.0f};
for(int i = 0; i < m_fftSize; i++) {
int v = (int)((spectrum[i] - m_referenceLevel) * 100.0 / m_powerRange + 100.0);
for(int i = 0; i < m_fftSize; i += 4) {
__m128 abc = _mm_loadu_ps (&spectrum[i]);
abc = _mm_sub_ps(abc, refl);
abc = _mm_mul_ps(abc, mul);
abc = _mm_div_ps(abc, power);
abc = _mm_add_ps(abc, mul);
__m128i result = _mm_cvtps_epi32(abc);
for(int j = 0; j < 4; j++) {
int v = ((int*)&result)[j];
if((v >= 0) && (v <= 99)) {
b = m_histogram + (i + j) * 100 + v;
if(*b < 220)
*b += m_histogramStroke; // was 4
else if(*b < 239)
*b += 1;
}
}
}
} else { // draw double pixels
int add = -m_decay * 4;
const __m128 refl = {m_referenceLevel, m_referenceLevel, m_referenceLevel, m_referenceLevel};
const __m128 power = {m_powerRange, m_powerRange, m_powerRange, m_powerRange};
const __m128 mul = {100.0f, 100.0f, 100.0f, 100.0f};
for(int i = 0; i < m_fftSize; i += 4) {
__m128 abc = _mm_loadu_ps (&spectrum[i]);
abc = _mm_sub_ps(abc, refl);
abc = _mm_mul_ps(abc, mul);
abc = _mm_div_ps(abc, power);
abc = _mm_add_ps(abc, mul);
__m128i result = _mm_cvtps_epi32(abc);
for(int j = 0; j < 4; j++) {
int v = ((int*)&result)[j];
if((v >= 1) && (v <= 98)) {
b = m_histogram + (i + j) * 100 + v;
if(b[-1] < 220)
b[-1] += add;
else if(b[-1] < 239)
b[-1] += 1;
if(b[0] < 220)
b[0] += add;
else if(b[0] < 239)
b[0] += 1;
if(b[1] < 220)
b[1] += add;
else if(b[1] < 239)
b[1] += 1;
} else if((v >= 0) && (v <= 99)) {
b = m_histogram + (i + j) * 100 + v;
if(*b < 220)
*b += add;
else if(*b < 239)
*b += 1;
}
}
}
}
if ((v >= 0) && (v <= 99)) {
b = m_histogram + i * 100 + v;
if(*b < 220)
*b += m_histogramStroke; // was 4
else if(*b < 239)
*b += 1;
}
}
#endif
}

View File

@ -14,8 +14,8 @@ INCLUDEPATH += $$PWD
DEFINES += USE_KISSFFT=1
DEFINES += __WINDOWS__=1
DEFINES += DSD_USE_SERIALDV=1
DEFINES += USE_SSE=1
QMAKE_CXXFLAGS += -msse4.1
DEFINES += USE_SSE2=1
QMAKE_CXXFLAGS += -msse2
CONFIG(Release):build_subdir = release
CONFIG(Debug):build_subdir = debug