From fa5030eac41b4084ae800cd301adeeb5df224b66 Mon Sep 17 00:00:00 2001 From: f4exb Date: Mon, 7 Nov 2016 18:23:59 +0100 Subject: [PATCH] IntHalfbandFilterEO1: simplification for SSE 4.1 only and fix --- sdrbase/dsp/inthalfbandfiltereo1.h | 50 ++++------------------------- sdrbase/dsp/inthalfbandfiltereo1i.h | 8 +---- 2 files changed, 8 insertions(+), 50 deletions(-) diff --git a/sdrbase/dsp/inthalfbandfiltereo1.h b/sdrbase/dsp/inthalfbandfiltereo1.h index dbd116bf7..f73c41558 100644 --- a/sdrbase/dsp/inthalfbandfiltereo1.h +++ b/sdrbase/dsp/inthalfbandfiltereo1.h @@ -516,49 +516,13 @@ protected: qint32 qAcc = 0; #ifdef USE_SSE4_1 - const __m128i* h = (const __m128i*) HBFIRFilterTraits::hbCoeffs; - __m128i sumI = _mm_setzero_si128(); - __m128i sumQ = _mm_setzero_si128(); - __m128i sa, sb; - a -= 3; - - for (int i = 0; i < HBFIRFilterTraits::hbOrder / 16; i++) - { - if ((m_ptr % 2) == 0) - { - sa = _mm_shuffle_epi32(_mm_loadu_si128((__m128i*) &(m_even[0][a])), _MM_SHUFFLE(0,1,2,3)); - sb = _mm_loadu_si128((__m128i*) &(m_even[0][b])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - - sa = _mm_shuffle_epi32(_mm_loadu_si128((__m128i*) &(m_even[1][a])), _MM_SHUFFLE(0,1,2,3)); - sb = _mm_loadu_si128((__m128i*) &(m_even[1][b])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - } - else - { - sa = _mm_shuffle_epi32(_mm_loadu_si128((__m128i*) &(m_odd[0][a])), _MM_SHUFFLE(0,1,2,3)); - sb = _mm_loadu_si128((__m128i*) &(m_odd[0][b])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - - sa = _mm_shuffle_epi32(_mm_loadu_si128((__m128i*) &(m_odd[1][a])), _MM_SHUFFLE(0,1,2,3)); - sb = _mm_loadu_si128((__m128i*) &(m_odd[1][b])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - } - - a -= 4; - b += 4; - ++h; - } - - // horizontal add of four 32 bit partial sums - - sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 8)); - sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 4)); - iAcc = _mm_cvtsi128_si32(sumI); - - sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8)); - sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4)); - qAcc = _mm_cvtsi128_si32(sumQ); + IntHalfbandFilterEO1Intrisics::work( + m_ptr, + m_even, + m_odd, + iAcc, + qAcc + ); #else for (int i = 0; i < HBFIRFilterTraits::hbOrder / 4; i++) { diff --git a/sdrbase/dsp/inthalfbandfiltereo1i.h b/sdrbase/dsp/inthalfbandfiltereo1i.h index c03ec032c..8a937c11a 100644 --- a/sdrbase/dsp/inthalfbandfiltereo1i.h +++ b/sdrbase/dsp/inthalfbandfiltereo1i.h @@ -24,12 +24,8 @@ #include -#if defined(USE_AVX2) -#include -#elif defined(USE_SSE4_1) +#if defined(USE_SSE4_1) #include -#elif defined(USE_NEON) -#include #endif #include "hbfiltertraits.h" @@ -94,6 +90,4 @@ public: } }; - - #endif /* SDRBASE_DSP_INTHALFBANDFILTEREO1I_H_ */