diff --git a/CMakeLists.txt b/CMakeLists.txt index ce9db967c..b96bea72a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,6 +214,7 @@ set(sdrbase_HEADERS sdrbase/dsp/inthalfbandfilter.h sdrbase/dsp/inthalfbandfilterdb.h sdrbase/dsp/inthalfbandfiltereo1.h + sdrbase/dsp/inthalfbandfiltereo2.h sdrbase/dsp/kissfft.h sdrbase/dsp/kissengine.h sdrbase/dsp/lowpass.h diff --git a/sdrbase/dsp/inthalfbandfiltereo2.h b/sdrbase/dsp/inthalfbandfiltereo2.h new file mode 100644 index 000000000..8a01b51c4 --- /dev/null +++ b/sdrbase/dsp/inthalfbandfiltereo2.h @@ -0,0 +1,668 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016 F4EXB // +// written by Edouard Griffiths // +// // +// Integer half-band FIR based interpolator and decimator // +// This is the even/odd double buffer variant. Really useful only when SIMD is // +// used // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef SDRBASE_DSP_INTHALFBANDFILTEREO2_H_ +#define SDRBASE_DSP_INTHALFBANDFILTEREO2_H_ + +#ifdef USE_SIMD +#include +#endif + +#include +#include "dsp/dsptypes.h" +#include "dsp/hbfiltertraits.h" +#include "util/export.h" + +template +class SDRANGEL_API IntHalfbandFilterEO2 { +public: + IntHalfbandFilterEO2(); + + // downsample by 2, return center part of original spectrum + bool workDecimateCenter(Sample* sample) + { + // insert sample into ring-buffer + storeSample((FixReal) sample->real(), (FixReal) sample->imag()); + + switch(m_state) + { + case 0: + // advance write-pointer + advancePointer(); + // next state + m_state = 1; + // tell caller we don't have a new sample + return false; + + default: + // save result + doFIR(sample); + // advance write-pointer + advancePointer(); + // next state + m_state = 0; + + // tell caller we have a new sample + return true; + } + } + + // upsample by 2, return center part of original spectrum - double buffer variant + bool workInterpolateCenter(Sample* sampleIn, Sample *SampleOut) + { + switch(m_state) + { + case 0: + // insert sample into ring-buffer + storeSample(0, 0); + // save result + doFIR(SampleOut); + // advance write-pointer + advancePointer(); + // next state + m_state = 1; + // tell caller we didn't consume the sample + return false; + + default: + // insert sample into ring-buffer + storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag()); + // save result + doFIR(SampleOut); + // advance write-pointer + advancePointer(); + // next state + m_state = 0; + // tell caller we consumed the sample + return true; + } + } + + bool workDecimateCenter(qint32 *x, qint32 *y) + { + // insert sample into ring-buffer + storeSample(*x, *y); + + switch(m_state) + { + case 0: + // advance write-pointer + advancePointer(); + // next state + m_state = 1; + // tell caller we don't have a new sample + return false; + + default: + // save result + doFIR(x, y); + // advance write-pointer + advancePointer(); + // next state + m_state = 0; + // tell caller we have a new sample + return true; + } + } + + // downsample by 2, return lower half of original spectrum + bool workDecimateLowerHalf(Sample* sample) + { + switch(m_state) + { + case 0: + // insert sample into ring-buffer + storeSample((FixReal) -sample->imag(), (FixReal) sample->real()); + // advance write-pointer + advancePointer(); + // next state + m_state = 1; + // tell caller we don't have a new sample + return false; + + case 1: + // insert sample into ring-buffer + storeSample((FixReal) -sample->real(), (FixReal) -sample->imag()); + // save result + doFIR(sample); + // advance write-pointer + advancePointer(); + // next state + m_state = 2; + // tell caller we have a new sample + return true; + + case 2: + // insert sample into ring-buffer + storeSample((FixReal) sample->imag(), (FixReal) -sample->real()); + // advance write-pointer + advancePointer(); + // next state + m_state = 3; + // tell caller we don't have a new sample + return false; + + default: + // insert sample into ring-buffer + storeSample((FixReal) sample->real(), (FixReal) sample->imag()); + // save result + doFIR(sample); + // advance write-pointer + advancePointer(); + // next state + m_state = 0; + // tell caller we have a new sample + return true; + } + } + + // upsample by 2, from lower half of original spectrum - double buffer variant + bool workInterpolateLowerHalf(Sample* sampleIn, Sample *sampleOut) + { + Sample s; + + switch(m_state) + { + case 0: + // insert sample into ring-buffer + storeSample(0, 0); + + // save result + doFIR(&s); + sampleOut->setReal(s.imag()); + sampleOut->setImag(-s.real()); + + // advance write-pointer + advancePointer(); + + // next state + m_state = 1; + + // tell caller we didn't consume the sample + return false; + + case 1: + // insert sample into ring-buffer + storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag()); + + // save result + doFIR(&s); + sampleOut->setReal(-s.real()); + sampleOut->setImag(-s.imag()); + + // advance write-pointer + advancePointer(); + + // next state + m_state = 2; + + // tell caller we consumed the sample + return true; + + case 2: + // insert sample into ring-buffer + storeSample(0, 0); + + // save result + doFIR(&s); + sampleOut->setReal(-s.imag()); + sampleOut->setImag(s.real()); + + // advance write-pointer + advancePointer(); + + // next state + m_state = 3; + + // tell caller we didn't consume the sample + return false; + + default: + // insert sample into ring-buffer + storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag()); + + // save result + doFIR(&s); + sampleOut->setReal(s.real()); + sampleOut->setImag(s.imag()); + + // advance write-pointer + advancePointer(); + + // next state + m_state = 0; + + // tell caller we consumed the sample + return true; + } + } + + // downsample by 2, return upper half of original spectrum + bool workDecimateUpperHalf(Sample* sample) + { + switch(m_state) + { + case 0: + // insert sample into ring-buffer + storeSample((FixReal) sample->imag(), (FixReal) -sample->real()); + // advance write-pointer + advancePointer(); + // next state + m_state = 1; + // tell caller we don't have a new sample + return false; + + case 1: + // insert sample into ring-buffer + storeSample((FixReal) -sample->real(), (FixReal) -sample->imag()); + // save result + doFIR(sample); + // advance write-pointer + advancePointer(); + // next state + m_state = 2; + // tell caller we have a new sample + return true; + + case 2: + // insert sample into ring-buffer + storeSample((FixReal) -sample->imag(), (FixReal) sample->real()); + // advance write-pointer + advancePointer(); + // next state + m_state = 3; + // tell caller we don't have a new sample + return false; + + default: + // insert sample into ring-buffer + storeSample((FixReal) sample->real(), (FixReal) sample->imag()); + // save result + doFIR(sample); + // advance write-pointer + advancePointer(); + // next state + m_state = 0; + // tell caller we have a new sample + return true; + } + } + + // upsample by 2, move original spectrum to upper half - double buffer variant + bool workInterpolateUpperHalf(Sample* sampleIn, Sample *sampleOut) + { + Sample s; + + switch(m_state) + { + case 0: + // insert sample into ring-buffer + storeSample(0, 0); + + // save result + doFIR(&s); + sampleOut->setReal(-s.imag()); + sampleOut->setImag(s.real()); + + // advance write-pointer + advancePointer(); + + // next state + m_state = 1; + + // tell caller we didn't consume the sample + return false; + + case 1: + // insert sample into ring-buffer + storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag()); + + // save result + doFIR(&s); + sampleOut->setReal(-s.real()); + sampleOut->setImag(-s.imag()); + + // advance write-pointer + advancePointer(); + + // next state + m_state = 2; + + // tell caller we consumed the sample + return true; + + case 2: + // insert sample into ring-buffer + storeSample(0, 0); + + // save result + doFIR(&s); + sampleOut->setReal(s.imag()); + sampleOut->setImag(-s.real()); + + // advance write-pointer + advancePointer(); + + // next state + m_state = 3; + + // tell caller we didn't consume the sample + return false; + + default: + // insert sample into ring-buffer + storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag()); + + // save result + doFIR(&s); + sampleOut->setReal(s.real()); + sampleOut->setImag(s.imag()); + + // advance write-pointer + advancePointer(); + + // next state + m_state = 0; + + // tell caller we consumed the sample + return true; + } + } + + void myDecimate(const Sample* sample1, Sample* sample2) + { + storeSample((FixReal) sample1->real(), (FixReal) sample1->imag()); + advancePointer(); + + storeSample((FixReal) sample2->real(), (FixReal) sample2->imag()); + doFIR(sample2); + advancePointer(); + } + + void myDecimate(qint32 x1, qint32 y1, qint32 *x2, qint32 *y2) + { + storeSample(x1, y1); + advancePointer(); + + storeSample(*x2, *y2); + doFIR(x2, y2); + advancePointer(); + } + +protected: + qint32 m_evenB[2][HBFIRFilterTraits::hbOrder]; // double buffer technique + qint32 m_oddB[2][HBFIRFilterTraits::hbOrder]; // double buffer technique + qint32 m_evenA[2][HBFIRFilterTraits::hbOrder]; // double buffer technique + qint32 m_oddA[2][HBFIRFilterTraits::hbOrder]; // double buffer technique + + int m_ptrA; + int m_ptrB; + int m_size; + int m_state; + + void storeSample(const FixReal& sampleI, const FixReal& sampleQ) + { + if ((m_ptrB % 2) == 0) + { + m_evenB[0][m_ptrB/2] = sampleI; + m_evenB[1][m_ptrB/2] = sampleQ; + m_evenB[0][m_ptrB/2 + m_size] = sampleI; + m_evenB[1][m_ptrB/2 + m_size] = sampleQ; + m_evenA[0][m_ptrA/2] = sampleI; + m_evenA[1][m_ptrA/2] = sampleQ; + m_evenA[0][m_ptrA/2 + m_size] = sampleI; + m_evenA[1][m_ptrA/2 + m_size] = sampleQ; + } + else + { + m_oddB[0][m_ptrB/2] = sampleI; + m_oddB[1][m_ptrB/2] = sampleQ; + m_oddB[0][m_ptrB/2 + m_size] = sampleI; + m_oddB[1][m_ptrB/2 + m_size] = sampleQ; + m_oddA[0][m_ptrA/2] = sampleI; + m_oddA[1][m_ptrA/2] = sampleQ; + m_oddA[0][m_ptrA/2 + m_size] = sampleI; + m_oddA[1][m_ptrA/2 + m_size] = sampleQ; + } + } + + void storeSample(qint32 x, qint32 y) + { + if ((m_ptrB % 2) == 0) + { + m_evenB[0][m_ptrB/2] = x; + m_evenB[1][m_ptrB/2] = y; + m_evenB[0][m_ptrB/2 + m_size] = x; + m_evenB[1][m_ptrB/2 + m_size] = y; + m_evenA[0][m_ptrA/2] = x; + m_evenA[1][m_ptrA/2] = y; + m_evenA[0][m_ptrA/2 + m_size] = x; + m_evenA[1][m_ptrA/2 + m_size] = y; + } + else + { + m_oddB[0][m_ptrB/2] = x; + m_oddB[1][m_ptrB/2] = y; + m_oddB[0][m_ptrB/2 + m_size] = x; + m_oddB[1][m_ptrB/2 + m_size] = y; + m_oddA[0][m_ptrA/2] = x; + m_oddA[1][m_ptrA/2] = y; + m_oddA[0][m_ptrA/2 + m_size] = x; + m_oddA[1][m_ptrA/2 + m_size] = y; + } + } + + void advancePointer() + { + m_ptrA = (m_ptrA - 1 + 2*m_size) % (2*m_size); + m_ptrB = (m_ptrB + 1) % (2*m_size); + } + + void doFIR(Sample* sample) + { + int a = m_ptrA/2; // tip pointer + int b = m_ptrB/2 + 1; // tail pointer + + qint32 iAcc = 0; + qint32 qAcc = 0; + +#ifdef USE_SIMD +//#warning "IntHalfbandFiler SIMD" + const __m128i* h = (const __m128i*) HBFIRFilterTraits::hbCoeffs; + __m128i sumI = _mm_setzero_si128(); + __m128i sumQ = _mm_setzero_si128(); + __m128i sa, sb; + + for (int i = 0; i < HBFIRFilterTraits::hbOrder / 16; i++) + { + if ((m_ptrB % 2) == 0) + { + sa = _mm_loadu_si128((__m128i*) &(m_evenA[0][a])); + sb = _mm_loadu_si128((__m128i*) &(m_evenB[0][b])); + sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); + + sa = _mm_loadu_si128((__m128i*) &(m_evenA[1][a])); + sb = _mm_loadu_si128((__m128i*) &(m_evenB[1][b])); + sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); + } + else + { + sa = _mm_loadu_si128((__m128i*) &(m_oddA[0][a])); + sb = _mm_loadu_si128((__m128i*) &(m_oddB[0][b])); + sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); + + sa = _mm_loadu_si128((__m128i*) &(m_oddA[1][a])); + sb = _mm_loadu_si128((__m128i*) &(m_oddB[1][b])); + sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); + } + + a += 4; + b += 4; + ++h; + } + + // horizontal add of four 32 bit partial sums + + sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 8)); + sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 4)); + iAcc = _mm_cvtsi128_si32(sumI); + + sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8)); + sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4)); + qAcc = _mm_cvtsi128_si32(sumQ); +#else + for (int i = 0; i < HBFIRFilterTraits::hbOrder / 4; i++) + { + if ((m_ptrB % 2) == 0) + { + iAcc += (m_evenA[0][a] + m_evenB[0][b]) * HBFIRFilterTraits::hbCoeffs[i]; + qAcc += (m_evenA[1][a] + m_evenB[1][b]) * HBFIRFilterTraits::hbCoeffs[i]; + } + else + { + iAcc += (m_oddA[0][a] + m_oddB[0][b]) * HBFIRFilterTraits::hbCoeffs[i]; + qAcc += (m_oddA[1][a] + m_oddB[1][b]) * HBFIRFilterTraits::hbCoeffs[i]; + } + + a += 1; + b += 1; + } +#endif + + if ((m_ptrB % 2) == 0) + { + iAcc += ((qint32)m_oddB[0][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); + qAcc += ((qint32)m_oddB[1][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); + } + else + { + iAcc += ((qint32)m_evenB[0][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); + qAcc += ((qint32)m_evenB[1][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); + } + + sample->setReal(iAcc >> HBFIRFilterTraits::hbShift -1); + sample->setImag(qAcc >> HBFIRFilterTraits::hbShift -1); + } + + void doFIR(qint32 *x, qint32 *y) + { + int a = m_ptrA/2; // tip pointer + int b = m_ptrB/2 + 1; // tail pointer + + qint32 iAcc = 0; + qint32 qAcc = 0; + +#ifdef USE_SIMD + const __m128i* h = (const __m128i*) HBFIRFilterTraits::hbCoeffs; + __m128i sumI = _mm_setzero_si128(); + __m128i sumQ = _mm_setzero_si128(); + __m128i sa, sb; + + for (int i = 0; i < HBFIRFilterTraits::hbOrder / 16; i++) + { + if ((m_ptrB % 2) == 0) + { + sa = _mm_loadu_si128((__m128i*) &(m_evenA[0][a])); + sb = _mm_loadu_si128((__m128i*) &(m_evenB[0][b])); + sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); + + sa = _mm_loadu_si128((__m128i*) &(m_evenB[1][a])); + sb = _mm_loadu_si128((__m128i*) &(m_evenB[1][b])); + sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); + } + else + { + sa = _mm_loadu_si128((__m128i*) &(m_oddA[0][a])); + sb = _mm_loadu_si128((__m128i*) &(m_oddB[0][b])); + sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); + + sa = _mm_loadu_si128((__m128i*) &(m_oddB[1][a])); + sb = _mm_loadu_si128((__m128i*) &(m_oddB[1][b])); + sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); + } + + a += 4; + b += 4; + ++h; + } + + // horizontal add of four 32 bit partial sums + + sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 8)); + sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 4)); + iAcc = _mm_cvtsi128_si32(sumI); + + sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8)); + sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4)); + qAcc = _mm_cvtsi128_si32(sumQ); +#else + for (int i = 0; i < HBFIRFilterTraits::hbOrder / 4; i++) + { + if ((m_ptrB % 2) == 0) + { + iAcc += (m_evenA[0][a] + m_evenB[0][b]) * HBFIRFilterTraits::hbCoeffs[i]; + qAcc += (m_evenA[1][a] + m_evenB[1][b]) * HBFIRFilterTraits::hbCoeffs[i]; + } + else + { + iAcc += (m_oddA[0][a] + m_oddB[0][b]) * HBFIRFilterTraits::hbCoeffs[i]; + qAcc += (m_oddA[1][a] + m_oddB[1][b]) * HBFIRFilterTraits::hbCoeffs[i]; + } + + a += 1; + b += 1; + } +#endif + if ((m_ptrB % 2) == 0) + { + iAcc += ((qint32)m_oddB[0][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); + qAcc += ((qint32)m_oddB[1][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); + } + else + { + iAcc += ((qint32)m_evenB[0][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); + qAcc += ((qint32)m_evenB[1][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); + } + + *x = iAcc >> (HBFIRFilterTraits::hbShift -1); // HB_SHIFT incorrect do not loose the gained bit + *y = qAcc >> (HBFIRFilterTraits::hbShift -1); + } +}; + +template +IntHalfbandFilterEO2::IntHalfbandFilterEO2() +{ + m_size = HBFIRFilterTraits::hbOrder/2; + + for (int i = 0; i < 2*m_size; i++) + { + m_evenB[0][i] = 0; + m_evenB[1][i] = 0; + m_oddB[0][i] = 0; + m_oddB[1][i] = 0; + } + + m_ptrA = 0; + m_ptrB = 0; + m_state = 0; +} + +#endif /* SDRBASE_DSP_INTHALFBANDFILTEREO2_H_ */ diff --git a/sdrbase/dsp/upchannelizer.cpp b/sdrbase/dsp/upchannelizer.cpp index 896af2ec2..580f1d968 100644 --- a/sdrbase/dsp/upchannelizer.cpp +++ b/sdrbase/dsp/upchannelizer.cpp @@ -203,20 +203,20 @@ void UpChannelizer::applyConfiguration() #ifdef USE_SIMD UpChannelizer::FilterStage::FilterStage(Mode mode) : - m_filter(new IntHalfbandFilterEO1), + m_filter(new IntHalfbandFilterEO2), m_workFunction(0) { switch(mode) { case ModeCenter: - m_workFunction = &IntHalfbandFilterEO1::workInterpolateCenter; + m_workFunction = &IntHalfbandFilterEO2::workInterpolateCenter; break; case ModeLowerHalf: - m_workFunction = &IntHalfbandFilterEO1::workInterpolateLowerHalf; + m_workFunction = &IntHalfbandFilterEO2::workInterpolateLowerHalf; break; case ModeUpperHalf: - m_workFunction = &IntHalfbandFilterEO1::workInterpolateUpperHalf; + m_workFunction = &IntHalfbandFilterEO2::workInterpolateUpperHalf; break; } } diff --git a/sdrbase/dsp/upchannelizer.h b/sdrbase/dsp/upchannelizer.h index b6dcb20e7..1e1ef69e2 100644 --- a/sdrbase/dsp/upchannelizer.h +++ b/sdrbase/dsp/upchannelizer.h @@ -24,7 +24,7 @@ #include "util/export.h" #include "util/message.h" #ifdef USE_SIMD -#include "dsp/inthalfbandfiltereo1.h" +#include "dsp/inthalfbandfiltereo2.h" #else #include "dsp/inthalfbandfilterdb.h" #endif @@ -74,8 +74,8 @@ protected: }; #ifdef USE_SIMD - typedef bool (IntHalfbandFilterEO1::*WorkFunction)(Sample* sIn, Sample *sOut); - IntHalfbandFilterEO1* m_filter; + typedef bool (IntHalfbandFilterEO2::*WorkFunction)(Sample* sIn, Sample *sOut); + IntHalfbandFilterEO2* m_filter; #else typedef bool (IntHalfbandFilterDB::*WorkFunction)(Sample* sIn, Sample *sOut); IntHalfbandFilterDB* m_filter;