1
0
mirror of https://github.com/f4exb/sdrangel.git synced 2025-03-08 12:28:49 -05:00

Removed useless flavour of even/odd FIR halfband filter

This commit is contained in:
f4exb 2016-11-11 10:21:54 +01:00
parent 671b380909
commit ae5070ebb3
6 changed files with 44 additions and 1198 deletions

View File

@ -211,8 +211,6 @@ set(sdrbase_HEADERS
sdrbase/dsp/inthalfbandfilterdb.h sdrbase/dsp/inthalfbandfilterdb.h
sdrbase/dsp/inthalfbandfiltereo1.h sdrbase/dsp/inthalfbandfiltereo1.h
sdrbase/dsp/inthalfbandfiltereo1i.h sdrbase/dsp/inthalfbandfiltereo1i.h
sdrbase/dsp/inthalfbandfiltereo2.h
sdrbase/dsp/inthalfbandfiltereo2i.h
sdrbase/dsp/inthalfbandfilterst.h sdrbase/dsp/inthalfbandfilterst.h
sdrbase/dsp/inthalfbandfiltersti.h sdrbase/dsp/inthalfbandfiltersti.h
sdrbase/dsp/kissfft.h sdrbase/dsp/kissfft.h

View File

@ -23,6 +23,7 @@
#define SDRBASE_DSP_INTHALFBANDFILTEREO_H_ #define SDRBASE_DSP_INTHALFBANDFILTEREO_H_
#include <stdint.h> #include <stdint.h>
#include <cstdlib>
#include "dsp/dsptypes.h" #include "dsp/dsptypes.h"
#include "dsp/hbfiltertraits.h" #include "dsp/hbfiltertraits.h"
#include "dsp/inthalfbandfiltereo1i.h" #include "dsp/inthalfbandfiltereo1i.h"
@ -93,7 +94,7 @@ public:
} }
} }
bool workDecimateCenter(qint32 *x, qint32 *y) bool workDecimateCenter(int32_t *x, int32_t *y)
{ {
// insert sample into ring-buffer // insert sample into ring-buffer
storeSample(*x, *y); storeSample(*x, *y);
@ -394,7 +395,7 @@ public:
advancePointer(); advancePointer();
} }
void myDecimate(qint32 x1, qint32 y1, qint32 *x2, qint32 *y2) void myDecimate(int32_t x1, int32_t y1, int32_t *x2, int32_t *y2)
{ {
storeSample(x1, y1); storeSample(x1, y1);
advancePointer(); advancePointer();
@ -405,8 +406,8 @@ public:
} }
protected: protected:
qint32 m_even[2][HBFIRFilterTraits<HBFilterOrder>::hbOrder]; // double buffer technique int32_t m_even[2][HBFIRFilterTraits<HBFilterOrder>::hbOrder]; // double buffer technique
qint32 m_odd[2][HBFIRFilterTraits<HBFilterOrder>::hbOrder]; // double buffer technique int32_t m_odd[2][HBFIRFilterTraits<HBFilterOrder>::hbOrder]; // double buffer technique
int m_ptr; int m_ptr;
int m_size; int m_size;
@ -430,7 +431,7 @@ protected:
} }
} }
void storeSample(qint32 x, qint32 y) void storeSample(int32_t x, int32_t y)
{ {
if ((m_ptr % 2) == 0) if ((m_ptr % 2) == 0)
{ {
@ -453,10 +454,15 @@ protected:
m_ptr = m_ptr + 1 < 2*m_size ? m_ptr + 1: 0; m_ptr = m_ptr + 1 < 2*m_size ? m_ptr + 1: 0;
} }
int32_t rand(int32_t mod)
{
return (RAND_MAX/2 - std::rand()) % mod;
}
void doFIR(Sample* sample) void doFIR(Sample* sample)
{ {
qint32 iAcc = 0; int32_t iAcc = 0;
qint32 qAcc = 0; int32_t qAcc = 0;
#ifdef USE_SSE4_1 #ifdef USE_SSE4_1
IntHalfbandFilterEO1Intrisics<HBFilterOrder>::work( IntHalfbandFilterEO1Intrisics<HBFilterOrder>::work(
@ -490,23 +496,23 @@ protected:
if ((m_ptr % 2) == 0) if ((m_ptr % 2) == 0)
{ {
iAcc += ((qint32)m_odd[0][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); iAcc += ((int32_t)m_odd[0][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
qAcc += ((qint32)m_odd[1][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); qAcc += ((int32_t)m_odd[1][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
} }
else else
{ {
iAcc += ((qint32)m_even[0][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); iAcc += ((int32_t)m_even[0][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
qAcc += ((qint32)m_even[1][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); qAcc += ((int32_t)m_even[1][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
} }
sample->setReal(iAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1); sample->setReal(iAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1);
sample->setImag(qAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1); sample->setImag(qAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1);
} }
void doFIR(qint32 *x, qint32 *y) void doFIR(int32_t *x, int32_t *y)
{ {
qint32 iAcc = 0; int32_t iAcc = 0;
qint32 qAcc = 0; int32_t qAcc = 0;
#ifdef USE_SSE4_1 #ifdef USE_SSE4_1
IntHalfbandFilterEO1Intrisics<HBFilterOrder>::work( IntHalfbandFilterEO1Intrisics<HBFilterOrder>::work(
@ -539,13 +545,13 @@ protected:
#endif #endif
if ((m_ptr % 2) == 0) if ((m_ptr % 2) == 0)
{ {
iAcc += ((qint32)m_odd[0][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); iAcc += ((int32_t)m_odd[0][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
qAcc += ((qint32)m_odd[1][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); qAcc += ((int32_t)m_odd[1][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
} }
else else
{ {
iAcc += ((qint32)m_even[0][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); iAcc += ((int32_t)m_even[0][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
qAcc += ((qint32)m_even[1][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); qAcc += ((int32_t)m_even[1][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
} }
*x = iAcc >> (HBFIRFilterTraits<HBFilterOrder>::hbShift -1); // HB_SHIFT incorrect do not loose the gained bit *x = iAcc >> (HBFIRFilterTraits<HBFilterOrder>::hbShift -1); // HB_SHIFT incorrect do not loose the gained bit

View File

@ -1,609 +0,0 @@
///////////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2016 F4EXB //
// written by Edouard Griffiths //
// //
// Integer half-band FIR based interpolator and decimator //
// This is the even/odd double buffer variant. Really useful only when SIMD is //
// used //
// //
// This program is free software; you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation as version 3 of the License, or //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License V3 for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with this program. If not, see <http://www.gnu.org/licenses/>. //
///////////////////////////////////////////////////////////////////////////////////
#ifndef SDRBASE_DSP_INTHALFBANDFILTEREO2_H_
#define SDRBASE_DSP_INTHALFBANDFILTEREO2_H_
#ifdef USE_SSE4_1
#include <smmintrin.h>
#endif
#ifdef USE_NEON
#include <arm_neon.h>
#endif
#include <stdint.h>
#include "dsp/dsptypes.h"
#include "dsp/hbfiltertraits.h"
#include "dsp/inthalfbandfiltereo2i.h"
#include "util/export.h"
template<uint32_t HBFilterOrder>
class SDRANGEL_API IntHalfbandFilterEO2 {
public:
IntHalfbandFilterEO2();
// downsample by 2, return center part of original spectrum
bool workDecimateCenter(Sample* sample)
{
// insert sample into ring-buffer
storeSample((FixReal) sample->real(), (FixReal) sample->imag());
switch(m_state)
{
case 0:
// advance write-pointer
advancePointer();
// next state
m_state = 1;
// tell caller we don't have a new sample
return false;
default:
// save result
doFIR(sample);
// advance write-pointer
advancePointer();
// next state
m_state = 0;
// tell caller we have a new sample
return true;
}
}
// upsample by 2, return center part of original spectrum - double buffer variant
bool workInterpolateCenter(Sample* sampleIn, Sample *SampleOut)
{
switch(m_state)
{
case 0:
// insert sample into ring-buffer
storeSample(0, 0);
// save result
doFIR(SampleOut);
// advance write-pointer
advancePointer();
// next state
m_state = 1;
// tell caller we didn't consume the sample
return false;
default:
// insert sample into ring-buffer
storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag());
// save result
doFIR(SampleOut);
// advance write-pointer
advancePointer();
// next state
m_state = 0;
// tell caller we consumed the sample
return true;
}
}
bool workDecimateCenter(qint32 *x, qint32 *y)
{
// insert sample into ring-buffer
storeSample(*x, *y);
switch(m_state)
{
case 0:
// advance write-pointer
advancePointer();
// next state
m_state = 1;
// tell caller we don't have a new sample
return false;
default:
// save result
doFIR(x, y);
// advance write-pointer
advancePointer();
// next state
m_state = 0;
// tell caller we have a new sample
return true;
}
}
// downsample by 2, return lower half of original spectrum
bool workDecimateLowerHalf(Sample* sample)
{
switch(m_state)
{
case 0:
// insert sample into ring-buffer
storeSample((FixReal) -sample->imag(), (FixReal) sample->real());
// advance write-pointer
advancePointer();
// next state
m_state = 1;
// tell caller we don't have a new sample
return false;
case 1:
// insert sample into ring-buffer
storeSample((FixReal) -sample->real(), (FixReal) -sample->imag());
// save result
doFIR(sample);
// advance write-pointer
advancePointer();
// next state
m_state = 2;
// tell caller we have a new sample
return true;
case 2:
// insert sample into ring-buffer
storeSample((FixReal) sample->imag(), (FixReal) -sample->real());
// advance write-pointer
advancePointer();
// next state
m_state = 3;
// tell caller we don't have a new sample
return false;
default:
// insert sample into ring-buffer
storeSample((FixReal) sample->real(), (FixReal) sample->imag());
// save result
doFIR(sample);
// advance write-pointer
advancePointer();
// next state
m_state = 0;
// tell caller we have a new sample
return true;
}
}
// upsample by 2, from lower half of original spectrum - double buffer variant
bool workInterpolateLowerHalf(Sample* sampleIn, Sample *sampleOut)
{
Sample s;
switch(m_state)
{
case 0:
// insert sample into ring-buffer
storeSample(0, 0);
// save result
doFIR(&s);
sampleOut->setReal(s.imag());
sampleOut->setImag(-s.real());
// advance write-pointer
advancePointer();
// next state
m_state = 1;
// tell caller we didn't consume the sample
return false;
case 1:
// insert sample into ring-buffer
storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag());
// save result
doFIR(&s);
sampleOut->setReal(-s.real());
sampleOut->setImag(-s.imag());
// advance write-pointer
advancePointer();
// next state
m_state = 2;
// tell caller we consumed the sample
return true;
case 2:
// insert sample into ring-buffer
storeSample(0, 0);
// save result
doFIR(&s);
sampleOut->setReal(-s.imag());
sampleOut->setImag(s.real());
// advance write-pointer
advancePointer();
// next state
m_state = 3;
// tell caller we didn't consume the sample
return false;
default:
// insert sample into ring-buffer
storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag());
// save result
doFIR(&s);
sampleOut->setReal(s.real());
sampleOut->setImag(s.imag());
// advance write-pointer
advancePointer();
// next state
m_state = 0;
// tell caller we consumed the sample
return true;
}
}
// downsample by 2, return upper half of original spectrum
bool workDecimateUpperHalf(Sample* sample)
{
switch(m_state)
{
case 0:
// insert sample into ring-buffer
storeSample((FixReal) sample->imag(), (FixReal) -sample->real());
// advance write-pointer
advancePointer();
// next state
m_state = 1;
// tell caller we don't have a new sample
return false;
case 1:
// insert sample into ring-buffer
storeSample((FixReal) -sample->real(), (FixReal) -sample->imag());
// save result
doFIR(sample);
// advance write-pointer
advancePointer();
// next state
m_state = 2;
// tell caller we have a new sample
return true;
case 2:
// insert sample into ring-buffer
storeSample((FixReal) -sample->imag(), (FixReal) sample->real());
// advance write-pointer
advancePointer();
// next state
m_state = 3;
// tell caller we don't have a new sample
return false;
default:
// insert sample into ring-buffer
storeSample((FixReal) sample->real(), (FixReal) sample->imag());
// save result
doFIR(sample);
// advance write-pointer
advancePointer();
// next state
m_state = 0;
// tell caller we have a new sample
return true;
}
}
// upsample by 2, move original spectrum to upper half - double buffer variant
bool workInterpolateUpperHalf(Sample* sampleIn, Sample *sampleOut)
{
Sample s;
switch(m_state)
{
case 0:
// insert sample into ring-buffer
storeSample(0, 0);
// save result
doFIR(&s);
sampleOut->setReal(-s.imag());
sampleOut->setImag(s.real());
// advance write-pointer
advancePointer();
// next state
m_state = 1;
// tell caller we didn't consume the sample
return false;
case 1:
// insert sample into ring-buffer
storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag());
// save result
doFIR(&s);
sampleOut->setReal(-s.real());
sampleOut->setImag(-s.imag());
// advance write-pointer
advancePointer();
// next state
m_state = 2;
// tell caller we consumed the sample
return true;
case 2:
// insert sample into ring-buffer
storeSample(0, 0);
// save result
doFIR(&s);
sampleOut->setReal(s.imag());
sampleOut->setImag(-s.real());
// advance write-pointer
advancePointer();
// next state
m_state = 3;
// tell caller we didn't consume the sample
return false;
default:
// insert sample into ring-buffer
storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag());
// save result
doFIR(&s);
sampleOut->setReal(s.real());
sampleOut->setImag(s.imag());
// advance write-pointer
advancePointer();
// next state
m_state = 0;
// tell caller we consumed the sample
return true;
}
}
void myDecimate(const Sample* sample1, Sample* sample2)
{
storeSample((FixReal) sample1->real(), (FixReal) sample1->imag());
advancePointer();
storeSample((FixReal) sample2->real(), (FixReal) sample2->imag());
doFIR(sample2);
advancePointer();
}
void myDecimate(qint32 x1, qint32 y1, qint32 *x2, qint32 *y2)
{
storeSample(x1, y1);
advancePointer();
storeSample(*x2, *y2);
doFIR(x2, y2);
advancePointer();
}
protected:
qint32 m_evenB[2][HBFIRFilterTraits<HBFilterOrder>::hbOrder]; // double buffer technique
qint32 m_oddB[2][HBFIRFilterTraits<HBFilterOrder>::hbOrder]; // double buffer technique
qint32 m_evenA[2][HBFIRFilterTraits<HBFilterOrder>::hbOrder]; // double buffer technique
qint32 m_oddA[2][HBFIRFilterTraits<HBFilterOrder>::hbOrder]; // double buffer technique
int m_ptrA;
int m_ptrB;
int m_size;
int m_state;
void storeSample(const FixReal& sampleI, const FixReal& sampleQ)
{
if ((m_ptrB % 2) == 0)
{
m_evenB[0][m_ptrB/2] = sampleI;
m_evenB[1][m_ptrB/2] = sampleQ;
m_evenB[0][m_ptrB/2 + m_size] = sampleI;
m_evenB[1][m_ptrB/2 + m_size] = sampleQ;
m_evenA[0][m_ptrA/2] = sampleI;
m_evenA[1][m_ptrA/2] = sampleQ;
m_evenA[0][m_ptrA/2 + m_size] = sampleI;
m_evenA[1][m_ptrA/2 + m_size] = sampleQ;
}
else
{
m_oddB[0][m_ptrB/2] = sampleI;
m_oddB[1][m_ptrB/2] = sampleQ;
m_oddB[0][m_ptrB/2 + m_size] = sampleI;
m_oddB[1][m_ptrB/2 + m_size] = sampleQ;
m_oddA[0][m_ptrA/2] = sampleI;
m_oddA[1][m_ptrA/2] = sampleQ;
m_oddA[0][m_ptrA/2 + m_size] = sampleI;
m_oddA[1][m_ptrA/2 + m_size] = sampleQ;
}
}
void storeSample(qint32 x, qint32 y)
{
if ((m_ptrB % 2) == 0)
{
m_evenB[0][m_ptrB/2] = x;
m_evenB[1][m_ptrB/2] = y;
m_evenB[0][m_ptrB/2 + m_size] = x;
m_evenB[1][m_ptrB/2 + m_size] = y;
m_evenA[0][m_ptrA/2] = x;
m_evenA[1][m_ptrA/2] = y;
m_evenA[0][m_ptrA/2 + m_size] = x;
m_evenA[1][m_ptrA/2 + m_size] = y;
}
else
{
m_oddB[0][m_ptrB/2] = x;
m_oddB[1][m_ptrB/2] = y;
m_oddB[0][m_ptrB/2 + m_size] = x;
m_oddB[1][m_ptrB/2 + m_size] = y;
m_oddA[0][m_ptrA/2] = x;
m_oddA[1][m_ptrA/2] = y;
m_oddA[0][m_ptrA/2 + m_size] = x;
m_oddA[1][m_ptrA/2 + m_size] = y;
}
}
void advancePointer()
{
m_ptrA = (m_ptrA - 1 + 2*m_size) % (2*m_size);
m_ptrB = (m_ptrB + 1) % (2*m_size);
}
void doFIR(Sample* sample)
{
int a = m_ptrA/2; // tip pointer
int b = m_ptrB/2 + 1; // tail pointer
qint32 iAcc = 0;
qint32 qAcc = 0;
#if defined(USE_AVX2) || defined(USE_SSE4_1) || defined(USE_NEON)
IntHalfbandFilterEO2Intrisics<HBFilterOrder>::work(
m_ptrA,
m_ptrB,
m_evenA,
m_evenB,
m_oddA,
m_oddB,
iAcc,
qAcc
);
#else
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
{
if ((m_ptrB % 2) == 0)
{
iAcc += (m_evenA[0][a] + m_evenB[0][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffs[i];
qAcc += (m_evenA[1][a] + m_evenB[1][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffs[i];
}
else
{
iAcc += (m_oddA[0][a] + m_oddB[0][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffs[i];
qAcc += (m_oddA[1][a] + m_oddB[1][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffs[i];
}
a += 1;
b += 1;
}
#endif
if ((m_ptrB % 2) == 0)
{
iAcc += ((qint32)m_oddB[0][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
qAcc += ((qint32)m_oddB[1][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
}
else
{
iAcc += ((qint32)m_evenB[0][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
qAcc += ((qint32)m_evenB[1][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
}
sample->setReal(iAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1);
sample->setImag(qAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1);
}
void doFIR(qint32 *x, qint32 *y)
{
int a = m_ptrA/2; // tip pointer
int b = m_ptrB/2 + 1; // tail pointer
qint32 iAcc = 0;
qint32 qAcc = 0;
#if defined(USE_AVX2) || defined(USE_SSE4_1) || defined(USE_NEON)
IntHalfbandFilterEO2Intrisics<HBFilterOrder>::work(
m_ptrA,
m_ptrB,
m_evenA,
m_evenB,
m_oddA,
m_oddB,
iAcc,
qAcc
);
#else
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
{
if ((m_ptrB % 2) == 0)
{
iAcc += (m_evenA[0][a] + m_evenB[0][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffs[i];
qAcc += (m_evenA[1][a] + m_evenB[1][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffs[i];
}
else
{
iAcc += (m_oddA[0][a] + m_oddB[0][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffs[i];
qAcc += (m_oddA[1][a] + m_oddB[1][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffs[i];
}
a += 1;
b += 1;
}
#endif
if ((m_ptrB % 2) == 0)
{
iAcc += ((qint32)m_oddB[0][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
qAcc += ((qint32)m_oddB[1][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
}
else
{
iAcc += ((qint32)m_evenB[0][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
qAcc += ((qint32)m_evenB[1][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
}
*x = iAcc >> (HBFIRFilterTraits<HBFilterOrder>::hbShift -1); // HB_SHIFT incorrect do not loose the gained bit
*y = qAcc >> (HBFIRFilterTraits<HBFilterOrder>::hbShift -1);
}
};
template<uint32_t HBFilterOrder>
IntHalfbandFilterEO2<HBFilterOrder>::IntHalfbandFilterEO2()
{
m_size = HBFIRFilterTraits<HBFilterOrder>::hbOrder/2;
for (int i = 0; i < 2*m_size; i++)
{
m_evenB[0][i] = 0;
m_evenB[1][i] = 0;
m_oddB[0][i] = 0;
m_oddB[1][i] = 0;
}
m_ptrA = 0;
m_ptrB = 0;
m_state = 0;
}
#endif /* SDRBASE_DSP_INTHALFBANDFILTEREO2_H_ */

View File

@ -1,547 +0,0 @@
///////////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2016 F4EXB //
// written by Edouard Griffiths //
// //
// Integer half-band FIR based interpolator and decimator //
// This is the even/odd double buffer variant. Really useful only when SIMD is //
// used //
// //
// This program is free software; you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation as version 3 of the License, or //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License V3 for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with this program. If not, see <http://www.gnu.org/licenses/>. //
///////////////////////////////////////////////////////////////////////////////////
#ifndef INCLUDE_INTHALFBANDFILTEREO2I_H_
#define INCLUDE_INTHALFBANDFILTEREO2I_H_
#include <stdint.h>
#if defined(USE_AVX2)
#include <immintrin.h>
#elif defined(USE_SSE4_1)
#include <smmintrin.h>
#elif defined(USE_NEON)
#include <arm_neon.h>
#endif
#include "hbfiltertraits.h"
template<uint32_t HBFilterOrder>
class IntHalfbandFilterEO2Intrisics
{
public:
static void work(
int ptrA,
int ptrB,
int32_t evenA[2][HBFilterOrder],
int32_t evenB[2][HBFilterOrder],
int32_t oddA[2][HBFilterOrder],
int32_t oddB[2][HBFilterOrder],
int32_t& iAcc, int32_t& qAcc)
{
#if defined(USE_SSE4_1)
int a = ptrA/2; // tip pointer
int b = ptrB/2 + 1; // tail pointer
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
__m128i sumI = _mm_setzero_si128();
__m128i sumQ = _mm_setzero_si128();
__m128i sa, sb;
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
{
if ((ptrB % 2) == 0)
{
sa = _mm_loadu_si128((__m128i*) &(evenA[0][a]));
sb = _mm_loadu_si128((__m128i*) &(evenB[0][b]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
sa = _mm_loadu_si128((__m128i*) &(evenA[1][a]));
sb = _mm_loadu_si128((__m128i*) &(evenB[1][b]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
}
else
{
sa = _mm_loadu_si128((__m128i*) &(oddA[0][a]));
sb = _mm_loadu_si128((__m128i*) &(oddB[0][b]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
sa = _mm_loadu_si128((__m128i*) &(oddA[1][a]));
sb = _mm_loadu_si128((__m128i*) &(oddB[1][b]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
}
a += 4;
b += 4;
++h;
}
// horizontal add of four 32 bit partial sums
sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 8));
sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 4));
iAcc = _mm_cvtsi128_si32(sumI);
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
qAcc = _mm_cvtsi128_si32(sumQ);
#elif defined(USE_NEON)
int a = ptrA/2; // tip pointer
int b = ptrB/2 + 1; // tail pointer
int32x4_t sumI = vdupq_n_s32(0);
int32x4_t sumQ = vdupq_n_s32(0);
int32x4_t sa, sb, sh;
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
{
sh = vld1q_s32(&HBFIRFilterTraits<HBFilterOrder>::hbCoeffs[4*i]);
if ((ptrB % 2) == 0)
{
sa = vld1q_s32(&(evenA[0][a]));
sb = vld1q_s32(&(evenB[0][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
sa = vld1q_s32(&(evenA[1][a]));
sb = vld1q_s32(&(evenB[1][b]));
sumQ = vmlaq_s32(sumQ, vaddq_s32(sa, sb), sh);
}
else
{
sa = vld1q_s32(&(oddA[0][a]));
sb = vld1q_s32(&(oddB[0][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
sa = vld1q_s32(&(oddA[1][a]));
sb = vld1q_s32(&(oddB[1][b]));
sumQ = vmlaq_s32(sumQ, vaddq_s32(sa, sb), sh);
}
a += 4;
b += 4;
}
int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI));
int32x2_t sumI2 = vpadd_s32(sumI1, sumI1);
iAcc = vget_lane_s32(sumI2, 0);
int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ));
int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1);
qAcc = vget_lane_s32(sumQ2, 0);
#endif
}
};
template<>
class IntHalfbandFilterEO2Intrisics<48>
{
public:
static void work(
int ptrA,
int ptrB,
int32_t evenA[2][48],
int32_t evenB[2][48],
int32_t oddA[2][48],
int32_t oddB[2][48],
int32_t& iAcc, int32_t& qAcc)
{
#if defined(USE_SSE4_1)
int a = ptrA/2; // tip pointer
int b = ptrB/2 + 1; // tail pointer
const __m128i* h = (const __m128i*) HBFIRFilterTraits<48>::hbCoeffs;
__m128i sumI = _mm_setzero_si128();
__m128i sumQ = _mm_setzero_si128();
__m128i sa, sb;
if ((ptrB % 2) == 0)
{
// 0
sa = _mm_loadu_si128((__m128i*) &(evenA[0][a]));
sb = _mm_loadu_si128((__m128i*) &(evenB[0][b]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
sa = _mm_loadu_si128((__m128i*) &(evenA[1][a]));
sb = _mm_loadu_si128((__m128i*) &(evenB[1][b]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
h++;
// 1
sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+4]));
sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+4]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+4]));
sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+4]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
h++;
// 2
sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+8]));
sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+8]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+8]));
sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+8]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
}
else
{
// 0
sa = _mm_loadu_si128((__m128i*) &(oddA[0][a]));
sb = _mm_loadu_si128((__m128i*) &(oddB[0][b]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
sa = _mm_loadu_si128((__m128i*) &(oddA[1][a]));
sb = _mm_loadu_si128((__m128i*) &(oddB[1][b]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
h++;
// 1
sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+4]));
sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+4]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+4]));
sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+4]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
h++;
// 2
sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+8]));
sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+8]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+8]));
sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+8]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h));
}
// horizontal add of four 32 bit partial sums
sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 8));
sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 4));
iAcc = _mm_cvtsi128_si32(sumI);
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
qAcc = _mm_cvtsi128_si32(sumQ);
#elif defined(USE_NEON)
int a = ptrA/2; // tip pointer
int b = ptrB/2 + 1; // tail pointer
int32x4_t sumI = vdupq_n_s32(0);
int32x4_t sumQ = vdupq_n_s32(0);
int32x4x3_t sh = vld3q_s32((int32_t const *) &HBFIRFilterTraits<48>::hbCoeffs[0]);
int32x4x3_t sa, sb;
if ((ptrB % 2) == 0)
{
sa = vld3q_s32((int32_t const *) &(evenA[0][a]));
sb = vld3q_s32((int32_t const *) &(evenB[0][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[0], sb.val[0]), sh.val[0]);
sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[1], sb.val[1]), sh.val[1]);
sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[2], sb.val[2]), sh.val[2]);
sa = vld3q_s32((int32_t const *) &(evenA[1][a]));
sb = vld3q_s32((int32_t const *) &(evenB[1][b]));
sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[0], sb.val[0]), sh.val[0]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[1], sb.val[1]), sh.val[1]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[2], sb.val[2]), sh.val[2]);
}
else
{
sa = vld3q_s32((int32_t const *) &(oddA[0][a]));
sb = vld3q_s32((int32_t const *) &(oddB[0][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[0], sb.val[0]), sh.val[0]);
sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[1], sb.val[1]), sh.val[1]);
sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[2], sb.val[2]), sh.val[2]);
sa = vld3q_s32((int32_t const *) &(oddA[1][a]));
sb = vld3q_s32((int32_t const *) &(oddB[1][b]));
sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[0], sb.val[0]), sh.val[0]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[1], sb.val[1]), sh.val[1]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[2], sb.val[2]), sh.val[2]);
}
int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI));
int32x2_t sumI2 = vpadd_s32(sumI1, sumI1);
iAcc = vget_lane_s32(sumI2, 0);
int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ));
int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1);
qAcc = vget_lane_s32(sumQ2, 0);
#endif
}
};
template<>
class IntHalfbandFilterEO2Intrisics<96>
{
public:
static void work(
int ptrA,
int ptrB,
int32_t evenA[2][96],
int32_t evenB[2][96],
int32_t oddA[2][96],
int32_t oddB[2][96],
int32_t& iAcc, int32_t& qAcc)
{
#if defined(USE_AVX2)
int a = ptrA/2; // tip pointer
int b = ptrB/2 + 1; // tail pointer
const __m256i* h = (const __m256i*) HBFIRFilterTraits<96>::hbCoeffs;
__m256i sumI = _mm256_setzero_si256();
__m256i sumQ = _mm256_setzero_si256();
__m256i sa, sb;
if ((ptrB % 2) == 0)
{
// I
sa = _mm256_loadu_si256((__m256i*) &(evenA[0][a]));
sb = _mm256_loadu_si256((__m256i*) &(evenB[0][b]));
sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[0]));
sa = _mm256_loadu_si256((__m256i*) &(evenA[0][a+8]));
sb = _mm256_loadu_si256((__m256i*) &(evenB[0][b+8]));
sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[1]));
sa = _mm256_loadu_si256((__m256i*) &(evenA[0][a+16]));
sb = _mm256_loadu_si256((__m256i*) &(evenB[0][b+16]));
sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[2]));
// Q
sa = _mm256_loadu_si256((__m256i*) &(evenA[1][a]));
sb = _mm256_loadu_si256((__m256i*) &(evenB[1][b]));
sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[0]));
sa = _mm256_loadu_si256((__m256i*) &(evenA[1][a+8]));
sb = _mm256_loadu_si256((__m256i*) &(evenB[1][b+8]));
sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[1]));
sa = _mm256_loadu_si256((__m256i*) &(evenA[1][a+16]));
sb = _mm256_loadu_si256((__m256i*) &(evenB[1][b+16]));
sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[2]));
}
else
{
// I
sa = _mm256_loadu_si256((__m256i*) &(oddA[0][a]));
sb = _mm256_loadu_si256((__m256i*) &(oddB[0][b]));
sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[0]));
sa = _mm256_loadu_si256((__m256i*) &(oddA[0][a+8]));
sb = _mm256_loadu_si256((__m256i*) &(oddB[0][b+8]));
sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[1]));
sa = _mm256_loadu_si256((__m256i*) &(oddA[0][a+16]));
sb = _mm256_loadu_si256((__m256i*) &(oddB[0][b+16]));
sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[2]));
// Q
sa = _mm256_loadu_si256((__m256i*) &(oddA[1][a]));
sb = _mm256_loadu_si256((__m256i*) &(oddB[1][b]));
sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[0]));
sa = _mm256_loadu_si256((__m256i*) &(oddA[1][a+8]));
sb = _mm256_loadu_si256((__m256i*) &(oddB[1][b+8]));
sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[1]));
sa = _mm256_loadu_si256((__m256i*) &(oddA[1][a+16]));
sb = _mm256_loadu_si256((__m256i*) &(oddB[1][b+16]));
sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[2]));
}
// horizontal add
__m128i vloI = _mm256_castsi256_si128(sumI);
vloI = _mm_add_epi32(vloI, _mm_srli_si128(vloI, 8));
vloI = _mm_add_epi32(vloI, _mm_srli_si128(vloI, 4));
iAcc = _mm_cvtsi128_si32(vloI);
__m128i vhiI = _mm256_extracti128_si256(sumI, 1);
vhiI = _mm_add_epi32(vhiI, _mm_srli_si128(vhiI, 8));
vhiI = _mm_add_epi32(vhiI, _mm_srli_si128(vhiI, 4));
iAcc += _mm_cvtsi128_si32(vhiI);
__m128i vloQ = _mm256_castsi256_si128(sumQ);
vloQ = _mm_add_epi32(vloQ, _mm_srli_si128(vloQ, 8));
vloQ = _mm_add_epi32(vloQ, _mm_srli_si128(vloQ, 4));
qAcc = _mm_cvtsi128_si32(vloQ);
__m128i vhiQ = _mm256_extracti128_si256(sumQ, 1);
vhiQ = _mm_add_epi32(vhiQ, _mm_srli_si128(vhiQ, 8));
vhiQ = _mm_add_epi32(vhiQ, _mm_srli_si128(vhiQ, 4));
qAcc += _mm_cvtsi128_si32(vhiQ);
#elif defined(USE_SSE4_1)
int a = ptrA/2; // tip pointer
int b = ptrB/2 + 1; // tail pointer
const __m128i* h = (const __m128i*) HBFIRFilterTraits<96>::hbCoeffs;
__m128i sumI = _mm_setzero_si128();
__m128i sumQ = _mm_setzero_si128();
__m128i sa, sb;
if ((ptrB % 2) == 0)
{
// I
sa = _mm_loadu_si128((__m128i*) &(evenA[0][a]));
sb = _mm_loadu_si128((__m128i*) &(evenB[0][b]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[0]));
sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+4]));
sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+4]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[1]));
sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+8]));
sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+8]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[2]));
sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+12]));
sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+12]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[3]));
sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+16]));
sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+16]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[4]));
sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+20]));
sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+20]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[5]));
// Q
sa = _mm_loadu_si128((__m128i*) &(evenA[1][a]));
sb = _mm_loadu_si128((__m128i*) &(evenB[1][b]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[0]));
sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+4]));
sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+4]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[1]));
sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+8]));
sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+8]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[2]));
sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+12]));
sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+12]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[3]));
sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+16]));
sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+16]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[4]));
sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+20]));
sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+20]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[5]));
}
else
{
// I
sa = _mm_loadu_si128((__m128i*) &(oddA[0][a]));
sb = _mm_loadu_si128((__m128i*) &(oddB[0][b]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[0]));
sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+4]));
sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+4]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[1]));
sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+8]));
sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+8]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[2]));
sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+12]));
sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+12]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[3]));
sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+16]));
sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+16]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[4]));
sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+20]));
sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+20]));
sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[5]));
// Q
sa = _mm_loadu_si128((__m128i*) &(oddA[1][a]));
sb = _mm_loadu_si128((__m128i*) &(oddB[1][b]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[0]));
sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+4]));
sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+4]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[1]));
sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+8]));
sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+8]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[2]));
sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+12]));
sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+12]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[3]));
sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+16]));
sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+16]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[4]));
sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+20]));
sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+20]));
sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[5]));
}
// horizontal add of four 32 bit partial sums
sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 8));
sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 4));
iAcc = _mm_cvtsi128_si32(sumI);
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
qAcc = _mm_cvtsi128_si32(sumQ);
#elif defined(USE_NEON)
int a = ptrA/2; // tip pointer
int b = ptrB/2 + 1; // tail pointer
int32x4_t sumI = vdupq_n_s32(0);
int32x4_t sumQ = vdupq_n_s32(0);
int32x4x3_t sh = vld3q_s32((int32_t const *) &HBFIRFilterTraits<96>::hbCoeffs[0]);
int32x4x4_t s4a, s4b, s4h;
int32x4x2_t s2a, s2b, s2h;
if ((ptrB % 2) == 0)
{
s4h = vld4q_s32((int32_t const *) &HBFIRFilterTraits<96>::hbCoeffs[0]);
s4a = vld4q_s32((int32_t const *) &(evenA[0][a]));
s4b = vld4q_s32((int32_t const *) &(evenB[0][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[0], s4b.val[0]), s4h.val[0]);
sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[1], s4b.val[1]), s4h.val[1]);
sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[2], s4b.val[2]), s4h.val[2]);
sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[3], s4b.val[3]), s4h.val[3]);
s4a = vld4q_s32((int32_t const *) &(evenA[1][a]));
s4b = vld4q_s32((int32_t const *) &(evenB[1][b]));
sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[0], s4b.val[0]), s4h.val[0]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[1], s4b.val[1]), s4h.val[1]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[2], s4b.val[2]), s4h.val[2]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[3], s4b.val[3]), s4h.val[3]);
s2h = vld4q_s32((int32_t const *) &HBFIRFilterTraits<96>::hbCoeffs[16]);
s2a = vld2q_s32((int32_t const *) &(evenA[0][a+16]));
s2b = vld2q_s32((int32_t const *) &(evenB[0][b+16]));
sumI = vmlaq_s32(sumI, vaddq_s32(s2a.val[0], s2b.val[0]), s2h.val[0]);
sumI = vmlaq_s32(sumI, vaddq_s32(s2a.val[1], s2b.val[1]), s2h.val[1]);
s2a = vld2q_s32((int32_t const *) &(evenA[1][a+16]));
s2b = vld2q_s32((int32_t const *) &(evenB[1][b+16]));
sumQ = vmlaq_s32(sumQ, vaddq_s32(s2a.val[0], s2b.val[0]), s2h.val[0]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(s2a.val[1], s2b.val[1]), s2h.val[1]);
}
else
{
s4h = vld4q_s32((int32_t const *) &HBFIRFilterTraits<96>::hbCoeffs[0]);
s4a = vld4q_s32((int32_t const *) &(oddA[0][a]));
s4b = vld4q_s32((int32_t const *) &(oddB[0][b]));
sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[0], s4b.val[0]), s4h.val[0]);
sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[1], s4b.val[1]), s4h.val[1]);
sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[2], s4b.val[2]), s4h.val[2]);
sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[3], s4b.val[3]), s4h.val[3]);
s4a = vld4q_s32((int32_t const *) &(oddA[1][a]));
s4b = vld4q_s32((int32_t const *) &(oddB[1][b]));
sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[0], s4b.val[0]), s4h.val[0]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[1], s4b.val[1]), s4h.val[1]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[2], s4b.val[2]), s4h.val[2]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[3], s4b.val[3]), s4h.val[3]);
s2h = vld4q_s32((int32_t const *) &HBFIRFilterTraits<96>::hbCoeffs[16]);
s2a = vld2q_s32((int32_t const *) &(oddA[0][a+16]));
s2b = vld2q_s32((int32_t const *) &(oddB[0][b+16]));
sumI = vmlaq_s32(sumI, vaddq_s32(s2a.val[0], s2b.val[0]), s2h.val[0]);
sumI = vmlaq_s32(sumI, vaddq_s32(s2a.val[1], s2b.val[1]), s2h.val[1]);
s2a = vld2q_s32((int32_t const *) &(oddA[1][a+16]));
s2b = vld2q_s32((int32_t const *) &(oddB[1][b+16]));
sumQ = vmlaq_s32(sumQ, vaddq_s32(s2a.val[0], s2b.val[0]), s2h.val[0]);
sumQ = vmlaq_s32(sumQ, vaddq_s32(s2a.val[1], s2b.val[1]), s2h.val[1]);
}
int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI));
int32x2_t sumI2 = vpadd_s32(sumI1, sumI1);
iAcc = vget_lane_s32(sumI2, 0);
int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ));
int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1);
qAcc = vget_lane_s32(sumQ2, 0);
#endif
}
};
#endif /* INCLUDE_INTHALFBANDFILTEREO2I_H_ */

View File

@ -92,7 +92,7 @@ public:
} }
} }
bool workDecimateCenter(qint32 *x, qint32 *y) bool workDecimateCenter(int32_t *x, int32_t *y)
{ {
// insert sample into ring-buffer // insert sample into ring-buffer
storeSample(*x, *y); storeSample(*x, *y);
@ -393,7 +393,7 @@ public:
advancePointer(); advancePointer();
} }
void myDecimate(qint32 x1, qint32 y1, qint32 *x2, qint32 *y2) void myDecimate(int32_t x1, int32_t y1, int32_t *x2, int32_t *y2)
{ {
storeSample(x1, y1); storeSample(x1, y1);
advancePointer(); advancePointer();
@ -404,15 +404,15 @@ public:
} }
protected: protected:
qint32 m_samplesDB[2*HBFilterOrder][2]; // double buffer technique with even/odd amnd I/Q stride int32_t m_samplesDB[2*HBFilterOrder][2]; // double buffer technique with even/odd amnd I/Q stride
qint32 m_samplesAligned[HBFilterOrder][2] __attribute__ ((aligned (16))); int32_t m_samplesAligned[HBFilterOrder][2] __attribute__ ((aligned (16)));
int m_ptr; int m_ptr;
int m_size; int m_size;
int m_state; int m_state;
qint32 m_iEvenAcc; int32_t m_iEvenAcc;
qint32 m_qEvenAcc; int32_t m_qEvenAcc;
qint32 m_iOddAcc; int32_t m_iOddAcc;
qint32 m_qOddAcc; int32_t m_qOddAcc;
void storeSample(const FixReal& sampleI, const FixReal& sampleQ) void storeSample(const FixReal& sampleI, const FixReal& sampleQ)
@ -423,7 +423,7 @@ protected:
m_samplesDB[m_ptr + m_size][1] = sampleQ; m_samplesDB[m_ptr + m_size][1] = sampleQ;
} }
void storeSample(qint32 x, qint32 y) void storeSample(int32_t x, int32_t y)
{ {
m_samplesDB[m_ptr][0] = x; m_samplesDB[m_ptr][0] = x;
m_samplesDB[m_ptr][1] = y; m_samplesDB[m_ptr][1] = y;
@ -447,7 +447,7 @@ protected:
m_iOddAcc = 0; m_iOddAcc = 0;
m_qOddAcc = 0; m_qOddAcc = 0;
#ifdef USE_SSE4_1 #ifdef USE_SSE4_1
// memcpy((void *) m_samplesAligned, (const void *) &(m_samplesDB[ m_ptr + 1][0]), HBFilterOrder*2*sizeof(qint32)); // memcpy((void *) m_samplesAligned, (const void *) &(m_samplesDB[ m_ptr + 1][0]), HBFilterOrder*2*sizeof(int32_t));
IntHalfbandFilterSTIntrinsics<HBFilterOrder>::workNA( IntHalfbandFilterSTIntrinsics<HBFilterOrder>::workNA(
m_ptr + 1, m_ptr + 1,
m_samplesDB, m_samplesDB,
@ -469,10 +469,10 @@ protected:
b += 2; b += 2;
} }
#endif #endif
m_iEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][0]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); m_iEvenAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2][0]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
m_qEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); m_qEvenAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2][1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
m_iOddAcc += ((qint32)m_samplesDB[m_ptr + m_size/2 + 1][0]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); m_iOddAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2 + 1][0]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
m_qOddAcc += ((qint32)m_samplesDB[m_ptr + m_size/2 + 1][1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); m_qOddAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2 + 1][1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
sample->setReal(m_iEvenAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1); sample->setReal(m_iEvenAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1);
sample->setImag(m_qEvenAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1); sample->setImag(m_qEvenAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1);
@ -484,7 +484,7 @@ protected:
} }
} }
void doFIR(qint32 *x, qint32 *y) void doFIR(int32_t *x, int32_t *y)
{ {
// calculate on odd values // calculate on odd values
@ -496,7 +496,7 @@ protected:
m_qOddAcc = 0; m_qOddAcc = 0;
#ifdef USE_SSE4_1 #ifdef USE_SSE4_1
// memcpy((void *) m_samplesAligned, (const void *) &(m_samplesDB[ m_ptr + 1][0]), HBFilterOrder*2*sizeof(qint32)); // memcpy((void *) m_samplesAligned, (const void *) &(m_samplesDB[ m_ptr + 1][0]), HBFilterOrder*2*sizeof(int32_t));
IntHalfbandFilterSTIntrinsics<HBFilterOrder>::workNA( IntHalfbandFilterSTIntrinsics<HBFilterOrder>::workNA(
m_ptr + 1, m_ptr + 1,
m_samplesDB, m_samplesDB,
@ -518,10 +518,10 @@ protected:
b += 2; b += 2;
} }
#endif #endif
m_iEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][0]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); m_iEvenAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2][0]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
m_qEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); m_qEvenAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2][1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
m_iOddAcc += ((qint32)m_samplesDB[m_ptr + m_size/2 + 1][0]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); m_iOddAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2 + 1][0]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
m_qOddAcc += ((qint32)m_samplesDB[m_ptr + m_size/2 + 1][1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1); m_qOddAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2 + 1][1]) << (HBFIRFilterTraits<HBFilterOrder>::hbShift - 1);
*x = m_iEvenAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1; *x = m_iEvenAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1;
*y = m_qEvenAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1; *y = m_qEvenAcc >> HBFIRFilterTraits<HBFilterOrder>::hbShift -1;

View File

@ -145,8 +145,6 @@ HEADERS += mainwindow.h\
dsp/inthalfbandfilterdb.h\ dsp/inthalfbandfilterdb.h\
dsp/inthalfbandfiltereo1.h\ dsp/inthalfbandfiltereo1.h\
dsp/inthalfbandfiltereo1i.h\ dsp/inthalfbandfiltereo1i.h\
dsp/inthalfbandfiltereo2.h\
dsp/inthalfbandfiltereo2i.h\
dsp/inthalfbandfilterst.h\ dsp/inthalfbandfilterst.h\
dsp/inthalfbandfiltersti.h\ dsp/inthalfbandfiltersti.h\
dsp/kissfft.h\ dsp/kissfft.h\