mirror of
https://github.com/f4exb/sdrangel.git
synced 2024-11-04 16:01:14 -05:00
129 lines
3.2 KiB
C++
129 lines
3.2 KiB
C++
#ifndef INCLUDE_INTERPOLATOR_H
|
|
#define INCLUDE_INTERPOLATOR_H
|
|
|
|
#include <immintrin.h>
|
|
#include "dsp/dsptypes.h"
|
|
#include "util/export.h"
|
|
#include <stdio.h>
|
|
#ifndef WIN32
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
class SDRANGELOVE_API Interpolator {
|
|
public:
|
|
Interpolator();
|
|
~Interpolator();
|
|
|
|
void create(int phaseSteps, double sampleRate, double cutoff);
|
|
void free();
|
|
|
|
// Original code allowed for upsampling, but was never used that way
|
|
bool interpolate(Real* distance, const Complex& next, Complex* result)
|
|
{
|
|
advanceFilter(next);
|
|
*distance -= 1.0;
|
|
if (*distance >= 1.0)
|
|
return false;
|
|
doInterpolate((int)floor(*distance * (Real)m_phaseSteps), result);
|
|
return true;
|
|
}
|
|
|
|
|
|
private:
|
|
float* m_taps;
|
|
float* m_alignedTaps;
|
|
float* m_taps2;
|
|
float* m_alignedTaps2;
|
|
std::vector<Complex> m_samples;
|
|
int m_ptr;
|
|
int m_phaseSteps;
|
|
int m_nTaps;
|
|
|
|
void createTaps(int nTaps, double sampleRate, double cutoff, std::vector<Real>* taps);
|
|
|
|
void advanceFilter(const Complex& next)
|
|
{
|
|
m_ptr--;
|
|
if(m_ptr < 0)
|
|
m_ptr = m_nTaps - 1;
|
|
m_samples[m_ptr] = next;
|
|
}
|
|
|
|
void doInterpolate(int phase, Complex* result)
|
|
{
|
|
if (phase < 0)
|
|
phase = 0;
|
|
#if 1
|
|
// beware of the ringbuffer
|
|
if(m_ptr == 0) {
|
|
// only one straight block
|
|
const float* src = (const float*)&m_samples[0];
|
|
const __m128* filter = (const __m128*)&m_alignedTaps[phase * m_nTaps * 2];
|
|
__m128 sum = _mm_setzero_ps();
|
|
int todo = m_nTaps / 2;
|
|
|
|
for(int i = 0; i < todo; i++) {
|
|
sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(src), *filter));
|
|
src += 4;
|
|
filter += 1;
|
|
}
|
|
|
|
// add upper half to lower half and store
|
|
_mm_storel_pi((__m64*)result, _mm_add_ps(sum, _mm_shuffle_ps(sum, _mm_setzero_ps(), _MM_SHUFFLE(1, 0, 3, 2))));
|
|
} else {
|
|
// two blocks
|
|
const float* src = (const float*)&m_samples[m_ptr];
|
|
const __m128* filter = (const __m128*)&m_alignedTaps[phase * m_nTaps * 2];
|
|
__m128 sum = _mm_setzero_ps();
|
|
|
|
// first block
|
|
int block = m_nTaps - m_ptr;
|
|
int todo = block / 2;
|
|
if(block & 1)
|
|
todo++;
|
|
for(int i = 0; i < todo; i++) {
|
|
sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(src), *filter));
|
|
src += 4;
|
|
filter += 1;
|
|
}
|
|
if(block & 1) {
|
|
// one sample beyond the end -> switch coefficient table
|
|
filter = (const __m128*)&m_alignedTaps2[phase * m_nTaps * 2 + todo * 4 - 4];
|
|
}
|
|
// second block
|
|
src = (const float*)&m_samples[0];
|
|
block = m_ptr;
|
|
todo = block / 2;
|
|
for(int i = 0; i < todo; i++) {
|
|
sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(src), *filter));
|
|
src += 4;
|
|
filter += 1;
|
|
}
|
|
if(block & 1) {
|
|
// one sample remaining
|
|
sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadl_pi(_mm_setzero_ps(), (const __m64*)src), filter[0]));
|
|
}
|
|
|
|
// add upper half to lower half and store
|
|
_mm_storel_pi((__m64*)result, _mm_add_ps(sum, _mm_shuffle_ps(sum, _mm_setzero_ps(), _MM_SHUFFLE(1, 0, 3, 2))));
|
|
}
|
|
#else
|
|
int sample = m_ptr;
|
|
const Real* coeff = &m_alignedTaps[phase * m_nTaps * 2];
|
|
Real rAcc = 0;
|
|
Real iAcc = 0;
|
|
|
|
for(int i = 0; i < m_nTaps; i++) {
|
|
rAcc += *coeff * m_samples[sample].real();
|
|
iAcc += *coeff * m_samples[sample].imag();
|
|
sample = (sample + 1) % m_nTaps;
|
|
coeff += 2;
|
|
}
|
|
*result = Complex(rAcc, iAcc);
|
|
#endif
|
|
|
|
}
|
|
};
|
|
|
|
#endif // INCLUDE_INTERPOLATOR_H
|