diff --git a/CMakeLists.txt b/CMakeLists.txt index e32f9c1cc..5e47d2b91 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,7 @@ set(sdrbase_SOURCES sdrbase/dsp/dspcommands.cpp sdrbase/dsp/dspengine.cpp sdrbase/dsp/fftengine.cpp + sdrbase/dsp/fftfilt.cxx sdrbase/dsp/fftwindow.cpp sdrbase/dsp/interpolator.cpp sdrbase/dsp/inthalfbandfilter.cpp @@ -107,17 +108,21 @@ set(sdrbase_HEADERS include-gpl/dsp/channelizer.h include/dsp/channelmarker.h + include-gpl/dsp/complex.h include-gpl/dsp/dspcommands.h include-gpl/dsp/dspengine.h include/dsp/dsptypes.h include-gpl/dsp/fftengine.h + include-gpl/dsp/fftfilt.h include-gpl/dsp/fftwengine.h include-gpl/dsp/fftwindow.h + include-gpl/dsp/gfft.h include-gpl/dsp/interpolator.h include-gpl/dsp/inthalfbandfilter.h include/dsp/kissfft.h include-gpl/dsp/kissengine.h include-gpl/dsp/lowpass.h + include-gpl/dsp/misc.h include-gpl/dsp/movingaverage.h include-gpl/dsp/nco.h sdrbase/dsp/pidcontroller.h diff --git a/include-gpl/dsp/complex.h b/include-gpl/dsp/complex.h new file mode 100644 index 000000000..2653a591c --- /dev/null +++ b/include-gpl/dsp/complex.h @@ -0,0 +1,43 @@ +// ---------------------------------------------------------------------------- +// complex.h -- Complex arithmetic +// +// Copyright (C) 2006-2008 +// Dave Freese, W1HKJ +// Copyright (C) 2008 +// Stelios Bounanos, M0GLD +// +// This file is part of fldigi. +// +// Fldigi is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Fldigi is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with fldigi. If not, see . +// ---------------------------------------------------------------------------- + +#ifndef _COMPLEX_H +#define _COMPLEX_H + +#include +#include + +typedef std::complex cmplx; + +inline cmplx cmac (const cmplx *a, const cmplx *b, int ptr, int len) { + cmplx z; + ptr %= len; + for (int i = 0; i < len; i++) { + z += a[i] * b[ptr]; + ptr = (ptr + 1) % len; + } + return z; +} + +#endif diff --git a/include-gpl/dsp/fftfilt.h b/include-gpl/dsp/fftfilt.h new file mode 100644 index 000000000..e5d009383 --- /dev/null +++ b/include-gpl/dsp/fftfilt.h @@ -0,0 +1,54 @@ +/* + * fftfilt.h -- Fast convolution FIR filter +*/ + +#ifndef _FFTFILT_H +#define _FFTFILT_H + +#include "complex.h" +#include "gfft.h" + +//---------------------------------------------------------------------- + +class fftfilt { +enum {NONE, BLACKMAN, HAMMING, HANNING}; + +protected: + int flen; + int flen2; + g_fft *fft; + g_fft *ift; + cmplx *ht; + cmplx *filter; + cmplx *timedata; + cmplx *freqdata; + cmplx *ovlbuf; + cmplx *output; + int inptr; + int pass; + int window; + + inline float fsinc(float fc, int i, int len) { + return (i == len/2) ? 2.0 * fc: + sin(2 * M_PI * fc * (i - len/2)) / (M_PI * (i - len/2)); + } + inline float _blackman(int i, int len) { + return (0.42 - + 0.50 * cos(2.0 * M_PI * i / len) + + 0.08 * cos(4.0 * M_PI * i / len)); + } + void init_filter(); + +public: + fftfilt(float f1, float f2, int len); + fftfilt(float f, int len); + ~fftfilt(); +// f1 < f2 ==> bandpass +// f1 > f2 ==> band reject + void create_filter(float f1, float f2); + void rtty_filter(float); + + int run(const cmplx& in, cmplx **out); +}; + +#endif diff --git a/include-gpl/dsp/gfft.h b/include-gpl/dsp/gfft.h new file mode 100644 index 000000000..a5fba335b --- /dev/null +++ b/include-gpl/dsp/gfft.h @@ -0,0 +1,3376 @@ +//============================================================================== +// g_fft.h: +// +// FFT library +// Copyright (C) 2013 +// Dave Freese, W1HKJ +// +// based on public domain code by John Green +// original version is available at +// http://hyperarchive.lcs.mit.edu/ +// /HyperArchive/Archive/dev/src/ffts-for-risc-2-c.hqx +// +// ported to C++ for fldigi by Dave Freese, W1HKJ +// +// This file is part of fldigi. +// +// Fldigi is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Fldigi is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with fldigi. If not, see . +//============================================================================== + +#ifndef CGREEN_FFT_H +#define CGREEN_FFT_H + +#include + +template +class g_fft { +#define FFT_RECIPLN2 1.442695040888963407359924681001892137426 // 1.0/log(2) + +// some useful conversions between a number and its power of 2 +#define LOG2(a) (FFT_RECIPLN2*log(a)) // floating point logarithm base 2 +#define POW2(m) ((unsigned int) 1 << (m)) // integer power of 2 for m<32 + +// fft's with M bigger than this bust primary cache +#define MCACHE (11 - (sizeof(FFT_TYPE) / 8)) + +// some math constants to 40 decimal places +#define FFT_PI 3.141592653589793238462643383279502884197 // pi +#define FFT_ROOT2 1.414213562373095048801688724209698078569 // sqrt(2) +#define FFT_COSPID8 0.9238795325112867561281831893967882868224 // cos(pi/8) +#define FFT_SINPID8 0.3826834323650897717284599840303988667613 // sin(pi/8) +private: + int FFT_size; + int FFT_N; + FFT_TYPE *FFT_table_1[32]; + short int *FFT_table_2[32]; + + FFT_TYPE *Utbl; + short *BRLow; + + void fftInit(); + int ConvertFFTSize(int); + +// base fft methods + void riffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow); + void ifrstage(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl); + void rifft8pt(FFT_TYPE *ioptr, FFT_TYPE scale); + void rifft4pt(FFT_TYPE *ioptr, FFT_TYPE scale); + void rifft2pt(FFT_TYPE *ioptr, FFT_TYPE scale); + void rifft1pt(FFT_TYPE *ioptr, FFT_TYPE scale); + void rffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow); + void frstage(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl); + void rfft8pt(FFT_TYPE *ioptr); + void rfft4pt(FFT_TYPE *ioptr); + void rfft2pt(FFT_TYPE *ioptr); + void rfft1pt(FFT_TYPE *ioptr); + void iffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow); + void ifftrecurs(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, int Ustride, + int NDiffU, int StageCnt); + void ibfstages(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, int Ustride, + int NDiffU, int StageCnt); + void ibfR4(FFT_TYPE *ioptr, int M, int NDiffU); + void ibfR2(FFT_TYPE *ioptr, int M, int NDiffU); + void ifft8pt(FFT_TYPE *ioptr, FFT_TYPE scale); + void ifft4pt(FFT_TYPE *ioptr, FFT_TYPE scale); + void ifft2pt(FFT_TYPE *ioptr, FFT_TYPE scale); + void scbitrevR2(FFT_TYPE *ioptr, int M, short *BRLow, FFT_TYPE scale); + void ffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow); + void fftrecurs(FFT_TYPE *ioptr, int M, + FFT_TYPE *Utbl, int Ustride, int NDiffU, + int StageCnt); + void bfstages(FFT_TYPE *ioptr, int M, + FFT_TYPE *Utbl, int Ustride, + int NDiffU, int StageCnt); + void bfR4(FFT_TYPE *ioptr, int M, int NDiffU); + void bfR2(FFT_TYPE *ioptr, int M, int NDiffU); + void fft8pt(FFT_TYPE *ioptr); + void fft4pt(FFT_TYPE *ioptr); + void fft2pt(FFT_TYPE *ioptr); + void bitrevR2(FFT_TYPE *ioptr, int M, short *BRLow); + void fftBRInit(int M, short *BRLow); + void fftCosInit(int M, FFT_TYPE *Utbl); + +public: + g_fft(int M = 8192) { + if (M < 16) M = 16; + if (M > 268435456) M = 268435456; + FFT_size = M; + fftInit(); + } + ~g_fft() { + for (int i = 0; i < 32; i++) { + if (FFT_table_1[i] != 0) delete [] FFT_table_1[i]; + if (FFT_table_2[i] != 0) delete [] FFT_table_2[i]; + } + } + + void ComplexFFT(std::complex *buf); + void InverseComplexFFT(std::complex *buf); + void RealFFT(std::complex *buf); + void InverseRealFFT(std::complex *buf); + FFT_TYPE GetInverseComplexFFTScale(); + FFT_TYPE GetInverseRealFFTScale(); +}; + +//------------------------------------------------------------------------------ +// Compute Utbl, the cosine table for ffts +// of size (pow(2,M)/4 +1) +// INPUTS +// M = log2 of fft size +// OUTPUTS +// *Utbl = cosine table +//------------------------------------------------------------------------------ +template +void g_fft::fftCosInit(int M, FFT_TYPE *Utbl) +{ + unsigned int fftN = POW2(M); + unsigned int i1; + + Utbl[0] = FFT_TYPE(1.0); + for (i1 = 1; i1 < fftN/4; i1++) + Utbl[i1] = (FFT_TYPE)cos((2.0 * FFT_PI * (float)i1) / (float)fftN); + Utbl[fftN/4] = FFT_TYPE(0.0); +} + +//------------------------------------------------------------------------------ +// Compute BRLow, the bit reversed table for ffts +// of size pow(2,M/2 -1) +// INPUTS +// M = log2 of fft size +// OUTPUTS +// *BRLow = bit reversed counter table +//------------------------------------------------------------------------------ +template +void g_fft::fftBRInit(int M, short *BRLow) +{ + int Mroot_1 = M / 2 - 1; + int Nroot_1 = POW2(Mroot_1); + int i1; + int bitsum; + int bitmask; + int bit; + + for (i1 = 0; i1 < Nroot_1; i1++) { + bitsum = 0; + bitmask = 1; + for (bit = 1; bit <= Mroot_1; bitmask <<= 1, bit++) + if (i1 & bitmask) + bitsum = bitsum + (Nroot_1 >> bit); + BRLow[i1] = bitsum; + } +} + +//------------------------------------------------------------------------------ +// parts of ffts1 +// bit reverse and first radix 2 stage of forward or inverse fft +//------------------------------------------------------------------------------ +template +void g_fft::bitrevR2(FFT_TYPE *ioptr, int M, short *BRLow) +{ + FFT_TYPE f0r; + FFT_TYPE f0i; + FFT_TYPE f1r; + FFT_TYPE f1i; + FFT_TYPE f2r; + FFT_TYPE f2i; + FFT_TYPE f3r; + FFT_TYPE f3i; + FFT_TYPE f4r; + FFT_TYPE f4i; + FFT_TYPE f5r; + FFT_TYPE f5i; + FFT_TYPE f6r; + FFT_TYPE f6i; + FFT_TYPE f7r; + FFT_TYPE f7i; + FFT_TYPE t0r; + FFT_TYPE t0i; + FFT_TYPE t1r; + FFT_TYPE t1i; + FFT_TYPE *p0r; + FFT_TYPE *p1r; + FFT_TYPE *IOP; + FFT_TYPE *iolimit; + int Colstart; + int iCol; + unsigned int posA; + unsigned int posAi; + unsigned int posB; + unsigned int posBi; + + const unsigned int Nrems2 = POW2((M + 3) / 2); + const unsigned int Nroot_1_ColInc = POW2(M) - Nrems2; + const unsigned int Nroot_1 = POW2(M / 2 - 1) - 1; + const unsigned int ColstartShift = (M + 1) / 2 + 1; + + posA = POW2(M); // 1/2 of POW2(M) complex + posAi = posA + 1; + posB = posA + 2; + posBi = posB + 1; + + iolimit = ioptr + Nrems2; + for (; ioptr < iolimit; ioptr += POW2(M / 2 + 1)) { + for (Colstart = Nroot_1; Colstart >= 0; Colstart--) { + iCol = Nroot_1; + p0r = ioptr + Nroot_1_ColInc + BRLow[Colstart] * 4; + IOP = ioptr + (Colstart << ColstartShift); + p1r = IOP + BRLow[iCol] * 4; + f0r = *(p0r); + f0i = *(p0r + 1); + f1r = *(p0r + posA); + f1i = *(p0r + posAi); + for (; iCol > Colstart;) { + f2r = *(p0r + 2); + f2i = *(p0r + (2 + 1)); + f3r = *(p0r + posB); + f3i = *(p0r + posBi); + f4r = *(p1r); + f4i = *(p1r + 1); + f5r = *(p1r + posA); + f5i = *(p1r + posAi); + f6r = *(p1r + 2); + f6i = *(p1r + (2 + 1)); + f7r = *(p1r + posB); + f7i = *(p1r + posBi); + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + t1r = f2r + f3r; + t1i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; + f0r = f4r + f5r; + f0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; + f2r = f6r + f7r; + f2i = f6i + f7i; + f7r = f6r - f7r; + f7i = f6i - f7i; + + *(p1r) = t0r; + *(p1r + 1) = t0i; + *(p1r + 2) = f1r; + *(p1r + (2 + 1)) = f1i; + *(p1r + posA) = t1r; + *(p1r + posAi) = t1i; + *(p1r + posB) = f3r; + *(p1r + posBi) = f3i; + *(p0r) = f0r; + *(p0r + 1) = f0i; + *(p0r + 2) = f5r; + *(p0r + (2 + 1)) = f5i; + *(p0r + posA) = f2r; + *(p0r + posAi) = f2i; + *(p0r + posB) = f7r; + *(p0r + posBi) = f7i; + + p0r -= Nrems2; + f0r = *(p0r); + f0i = *(p0r + 1); + f1r = *(p0r + posA); + f1i = *(p0r + posAi); + iCol -= 1; + p1r = IOP + BRLow[iCol] * 4; + } + f2r = *(p0r + 2); + f2i = *(p0r + (2 + 1)); + f3r = *(p0r + posB); + f3i = *(p0r + posBi); + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + t1r = f2r + f3r; + t1i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; + + *(p0r) = t0r; + *(p0r + 1) = t0i; + *(p0r + 2) = f1r; + *(p0r + (2 + 1)) = f1i; + *(p0r + posA) = t1r; + *(p0r + posAi) = t1i; + *(p0r + posB) = f3r; + *(p0r + posBi) = f3i; + } + } +} + +//------------------------------------------------------------------------------ +// RADIX 2 fft +//------------------------------------------------------------------------------ +template +void g_fft::fft2pt(FFT_TYPE *ioptr) +{ + FFT_TYPE f0r, f0i, f1r, f1i; + FFT_TYPE t0r, t0i; + +// bit reversed load + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[2]; + f1i = ioptr[3]; + +// Butterflys +// f0 - - - t0 +// f1 - 1 - f1 + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + +// store result + ioptr[0] = t0r; + ioptr[1] = t0i; + ioptr[2] = f1r; + ioptr[3] = f1i; +} + +//------------------------------------------------------------------------------ +// RADIX 4 fft +//------------------------------------------------------------------------------ +template +void g_fft::fft4pt(FFT_TYPE *ioptr) +{ + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE t0r, t0i, t1r, t1i; + +// bit reversed load + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[4]; + f1i = ioptr[5]; + f2r = ioptr[2]; + f2i = ioptr[3]; + f3r = ioptr[6]; + f3i = ioptr[7]; + +// Butterflys +// f0 - - t0 - - f0 +// f1 - 1 - f1 - - f1 +// f2 - - f2 - 1 - f2 +// f3 - 1 - t1 - -i - f3 + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; + + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; + + f3r = f1r - t1i; + f3i = f1i + t1r; + f1r = f1r + t1i; + f1i = f1i - t1r; + +// store result + ioptr[0] = f0r; + ioptr[1] = f0i; + ioptr[2] = f1r; + ioptr[3] = f1i; + ioptr[4] = f2r; + ioptr[5] = f2i; + ioptr[6] = f3r; + ioptr[7] = f3i; +} + +//------------------------------------------------------------------------------ +// RADIX 8 fft +//------------------------------------------------------------------------------ +template +void g_fft::fft8pt(FFT_TYPE *ioptr) +{ + FFT_TYPE w0r = 1.0 / FFT_ROOT2; // cos(pi/4) + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + FFT_TYPE t0r, t0i, t1r, t1i; + const FFT_TYPE Two = 2.0; + +// bit reversed load + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[8]; + f1i = ioptr[9]; + f2r = ioptr[4]; + f2i = ioptr[5]; + f3r = ioptr[12]; + f3i = ioptr[13]; + f4r = ioptr[2]; + f4i = ioptr[3]; + f5r = ioptr[10]; + f5i = ioptr[11]; + f6r = ioptr[6]; + f6i = ioptr[7]; + f7r = ioptr[14]; + f7i = ioptr[15]; + +// Butterflys +// f0 - - t0 - - f0 - - f0 +// f1 - 1 - f1 - - f1 - - f1 +// f2 - - f2 - 1 - f2 - - f2 +// f3 - 1 - t1 - -i - f3 - - f3 +// f4 - - t0 - - f4 - 1 - t0 +// f5 - 1 - f5 - - f5 - w3 - f4 +// f6 - - f6 - 1 - f6 - -i - t1 +// f7 - 1 - t1 - -i - f7 - iw3- f6 + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; + + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; + + f3r = f1r - t1i; + f3i = f1i + t1r; + f1r = f1r + t1i; + f1i = f1i - t1r; + + t0r = f4r + f5r; + t0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; + + t1r = f6r - f7r; + t1i = f6i - f7i; + f6r = f6r + f7r; + f6i = f6i + f7i; + + f4r = t0r + f6r; + f4i = t0i + f6i; + f6r = t0r - f6r; + f6i = t0i - f6i; + + f7r = f5r - t1i; + f7i = f5i + t1r; + f5r = f5r + t1i; + f5i = f5i - t1r; + + t0r = f0r - f4r; + t0i = f0i - f4i; + f0r = f0r + f4r; + f0i = f0i + f4i; + + t1r = f2r - f6i; + t1i = f2i + f6r; + f2r = f2r + f6i; + f2i = f2i - f6r; + + f4r = f1r - f5r * w0r - f5i * w0r; + f4i = f1i + f5r * w0r - f5i * w0r; + f1r = f1r * Two - f4r; + f1i = f1i * Two - f4i; + + f6r = f3r + f7r * w0r - f7i * w0r; + f6i = f3i + f7r * w0r + f7i * w0r; + f3r = f3r * Two - f6r; + f3i = f3i * Two - f6i; + +// store result + ioptr[0] = f0r; + ioptr[1] = f0i; + ioptr[2] = f1r; + ioptr[3] = f1i; + ioptr[4] = f2r; + ioptr[5] = f2i; + ioptr[6] = f3r; + ioptr[7] = f3i; + ioptr[8] = t0r; + ioptr[9] = t0i; + ioptr[10] = f4r; + ioptr[11] = f4i; + ioptr[12] = t1r; + ioptr[13] = t1i; + ioptr[14] = f6r; + ioptr[15] = f6i; +} + +//------------------------------------------------------------------------------ +// 2nd radix 2 stage +//------------------------------------------------------------------------------ +template +void g_fft::bfR2(FFT_TYPE *ioptr, int M, int NDiffU) +{ + unsigned int pos; + unsigned int posi; + unsigned int pinc; + unsigned int pnext; + unsigned int NSameU; + unsigned int SameUCnt; + + FFT_TYPE *pstrt; + FFT_TYPE *p0r, *p1r, *p2r, *p3r; + + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + + pinc = NDiffU * 2; // 2 floats per complex + pnext = pinc * 4; + pos = 2; + posi = pos + 1; + NSameU = POW2(M) / 4 / NDiffU; // 4 Us at a time + pstrt = ioptr; + p0r = pstrt; + p1r = pstrt + pinc; + p2r = p1r + pinc; + p3r = p2r + pinc; + +// Butterflys +// f0 - - f4 +// f1 - 1 - f5 +// f2 - - f6 +// f3 - 1 - f7 +// Butterflys +// f0 - - f4 +// f1 - 1 - f5 +// f2 - - f6 +// f3 - 1 - f7 + + for (SameUCnt = NSameU; SameUCnt > 0; SameUCnt--) { + + f0r = *p0r; + f1r = *p1r; + f0i = *(p0r + 1); + f1i = *(p1r + 1); + f2r = *p2r; + f3r = *p3r; + f2i = *(p2r + 1); + f3i = *(p3r + 1); + + f4r = f0r + f1r; + f4i = f0i + f1i; + f5r = f0r - f1r; + f5i = f0i - f1i; + + f6r = f2r + f3r; + f6i = f2i + f3i; + f7r = f2r - f3r; + f7i = f2i - f3i; + + *p0r = f4r; + *(p0r + 1) = f4i; + *p1r = f5r; + *(p1r + 1) = f5i; + *p2r = f6r; + *(p2r + 1) = f6i; + *p3r = f7r; + *(p3r + 1) = f7i; + + f0r = *(p0r + pos); + f1i = *(p1r + posi); + f0i = *(p0r + posi); + f1r = *(p1r + pos); + f2r = *(p2r + pos); + f3i = *(p3r + posi); + f2i = *(p2r + posi); + f3r = *(p3r + pos); + + f4r = f0r + f1i; + f4i = f0i - f1r; + f5r = f0r - f1i; + f5i = f0i + f1r; + + f6r = f2r + f3i; + f6i = f2i - f3r; + f7r = f2r - f3i; + f7i = f2i + f3r; + + *(p0r + pos) = f4r; + *(p0r + posi) = f4i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; + *(p2r + pos) = f6r; + *(p2r + posi) = f6i; + *(p3r + pos) = f7r; + *(p3r + posi) = f7i; + + p0r += pnext; + p1r += pnext; + p2r += pnext; + p3r += pnext; + } +} + +//------------------------------------------------------------------------------ +// 1 radix 4 stage +//------------------------------------------------------------------------------ +template +void g_fft::bfR4(FFT_TYPE *ioptr, int M, int NDiffU) +{ + unsigned int pos; + unsigned int posi; + unsigned int pinc; + unsigned int pnext; + unsigned int pnexti; + unsigned int NSameU; + unsigned int SameUCnt; + + FFT_TYPE *pstrt; + FFT_TYPE *p0r, *p1r, *p2r, *p3r; + + FFT_TYPE w1r = 1.0 / FFT_ROOT2; // cos(pi/4) + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + FFT_TYPE t1r, t1i; + const FFT_TYPE Two = 2.0; + + pinc = NDiffU * 2; // 2 floats per complex + pnext = pinc * 4; + pnexti = pnext + 1; + pos = 2; + posi = pos + 1; + NSameU = POW2(M) / 4 / NDiffU; // 4 pts per butterfly + pstrt = ioptr; + p0r = pstrt; + p1r = pstrt + pinc; + p2r = p1r + pinc; + p3r = p2r + pinc; + +// Butterflys +// f0 - - f0 - - f4 +// f1 - 1 - f5 - - f5 +// f2 - - f6 - 1 - f6 +// f3 - 1 - f3 - -i - f7 +// Butterflys +// f0 - - f4 - - f4 +// f1 - -i - t1 - - f5 +// f2 - - f2 - w1 - f6 +// f3 - -i - f7 - iw1- f7 + + f0r = *p0r; + f1r = *p1r; + f2r = *p2r; + f3r = *p3r; + f0i = *(p0r + 1); + f1i = *(p1r + 1); + f2i = *(p2r + 1); + f3i = *(p3r + 1); + + f5r = f0r - f1r; + f5i = f0i - f1i; + f0r = f0r + f1r; + f0i = f0i + f1i; + + f6r = f2r + f3r; + f6i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; + + for (SameUCnt = NSameU - 1; SameUCnt > 0; SameUCnt--) { + + f7r = f5r - f3i; + f7i = f5i + f3r; + f5r = f5r + f3i; + f5i = f5i - f3r; + + f4r = f0r + f6r; + f4i = f0i + f6i; + f6r = f0r - f6r; + f6i = f0i - f6i; + + f2r = *(p2r + pos); + f2i = *(p2r + posi); + f1r = *(p1r + pos); + f1i = *(p1r + posi); + f3i = *(p3r + posi); + f0r = *(p0r + pos); + f3r = *(p3r + pos); + f0i = *(p0r + posi); + + *p3r = f7r; + *p0r = f4r; + *(p3r + 1) = f7i; + *(p0r + 1) = f4i; + *p1r = f5r; + *p2r = f6r; + *(p1r + 1) = f5i; + *(p2r + 1) = f6i; + + f7r = f2r - f3i; + f7i = f2i + f3r; + f2r = f2r + f3i; + f2i = f2i - f3r; + + f4r = f0r + f1i; + f4i = f0i - f1r; + t1r = f0r - f1i; + t1i = f0i + f1r; + + f5r = t1r - f7r * w1r + f7i * w1r; + f5i = t1i - f7r * w1r - f7i * w1r; + f7r = t1r * Two - f5r; + f7i = t1i * Two - f5i; + + f6r = f4r - f2r * w1r - f2i * w1r; + f6i = f4i + f2r * w1r - f2i * w1r; + f4r = f4r * Two - f6r; + f4i = f4i * Two - f6i; + + f3r = *(p3r + pnext); + f0r = *(p0r + pnext); + f3i = *(p3r + pnexti); + f0i = *(p0r + pnexti); + f2r = *(p2r + pnext); + f2i = *(p2r + pnexti); + f1r = *(p1r + pnext); + f1i = *(p1r + pnexti); + + *(p2r + pos) = f6r; + *(p1r + pos) = f5r; + *(p2r + posi) = f6i; + *(p1r + posi) = f5i; + *(p3r + pos) = f7r; + *(p0r + pos) = f4r; + *(p3r + posi) = f7i; + *(p0r + posi) = f4i; + + f6r = f2r + f3r; + f6i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; + + f5r = f0r - f1r; + f5i = f0i - f1i; + f0r = f0r + f1r; + f0i = f0i + f1i; + + p3r += pnext; + p0r += pnext; + p1r += pnext; + p2r += pnext; + } + f7r = f5r - f3i; + f7i = f5i + f3r; + f5r = f5r + f3i; + f5i = f5i - f3r; + + f4r = f0r + f6r; + f4i = f0i + f6i; + f6r = f0r - f6r; + f6i = f0i - f6i; + + f2r = *(p2r + pos); + f2i = *(p2r + posi); + f1r = *(p1r + pos); + f1i = *(p1r + posi); + f3i = *(p3r + posi); + f0r = *(p0r + pos); + f3r = *(p3r + pos); + f0i = *(p0r + posi); + + *p3r = f7r; + *p0r = f4r; + *(p3r + 1) = f7i; + *(p0r + 1) = f4i; + *p1r = f5r; + *p2r = f6r; + *(p1r + 1) = f5i; + *(p2r + 1) = f6i; + + f7r = f2r - f3i; + f7i = f2i + f3r; + f2r = f2r + f3i; + f2i = f2i - f3r; + + f4r = f0r + f1i; + f4i = f0i - f1r; + t1r = f0r - f1i; + t1i = f0i + f1r; + + f5r = t1r - f7r * w1r + f7i * w1r; + f5i = t1i - f7r * w1r - f7i * w1r; + f7r = t1r * Two - f5r; + f7i = t1i * Two - f5i; + + f6r = f4r - f2r * w1r - f2i * w1r; + f6i = f4i + f2r * w1r - f2i * w1r; + f4r = f4r * Two - f6r; + f4i = f4i * Two - f6i; + + *(p2r + pos) = f6r; + *(p1r + pos) = f5r; + *(p2r + posi) = f6i; + *(p1r + posi) = f5i; + *(p3r + pos) = f7r; + *(p0r + pos) = f4r; + *(p3r + posi) = f7i; + *(p0r + posi) = f4i; +} + +//------------------------------------------------------------------------------ +// RADIX 8 Stages +//------------------------------------------------------------------------------ +template +void g_fft::bfstages(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, int Ustride, + int NDiffU, int StageCnt) +{ + unsigned int pos; + unsigned int posi; + unsigned int pinc; + unsigned int pnext; + unsigned int NSameU; + int Uinc; + int Uinc2; + int Uinc4; + unsigned int DiffUCnt; + unsigned int SameUCnt; + unsigned int U2toU3; + + FFT_TYPE *pstrt; + FFT_TYPE *p0r, *p1r, *p2r, *p3r; + FFT_TYPE *u0r, *u0i, *u1r, *u1i, *u2r, *u2i; + + FFT_TYPE w0r, w0i, w1r, w1i, w2r, w2i, w3r, w3i; + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + FFT_TYPE t0r, t0i, t1r, t1i; + const FFT_TYPE Two = FFT_TYPE(2.0); + + pinc = NDiffU * 2; // 2 floats per complex + pnext = pinc * 8; + pos = pinc * 4; + posi = pos + 1; + NSameU = POW2(M) / 8 / NDiffU; // 8 pts per butterfly + Uinc = (int) NSameU * Ustride; + Uinc2 = Uinc * 2; + Uinc4 = Uinc * 4; + U2toU3 = (POW2(M) / 8) * Ustride; + for (; StageCnt > 0; StageCnt--) { + + u0r = &Utbl[0]; + u0i = &Utbl[POW2(M - 2) * Ustride]; + u1r = u0r; + u1i = u0i; + u2r = u0r; + u2i = u0i; + + w0r = *u0r; + w0i = *u0i; + w1r = *u1r; + w1i = *u1i; + w2r = *u2r; + w2i = *u2i; + w3r = *(u2r + U2toU3); + w3i = *(u2i - U2toU3); + + pstrt = ioptr; + + p0r = pstrt; + p1r = pstrt + pinc; + p2r = p1r + pinc; + p3r = p2r + pinc; + +// Butterflys +// f0 - - t0 - - f0 - - f0 +// f1 - w0- f1 - - f1 - - f1 +// f2 - - f2 - w1- f2 - - f4 +// f3 - w0- t1 - iw1- f3 - - f5 +// +// f4 - - t0 - - f4 - w2- t0 +// f5 - w0- f5 - - f5 - w3- t1 +// f6 - - f6 - w1- f6 - iw2- f6 +// f7 - w0- t1 - iw1- f7 - iw3- f7 + + for (DiffUCnt = NDiffU; DiffUCnt > 0; DiffUCnt--) { + f0r = *p0r; + f0i = *(p0r + 1); + f1r = *p1r; + f1i = *(p1r + 1); + for (SameUCnt = NSameU - 1; SameUCnt > 0; SameUCnt--) { + f2r = *p2r; + f2i = *(p2r + 1); + f3r = *p3r; + f3i = *(p3r + 1); + + t0r = f0r + f1r * w0r + f1i * w0i; + t0i = f0i - f1r * w0i + f1i * w0r; + f1r = f0r * Two - t0r; + f1i = f0i * Two - t0i; + + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f5r = *(p1r + pos); + f5i = *(p1r + posi); + + f6r = *(p2r + pos); + f6i = *(p2r + posi); + f7r = *(p3r + pos); + f7i = *(p3r + posi); + + t1r = f2r - f3r * w0r - f3i * w0i; + t1i = f2i + f3r * w0i - f3i * w0r; + f2r = f2r * Two - t1r; + f2i = f2i * Two - t1i; + + f0r = t0r + f2r * w1r + f2i * w1i; + f0i = t0i - f2r * w1i + f2i * w1r; + f2r = t0r * Two - f0r; + f2i = t0i * Two - f0i; + + f3r = f1r + t1r * w1i - t1i * w1r; + f3i = f1i + t1r * w1r + t1i * w1i; + f1r = f1r * Two - f3r; + f1i = f1i * Two - f3i; + + t0r = f4r + f5r * w0r + f5i * w0i; + t0i = f4i - f5r * w0i + f5i * w0r; + f5r = f4r * Two - t0r; + f5i = f4i * Two - t0i; + + t1r = f6r - f7r * w0r - f7i * w0i; + t1i = f6i + f7r * w0i - f7i * w0r; + f6r = f6r * Two - t1r; + f6i = f6i * Two - t1i; + + f4r = t0r + f6r * w1r + f6i * w1i; + f4i = t0i - f6r * w1i + f6i * w1r; + f6r = t0r * Two - f4r; + f6i = t0i * Two - f4i; + + f7r = f5r + t1r * w1i - t1i * w1r; + f7i = f5i + t1r * w1r + t1i * w1i; + f5r = f5r * Two - f7r; + f5i = f5i * Two - f7i; + + t0r = f0r - f4r * w2r - f4i * w2i; + t0i = f0i + f4r * w2i - f4i * w2r; + f0r = f0r * Two - t0r; + f0i = f0i * Two - t0i; + + t1r = f1r - f5r * w3r - f5i * w3i; + t1i = f1i + f5r * w3i - f5i * w3r; + f1r = f1r * Two - t1r; + f1i = f1i * Two - t1i; + + *(p0r + pos) = t0r; + *(p1r + pos) = t1r; + *(p0r + posi) = t0i; + *(p1r + posi) = t1i; + *p0r = f0r; + *p1r = f1r; + *(p0r + 1) = f0i; + *(p1r + 1) = f1i; + + p0r += pnext; + f0r = *p0r; + f0i = *(p0r + 1); + + p1r += pnext; + + f1r = *p1r; + f1i = *(p1r + 1); + + f4r = f2r - f6r * w2i + f6i * w2r; + f4i = f2i - f6r * w2r - f6i * w2i; + f6r = f2r * Two - f4r; + f6i = f2i * Two - f4i; + + f5r = f3r - f7r * w3i + f7i * w3r; + f5i = f3i - f7r * w3r - f7i * w3i; + f7r = f3r * Two - f5r; + f7i = f3i * Two - f5i; + + *p2r = f4r; + *p3r = f5r; + *(p2r + 1) = f4i; + *(p3r + 1) = f5i; + *(p2r + pos) = f6r; + *(p3r + pos) = f7r; + *(p2r + posi) = f6i; + *(p3r + posi) = f7i; + + p2r += pnext; + p3r += pnext; + } + + f2r = *p2r; + f2i = *(p2r + 1); + f3r = *p3r; + f3i = *(p3r + 1); + + t0r = f0r + f1r * w0r + f1i * w0i; + t0i = f0i - f1r * w0i + f1i * w0r; + f1r = f0r * Two - t0r; + f1i = f0i * Two - t0i; + + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f5r = *(p1r + pos); + f5i = *(p1r + posi); + + f6r = *(p2r + pos); + f6i = *(p2r + posi); + f7r = *(p3r + pos); + f7i = *(p3r + posi); + + t1r = f2r - f3r * w0r - f3i * w0i; + t1i = f2i + f3r * w0i - f3i * w0r; + f2r = f2r * Two - t1r; + f2i = f2i * Two - t1i; + + f0r = t0r + f2r * w1r + f2i * w1i; + f0i = t0i - f2r * w1i + f2i * w1r; + f2r = t0r * Two - f0r; + f2i = t0i * Two - f0i; + + f3r = f1r + t1r * w1i - t1i * w1r; + f3i = f1i + t1r * w1r + t1i * w1i; + f1r = f1r * Two - f3r; + f1i = f1i * Two - f3i; + + if ((int) DiffUCnt == NDiffU / 2) + Uinc4 = -Uinc4; + + u0r += Uinc4; + u0i -= Uinc4; + u1r += Uinc2; + u1i -= Uinc2; + u2r += Uinc; + u2i -= Uinc; + + pstrt += 2; + + t0r = f4r + f5r * w0r + f5i * w0i; + t0i = f4i - f5r * w0i + f5i * w0r; + f5r = f4r * Two - t0r; + f5i = f4i * Two - t0i; + + t1r = f6r - f7r * w0r - f7i * w0i; + t1i = f6i + f7r * w0i - f7i * w0r; + f6r = f6r * Two - t1r; + f6i = f6i * Two - t1i; + + f4r = t0r + f6r * w1r + f6i * w1i; + f4i = t0i - f6r * w1i + f6i * w1r; + f6r = t0r * Two - f4r; + f6i = t0i * Two - f4i; + + f7r = f5r + t1r * w1i - t1i * w1r; + f7i = f5i + t1r * w1r + t1i * w1i; + f5r = f5r * Two - f7r; + f5i = f5i * Two - f7i; + + w0r = *u0r; + w0i = *u0i; + w1r = *u1r; + w1i = *u1i; + + if ((int) DiffUCnt <= NDiffU / 2) + w0r = -w0r; + + t0r = f0r - f4r * w2r - f4i * w2i; + t0i = f0i + f4r * w2i - f4i * w2r; + f0r = f0r * Two - t0r; + f0i = f0i * Two - t0i; + + f4r = f2r - f6r * w2i + f6i * w2r; + f4i = f2i - f6r * w2r - f6i * w2i; + f6r = f2r * Two - f4r; + f6i = f2i * Two - f4i; + + *(p0r + pos) = t0r; + *p2r = f4r; + *(p0r + posi) = t0i; + *(p2r + 1) = f4i; + w2r = *u2r; + w2i = *u2i; + *p0r = f0r; + *(p2r + pos) = f6r; + *(p0r + 1) = f0i; + *(p2r + posi) = f6i; + + p0r = pstrt; + p2r = pstrt + pinc + pinc; + + t1r = f1r - f5r * w3r - f5i * w3i; + t1i = f1i + f5r * w3i - f5i * w3r; + f1r = f1r * Two - t1r; + f1i = f1i * Two - t1i; + + f5r = f3r - f7r * w3i + f7i * w3r; + f5i = f3i - f7r * w3r - f7i * w3i; + f7r = f3r * Two - f5r; + f7i = f3i * Two - f5i; + + *(p1r + pos) = t1r; + *p3r = f5r; + *(p1r + posi) = t1i; + *(p3r + 1) = f5i; + w3r = *(u2r + U2toU3); + w3i = *(u2i - U2toU3); + *p1r = f1r; + *(p3r + pos) = f7r; + *(p1r + 1) = f1i; + *(p3r + posi) = f7i; + + p1r = pstrt + pinc; + p3r = p2r + pinc; + } + NSameU /= 8; + Uinc /= 8; + Uinc2 /= 8; + Uinc4 = Uinc * 4; + NDiffU *= 8; + pinc *= 8; + pnext *= 8; + pos *= 8; + posi = pos + 1; + } +} + +template +void g_fft::fftrecurs(FFT_TYPE *ioptr, int M, + FFT_TYPE *Utbl, int Ustride, int NDiffU, + int StageCnt) +{ +// recursive bfstages calls to maximize on chip cache efficiency + int i1; + + if (M <= (int) MCACHE) // fits on chip ? + bfstages(ioptr, M, Utbl, Ustride, NDiffU, StageCnt); // RADIX 8 Stages + else { + for (i1 = 0; i1 < 8; i1++) { + fftrecurs(&ioptr[i1 * POW2(M - 3) * 2], M - 3, Utbl, 8 * Ustride, + NDiffU, StageCnt - 1); // RADIX 8 Stages + } + bfstages(ioptr, M, Utbl, Ustride, POW2(M - 3), 1); // RADIX 8 Stage + } +} + +//------------------------------------------------------------------------------ +// Compute in-place complex fft on the rows of the input array +// INPUTS +// *ioptr = input data array +// M = log2 of fft size (ex M=10 for 1024 point fft) +// *Utbl = cosine table +// *BRLow = bit reversed counter table +// OUTPUTS +// *ioptr = output data array +//------------------------------------------------------------------------------ +template +void g_fft::ffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow) +{ + int StageCnt; + int NDiffU; + + switch (M) { + case 0: + break; + case 1: + fft2pt(ioptr); // a 2 pt fft + break; + case 2: + fft4pt(ioptr); // a 4 pt fft + break; + case 3: + fft8pt(ioptr); // an 8 pt fft + break; + default: + bitrevR2(ioptr, M, BRLow); // bit reverse and first radix 2 stage + StageCnt = (M - 1) / 3; // number of radix 8 stages + NDiffU = 2; // one radix 2 stage already complete + if ((M - 1 - (StageCnt * 3)) == 1) { + bfR2(ioptr, M, NDiffU); // 1 radix 2 stage + NDiffU *= 2; + } + if ((M - 1 - (StageCnt * 3)) == 2) { + bfR4(ioptr, M, NDiffU); // 1 radix 4 stage + NDiffU *= 4; + } + if (M <= (int) MCACHE) + bfstages(ioptr, M, Utbl, 1, NDiffU, StageCnt); // RADIX 8 Stages + else + fftrecurs(ioptr, M, Utbl, 1, NDiffU, StageCnt); // RADIX 8 Stages + } +} + +//------------------------------------------------------------------------------ +// parts of iffts1 +// scaled bit reverse and first radix 2 stage forward or inverse fft +//------------------------------------------------------------------------------ +template +void g_fft::scbitrevR2(FFT_TYPE *ioptr, int M, short *BRLow, FFT_TYPE scale) +{ + FFT_TYPE f0r; + FFT_TYPE f0i; + FFT_TYPE f1r; + FFT_TYPE f1i; + FFT_TYPE f2r; + FFT_TYPE f2i; + FFT_TYPE f3r; + FFT_TYPE f3i; + FFT_TYPE f4r; + FFT_TYPE f4i; + FFT_TYPE f5r; + FFT_TYPE f5i; + FFT_TYPE f6r; + FFT_TYPE f6i; + FFT_TYPE f7r; + FFT_TYPE f7i; + FFT_TYPE t0r; + FFT_TYPE t0i; + FFT_TYPE t1r; + FFT_TYPE t1i; + FFT_TYPE *p0r; + FFT_TYPE *p1r; + FFT_TYPE *IOP; + FFT_TYPE *iolimit; + int Colstart; + int iCol; + unsigned int posA; + unsigned int posAi; + unsigned int posB; + unsigned int posBi; + + const unsigned int Nrems2 = POW2((M + 3) / 2); + const unsigned int Nroot_1_ColInc = POW2(M) - Nrems2; + const unsigned int Nroot_1 = POW2(M / 2 - 1) - 1; + const unsigned int ColstartShift = (M + 1) / 2 + 1; + + posA = POW2(M); // 1/2 of POW2(M) complexes + posAi = posA + 1; + posB = posA + 2; + posBi = posB + 1; + + iolimit = ioptr + Nrems2; + for (; ioptr < iolimit; ioptr += POW2(M / 2 + 1)) { + for (Colstart = Nroot_1; Colstart >= 0; Colstart--) { + iCol = Nroot_1; + p0r = ioptr + Nroot_1_ColInc + BRLow[Colstart] * 4; + IOP = ioptr + (Colstart << ColstartShift); + p1r = IOP + BRLow[iCol] * 4; + f0r = *(p0r); + f0i = *(p0r + 1); + f1r = *(p0r + posA); + f1i = *(p0r + posAi); + for (; iCol > Colstart;) { + f2r = *(p0r + 2); + f2i = *(p0r + (2 + 1)); + f3r = *(p0r + posB); + f3i = *(p0r + posBi); + f4r = *(p1r); + f4i = *(p1r + 1); + f5r = *(p1r + posA); + f5i = *(p1r + posAi); + f6r = *(p1r + 2); + f6i = *(p1r + (2 + 1)); + f7r = *(p1r + posB); + f7i = *(p1r + posBi); + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + t1r = f2r + f3r; + t1i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; + f0r = f4r + f5r; + f0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; + f2r = f6r + f7r; + f2i = f6i + f7i; + f7r = f6r - f7r; + f7i = f6i - f7i; + + *(p1r) = scale * t0r; + *(p1r + 1) = scale * t0i; + *(p1r + 2) = scale * f1r; + *(p1r + (2 + 1)) = scale * f1i; + *(p1r + posA) = scale * t1r; + *(p1r + posAi) = scale * t1i; + *(p1r + posB) = scale * f3r; + *(p1r + posBi) = scale * f3i; + *(p0r) = scale * f0r; + *(p0r + 1) = scale * f0i; + *(p0r + 2) = scale * f5r; + *(p0r + (2 + 1)) = scale * f5i; + *(p0r + posA) = scale * f2r; + *(p0r + posAi) = scale * f2i; + *(p0r + posB) = scale * f7r; + *(p0r + posBi) = scale * f7i; + + p0r -= Nrems2; + f0r = *(p0r); + f0i = *(p0r + 1); + f1r = *(p0r + posA); + f1i = *(p0r + posAi); + iCol -= 1; + p1r = IOP + BRLow[iCol] * 4; + } + f2r = *(p0r + 2); + f2i = *(p0r + (2 + 1)); + f3r = *(p0r + posB); + f3i = *(p0r + posBi); + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + t1r = f2r + f3r; + t1i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; + + *(p0r) = scale * t0r; + *(p0r + 1) = scale * t0i; + *(p0r + 2) = scale * f1r; + *(p0r + (2 + 1)) = scale * f1i; + *(p0r + posA) = scale * t1r; + *(p0r + posAi) = scale * t1i; + *(p0r + posB) = scale * f3r; + *(p0r + posBi) = scale * f3i; + } + } +} + +//------------------------------------------------------------------------------ +// RADIX 2 ifft +//------------------------------------------------------------------------------ +template +void g_fft::ifft2pt(FFT_TYPE *ioptr, FFT_TYPE scale) +{ + FFT_TYPE f0r, f0i, f1r, f1i; + FFT_TYPE t0r, t0i; + +// bit reversed load + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[2]; + f1i = ioptr[3]; + +// Butterflys +// f0 - - t0 +// f1 - 1 - f1 + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + +// store result + ioptr[0] = scale * t0r; + ioptr[1] = scale * t0i; + ioptr[2] = scale * f1r; + ioptr[3] = scale * f1i; +} + +//------------------------------------------------------------------------------ +// RADIX 4 ifft +//------------------------------------------------------------------------------ +template +void g_fft::ifft4pt(FFT_TYPE *ioptr, FFT_TYPE scale) +{ + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE t0r, t0i, t1r, t1i; + +// bit reversed load + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[4]; + f1i = ioptr[5]; + f2r = ioptr[2]; + f2i = ioptr[3]; + f3r = ioptr[6]; + f3i = ioptr[7]; + +// Butterflys +// f0 - - t0 - - f0 +// f1 - 1 - f1 - - f1 +// f2 - - f2 - 1 - f2 +// f3 - 1 - t1 - i - f3 + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; + + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; + + f3r = f1r + t1i; + f3i = f1i - t1r; + f1r = f1r - t1i; + f1i = f1i + t1r; + +// store result + ioptr[0] = scale * f0r; + ioptr[1] = scale * f0i; + ioptr[2] = scale * f1r; + ioptr[3] = scale * f1i; + ioptr[4] = scale * f2r; + ioptr[5] = scale * f2i; + ioptr[6] = scale * f3r; + ioptr[7] = scale * f3i; +} + +//------------------------------------------------------------------------------ +// RADIX 8 ifft +//------------------------------------------------------------------------------ +template +void g_fft::ifft8pt(FFT_TYPE *ioptr, FFT_TYPE scale) +{ + FFT_TYPE w0r = 1.0 / FFT_ROOT2; // cos(pi/4) + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + FFT_TYPE t0r, t0i, t1r, t1i; + const FFT_TYPE Two = 2.0; + +// bit reversed load + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[8]; + f1i = ioptr[9]; + f2r = ioptr[4]; + f2i = ioptr[5]; + f3r = ioptr[12]; + f3i = ioptr[13]; + f4r = ioptr[2]; + f4i = ioptr[3]; + f5r = ioptr[10]; + f5i = ioptr[11]; + f6r = ioptr[6]; + f6i = ioptr[7]; + f7r = ioptr[14]; + f7i = ioptr[15]; + +// Butterflys +// f0 - - t0 - - f0 - - f0 +// f1 - 1 - f1 - - f1 - - f1 +// f2 - - f2 - 1 - f2 - - f2 +// f3 - 1 - t1 - i - f3 - - f3 +// f4 - - t0 - - f4 - 1 - t0 +// f5 - 1 - f5 - - f5 - w3 - f4 +// f6 - - f6 - 1 - f6 - i - t1 +// f7 - 1 - t1 - i - f7 - iw3- f6 + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; + + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; + + f3r = f1r + t1i; + f3i = f1i - t1r; + f1r = f1r - t1i; + f1i = f1i + t1r; + + t0r = f4r + f5r; + t0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; + + t1r = f6r - f7r; + t1i = f6i - f7i; + f6r = f6r + f7r; + f6i = f6i + f7i; + + f4r = t0r + f6r; + f4i = t0i + f6i; + f6r = t0r - f6r; + f6i = t0i - f6i; + + f7r = f5r + t1i; + f7i = f5i - t1r; + f5r = f5r - t1i; + f5i = f5i + t1r; + + t0r = f0r - f4r; + t0i = f0i - f4i; + f0r = f0r + f4r; + f0i = f0i + f4i; + + t1r = f2r + f6i; + t1i = f2i - f6r; + f2r = f2r - f6i; + f2i = f2i + f6r; + + f4r = f1r - f5r * w0r + f5i * w0r; + f4i = f1i - f5r * w0r - f5i * w0r; + f1r = f1r * Two - f4r; + f1i = f1i * Two - f4i; + + f6r = f3r + f7r * w0r + f7i * w0r; + f6i = f3i - f7r * w0r + f7i * w0r; + f3r = f3r * Two - f6r; + f3i = f3i * Two - f6i; + +// store result + ioptr[0] = scale * f0r; + ioptr[1] = scale * f0i; + ioptr[2] = scale * f1r; + ioptr[3] = scale * f1i; + ioptr[4] = scale * f2r; + ioptr[5] = scale * f2i; + ioptr[6] = scale * f3r; + ioptr[7] = scale * f3i; + ioptr[8] = scale * t0r; + ioptr[9] = scale * t0i; + ioptr[10] = scale * f4r; + ioptr[11] = scale * f4i; + ioptr[12] = scale * t1r; + ioptr[13] = scale * t1i; + ioptr[14] = scale * f6r; + ioptr[15] = scale * f6i; +} + +//------------------------------------------------------------------------------ +// 2nd radix 2 stage +//------------------------------------------------------------------------------ +template +void g_fft::ibfR2(FFT_TYPE *ioptr, int M, int NDiffU) +{ + unsigned int pos; + unsigned int posi; + unsigned int pinc; + unsigned int pnext; + unsigned int NSameU; + unsigned int SameUCnt; + + FFT_TYPE *pstrt; + FFT_TYPE *p0r, *p1r, *p2r, *p3r; + + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + + pinc = NDiffU * 2; // 2 floats per complex + pnext = pinc * 4; + pos = 2; + posi = pos + 1; + NSameU = POW2(M) / 4 / NDiffU; // 4 Us at a time + pstrt = ioptr; + p0r = pstrt; + p1r = pstrt + pinc; + p2r = p1r + pinc; + p3r = p2r + pinc; + +// Butterflys +// f0 - - f4 +// f1 - 1 - f5 +// f2 - - f6 +// f3 - 1 - f7 +// Butterflys +// f0 - - f4 +// f1 - 1 - f5 +// f2 - - f6 +// f3 - 1 - f7 + + for (SameUCnt = NSameU; SameUCnt > 0; SameUCnt--) { + + f0r = *p0r; + f1r = *p1r; + f0i = *(p0r + 1); + f1i = *(p1r + 1); + f2r = *p2r; + f3r = *p3r; + f2i = *(p2r + 1); + f3i = *(p3r + 1); + + f4r = f0r + f1r; + f4i = f0i + f1i; + f5r = f0r - f1r; + f5i = f0i - f1i; + + f6r = f2r + f3r; + f6i = f2i + f3i; + f7r = f2r - f3r; + f7i = f2i - f3i; + + *p0r = f4r; + *(p0r + 1) = f4i; + *p1r = f5r; + *(p1r + 1) = f5i; + *p2r = f6r; + *(p2r + 1) = f6i; + *p3r = f7r; + *(p3r + 1) = f7i; + + f0r = *(p0r + pos); + f1i = *(p1r + posi); + f0i = *(p0r + posi); + f1r = *(p1r + pos); + f2r = *(p2r + pos); + f3i = *(p3r + posi); + f2i = *(p2r + posi); + f3r = *(p3r + pos); + + f4r = f0r - f1i; + f4i = f0i + f1r; + f5r = f0r + f1i; + f5i = f0i - f1r; + + f6r = f2r - f3i; + f6i = f2i + f3r; + f7r = f2r + f3i; + f7i = f2i - f3r; + + *(p0r + pos) = f4r; + *(p0r + posi) = f4i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; + *(p2r + pos) = f6r; + *(p2r + posi) = f6i; + *(p3r + pos) = f7r; + *(p3r + posi) = f7i; + + p0r += pnext; + p1r += pnext; + p2r += pnext; + p3r += pnext; + } +} + +//------------------------------------------------------------------------------ +// 1 radix 4 stage +//------------------------------------------------------------------------------ +template +void g_fft::ibfR4(FFT_TYPE *ioptr, int M, int NDiffU) +{ + unsigned int pos; + unsigned int posi; + unsigned int pinc; + unsigned int pnext; + unsigned int pnexti; + unsigned int NSameU; + unsigned int SameUCnt; + + FFT_TYPE *pstrt; + FFT_TYPE *p0r, *p1r, *p2r, *p3r; + + FFT_TYPE w1r = 1.0 / FFT_ROOT2; // cos(pi/4) + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + FFT_TYPE t1r, t1i; + const FFT_TYPE Two = 2.0; + + pinc = NDiffU * 2; // 2 floats per complex + pnext = pinc * 4; + pnexti = pnext + 1; + pos = 2; + posi = pos + 1; + NSameU = POW2(M) / 4 / NDiffU; // 4 pts per butterfly + pstrt = ioptr; + p0r = pstrt; + p1r = pstrt + pinc; + p2r = p1r + pinc; + p3r = p2r + pinc; + +// Butterflys +// f0 - - f0 - - f4 +// f1 - 1 - f5 - - f5 +// f2 - - f6 - 1 - f6 +// f3 - 1 - f3 - -i - f7 +// Butterflys +// f0 - - f4 - - f4 +// f1 - -i - t1 - - f5 +// f2 - - f2 - w1 - f6 +// f3 - -i - f7 - iw1- f7 + + f0r = *p0r; + f1r = *p1r; + f2r = *p2r; + f3r = *p3r; + f0i = *(p0r + 1); + f1i = *(p1r + 1); + f2i = *(p2r + 1); + f3i = *(p3r + 1); + + f5r = f0r - f1r; + f5i = f0i - f1i; + f0r = f0r + f1r; + f0i = f0i + f1i; + + f6r = f2r + f3r; + f6i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; + + for (SameUCnt = NSameU - 1; SameUCnt > 0; SameUCnt--) { + + f7r = f5r + f3i; + f7i = f5i - f3r; + f5r = f5r - f3i; + f5i = f5i + f3r; + + f4r = f0r + f6r; + f4i = f0i + f6i; + f6r = f0r - f6r; + f6i = f0i - f6i; + + f2r = *(p2r + pos); + f2i = *(p2r + posi); + f1r = *(p1r + pos); + f1i = *(p1r + posi); + f3i = *(p3r + posi); + f0r = *(p0r + pos); + f3r = *(p3r + pos); + f0i = *(p0r + posi); + + *p3r = f7r; + *p0r = f4r; + *(p3r + 1) = f7i; + *(p0r + 1) = f4i; + *p1r = f5r; + *p2r = f6r; + *(p1r + 1) = f5i; + *(p2r + 1) = f6i; + + f7r = f2r + f3i; + f7i = f2i - f3r; + f2r = f2r - f3i; + f2i = f2i + f3r; + + f4r = f0r - f1i; + f4i = f0i + f1r; + t1r = f0r + f1i; + t1i = f0i - f1r; + + f5r = t1r - f7r * w1r - f7i * w1r; + f5i = t1i + f7r * w1r - f7i * w1r; + f7r = t1r * Two - f5r; + f7i = t1i * Two - f5i; + + f6r = f4r - f2r * w1r + f2i * w1r; + f6i = f4i - f2r * w1r - f2i * w1r; + f4r = f4r * Two - f6r; + f4i = f4i * Two - f6i; + + f3r = *(p3r + pnext); + f0r = *(p0r + pnext); + f3i = *(p3r + pnexti); + f0i = *(p0r + pnexti); + f2r = *(p2r + pnext); + f2i = *(p2r + pnexti); + f1r = *(p1r + pnext); + f1i = *(p1r + pnexti); + + *(p2r + pos) = f6r; + *(p1r + pos) = f5r; + *(p2r + posi) = f6i; + *(p1r + posi) = f5i; + *(p3r + pos) = f7r; + *(p0r + pos) = f4r; + *(p3r + posi) = f7i; + *(p0r + posi) = f4i; + + f6r = f2r + f3r; + f6i = f2i + f3i; + f3r = f2r - f3r; + f3i = f2i - f3i; + + f5r = f0r - f1r; + f5i = f0i - f1i; + f0r = f0r + f1r; + f0i = f0i + f1i; + + p3r += pnext; + p0r += pnext; + p1r += pnext; + p2r += pnext; + } + f7r = f5r + f3i; + f7i = f5i - f3r; + f5r = f5r - f3i; + f5i = f5i + f3r; + + f4r = f0r + f6r; + f4i = f0i + f6i; + f6r = f0r - f6r; + f6i = f0i - f6i; + + f2r = *(p2r + pos); + f2i = *(p2r + posi); + f1r = *(p1r + pos); + f1i = *(p1r + posi); + f3i = *(p3r + posi); + f0r = *(p0r + pos); + f3r = *(p3r + pos); + f0i = *(p0r + posi); + + *p3r = f7r; + *p0r = f4r; + *(p3r + 1) = f7i; + *(p0r + 1) = f4i; + *p1r = f5r; + *p2r = f6r; + *(p1r + 1) = f5i; + *(p2r + 1) = f6i; + + f7r = f2r + f3i; + f7i = f2i - f3r; + f2r = f2r - f3i; + f2i = f2i + f3r; + + f4r = f0r - f1i; + f4i = f0i + f1r; + t1r = f0r + f1i; + t1i = f0i - f1r; + + f5r = t1r - f7r * w1r - f7i * w1r; + f5i = t1i + f7r * w1r - f7i * w1r; + f7r = t1r * Two - f5r; + f7i = t1i * Two - f5i; + + f6r = f4r - f2r * w1r + f2i * w1r; + f6i = f4i - f2r * w1r - f2i * w1r; + f4r = f4r * Two - f6r; + f4i = f4i * Two - f6i; + + *(p2r + pos) = f6r; + *(p1r + pos) = f5r; + *(p2r + posi) = f6i; + *(p1r + posi) = f5i; + *(p3r + pos) = f7r; + *(p0r + pos) = f4r; + *(p3r + posi) = f7i; + *(p0r + posi) = f4i; +} + +//------------------------------------------------------------------------------ +// RADIX 8 Stages +//------------------------------------------------------------------------------ +template +void g_fft::ibfstages(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, int Ustride, + int NDiffU, int StageCnt) +{ + unsigned int pos; + unsigned int posi; + unsigned int pinc; + unsigned int pnext; + unsigned int NSameU; + int Uinc; + int Uinc2; + int Uinc4; + unsigned int DiffUCnt; + unsigned int SameUCnt; + unsigned int U2toU3; + + FFT_TYPE *pstrt; + FFT_TYPE *p0r, *p1r, *p2r, *p3r; + FFT_TYPE *u0r, *u0i, *u1r, *u1i, *u2r, *u2i; + + FFT_TYPE w0r, w0i, w1r, w1i, w2r, w2i, w3r, w3i; + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + FFT_TYPE t0r, t0i, t1r, t1i; + const FFT_TYPE Two = 2.0; + + pinc = NDiffU * 2; // 2 floats per complex + pnext = pinc * 8; + pos = pinc * 4; + posi = pos + 1; + NSameU = POW2(M) / 8 / NDiffU; // 8 pts per butterfly + Uinc = (int) NSameU * Ustride; + Uinc2 = Uinc * 2; + Uinc4 = Uinc * 4; + U2toU3 = (POW2(M) / 8) * Ustride; + for (; StageCnt > 0; StageCnt--) { + + u0r = &Utbl[0]; + u0i = &Utbl[POW2(M - 2) * Ustride]; + u1r = u0r; + u1i = u0i; + u2r = u0r; + u2i = u0i; + + w0r = *u0r; + w0i = *u0i; + w1r = *u1r; + w1i = *u1i; + w2r = *u2r; + w2i = *u2i; + w3r = *(u2r + U2toU3); + w3i = *(u2i - U2toU3); + + pstrt = ioptr; + + p0r = pstrt; + p1r = pstrt + pinc; + p2r = p1r + pinc; + p3r = p2r + pinc; + +// Butterflys +// f0 - - t0 - - f0 - - f0 +// f1 - w0- f1 - - f1 - - f1 +// f2 - - f2 - w1- f2 - - f4 +// f3 - w0- t1 - iw1- f3 - - f5 +// f4 - - t0 - - f4 - w2- t0 +// f5 - w0- f5 - - f5 - w3- t1 +// f6 - - f6 - w1- f6 - iw2- f6 +// f7 - w0- t1 - iw1- f7 - iw3- f7 + + for (DiffUCnt = NDiffU; DiffUCnt > 0; DiffUCnt--) { + f0r = *p0r; + f0i = *(p0r + 1); + f1r = *p1r; + f1i = *(p1r + 1); + for (SameUCnt = NSameU - 1; SameUCnt > 0; SameUCnt--) { + f2r = *p2r; + f2i = *(p2r + 1); + f3r = *p3r; + f3i = *(p3r + 1); + + t0r = f0r + f1r * w0r - f1i * w0i; + t0i = f0i + f1r * w0i + f1i * w0r; + f1r = f0r * Two - t0r; + f1i = f0i * Two - t0i; + + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f5r = *(p1r + pos); + f5i = *(p1r + posi); + + f6r = *(p2r + pos); + f6i = *(p2r + posi); + f7r = *(p3r + pos); + f7i = *(p3r + posi); + + t1r = f2r - f3r * w0r + f3i * w0i; + t1i = f2i - f3r * w0i - f3i * w0r; + f2r = f2r * Two - t1r; + f2i = f2i * Two - t1i; + + f0r = t0r + f2r * w1r - f2i * w1i; + f0i = t0i + f2r * w1i + f2i * w1r; + f2r = t0r * Two - f0r; + f2i = t0i * Two - f0i; + + f3r = f1r + t1r * w1i + t1i * w1r; + f3i = f1i - t1r * w1r + t1i * w1i; + f1r = f1r * Two - f3r; + f1i = f1i * Two - f3i; + + t0r = f4r + f5r * w0r - f5i * w0i; + t0i = f4i + f5r * w0i + f5i * w0r; + f5r = f4r * Two - t0r; + f5i = f4i * Two - t0i; + + t1r = f6r - f7r * w0r + f7i * w0i; + t1i = f6i - f7r * w0i - f7i * w0r; + f6r = f6r * Two - t1r; + f6i = f6i * Two - t1i; + + f4r = t0r + f6r * w1r - f6i * w1i; + f4i = t0i + f6r * w1i + f6i * w1r; + f6r = t0r * Two - f4r; + f6i = t0i * Two - f4i; + + f7r = f5r + t1r * w1i + t1i * w1r; + f7i = f5i - t1r * w1r + t1i * w1i; + f5r = f5r * Two - f7r; + f5i = f5i * Two - f7i; + + t0r = f0r - f4r * w2r + f4i * w2i; + t0i = f0i - f4r * w2i - f4i * w2r; + f0r = f0r * Two - t0r; + f0i = f0i * Two - t0i; + + t1r = f1r - f5r * w3r + f5i * w3i; + t1i = f1i - f5r * w3i - f5i * w3r; + f1r = f1r * Two - t1r; + f1i = f1i * Two - t1i; + + *(p0r + pos) = t0r; + *(p0r + posi) = t0i; + *p0r = f0r; + *(p0r + 1) = f0i; + + p0r += pnext; + f0r = *p0r; + f0i = *(p0r + 1); + + *(p1r + pos) = t1r; + *(p1r + posi) = t1i; + *p1r = f1r; + *(p1r + 1) = f1i; + + p1r += pnext; + + f1r = *p1r; + f1i = *(p1r + 1); + + f4r = f2r - f6r * w2i - f6i * w2r; + f4i = f2i + f6r * w2r - f6i * w2i; + f6r = f2r * Two - f4r; + f6i = f2i * Two - f4i; + + f5r = f3r - f7r * w3i - f7i * w3r; + f5i = f3i + f7r * w3r - f7i * w3i; + f7r = f3r * Two - f5r; + f7i = f3i * Two - f5i; + + *p2r = f4r; + *(p2r + 1) = f4i; + *(p2r + pos) = f6r; + *(p2r + posi) = f6i; + + p2r += pnext; + + *p3r = f5r; + *(p3r + 1) = f5i; + *(p3r + pos) = f7r; + *(p3r + posi) = f7i; + + p3r += pnext; + } + + f2r = *p2r; + f2i = *(p2r + 1); + f3r = *p3r; + f3i = *(p3r + 1); + + t0r = f0r + f1r * w0r - f1i * w0i; + t0i = f0i + f1r * w0i + f1i * w0r; + f1r = f0r * Two - t0r; + f1i = f0i * Two - t0i; + + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f5r = *(p1r + pos); + f5i = *(p1r + posi); + + f6r = *(p2r + pos); + f6i = *(p2r + posi); + f7r = *(p3r + pos); + f7i = *(p3r + posi); + + t1r = f2r - f3r * w0r + f3i * w0i; + t1i = f2i - f3r * w0i - f3i * w0r; + f2r = f2r * Two - t1r; + f2i = f2i * Two - t1i; + + f0r = t0r + f2r * w1r - f2i * w1i; + f0i = t0i + f2r * w1i + f2i * w1r; + f2r = t0r * Two - f0r; + f2i = t0i * Two - f0i; + + f3r = f1r + t1r * w1i + t1i * w1r; + f3i = f1i - t1r * w1r + t1i * w1i; + f1r = f1r * Two - f3r; + f1i = f1i * Two - f3i; + + if ((int) DiffUCnt == NDiffU / 2) + Uinc4 = -Uinc4; + + u0r += Uinc4; + u0i -= Uinc4; + u1r += Uinc2; + u1i -= Uinc2; + u2r += Uinc; + u2i -= Uinc; + + pstrt += 2; + + t0r = f4r + f5r * w0r - f5i * w0i; + t0i = f4i + f5r * w0i + f5i * w0r; + f5r = f4r * Two - t0r; + f5i = f4i * Two - t0i; + + t1r = f6r - f7r * w0r + f7i * w0i; + t1i = f6i - f7r * w0i - f7i * w0r; + f6r = f6r * Two - t1r; + f6i = f6i * Two - t1i; + + f4r = t0r + f6r * w1r - f6i * w1i; + f4i = t0i + f6r * w1i + f6i * w1r; + f6r = t0r * Two - f4r; + f6i = t0i * Two - f4i; + + f7r = f5r + t1r * w1i + t1i * w1r; + f7i = f5i - t1r * w1r + t1i * w1i; + f5r = f5r * Two - f7r; + f5i = f5i * Two - f7i; + + w0r = *u0r; + w0i = *u0i; + w1r = *u1r; + w1i = *u1i; + + if ((int) DiffUCnt <= NDiffU / 2) + w0r = -w0r; + + t0r = f0r - f4r * w2r + f4i * w2i; + t0i = f0i - f4r * w2i - f4i * w2r; + f0r = f0r * Two - t0r; + f0i = f0i * Two - t0i; + + f4r = f2r - f6r * w2i - f6i * w2r; + f4i = f2i + f6r * w2r - f6i * w2i; + f6r = f2r * Two - f4r; + f6i = f2i * Two - f4i; + + *(p0r + pos) = t0r; + *p2r = f4r; + *(p0r + posi) = t0i; + *(p2r + 1) = f4i; + w2r = *u2r; + w2i = *u2i; + *p0r = f0r; + *(p2r + pos) = f6r; + *(p0r + 1) = f0i; + *(p2r + posi) = f6i; + + p0r = pstrt; + p2r = pstrt + pinc + pinc; + + t1r = f1r - f5r * w3r + f5i * w3i; + t1i = f1i - f5r * w3i - f5i * w3r; + f1r = f1r * Two - t1r; + f1i = f1i * Two - t1i; + + f5r = f3r - f7r * w3i - f7i * w3r; + f5i = f3i + f7r * w3r - f7i * w3i; + f7r = f3r * Two - f5r; + f7i = f3i * Two - f5i; + + *(p1r + pos) = t1r; + *p3r = f5r; + *(p1r + posi) = t1i; + *(p3r + 1) = f5i; + w3r = *(u2r + U2toU3); + w3i = *(u2i - U2toU3); + *p1r = f1r; + *(p3r + pos) = f7r; + *(p1r + 1) = f1i; + *(p3r + posi) = f7i; + + p1r = pstrt + pinc; + p3r = p2r + pinc; + } + NSameU /= 8; + Uinc /= 8; + Uinc2 /= 8; + Uinc4 = Uinc * 4; + NDiffU *= 8; + pinc *= 8; + pnext *= 8; + pos *= 8; + posi = pos + 1; + } +} + +//------------------------------------------------------------------------------ +// recursive bfstages calls to maximize on chip cache efficiency +//------------------------------------------------------------------------------ +template +void g_fft::ifftrecurs(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, int Ustride, + int NDiffU, int StageCnt) +{ + int i1; + + if (M <= (int) MCACHE) + ibfstages(ioptr, M, Utbl, Ustride, NDiffU, StageCnt); // RADIX 8 Stages + else { + for (i1 = 0; i1 < 8; i1++) { + ifftrecurs(&ioptr[i1 * POW2(M - 3) * 2], M - 3, Utbl, 8 * Ustride, + NDiffU, StageCnt - 1); // RADIX 8 Stages + } + ibfstages(ioptr, M, Utbl, Ustride, POW2(M - 3), 1); // RADIX 8 Stage + } +} + +//------------------------------------------------------------------------------ +// Compute in-place inverse complex fft on the rows of the input array +// INPUTS +// *ioptr = input data array +// M = log2 of fft size +// *Utbl = cosine table +// *BRLow = bit reversed counter table +// OUTPUTS +// *ioptr = output data array +//------------------------------------------------------------------------------ +template +void g_fft::iffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow) +{ + int StageCnt; + int NDiffU; + const FFT_TYPE scale = 1.0 / POW2(M); + + switch (M) { + case 0: + break; + case 1: + ifft2pt(ioptr, scale); // a 2 pt fft + break; + case 2: + ifft4pt(ioptr, scale); // a 4 pt fft + break; + case 3: + ifft8pt(ioptr, scale); // an 8 pt fft + break; + default: +// bit reverse and first radix 2 stage + scbitrevR2(ioptr, M, BRLow, scale); + StageCnt = (M - 1) / 3; // number of radix 8 stages + NDiffU = 2; // one radix 2 stage already complete + if ((M - 1 - (StageCnt * 3)) == 1) { + ibfR2(ioptr, M, NDiffU); // 1 radix 2 stage + NDiffU *= 2; + } + if ((M - 1 - (StageCnt * 3)) == 2) { + ibfR4(ioptr, M, NDiffU); // 1 radix 4 stage + NDiffU *= 4; + } + if (M <= (int) MCACHE) + ibfstages(ioptr, M, Utbl, 1, NDiffU, StageCnt); // RADIX 8 Stages + else + ifftrecurs(ioptr, M, Utbl, 1, NDiffU, StageCnt); // RADIX 8 Stages + } +} + +//------------------------------------------------------------------------------ +// parts of rffts1 +// RADIX 2 rfft +//------------------------------------------------------------------------------ +template +void g_fft::rfft1pt(FFT_TYPE *ioptr) +{ + FFT_TYPE f0r, f0i; + FFT_TYPE t0r, t0i; + +// bit reversed load + f0r = ioptr[0]; + f0i = ioptr[1]; + +// finish rfft + t0r = f0r + f0i; + t0i = f0r - f0i; + +// store result + ioptr[0] = t0r; + ioptr[1] = t0i; +} + +//------------------------------------------------------------------------------ +// RADIX 4 rfft +//------------------------------------------------------------------------------ +template +void g_fft::rfft2pt(FFT_TYPE *ioptr) +{ + FFT_TYPE f0r, f0i, f1r, f1i; + FFT_TYPE t0r, t0i; + +// bit reversed load + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[2]; + f1i = ioptr[3]; + +// Butterflys +// f0 - - t0 +// f1 - 1 - f1 + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f1i - f0i; +// finish rfft + f0r = t0r + t0i; + f0i = t0r - t0i; + +// store result + ioptr[0] = f0r; + ioptr[1] = f0i; + ioptr[2] = f1r; + ioptr[3] = f1i; +} + +//------------------------------------------------------------------------------ +// RADIX 8 rfft +//------------------------------------------------------------------------------ +template +void g_fft::rfft4pt(FFT_TYPE *ioptr) +{ + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE t0r, t0i, t1r, t1i; + FFT_TYPE w0r = 1.0 / FFT_ROOT2; // cos(pi/4) + const FFT_TYPE Two = 2.0; + const FFT_TYPE scale = 0.5; + +// bit reversed load + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[4]; + f1i = ioptr[5]; + f2r = ioptr[2]; + f2i = ioptr[3]; + f3r = ioptr[6]; + f3i = ioptr[7]; + +// Butterflys +// f0 - - t0 - - f0 +// f1 - 1 - f1 - - f1 +// f2 - - f2 - 1 - f2 +// f3 - 1 - t1 - -i - f3 + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; + + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = f2i - t0i; // neg for rfft + + f3r = f1r - t1i; + f3i = f1i + t1r; + f1r = f1r + t1i; + f1i = f1i - t1r; + +// finish rfft + t0r = f0r + f0i; // compute Re(x[0]) + t0i = f0r - f0i; // compute Re(x[N/2]) + + t1r = f1r + f3r; + t1i = f1i - f3i; + f0r = f1i + f3i; + f0i = f3r - f1r; + + f1r = t1r + w0r * f0r + w0r * f0i; + f1i = t1i - w0r * f0r + w0r * f0i; + f3r = Two * t1r - f1r; + f3i = f1i - Two * t1i; + +// store result + ioptr[4] = f2r; + ioptr[5] = f2i; + ioptr[0] = t0r; + ioptr[1] = t0i; + + ioptr[2] = scale * f1r; + ioptr[3] = scale * f1i; + ioptr[6] = scale * f3r; + ioptr[7] = scale * f3i; +} + +//------------------------------------------------------------------------------ +// RADIX 16 rfft +//------------------------------------------------------------------------------ +template +void g_fft::rfft8pt(FFT_TYPE *ioptr) +{ + FFT_TYPE w0r = 1.0 / FFT_ROOT2; // cos(pi/4) + FFT_TYPE w1r = FFT_COSPID8; // cos(pi/8) + FFT_TYPE w1i = FFT_SINPID8; // sin(pi/8) + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + FFT_TYPE t0r, t0i, t1r, t1i; + const FFT_TYPE Two = 2.0; + const FFT_TYPE scale = 0.5; + +// bit reversed load + f0r = ioptr[0]; + f0i = ioptr[1]; + f1r = ioptr[8]; + f1i = ioptr[9]; + f2r = ioptr[4]; + f2i = ioptr[5]; + f3r = ioptr[12]; + f3i = ioptr[13]; + f4r = ioptr[2]; + f4i = ioptr[3]; + f5r = ioptr[10]; + f5i = ioptr[11]; + f6r = ioptr[6]; + f6i = ioptr[7]; + f7r = ioptr[14]; + f7i = ioptr[15]; + +// Butterflys +// f0 - - t0 - - f0 - - f0 +// f1 - 1 - f1 - - f1 - - f1 +// f2 - - f2 - 1 - f2 - - f2 +// f3 - 1 - t1 - -i - f3 - - f3 +// f4 - - t0 - - f4 - 1 - t0 +// f5 - 1 - f5 - - f5 - w3 - f4 +// f6 - - f6 - 1 - f6 - -i - t1 +// f7 - 1 - t1 - -i - f7 - iw3- f6 + + t0r = f0r + f1r; + t0i = f0i + f1i; + f1r = f0r - f1r; + f1i = f0i - f1i; + + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; + + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; + + f3r = f1r - t1i; + f3i = f1i + t1r; + f1r = f1r + t1i; + f1i = f1i - t1r; + + t0r = f4r + f5r; + t0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; + + t1r = f6r - f7r; + t1i = f6i - f7i; + f6r = f6r + f7r; + f6i = f6i + f7i; + + f4r = t0r + f6r; + f4i = t0i + f6i; + f6r = t0r - f6r; + f6i = t0i - f6i; + + f7r = f5r - t1i; + f7i = f5i + t1r; + f5r = f5r + t1i; + f5i = f5i - t1r; + + t0r = f0r - f4r; + t0i = f4i - f0i; // neg for rfft + f0r = f0r + f4r; + f0i = f0i + f4i; + + t1r = f2r - f6i; + t1i = f2i + f6r; + f2r = f2r + f6i; + f2i = f2i - f6r; + + f4r = f1r - f5r * w0r - f5i * w0r; + f4i = f1i + f5r * w0r - f5i * w0r; + f1r = f1r * Two - f4r; + f1i = f1i * Two - f4i; + + f6r = f3r + f7r * w0r - f7i * w0r; + f6i = f3i + f7r * w0r + f7i * w0r; + f3r = f3r * Two - f6r; + f3i = f3i * Two - f6i; + +// finish rfft + f5r = f0r + f0i; // compute Re(x[0]) + f5i = f0r - f0i; // compute Re(x[N/2]) + + f0r = f2r + t1r; + f0i = f2i - t1i; + f7r = f2i + t1i; + f7i = t1r - f2r; + + f2r = f0r + w0r * f7r + w0r * f7i; + f2i = f0i - w0r * f7r + w0r * f7i; + t1r = Two * f0r - f2r; + t1i = f2i - Two * f0i; + + f0r = f1r + f6r; + f0i = f1i - f6i; + f7r = f1i + f6i; + f7i = f6r - f1r; + + f1r = f0r + w1r * f7r + w1i * f7i; + f1i = f0i - w1i * f7r + w1r * f7i; + f6r = Two * f0r - f1r; + f6i = f1i - Two * f0i; + + f0r = f3r + f4r; + f0i = f3i - f4i; + f7r = f3i + f4i; + f7i = f4r - f3r; + + f3r = f0r + w1i * f7r + w1r * f7i; + f3i = f0i - w1r * f7r + w1i * f7i; + f4r = Two * f0r - f3r; + f4i = f3i - Two * f0i; + +// store result + ioptr[8] = t0r; + ioptr[9] = t0i; + ioptr[0] = f5r; + ioptr[1] = f5i; + + ioptr[4] = scale * f2r; + ioptr[5] = scale * f2i; + ioptr[12] = scale * t1r; + ioptr[13] = scale * t1i; + + ioptr[2] = scale * f1r; + ioptr[3] = scale * f1i; + ioptr[6] = scale * f3r; + ioptr[7] = scale * f3i; + ioptr[10] = scale * f4r; + ioptr[11] = scale * f4i; + ioptr[14] = scale * f6r; + ioptr[15] = scale * f6i; +} + +//------------------------------------------------------------------------------ +// Finish RFFT +//------------------------------------------------------------------------------ +template +void g_fft::frstage(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl) +{ + unsigned int pos; + unsigned int posi; + unsigned int diffUcnt; + + FFT_TYPE *p0r, *p1r; + FFT_TYPE *u0r, *u0i; + + FFT_TYPE w0r, w0i; + FFT_TYPE f0r, f0i, f1r, f1i, f4r, f4i, f5r, f5i; + FFT_TYPE t0r, t0i, t1r, t1i; + const FFT_TYPE Two = 2.0; + + pos = POW2(M - 1); + posi = pos + 1; + + p0r = ioptr; + p1r = ioptr + pos / 2; + + u0r = Utbl + POW2(M - 3); + + w0r = *u0r, f0r = *(p0r); + f0i = *(p0r + 1); + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f1r = *(p1r); + f1i = *(p1r + 1); + f5r = *(p1r + pos); + f5i = *(p1r + posi); + + t0r = Two * f0r + Two * f0i; // compute Re(x[0]) + t0i = Two * f0r - Two * f0i; // compute Re(x[N/2]) + t1r = f4r + f4r; + t1i = -f4i - f4i; + + f0r = f1r + f5r; + f0i = f1i - f5i; + f4r = f1i + f5i; + f4i = f5r - f1r; + + f1r = f0r + w0r * f4r + w0r * f4i; + f1i = f0i - w0r * f4r + w0r * f4i; + f5r = Two * f0r - f1r; + f5i = f1i - Two * f0i; + + *(p0r) = t0r; + *(p0r + 1) = t0i; + *(p0r + pos) = t1r; + *(p0r + posi) = t1i; + *(p1r) = f1r; + *(p1r + 1) = f1i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; + + u0r = Utbl + 1; + u0i = Utbl + (POW2(M - 2) - 1); + + w0r = *u0r, w0i = *u0i; + + p0r = (ioptr + 2); + p1r = (ioptr + (POW2(M - 2) - 1) * 2); + +// Butterflys +// f0 - t0 - - f0 +// f5 - t1 - w0 - f5 +// f1 - t0 - - f1 +// f4 - t1 -iw0 - f4 + + for (diffUcnt = POW2(M - 3) - 1; diffUcnt > 0; diffUcnt--) { + + f0r = *(p0r); + f0i = *(p0r + 1); + f5r = *(p1r + pos); + f5i = *(p1r + posi); + f1r = *(p1r); + f1i = *(p1r + 1); + f4r = *(p0r + pos); + f4i = *(p0r + posi); + + t0r = f0r + f5r; + t0i = f0i - f5i; + t1r = f0i + f5i; + t1i = f5r - f0r; + + f0r = t0r + w0r * t1r + w0i * t1i; + f0i = t0i - w0i * t1r + w0r * t1i; + f5r = Two * t0r - f0r; + f5i = f0i - Two * t0i; + + t0r = f1r + f4r; + t0i = f1i - f4i; + t1r = f1i + f4i; + t1i = f4r - f1r; + + f1r = t0r + w0i * t1r + w0r * t1i; + f1i = t0i - w0r * t1r + w0i * t1i; + f4r = Two * t0r - f1r; + f4i = f1i - Two * t0i; + + *(p0r) = f0r; + *(p0r + 1) = f0i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; + + w0r = *++u0r; + w0i = *--u0i; + + *(p1r) = f1r; + *(p1r + 1) = f1i; + *(p0r + pos) = f4r; + *(p0r + posi) = f4i; + + p0r += 2; + p1r -= 2; + } +} + +//------------------------------------------------------------------------------ +// Compute in-place real fft on the rows of the input array +// The result is the complex spectra of the positive frequencies +// except the location for the first complex number contains the real +// values for DC and Nyquest +// INPUTS +// *ioptr = real input data array +// M = log2 of fft size +// *Utbl = cosine table +// *BRLow = bit reversed counter table +// OUTPUTS +// *ioptr = output data array in the following order +// Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), +// ... Re(x[N/2-1]), Im(x[N/2-1]). +//------------------------------------------------------------------------------ +template +void g_fft::rffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow) +{ + FFT_TYPE scale; + int StageCnt; + int NDiffU; + + M = M - 1; + switch (M) { + case -1: + break; + case 0: + rfft1pt(ioptr); // a 2 pt fft + break; + case 1: + rfft2pt(ioptr); // a 4 pt fft + break; + case 2: + rfft4pt(ioptr); // an 8 pt fft + break; + case 3: + rfft8pt(ioptr); // a 16 pt fft + break; + default: + scale = 0.5; +// bit reverse and first radix 2 stage + scbitrevR2(ioptr, M, BRLow, scale); + StageCnt = (M - 1) / 3; // number of radix 8 stages + NDiffU = 2; // one radix 2 stage already complete + if ((M - 1 - (StageCnt * 3)) == 1) { + bfR2(ioptr, M, NDiffU); // 1 radix 2 stage + NDiffU *= 2; + } + if ((M - 1 - (StageCnt * 3)) == 2) { + bfR4(ioptr, M, NDiffU); // 1 radix 4 stage + NDiffU *= 4; + } + if (M <= (int) MCACHE) + bfstages(ioptr, M, Utbl, 2, NDiffU, StageCnt); // RADIX 8 Stages + else + fftrecurs(ioptr, M, Utbl, 2, NDiffU, StageCnt); // RADIX 8 Stages + frstage(ioptr, M + 1, Utbl); + } +} + +//------------------------------------------------------------------------------ +// parts of riffts1 +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ +// RADIX 2 rifft +//------------------------------------------------------------------------------ +template +void g_fft::rifft1pt(FFT_TYPE *ioptr, FFT_TYPE scale) +{ + FFT_TYPE f0r, f0i; + FFT_TYPE t0r, t0i; + +// bit reversed load + f0r = ioptr[0]; + f0i = ioptr[1]; + +// finish rfft + t0r = f0r + f0i; + t0i = f0r - f0i; + +// store result + ioptr[0] = scale * t0r; + ioptr[1] = scale * t0i; +} + +//------------------------------------------------------------------------------ +// RADIX 4 rifft +//------------------------------------------------------------------------------ +template +void g_fft::rifft2pt(FFT_TYPE *ioptr, FFT_TYPE scale) +{ + FFT_TYPE f0r, f0i, f1r, f1i; + FFT_TYPE t0r, t0i; + const FFT_TYPE Two = FFT_TYPE(2.0); + +// bit reversed load + t0r = ioptr[0]; + t0i = ioptr[1]; + f1r = Two * ioptr[2]; + f1i = Two * ioptr[3]; + +// start rifft + f0r = t0r + t0i; + f0i = t0r - t0i; + +// Butterflys +// f0 - - t0 +// f1 - 1 - f1 + + t0r = f0r + f1r; + t0i = f0i - f1i; + f1r = f0r - f1r; + f1i = f0i + f1i; + +// store result + ioptr[0] = scale * t0r; + ioptr[1] = scale * t0i; + ioptr[2] = scale * f1r; + ioptr[3] = scale * f1i; +} + +//------------------------------------------------------------------------------ +// RADIX 8 rifft +//------------------------------------------------------------------------------ +template +void g_fft::rifft4pt(FFT_TYPE *ioptr, FFT_TYPE scale) +{ + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE t0r, t0i, t1r, t1i; + FFT_TYPE w0r = 1.0 / FFT_ROOT2; // cos(pi/4) + const FFT_TYPE Two = FFT_TYPE(2.0); + +// bit reversed load + t0r = ioptr[0]; + t0i = ioptr[1]; + f2r = ioptr[2]; + f2i = ioptr[3]; + f1r = Two * ioptr[4]; + f1i = Two * ioptr[5]; + f3r = ioptr[6]; + f3i = ioptr[7]; + +// start rfft + f0r = t0r + t0i; // compute Re(x[0]) + f0i = t0r - t0i; // compute Re(x[N/2]) + + t1r = f2r + f3r; + t1i = f2i - f3i; + t0r = f2r - f3r; + t0i = f2i + f3i; + + f2r = t1r - w0r * t0r - w0r * t0i; + f2i = t1i + w0r * t0r - w0r * t0i; + f3r = Two * t1r - f2r; + f3i = f2i - Two * t1i; + +// Butterflys +// f0 - - t0 - - f0 +// f1 - 1 - f1 - - f1 +// f2 - - f2 - 1 - f2 +// f3 - 1 - t1 - i - f3 + + t0r = f0r + f1r; + t0i = f0i - f1i; + f1r = f0r - f1r; + f1i = f0i + f1i; + + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; + + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; + + f3r = f1r + t1i; + f3i = f1i - t1r; + f1r = f1r - t1i; + f1i = f1i + t1r; + +// store result + ioptr[0] = scale * f0r; + ioptr[1] = scale * f0i; + ioptr[2] = scale * f1r; + ioptr[3] = scale * f1i; + ioptr[4] = scale * f2r; + ioptr[5] = scale * f2i; + ioptr[6] = scale * f3r; + ioptr[7] = scale * f3i; +} + +//------------------------------------------------------------------------------ +// RADIX 16 rifft +//------------------------------------------------------------------------------ +template +void g_fft::rifft8pt(FFT_TYPE *ioptr, FFT_TYPE scale) +{ + FFT_TYPE w0r = (FFT_TYPE) (1.0 / FFT_ROOT2); // cos(pi/4) + FFT_TYPE w1r = FFT_COSPID8; // cos(pi/8) + FFT_TYPE w1i = FFT_SINPID8; // sin(pi/8) + FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i; + FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i; + FFT_TYPE t0r, t0i, t1r, t1i; + const FFT_TYPE Two = FFT_TYPE(2.0); + +// bit reversed load + t0r = ioptr[0]; + t0i = ioptr[1]; + f4r = ioptr[2]; + f4i = ioptr[3]; + f2r = ioptr[4]; + f2i = ioptr[5]; + f6r = ioptr[6]; + f6i = ioptr[7]; + f1r = Two * ioptr[8]; + f1i = Two * ioptr[9]; + f5r = ioptr[10]; + f5i = ioptr[11]; + f3r = ioptr[12]; + f3i = ioptr[13]; + f7r = ioptr[14]; + f7i = ioptr[15]; + +// start rfft + f0r = t0r + t0i; // compute Re(x[0]) + f0i = t0r - t0i; // compute Re(x[N/2]) + + t0r = f2r + f3r; + t0i = f2i - f3i; + t1r = f2r - f3r; + t1i = f2i + f3i; + + f2r = t0r - w0r * t1r - w0r * t1i; + f2i = t0i + w0r * t1r - w0r * t1i; + f3r = Two * t0r - f2r; + f3i = f2i - Two * t0i; + + t0r = f4r + f7r; + t0i = f4i - f7i; + t1r = f4r - f7r; + t1i = f4i + f7i; + + f4r = t0r - w1i * t1r - w1r * t1i; + f4i = t0i + w1r * t1r - w1i * t1i; + f7r = Two * t0r - f4r; + f7i = f4i - Two * t0i; + + t0r = f6r + f5r; + t0i = f6i - f5i; + t1r = f6r - f5r; + t1i = f6i + f5i; + + f6r = t0r - w1r * t1r - w1i * t1i; + f6i = t0i + w1i * t1r - w1r * t1i; + f5r = Two * t0r - f6r; + f5i = f6i - Two * t0i; + +// Butterflys +// f0 - - t0 - - f0 - - f0 +// f1* - 1 - f1 - - f1 - - f1 +// f2 - - f2 - 1 - f2 - - f2 +// f3 - 1 - t1 - i - f3 - - f3 +// f4 - - t0 - - f4 - 1 - t0 +// f5 - 1 - f5 - - f5 - w3 - f4 +// f6 - - f6 - 1 - f6 - i - t1 +// f7 - 1 - t1 - i - f7 - iw3- f6 + + t0r = f0r + f1r; + t0i = f0i - f1i; + f1r = f0r - f1r; + f1i = f0i + f1i; + + t1r = f2r - f3r; + t1i = f2i - f3i; + f2r = f2r + f3r; + f2i = f2i + f3i; + + f0r = t0r + f2r; + f0i = t0i + f2i; + f2r = t0r - f2r; + f2i = t0i - f2i; + + f3r = f1r + t1i; + f3i = f1i - t1r; + f1r = f1r - t1i; + f1i = f1i + t1r; + + t0r = f4r + f5r; + t0i = f4i + f5i; + f5r = f4r - f5r; + f5i = f4i - f5i; + + t1r = f6r - f7r; + t1i = f6i - f7i; + f6r = f6r + f7r; + f6i = f6i + f7i; + + f4r = t0r + f6r; + f4i = t0i + f6i; + f6r = t0r - f6r; + f6i = t0i - f6i; + + f7r = f5r + t1i; + f7i = f5i - t1r; + f5r = f5r - t1i; + f5i = f5i + t1r; + + t0r = f0r - f4r; + t0i = f0i - f4i; + f0r = f0r + f4r; + f0i = f0i + f4i; + + t1r = f2r + f6i; + t1i = f2i - f6r; + f2r = f2r - f6i; + f2i = f2i + f6r; + + f4r = f1r - f5r * w0r + f5i * w0r; + f4i = f1i - f5r * w0r - f5i * w0r; + f1r = f1r * Two - f4r; + f1i = f1i * Two - f4i; + + f6r = f3r + f7r * w0r + f7i * w0r; + f6i = f3i - f7r * w0r + f7i * w0r; + f3r = f3r * Two - f6r; + f3i = f3i * Two - f6i; + +// store result + ioptr[0] = scale * f0r; + ioptr[1] = scale * f0i; + ioptr[2] = scale * f1r; + ioptr[3] = scale * f1i; + ioptr[4] = scale * f2r; + ioptr[5] = scale * f2i; + ioptr[6] = scale * f3r; + ioptr[7] = scale * f3i; + ioptr[8] = scale * t0r; + ioptr[9] = scale * t0i; + ioptr[10] = scale * f4r; + ioptr[11] = scale * f4i; + ioptr[12] = scale * t1r; + ioptr[13] = scale * t1i; + ioptr[14] = scale * f6r; + ioptr[15] = scale * f6i; +} + +//------------------------------------------------------------------------------ +// Start RIFFT +//------------------------------------------------------------------------------ +template +void g_fft::ifrstage(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl) +{ + unsigned int pos; + unsigned int posi; + unsigned int diffUcnt; + + FFT_TYPE *p0r, *p1r; + FFT_TYPE *u0r, *u0i; + + FFT_TYPE w0r, w0i; + FFT_TYPE f0r, f0i, f1r, f1i, f4r, f4i, f5r, f5i; + FFT_TYPE t0r, t0i, t1r, t1i; + const FFT_TYPE Two = FFT_TYPE(2.0); + + pos = POW2(M - 1); + posi = pos + 1; + + p0r = ioptr; + p1r = ioptr + pos / 2; + + u0r = Utbl + POW2(M - 3); + + w0r = *u0r, f0r = *(p0r); + f0i = *(p0r + 1); + f4r = *(p0r + pos); + f4i = *(p0r + posi); + f1r = *(p1r); + f1i = *(p1r + 1); + f5r = *(p1r + pos); + f5i = *(p1r + posi); + + t0r = f0r + f0i; + t0i = f0r - f0i; + t1r = f4r + f4r; + t1i = -f4i - f4i; + + f0r = f1r + f5r; + f0i = f1i - f5i; + f4r = f1r - f5r; + f4i = f1i + f5i; + + f1r = f0r - w0r * f4r - w0r * f4i; + f1i = f0i + w0r * f4r - w0r * f4i; + f5r = Two * f0r - f1r; + f5i = f1i - Two * f0i; + + *(p0r) = t0r; + *(p0r + 1) = t0i; + *(p0r + pos) = t1r; + *(p0r + posi) = t1i; + *(p1r) = f1r; + *(p1r + 1) = f1i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; + + u0r = Utbl + 1; + u0i = Utbl + (POW2(M - 2) - 1); + + w0r = *u0r, w0i = *u0i; + + p0r = (ioptr + 2); + p1r = (ioptr + (POW2(M - 2) - 1) * 2); + +// Butterflys +// f0 - t0 - f0 +// f1 - t1 -w0- f1 +// f2 - t0 - f2 +// f3 - t1 -iw0- f3 + + for (diffUcnt = POW2(M - 3) - 1; diffUcnt > 0; diffUcnt--) { + + f0r = *(p0r); + f0i = *(p0r + 1); + f5r = *(p1r + pos); + f5i = *(p1r + posi); + f1r = *(p1r); + f1i = *(p1r + 1); + f4r = *(p0r + pos); + f4i = *(p0r + posi); + + t0r = f0r + f5r; + t0i = f0i - f5i; + t1r = f0r - f5r; + t1i = f0i + f5i; + + f0r = t0r - w0i * t1r - w0r * t1i; + f0i = t0i + w0r * t1r - w0i * t1i; + f5r = Two * t0r - f0r; + f5i = f0i - Two * t0i; + + t0r = f1r + f4r; + t0i = f1i - f4i; + t1r = f1r - f4r; + t1i = f1i + f4i; + + f1r = t0r - w0r * t1r - w0i * t1i; + f1i = t0i + w0i * t1r - w0r * t1i; + f4r = Two * t0r - f1r; + f4i = f1i - Two * t0i; + + *(p0r) = f0r; + *(p0r + 1) = f0i; + *(p1r + pos) = f5r; + *(p1r + posi) = f5i; + + w0r = *++u0r; + w0i = *--u0i; + + *(p1r) = f1r; + *(p1r + 1) = f1i; + *(p0r + pos) = f4r; + *(p0r + posi) = f4i; + + p0r += 2; + p1r -= 2; + } +} + +//------------------------------------------------------------------------------ +// Compute in-place real ifft on the rows of the input array +// data order as from rffts1 +// INPUTS +// *ioptr = input data array in the following order +// M = log2 of fft size +// Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), +// Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). +// *Utbl = cosine table +// *BRLow = bit reversed counter table +// OUTPUTS +// *ioptr = real output data array +//------------------------------------------------------------------------------ +template +void g_fft::riffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow) +{ + FFT_TYPE scale; + int StageCnt; + int NDiffU; + + scale = (FFT_TYPE)(1.0 / (float)((int)POW2(M))); + M = M - 1; + switch (M) { + case -1: + break; + case 0: + rifft1pt(ioptr, scale); // a 2 pt fft + break; + case 1: + rifft2pt(ioptr, scale); // a 4 pt fft + break; + case 2: + rifft4pt(ioptr, scale); // an 8 pt fft + break; + case 3: + rifft8pt(ioptr, scale); // a 16 pt fft + break; + default: + ifrstage(ioptr, M + 1, Utbl); +// bit reverse and first radix 2 stage + scbitrevR2(ioptr, M, BRLow, scale); + StageCnt = (M - 1) / 3; // number of radix 8 stages + NDiffU = 2; // one radix 2 stage already complete + if ((M - 1 - (StageCnt * 3)) == 1) { + ibfR2(ioptr, M, NDiffU); // 1 radix 2 stage + NDiffU *= 2; + } + if ((M - 1 - (StageCnt * 3)) == 2) { + ibfR4(ioptr, M, NDiffU); // 1 radix 4 stage + NDiffU *= 4; + } + if (M <= (int) MCACHE) + ibfstages(ioptr, M, Utbl, 2, NDiffU, StageCnt); // RADIX 8 Stages + else + ifftrecurs(ioptr, M, Utbl, 2, NDiffU, StageCnt); // RADIX 8 Stages + } +} + +//============================================================================== +// End of original C functions +// +// Wrapper methods for simple class access +//============================================================================== + +//------------------------------------------------------------------------------ +// malloc and init cosine and bit reversed tables for a given size +// fft, ifft, rfft, rifft +// INPUTS +// M = log2 of fft size (ex M=10 for 1024 point fft) +// OUTPUTS +// private cosine and bit reversed tables +//------------------------------------------------------------------------------ +template +void g_fft::fftInit() +{ + for (int i = 0; i < 32; i++) { + FFT_table_1[i] = (FFT_TYPE*)0; + FFT_table_2[i] = (short int*)0; + } + + FFT_N = ConvertFFTSize(FFT_size); + +// create and initialize cos table + FFT_table_1[FFT_N] = new FFT_TYPE[(POW2(FFT_N) / 4 + 1)]; + fftCosInit(FFT_N, FFT_table_1[FFT_N]); + +// create and initialize bit reverse tables + FFT_table_2[FFT_N/2] = new short[POW2(FFT_N/2 - 1)]; + fftBRInit(FFT_N, FFT_table_2[FFT_N/2]); + + FFT_table_2[(FFT_N - 1) / 2] = new short[POW2((FFT_N - 1) / 2 - 1)]; + fftBRInit(FFT_N - 1, FFT_table_2[(FFT_N - 1) / 2]); + + Utbl = ((FFT_TYPE**) FFT_table_1)[FFT_N]; + BRLow = ((short**) FFT_table_2)[FFT_N / 2]; + +} + +//------------------------------------------------------------------------------ +// convert from N to LOG2(N) +//------------------------------------------------------------------------------ +template +int g_fft::ConvertFFTSize(int N) +{ + if (N <= 0) N = -N; + + switch (N) { + case 0x00000001: return 0; // 1 + case 0x00000002: return 1; // 2 + case 0x00000004: return 2; // 4 + case 0x00000008: return 3; // 8 + case 0x00000010: return 4; // 16 + case 0x00000020: return 5; // 32 + case 0x00000040: return 6; // 64 + case 0x00000080: return 7; // 128 + case 0x00000100: return 8; // 256 + case 0x00000200: return 9; // 512 + case 0x00000400: return 10; // 1024 + case 0x00000800: return 11; // 2048 + case 0x00001000: return 12; // 4096 + case 0x00002000: return 13; // 8192 + case 0x00004000: return 14; // 16384 + case 0x00008000: return 15; // 32768 + case 0x00010000: return 16; // 65536 + case 0x00020000: return 17; // 131072 + case 0x00040000: return 18; // 262144 + case 0x00080000: return 19; // 525288 + case 0x00100000: return 20; // 1048576 + case 0x00200000: return 21; // 2097152 + case 0x00400000: return 22; // 4194304 + case 0x00800000: return 23; // 8388608 + case 0x01000000: return 24; // 16777216 + case 0x02000000: return 25; // 33554432 + case 0x04000000: return 26; // 67108864 + case 0x08000000: return 27; // 134217728 + case 0x10000000: return 28; // 268435456 + } + return 0; +} + +//------------------------------------------------------------------------------ +// Compute in-place complex FFT +// FFTsize: FFT length in samples +// buf: array of FFTsize*2 FFT_TYPE values, +// in interleaved real/imaginary format +//------------------------------------------------------------------------------ +template +void g_fft::ComplexFFT(std::complex *buf) +{ + void *ptr = buf; + FFT_TYPE *nbuf = static_cast(ptr); + ffts1(nbuf, FFT_N, Utbl, BRLow); +} + +//------------------------------------------------------------------------------ +// Compute in-place inverse complex FFT +// FFTsize: FFT length in samples +// buf: array of FFTsize*2 FFT_TYPE values, +// in interleaved real/imaginary format +// Output should be scaled by the return value of +// GetInverseComplexFFTScale(fft_struct, FFTsize). +//------------------------------------------------------------------------------ +template +void g_fft::InverseComplexFFT(std::complex *buf) +{ + void *ptr = buf; + FFT_TYPE *nbuf = static_cast(ptr); + iffts1(nbuf, FFT_N, Utbl, BRLow); +} + +//------------------------------------------------------------------------------ +// Compute in-place real FFT +// FFTsize: FFT length in samples +// buf: array of FFTsize FFT_TYPE values; output is in interleaved +// real/imaginary format, except for buf[1] which is the real +// part for the Nyquist frequency +//------------------------------------------------------------------------------ +template +void g_fft::RealFFT(std::complex *buf) +{ + void *ptr = buf; + FFT_TYPE *nbuf = static_cast(ptr); + rffts1(nbuf, FFT_N, Utbl, BRLow); +} + +//------------------------------------------------------------------------------ +// Compute in-place inverse real FFT +// FFTsize: FFT length in samples +// buf: array of FFTsize FFT_TYPE values; input is expected to be in +// interleaved real/imaginary format, except for buf[1] which +// is the real part for the Nyquist frequency +// Output should be scaled by the return value of +// GetInverseRealFFTScale(fft_struct, FFTsize). +//------------------------------------------------------------------------------ +template +void g_fft::InverseRealFFT(std::complex *buf) +{ + void *ptr = buf; + FFT_TYPE *nbuf = static_cast(ptr); + riffts1(nbuf, FFT_N, Utbl, BRLow); +} + +//------------------------------------------------------------------------------ +// Returns the amplitude scale that should be applied to the result of +// an inverse complex FFT with a length of 'FFTsize' samples. +//------------------------------------------------------------------------------ +template +FFT_TYPE g_fft::GetInverseComplexFFTScale() +{ + return FFT_TYPE(1.0); +} + +//------------------------------------------------------------------------------ +// Returns the amplitude scale that should be applied to the result of +// an inverse real FFT with a length of 'FFTsize' samples. +//------------------------------------------------------------------------------ +template +FFT_TYPE g_fft::GetInverseRealFFTScale() +{ + return FFT_TYPE(1.0); +} + +#endif + diff --git a/include-gpl/dsp/misc.h b/include-gpl/dsp/misc.h new file mode 100644 index 000000000..f0c73f1f0 --- /dev/null +++ b/include-gpl/dsp/misc.h @@ -0,0 +1,75 @@ +// ---------------------------------------------------------------------------- +// misc.h -- Miscellaneous helper functions +// +// Copyright (C) 2006-2008 +// Dave Freese, W1HKJ +// +// This file is part of fldigi. These filters were adapted from code contained +// in the gmfsk source code distribution. +// +// Fldigi is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Fldigi is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with fldigi. If not, see . +// ---------------------------------------------------------------------------- + +#ifndef _MISC_H +#define _MISC_H + +#include + +inline float sinc(float x) +{ + return (fabs(x) < 1e-10) ? 1.0 : (sin(M_PI * x) / (M_PI * x)); +} + +inline float cosc(float x) +{ + return (fabs(x) < 1e-10) ? 0.0 : ((1.0 - cos(M_PI * x)) / (M_PI * x)); +} + +inline float clamp(float x, float min, float max) +{ + return (x < min) ? min : ((x > max) ? max : x); +} + +/// This is always called with an int weight +inline float decayavg(float average, float input, int weight) +{ + if (weight <= 1) return input; + return ( ( input - average ) / (float)weight ) + average ; +} + +// following are defined inline to provide best performance +inline float blackman(float x) +{ + return (0.42 - 0.50 * cos(2 * M_PI * x) + 0.08 * cos(4 * M_PI * x)); +} + +inline float hamming(float x) +{ + return 0.54 - 0.46 * cos(2 * M_PI * x); +} + +inline float hanning(float x) +{ + return 0.5 - 0.5 * cos(2 * M_PI * x); +} + +inline float rcos( float t, float T, float alpha=1.0 ) +{ + if( t == 0 ) return 1.0; + float taT = T / (2.0 * alpha); + if( fabs(t) == taT ) return ((alpha/2.0) * sin(M_PI/(2.0*alpha))); + return (sin(M_PI*t/T)/(M_PI*t/T))*cos(alpha*M_PI*t/T)/(1.0-(t/taT)*(t/taT)); +} + +#endif diff --git a/sdrbase/dsp/fftfilt.cxx b/sdrbase/dsp/fftfilt.cxx new file mode 100644 index 000000000..2f7437548 --- /dev/null +++ b/sdrbase/dsp/fftfilt.cxx @@ -0,0 +1,172 @@ +// ---------------------------------------------------------------------------- +// fftfilt.cxx -- Fast convolution Overlap-Add filter +// +// Filter implemented using overlap-add FFT convolution method +// h(t) characterized by Windowed-Sinc impulse response +// +// Reference: +// "The Scientist and Engineer's Guide to Digital Signal Processing" +// by Dr. Steven W. Smith, http://www.dspguide.com +// Chapters 16, 18 and 21 +// +// Copyright (C) 2006-2008 Dave Freese, W1HKJ +// +// This file is part of fldigi. +// +// Fldigi is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Fldigi is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with fldigi. If not, see . +// ---------------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +//------------------------------------------------------------------------------ +// initialize the filter +// create forward and reverse FFTs +//------------------------------------------------------------------------------ + +// Only need a single instance of g_fft, used for both forward and reverse +void fftfilt::init_filter() +{ + flen2 = flen >> 1; + fft = new g_fft(flen); + + filter = new cmplx[flen]; + timedata = new cmplx[flen]; + freqdata = new cmplx[flen]; + output = new cmplx[flen]; + ovlbuf = new cmplx[flen2]; + ht = new cmplx[flen]; + + memset(filter, 0, flen * sizeof(cmplx)); + memset(timedata, 0, flen * sizeof(cmplx)); + memset(freqdata, 0, flen * sizeof(cmplx)); + memset(output, 0, flen * sizeof(cmplx)); + memset(ovlbuf, 0, flen2 * sizeof(cmplx)); + memset(ht, 0, flen * sizeof(cmplx)); + + inptr = 0; +} + +//------------------------------------------------------------------------------ +// fft filter +// f1 < f2 ==> band pass filter +// f1 > f2 ==> band reject filter +// f1 == 0 ==> low pass filter +// f2 == 0 ==> high pass filter +//------------------------------------------------------------------------------ +fftfilt::fftfilt(float f1, float f2, int len) +{ + flen = len; + init_filter(); + create_filter(f1, f2); +} + +fftfilt::~fftfilt() +{ + if (fft) delete fft; + + if (filter) delete [] filter; + if (timedata) delete [] timedata; + if (freqdata) delete [] freqdata; + if (output) delete [] output; + if (ovlbuf) delete [] ovlbuf; + if (ht) delete [] ht; +} + +void fftfilt::create_filter(float f1, float f2) +{ +// initialize the filter to zero + memset(ht, 0, flen * sizeof(cmplx)); + +// create the filter shape coefficients by fft +// filter values initialized to the ht response h(t) + bool b_lowpass, b_highpass;//, window; + b_lowpass = (f2 != 0); + b_highpass = (f1 != 0); + + for (int i = 0; i < flen2; i++) { + ht[i] = 0; +//combine lowpass / highpass +// lowpass @ f2 + if (b_lowpass) ht[i] += fsinc(f2, i, flen2); +// highighpass @ f1 + if (b_highpass) ht[i] -= fsinc(f1, i, flen2); + } +// highpass is delta[flen2/2] - h(t) + if (b_highpass && f2 < f1) ht[flen2 / 2] += 1; + + for (int i = 0; i < flen2; i++) + ht[i] *= _blackman(i, flen2); + +// this may change since green fft is in place fft + memcpy(filter, ht, flen * sizeof(cmplx)); + +// ht is flen complex points with imaginary all zero +// first half describes h(t), second half all zeros +// perform the cmplx forward fft to obtain H(w) +// filter is flen/2 complex values + + fft->ComplexFFT(filter); + +// normalize the output filter for unity gain + float scale = 0, mag; + for (int i = 0; i < flen2; i++) { + mag = abs(filter[i]); + if (mag > scale) scale = mag; + } + if (scale != 0) { + for (int i = 0; i < flen; i++) + filter[i] /= scale; + } +} + +// Filter with fast convolution (overlap-add algorithm). +int fftfilt::run(const cmplx & in, cmplx **out) +{ + timedata[inptr++] = in; + + if (inptr < flen2) + return 0; + inptr = 0; + + memcpy(freqdata, timedata, flen * sizeof(cmplx)); + fft->ComplexFFT(freqdata); + + for (int i = 0; i < flen; i++) + freqdata[i] *= filter[i]; + + fft->InverseComplexFFT(freqdata); + + // overlap and add + for (int i = 0; i < flen2; i++) { + output[i] = ovlbuf[i] + freqdata[i]; + ovlbuf[i] = freqdata[i+flen2]; + } + + *out = output; + return flen2; +} +