diff --git a/CMakeLists.txt b/CMakeLists.txt
index e32f9c1cc..5e47d2b91 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,6 +49,7 @@ set(sdrbase_SOURCES
sdrbase/dsp/dspcommands.cpp
sdrbase/dsp/dspengine.cpp
sdrbase/dsp/fftengine.cpp
+ sdrbase/dsp/fftfilt.cxx
sdrbase/dsp/fftwindow.cpp
sdrbase/dsp/interpolator.cpp
sdrbase/dsp/inthalfbandfilter.cpp
@@ -107,17 +108,21 @@ set(sdrbase_HEADERS
include-gpl/dsp/channelizer.h
include/dsp/channelmarker.h
+ include-gpl/dsp/complex.h
include-gpl/dsp/dspcommands.h
include-gpl/dsp/dspengine.h
include/dsp/dsptypes.h
include-gpl/dsp/fftengine.h
+ include-gpl/dsp/fftfilt.h
include-gpl/dsp/fftwengine.h
include-gpl/dsp/fftwindow.h
+ include-gpl/dsp/gfft.h
include-gpl/dsp/interpolator.h
include-gpl/dsp/inthalfbandfilter.h
include/dsp/kissfft.h
include-gpl/dsp/kissengine.h
include-gpl/dsp/lowpass.h
+ include-gpl/dsp/misc.h
include-gpl/dsp/movingaverage.h
include-gpl/dsp/nco.h
sdrbase/dsp/pidcontroller.h
diff --git a/include-gpl/dsp/complex.h b/include-gpl/dsp/complex.h
new file mode 100644
index 000000000..2653a591c
--- /dev/null
+++ b/include-gpl/dsp/complex.h
@@ -0,0 +1,43 @@
+// ----------------------------------------------------------------------------
+// complex.h -- Complex arithmetic
+//
+// Copyright (C) 2006-2008
+// Dave Freese, W1HKJ
+// Copyright (C) 2008
+// Stelios Bounanos, M0GLD
+//
+// This file is part of fldigi.
+//
+// Fldigi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// Fldigi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with fldigi. If not, see .
+// ----------------------------------------------------------------------------
+
+#ifndef _COMPLEX_H
+#define _COMPLEX_H
+
+#include
+#include
+
+typedef std::complex cmplx;
+
+inline cmplx cmac (const cmplx *a, const cmplx *b, int ptr, int len) {
+ cmplx z;
+ ptr %= len;
+ for (int i = 0; i < len; i++) {
+ z += a[i] * b[ptr];
+ ptr = (ptr + 1) % len;
+ }
+ return z;
+}
+
+#endif
diff --git a/include-gpl/dsp/fftfilt.h b/include-gpl/dsp/fftfilt.h
new file mode 100644
index 000000000..e5d009383
--- /dev/null
+++ b/include-gpl/dsp/fftfilt.h
@@ -0,0 +1,54 @@
+/*
+ * fftfilt.h -- Fast convolution FIR filter
+*/
+
+#ifndef _FFTFILT_H
+#define _FFTFILT_H
+
+#include "complex.h"
+#include "gfft.h"
+
+//----------------------------------------------------------------------
+
+class fftfilt {
+enum {NONE, BLACKMAN, HAMMING, HANNING};
+
+protected:
+ int flen;
+ int flen2;
+ g_fft *fft;
+ g_fft *ift;
+ cmplx *ht;
+ cmplx *filter;
+ cmplx *timedata;
+ cmplx *freqdata;
+ cmplx *ovlbuf;
+ cmplx *output;
+ int inptr;
+ int pass;
+ int window;
+
+ inline float fsinc(float fc, int i, int len) {
+ return (i == len/2) ? 2.0 * fc:
+ sin(2 * M_PI * fc * (i - len/2)) / (M_PI * (i - len/2));
+ }
+ inline float _blackman(int i, int len) {
+ return (0.42 -
+ 0.50 * cos(2.0 * M_PI * i / len) +
+ 0.08 * cos(4.0 * M_PI * i / len));
+ }
+ void init_filter();
+
+public:
+ fftfilt(float f1, float f2, int len);
+ fftfilt(float f, int len);
+ ~fftfilt();
+// f1 < f2 ==> bandpass
+// f1 > f2 ==> band reject
+ void create_filter(float f1, float f2);
+ void rtty_filter(float);
+
+ int run(const cmplx& in, cmplx **out);
+};
+
+#endif
diff --git a/include-gpl/dsp/gfft.h b/include-gpl/dsp/gfft.h
new file mode 100644
index 000000000..a5fba335b
--- /dev/null
+++ b/include-gpl/dsp/gfft.h
@@ -0,0 +1,3376 @@
+//==============================================================================
+// g_fft.h:
+//
+// FFT library
+// Copyright (C) 2013
+// Dave Freese, W1HKJ
+//
+// based on public domain code by John Green
+// original version is available at
+// http://hyperarchive.lcs.mit.edu/
+// /HyperArchive/Archive/dev/src/ffts-for-risc-2-c.hqx
+//
+// ported to C++ for fldigi by Dave Freese, W1HKJ
+//
+// This file is part of fldigi.
+//
+// Fldigi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// Fldigi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with fldigi. If not, see .
+//==============================================================================
+
+#ifndef CGREEN_FFT_H
+#define CGREEN_FFT_H
+
+#include
+
+template
+class g_fft {
+#define FFT_RECIPLN2 1.442695040888963407359924681001892137426 // 1.0/log(2)
+
+// some useful conversions between a number and its power of 2
+#define LOG2(a) (FFT_RECIPLN2*log(a)) // floating point logarithm base 2
+#define POW2(m) ((unsigned int) 1 << (m)) // integer power of 2 for m<32
+
+// fft's with M bigger than this bust primary cache
+#define MCACHE (11 - (sizeof(FFT_TYPE) / 8))
+
+// some math constants to 40 decimal places
+#define FFT_PI 3.141592653589793238462643383279502884197 // pi
+#define FFT_ROOT2 1.414213562373095048801688724209698078569 // sqrt(2)
+#define FFT_COSPID8 0.9238795325112867561281831893967882868224 // cos(pi/8)
+#define FFT_SINPID8 0.3826834323650897717284599840303988667613 // sin(pi/8)
+private:
+ int FFT_size;
+ int FFT_N;
+ FFT_TYPE *FFT_table_1[32];
+ short int *FFT_table_2[32];
+
+ FFT_TYPE *Utbl;
+ short *BRLow;
+
+ void fftInit();
+ int ConvertFFTSize(int);
+
+// base fft methods
+ void riffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow);
+ void ifrstage(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl);
+ void rifft8pt(FFT_TYPE *ioptr, FFT_TYPE scale);
+ void rifft4pt(FFT_TYPE *ioptr, FFT_TYPE scale);
+ void rifft2pt(FFT_TYPE *ioptr, FFT_TYPE scale);
+ void rifft1pt(FFT_TYPE *ioptr, FFT_TYPE scale);
+ void rffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow);
+ void frstage(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl);
+ void rfft8pt(FFT_TYPE *ioptr);
+ void rfft4pt(FFT_TYPE *ioptr);
+ void rfft2pt(FFT_TYPE *ioptr);
+ void rfft1pt(FFT_TYPE *ioptr);
+ void iffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow);
+ void ifftrecurs(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, int Ustride,
+ int NDiffU, int StageCnt);
+ void ibfstages(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, int Ustride,
+ int NDiffU, int StageCnt);
+ void ibfR4(FFT_TYPE *ioptr, int M, int NDiffU);
+ void ibfR2(FFT_TYPE *ioptr, int M, int NDiffU);
+ void ifft8pt(FFT_TYPE *ioptr, FFT_TYPE scale);
+ void ifft4pt(FFT_TYPE *ioptr, FFT_TYPE scale);
+ void ifft2pt(FFT_TYPE *ioptr, FFT_TYPE scale);
+ void scbitrevR2(FFT_TYPE *ioptr, int M, short *BRLow, FFT_TYPE scale);
+ void ffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow);
+ void fftrecurs(FFT_TYPE *ioptr, int M,
+ FFT_TYPE *Utbl, int Ustride, int NDiffU,
+ int StageCnt);
+ void bfstages(FFT_TYPE *ioptr, int M,
+ FFT_TYPE *Utbl, int Ustride,
+ int NDiffU, int StageCnt);
+ void bfR4(FFT_TYPE *ioptr, int M, int NDiffU);
+ void bfR2(FFT_TYPE *ioptr, int M, int NDiffU);
+ void fft8pt(FFT_TYPE *ioptr);
+ void fft4pt(FFT_TYPE *ioptr);
+ void fft2pt(FFT_TYPE *ioptr);
+ void bitrevR2(FFT_TYPE *ioptr, int M, short *BRLow);
+ void fftBRInit(int M, short *BRLow);
+ void fftCosInit(int M, FFT_TYPE *Utbl);
+
+public:
+ g_fft(int M = 8192) {
+ if (M < 16) M = 16;
+ if (M > 268435456) M = 268435456;
+ FFT_size = M;
+ fftInit();
+ }
+ ~g_fft() {
+ for (int i = 0; i < 32; i++) {
+ if (FFT_table_1[i] != 0) delete [] FFT_table_1[i];
+ if (FFT_table_2[i] != 0) delete [] FFT_table_2[i];
+ }
+ }
+
+ void ComplexFFT(std::complex *buf);
+ void InverseComplexFFT(std::complex *buf);
+ void RealFFT(std::complex *buf);
+ void InverseRealFFT(std::complex *buf);
+ FFT_TYPE GetInverseComplexFFTScale();
+ FFT_TYPE GetInverseRealFFTScale();
+};
+
+//------------------------------------------------------------------------------
+// Compute Utbl, the cosine table for ffts
+// of size (pow(2,M)/4 +1)
+// INPUTS
+// M = log2 of fft size
+// OUTPUTS
+// *Utbl = cosine table
+//------------------------------------------------------------------------------
+template
+void g_fft::fftCosInit(int M, FFT_TYPE *Utbl)
+{
+ unsigned int fftN = POW2(M);
+ unsigned int i1;
+
+ Utbl[0] = FFT_TYPE(1.0);
+ for (i1 = 1; i1 < fftN/4; i1++)
+ Utbl[i1] = (FFT_TYPE)cos((2.0 * FFT_PI * (float)i1) / (float)fftN);
+ Utbl[fftN/4] = FFT_TYPE(0.0);
+}
+
+//------------------------------------------------------------------------------
+// Compute BRLow, the bit reversed table for ffts
+// of size pow(2,M/2 -1)
+// INPUTS
+// M = log2 of fft size
+// OUTPUTS
+// *BRLow = bit reversed counter table
+//------------------------------------------------------------------------------
+template
+void g_fft::fftBRInit(int M, short *BRLow)
+{
+ int Mroot_1 = M / 2 - 1;
+ int Nroot_1 = POW2(Mroot_1);
+ int i1;
+ int bitsum;
+ int bitmask;
+ int bit;
+
+ for (i1 = 0; i1 < Nroot_1; i1++) {
+ bitsum = 0;
+ bitmask = 1;
+ for (bit = 1; bit <= Mroot_1; bitmask <<= 1, bit++)
+ if (i1 & bitmask)
+ bitsum = bitsum + (Nroot_1 >> bit);
+ BRLow[i1] = bitsum;
+ }
+}
+
+//------------------------------------------------------------------------------
+// parts of ffts1
+// bit reverse and first radix 2 stage of forward or inverse fft
+//------------------------------------------------------------------------------
+template
+void g_fft::bitrevR2(FFT_TYPE *ioptr, int M, short *BRLow)
+{
+ FFT_TYPE f0r;
+ FFT_TYPE f0i;
+ FFT_TYPE f1r;
+ FFT_TYPE f1i;
+ FFT_TYPE f2r;
+ FFT_TYPE f2i;
+ FFT_TYPE f3r;
+ FFT_TYPE f3i;
+ FFT_TYPE f4r;
+ FFT_TYPE f4i;
+ FFT_TYPE f5r;
+ FFT_TYPE f5i;
+ FFT_TYPE f6r;
+ FFT_TYPE f6i;
+ FFT_TYPE f7r;
+ FFT_TYPE f7i;
+ FFT_TYPE t0r;
+ FFT_TYPE t0i;
+ FFT_TYPE t1r;
+ FFT_TYPE t1i;
+ FFT_TYPE *p0r;
+ FFT_TYPE *p1r;
+ FFT_TYPE *IOP;
+ FFT_TYPE *iolimit;
+ int Colstart;
+ int iCol;
+ unsigned int posA;
+ unsigned int posAi;
+ unsigned int posB;
+ unsigned int posBi;
+
+ const unsigned int Nrems2 = POW2((M + 3) / 2);
+ const unsigned int Nroot_1_ColInc = POW2(M) - Nrems2;
+ const unsigned int Nroot_1 = POW2(M / 2 - 1) - 1;
+ const unsigned int ColstartShift = (M + 1) / 2 + 1;
+
+ posA = POW2(M); // 1/2 of POW2(M) complex
+ posAi = posA + 1;
+ posB = posA + 2;
+ posBi = posB + 1;
+
+ iolimit = ioptr + Nrems2;
+ for (; ioptr < iolimit; ioptr += POW2(M / 2 + 1)) {
+ for (Colstart = Nroot_1; Colstart >= 0; Colstart--) {
+ iCol = Nroot_1;
+ p0r = ioptr + Nroot_1_ColInc + BRLow[Colstart] * 4;
+ IOP = ioptr + (Colstart << ColstartShift);
+ p1r = IOP + BRLow[iCol] * 4;
+ f0r = *(p0r);
+ f0i = *(p0r + 1);
+ f1r = *(p0r + posA);
+ f1i = *(p0r + posAi);
+ for (; iCol > Colstart;) {
+ f2r = *(p0r + 2);
+ f2i = *(p0r + (2 + 1));
+ f3r = *(p0r + posB);
+ f3i = *(p0r + posBi);
+ f4r = *(p1r);
+ f4i = *(p1r + 1);
+ f5r = *(p1r + posA);
+ f5i = *(p1r + posAi);
+ f6r = *(p1r + 2);
+ f6i = *(p1r + (2 + 1));
+ f7r = *(p1r + posB);
+ f7i = *(p1r + posBi);
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+ t1r = f2r + f3r;
+ t1i = f2i + f3i;
+ f3r = f2r - f3r;
+ f3i = f2i - f3i;
+ f0r = f4r + f5r;
+ f0i = f4i + f5i;
+ f5r = f4r - f5r;
+ f5i = f4i - f5i;
+ f2r = f6r + f7r;
+ f2i = f6i + f7i;
+ f7r = f6r - f7r;
+ f7i = f6i - f7i;
+
+ *(p1r) = t0r;
+ *(p1r + 1) = t0i;
+ *(p1r + 2) = f1r;
+ *(p1r + (2 + 1)) = f1i;
+ *(p1r + posA) = t1r;
+ *(p1r + posAi) = t1i;
+ *(p1r + posB) = f3r;
+ *(p1r + posBi) = f3i;
+ *(p0r) = f0r;
+ *(p0r + 1) = f0i;
+ *(p0r + 2) = f5r;
+ *(p0r + (2 + 1)) = f5i;
+ *(p0r + posA) = f2r;
+ *(p0r + posAi) = f2i;
+ *(p0r + posB) = f7r;
+ *(p0r + posBi) = f7i;
+
+ p0r -= Nrems2;
+ f0r = *(p0r);
+ f0i = *(p0r + 1);
+ f1r = *(p0r + posA);
+ f1i = *(p0r + posAi);
+ iCol -= 1;
+ p1r = IOP + BRLow[iCol] * 4;
+ }
+ f2r = *(p0r + 2);
+ f2i = *(p0r + (2 + 1));
+ f3r = *(p0r + posB);
+ f3i = *(p0r + posBi);
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+ t1r = f2r + f3r;
+ t1i = f2i + f3i;
+ f3r = f2r - f3r;
+ f3i = f2i - f3i;
+
+ *(p0r) = t0r;
+ *(p0r + 1) = t0i;
+ *(p0r + 2) = f1r;
+ *(p0r + (2 + 1)) = f1i;
+ *(p0r + posA) = t1r;
+ *(p0r + posAi) = t1i;
+ *(p0r + posB) = f3r;
+ *(p0r + posBi) = f3i;
+ }
+ }
+}
+
+//------------------------------------------------------------------------------
+// RADIX 2 fft
+//------------------------------------------------------------------------------
+template
+void g_fft::fft2pt(FFT_TYPE *ioptr)
+{
+ FFT_TYPE f0r, f0i, f1r, f1i;
+ FFT_TYPE t0r, t0i;
+
+// bit reversed load
+ f0r = ioptr[0];
+ f0i = ioptr[1];
+ f1r = ioptr[2];
+ f1i = ioptr[3];
+
+// Butterflys
+// f0 - - - t0
+// f1 - 1 - f1
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+
+// store result
+ ioptr[0] = t0r;
+ ioptr[1] = t0i;
+ ioptr[2] = f1r;
+ ioptr[3] = f1i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 4 fft
+//------------------------------------------------------------------------------
+template
+void g_fft::fft4pt(FFT_TYPE *ioptr)
+{
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+
+// bit reversed load
+ f0r = ioptr[0];
+ f0i = ioptr[1];
+ f1r = ioptr[4];
+ f1i = ioptr[5];
+ f2r = ioptr[2];
+ f2i = ioptr[3];
+ f3r = ioptr[6];
+ f3i = ioptr[7];
+
+// Butterflys
+// f0 - - t0 - - f0
+// f1 - 1 - f1 - - f1
+// f2 - - f2 - 1 - f2
+// f3 - 1 - t1 - -i - f3
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+
+ t1r = f2r - f3r;
+ t1i = f2i - f3i;
+ f2r = f2r + f3r;
+ f2i = f2i + f3i;
+
+ f0r = t0r + f2r;
+ f0i = t0i + f2i;
+ f2r = t0r - f2r;
+ f2i = t0i - f2i;
+
+ f3r = f1r - t1i;
+ f3i = f1i + t1r;
+ f1r = f1r + t1i;
+ f1i = f1i - t1r;
+
+// store result
+ ioptr[0] = f0r;
+ ioptr[1] = f0i;
+ ioptr[2] = f1r;
+ ioptr[3] = f1i;
+ ioptr[4] = f2r;
+ ioptr[5] = f2i;
+ ioptr[6] = f3r;
+ ioptr[7] = f3i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 8 fft
+//------------------------------------------------------------------------------
+template
+void g_fft::fft8pt(FFT_TYPE *ioptr)
+{
+ FFT_TYPE w0r = 1.0 / FFT_ROOT2; // cos(pi/4)
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+ const FFT_TYPE Two = 2.0;
+
+// bit reversed load
+ f0r = ioptr[0];
+ f0i = ioptr[1];
+ f1r = ioptr[8];
+ f1i = ioptr[9];
+ f2r = ioptr[4];
+ f2i = ioptr[5];
+ f3r = ioptr[12];
+ f3i = ioptr[13];
+ f4r = ioptr[2];
+ f4i = ioptr[3];
+ f5r = ioptr[10];
+ f5i = ioptr[11];
+ f6r = ioptr[6];
+ f6i = ioptr[7];
+ f7r = ioptr[14];
+ f7i = ioptr[15];
+
+// Butterflys
+// f0 - - t0 - - f0 - - f0
+// f1 - 1 - f1 - - f1 - - f1
+// f2 - - f2 - 1 - f2 - - f2
+// f3 - 1 - t1 - -i - f3 - - f3
+// f4 - - t0 - - f4 - 1 - t0
+// f5 - 1 - f5 - - f5 - w3 - f4
+// f6 - - f6 - 1 - f6 - -i - t1
+// f7 - 1 - t1 - -i - f7 - iw3- f6
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+
+ t1r = f2r - f3r;
+ t1i = f2i - f3i;
+ f2r = f2r + f3r;
+ f2i = f2i + f3i;
+
+ f0r = t0r + f2r;
+ f0i = t0i + f2i;
+ f2r = t0r - f2r;
+ f2i = t0i - f2i;
+
+ f3r = f1r - t1i;
+ f3i = f1i + t1r;
+ f1r = f1r + t1i;
+ f1i = f1i - t1r;
+
+ t0r = f4r + f5r;
+ t0i = f4i + f5i;
+ f5r = f4r - f5r;
+ f5i = f4i - f5i;
+
+ t1r = f6r - f7r;
+ t1i = f6i - f7i;
+ f6r = f6r + f7r;
+ f6i = f6i + f7i;
+
+ f4r = t0r + f6r;
+ f4i = t0i + f6i;
+ f6r = t0r - f6r;
+ f6i = t0i - f6i;
+
+ f7r = f5r - t1i;
+ f7i = f5i + t1r;
+ f5r = f5r + t1i;
+ f5i = f5i - t1r;
+
+ t0r = f0r - f4r;
+ t0i = f0i - f4i;
+ f0r = f0r + f4r;
+ f0i = f0i + f4i;
+
+ t1r = f2r - f6i;
+ t1i = f2i + f6r;
+ f2r = f2r + f6i;
+ f2i = f2i - f6r;
+
+ f4r = f1r - f5r * w0r - f5i * w0r;
+ f4i = f1i + f5r * w0r - f5i * w0r;
+ f1r = f1r * Two - f4r;
+ f1i = f1i * Two - f4i;
+
+ f6r = f3r + f7r * w0r - f7i * w0r;
+ f6i = f3i + f7r * w0r + f7i * w0r;
+ f3r = f3r * Two - f6r;
+ f3i = f3i * Two - f6i;
+
+// store result
+ ioptr[0] = f0r;
+ ioptr[1] = f0i;
+ ioptr[2] = f1r;
+ ioptr[3] = f1i;
+ ioptr[4] = f2r;
+ ioptr[5] = f2i;
+ ioptr[6] = f3r;
+ ioptr[7] = f3i;
+ ioptr[8] = t0r;
+ ioptr[9] = t0i;
+ ioptr[10] = f4r;
+ ioptr[11] = f4i;
+ ioptr[12] = t1r;
+ ioptr[13] = t1i;
+ ioptr[14] = f6r;
+ ioptr[15] = f6i;
+}
+
+//------------------------------------------------------------------------------
+// 2nd radix 2 stage
+//------------------------------------------------------------------------------
+template
+void g_fft::bfR2(FFT_TYPE *ioptr, int M, int NDiffU)
+{
+ unsigned int pos;
+ unsigned int posi;
+ unsigned int pinc;
+ unsigned int pnext;
+ unsigned int NSameU;
+ unsigned int SameUCnt;
+
+ FFT_TYPE *pstrt;
+ FFT_TYPE *p0r, *p1r, *p2r, *p3r;
+
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i;
+
+ pinc = NDiffU * 2; // 2 floats per complex
+ pnext = pinc * 4;
+ pos = 2;
+ posi = pos + 1;
+ NSameU = POW2(M) / 4 / NDiffU; // 4 Us at a time
+ pstrt = ioptr;
+ p0r = pstrt;
+ p1r = pstrt + pinc;
+ p2r = p1r + pinc;
+ p3r = p2r + pinc;
+
+// Butterflys
+// f0 - - f4
+// f1 - 1 - f5
+// f2 - - f6
+// f3 - 1 - f7
+// Butterflys
+// f0 - - f4
+// f1 - 1 - f5
+// f2 - - f6
+// f3 - 1 - f7
+
+ for (SameUCnt = NSameU; SameUCnt > 0; SameUCnt--) {
+
+ f0r = *p0r;
+ f1r = *p1r;
+ f0i = *(p0r + 1);
+ f1i = *(p1r + 1);
+ f2r = *p2r;
+ f3r = *p3r;
+ f2i = *(p2r + 1);
+ f3i = *(p3r + 1);
+
+ f4r = f0r + f1r;
+ f4i = f0i + f1i;
+ f5r = f0r - f1r;
+ f5i = f0i - f1i;
+
+ f6r = f2r + f3r;
+ f6i = f2i + f3i;
+ f7r = f2r - f3r;
+ f7i = f2i - f3i;
+
+ *p0r = f4r;
+ *(p0r + 1) = f4i;
+ *p1r = f5r;
+ *(p1r + 1) = f5i;
+ *p2r = f6r;
+ *(p2r + 1) = f6i;
+ *p3r = f7r;
+ *(p3r + 1) = f7i;
+
+ f0r = *(p0r + pos);
+ f1i = *(p1r + posi);
+ f0i = *(p0r + posi);
+ f1r = *(p1r + pos);
+ f2r = *(p2r + pos);
+ f3i = *(p3r + posi);
+ f2i = *(p2r + posi);
+ f3r = *(p3r + pos);
+
+ f4r = f0r + f1i;
+ f4i = f0i - f1r;
+ f5r = f0r - f1i;
+ f5i = f0i + f1r;
+
+ f6r = f2r + f3i;
+ f6i = f2i - f3r;
+ f7r = f2r - f3i;
+ f7i = f2i + f3r;
+
+ *(p0r + pos) = f4r;
+ *(p0r + posi) = f4i;
+ *(p1r + pos) = f5r;
+ *(p1r + posi) = f5i;
+ *(p2r + pos) = f6r;
+ *(p2r + posi) = f6i;
+ *(p3r + pos) = f7r;
+ *(p3r + posi) = f7i;
+
+ p0r += pnext;
+ p1r += pnext;
+ p2r += pnext;
+ p3r += pnext;
+ }
+}
+
+//------------------------------------------------------------------------------
+// 1 radix 4 stage
+//------------------------------------------------------------------------------
+template
+void g_fft::bfR4(FFT_TYPE *ioptr, int M, int NDiffU)
+{
+ unsigned int pos;
+ unsigned int posi;
+ unsigned int pinc;
+ unsigned int pnext;
+ unsigned int pnexti;
+ unsigned int NSameU;
+ unsigned int SameUCnt;
+
+ FFT_TYPE *pstrt;
+ FFT_TYPE *p0r, *p1r, *p2r, *p3r;
+
+ FFT_TYPE w1r = 1.0 / FFT_ROOT2; // cos(pi/4)
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i;
+ FFT_TYPE t1r, t1i;
+ const FFT_TYPE Two = 2.0;
+
+ pinc = NDiffU * 2; // 2 floats per complex
+ pnext = pinc * 4;
+ pnexti = pnext + 1;
+ pos = 2;
+ posi = pos + 1;
+ NSameU = POW2(M) / 4 / NDiffU; // 4 pts per butterfly
+ pstrt = ioptr;
+ p0r = pstrt;
+ p1r = pstrt + pinc;
+ p2r = p1r + pinc;
+ p3r = p2r + pinc;
+
+// Butterflys
+// f0 - - f0 - - f4
+// f1 - 1 - f5 - - f5
+// f2 - - f6 - 1 - f6
+// f3 - 1 - f3 - -i - f7
+// Butterflys
+// f0 - - f4 - - f4
+// f1 - -i - t1 - - f5
+// f2 - - f2 - w1 - f6
+// f3 - -i - f7 - iw1- f7
+
+ f0r = *p0r;
+ f1r = *p1r;
+ f2r = *p2r;
+ f3r = *p3r;
+ f0i = *(p0r + 1);
+ f1i = *(p1r + 1);
+ f2i = *(p2r + 1);
+ f3i = *(p3r + 1);
+
+ f5r = f0r - f1r;
+ f5i = f0i - f1i;
+ f0r = f0r + f1r;
+ f0i = f0i + f1i;
+
+ f6r = f2r + f3r;
+ f6i = f2i + f3i;
+ f3r = f2r - f3r;
+ f3i = f2i - f3i;
+
+ for (SameUCnt = NSameU - 1; SameUCnt > 0; SameUCnt--) {
+
+ f7r = f5r - f3i;
+ f7i = f5i + f3r;
+ f5r = f5r + f3i;
+ f5i = f5i - f3r;
+
+ f4r = f0r + f6r;
+ f4i = f0i + f6i;
+ f6r = f0r - f6r;
+ f6i = f0i - f6i;
+
+ f2r = *(p2r + pos);
+ f2i = *(p2r + posi);
+ f1r = *(p1r + pos);
+ f1i = *(p1r + posi);
+ f3i = *(p3r + posi);
+ f0r = *(p0r + pos);
+ f3r = *(p3r + pos);
+ f0i = *(p0r + posi);
+
+ *p3r = f7r;
+ *p0r = f4r;
+ *(p3r + 1) = f7i;
+ *(p0r + 1) = f4i;
+ *p1r = f5r;
+ *p2r = f6r;
+ *(p1r + 1) = f5i;
+ *(p2r + 1) = f6i;
+
+ f7r = f2r - f3i;
+ f7i = f2i + f3r;
+ f2r = f2r + f3i;
+ f2i = f2i - f3r;
+
+ f4r = f0r + f1i;
+ f4i = f0i - f1r;
+ t1r = f0r - f1i;
+ t1i = f0i + f1r;
+
+ f5r = t1r - f7r * w1r + f7i * w1r;
+ f5i = t1i - f7r * w1r - f7i * w1r;
+ f7r = t1r * Two - f5r;
+ f7i = t1i * Two - f5i;
+
+ f6r = f4r - f2r * w1r - f2i * w1r;
+ f6i = f4i + f2r * w1r - f2i * w1r;
+ f4r = f4r * Two - f6r;
+ f4i = f4i * Two - f6i;
+
+ f3r = *(p3r + pnext);
+ f0r = *(p0r + pnext);
+ f3i = *(p3r + pnexti);
+ f0i = *(p0r + pnexti);
+ f2r = *(p2r + pnext);
+ f2i = *(p2r + pnexti);
+ f1r = *(p1r + pnext);
+ f1i = *(p1r + pnexti);
+
+ *(p2r + pos) = f6r;
+ *(p1r + pos) = f5r;
+ *(p2r + posi) = f6i;
+ *(p1r + posi) = f5i;
+ *(p3r + pos) = f7r;
+ *(p0r + pos) = f4r;
+ *(p3r + posi) = f7i;
+ *(p0r + posi) = f4i;
+
+ f6r = f2r + f3r;
+ f6i = f2i + f3i;
+ f3r = f2r - f3r;
+ f3i = f2i - f3i;
+
+ f5r = f0r - f1r;
+ f5i = f0i - f1i;
+ f0r = f0r + f1r;
+ f0i = f0i + f1i;
+
+ p3r += pnext;
+ p0r += pnext;
+ p1r += pnext;
+ p2r += pnext;
+ }
+ f7r = f5r - f3i;
+ f7i = f5i + f3r;
+ f5r = f5r + f3i;
+ f5i = f5i - f3r;
+
+ f4r = f0r + f6r;
+ f4i = f0i + f6i;
+ f6r = f0r - f6r;
+ f6i = f0i - f6i;
+
+ f2r = *(p2r + pos);
+ f2i = *(p2r + posi);
+ f1r = *(p1r + pos);
+ f1i = *(p1r + posi);
+ f3i = *(p3r + posi);
+ f0r = *(p0r + pos);
+ f3r = *(p3r + pos);
+ f0i = *(p0r + posi);
+
+ *p3r = f7r;
+ *p0r = f4r;
+ *(p3r + 1) = f7i;
+ *(p0r + 1) = f4i;
+ *p1r = f5r;
+ *p2r = f6r;
+ *(p1r + 1) = f5i;
+ *(p2r + 1) = f6i;
+
+ f7r = f2r - f3i;
+ f7i = f2i + f3r;
+ f2r = f2r + f3i;
+ f2i = f2i - f3r;
+
+ f4r = f0r + f1i;
+ f4i = f0i - f1r;
+ t1r = f0r - f1i;
+ t1i = f0i + f1r;
+
+ f5r = t1r - f7r * w1r + f7i * w1r;
+ f5i = t1i - f7r * w1r - f7i * w1r;
+ f7r = t1r * Two - f5r;
+ f7i = t1i * Two - f5i;
+
+ f6r = f4r - f2r * w1r - f2i * w1r;
+ f6i = f4i + f2r * w1r - f2i * w1r;
+ f4r = f4r * Two - f6r;
+ f4i = f4i * Two - f6i;
+
+ *(p2r + pos) = f6r;
+ *(p1r + pos) = f5r;
+ *(p2r + posi) = f6i;
+ *(p1r + posi) = f5i;
+ *(p3r + pos) = f7r;
+ *(p0r + pos) = f4r;
+ *(p3r + posi) = f7i;
+ *(p0r + posi) = f4i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 8 Stages
+//------------------------------------------------------------------------------
+template
+void g_fft::bfstages(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, int Ustride,
+ int NDiffU, int StageCnt)
+{
+ unsigned int pos;
+ unsigned int posi;
+ unsigned int pinc;
+ unsigned int pnext;
+ unsigned int NSameU;
+ int Uinc;
+ int Uinc2;
+ int Uinc4;
+ unsigned int DiffUCnt;
+ unsigned int SameUCnt;
+ unsigned int U2toU3;
+
+ FFT_TYPE *pstrt;
+ FFT_TYPE *p0r, *p1r, *p2r, *p3r;
+ FFT_TYPE *u0r, *u0i, *u1r, *u1i, *u2r, *u2i;
+
+ FFT_TYPE w0r, w0i, w1r, w1i, w2r, w2i, w3r, w3i;
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+ const FFT_TYPE Two = FFT_TYPE(2.0);
+
+ pinc = NDiffU * 2; // 2 floats per complex
+ pnext = pinc * 8;
+ pos = pinc * 4;
+ posi = pos + 1;
+ NSameU = POW2(M) / 8 / NDiffU; // 8 pts per butterfly
+ Uinc = (int) NSameU * Ustride;
+ Uinc2 = Uinc * 2;
+ Uinc4 = Uinc * 4;
+ U2toU3 = (POW2(M) / 8) * Ustride;
+ for (; StageCnt > 0; StageCnt--) {
+
+ u0r = &Utbl[0];
+ u0i = &Utbl[POW2(M - 2) * Ustride];
+ u1r = u0r;
+ u1i = u0i;
+ u2r = u0r;
+ u2i = u0i;
+
+ w0r = *u0r;
+ w0i = *u0i;
+ w1r = *u1r;
+ w1i = *u1i;
+ w2r = *u2r;
+ w2i = *u2i;
+ w3r = *(u2r + U2toU3);
+ w3i = *(u2i - U2toU3);
+
+ pstrt = ioptr;
+
+ p0r = pstrt;
+ p1r = pstrt + pinc;
+ p2r = p1r + pinc;
+ p3r = p2r + pinc;
+
+// Butterflys
+// f0 - - t0 - - f0 - - f0
+// f1 - w0- f1 - - f1 - - f1
+// f2 - - f2 - w1- f2 - - f4
+// f3 - w0- t1 - iw1- f3 - - f5
+//
+// f4 - - t0 - - f4 - w2- t0
+// f5 - w0- f5 - - f5 - w3- t1
+// f6 - - f6 - w1- f6 - iw2- f6
+// f7 - w0- t1 - iw1- f7 - iw3- f7
+
+ for (DiffUCnt = NDiffU; DiffUCnt > 0; DiffUCnt--) {
+ f0r = *p0r;
+ f0i = *(p0r + 1);
+ f1r = *p1r;
+ f1i = *(p1r + 1);
+ for (SameUCnt = NSameU - 1; SameUCnt > 0; SameUCnt--) {
+ f2r = *p2r;
+ f2i = *(p2r + 1);
+ f3r = *p3r;
+ f3i = *(p3r + 1);
+
+ t0r = f0r + f1r * w0r + f1i * w0i;
+ t0i = f0i - f1r * w0i + f1i * w0r;
+ f1r = f0r * Two - t0r;
+ f1i = f0i * Two - t0i;
+
+ f4r = *(p0r + pos);
+ f4i = *(p0r + posi);
+ f5r = *(p1r + pos);
+ f5i = *(p1r + posi);
+
+ f6r = *(p2r + pos);
+ f6i = *(p2r + posi);
+ f7r = *(p3r + pos);
+ f7i = *(p3r + posi);
+
+ t1r = f2r - f3r * w0r - f3i * w0i;
+ t1i = f2i + f3r * w0i - f3i * w0r;
+ f2r = f2r * Two - t1r;
+ f2i = f2i * Two - t1i;
+
+ f0r = t0r + f2r * w1r + f2i * w1i;
+ f0i = t0i - f2r * w1i + f2i * w1r;
+ f2r = t0r * Two - f0r;
+ f2i = t0i * Two - f0i;
+
+ f3r = f1r + t1r * w1i - t1i * w1r;
+ f3i = f1i + t1r * w1r + t1i * w1i;
+ f1r = f1r * Two - f3r;
+ f1i = f1i * Two - f3i;
+
+ t0r = f4r + f5r * w0r + f5i * w0i;
+ t0i = f4i - f5r * w0i + f5i * w0r;
+ f5r = f4r * Two - t0r;
+ f5i = f4i * Two - t0i;
+
+ t1r = f6r - f7r * w0r - f7i * w0i;
+ t1i = f6i + f7r * w0i - f7i * w0r;
+ f6r = f6r * Two - t1r;
+ f6i = f6i * Two - t1i;
+
+ f4r = t0r + f6r * w1r + f6i * w1i;
+ f4i = t0i - f6r * w1i + f6i * w1r;
+ f6r = t0r * Two - f4r;
+ f6i = t0i * Two - f4i;
+
+ f7r = f5r + t1r * w1i - t1i * w1r;
+ f7i = f5i + t1r * w1r + t1i * w1i;
+ f5r = f5r * Two - f7r;
+ f5i = f5i * Two - f7i;
+
+ t0r = f0r - f4r * w2r - f4i * w2i;
+ t0i = f0i + f4r * w2i - f4i * w2r;
+ f0r = f0r * Two - t0r;
+ f0i = f0i * Two - t0i;
+
+ t1r = f1r - f5r * w3r - f5i * w3i;
+ t1i = f1i + f5r * w3i - f5i * w3r;
+ f1r = f1r * Two - t1r;
+ f1i = f1i * Two - t1i;
+
+ *(p0r + pos) = t0r;
+ *(p1r + pos) = t1r;
+ *(p0r + posi) = t0i;
+ *(p1r + posi) = t1i;
+ *p0r = f0r;
+ *p1r = f1r;
+ *(p0r + 1) = f0i;
+ *(p1r + 1) = f1i;
+
+ p0r += pnext;
+ f0r = *p0r;
+ f0i = *(p0r + 1);
+
+ p1r += pnext;
+
+ f1r = *p1r;
+ f1i = *(p1r + 1);
+
+ f4r = f2r - f6r * w2i + f6i * w2r;
+ f4i = f2i - f6r * w2r - f6i * w2i;
+ f6r = f2r * Two - f4r;
+ f6i = f2i * Two - f4i;
+
+ f5r = f3r - f7r * w3i + f7i * w3r;
+ f5i = f3i - f7r * w3r - f7i * w3i;
+ f7r = f3r * Two - f5r;
+ f7i = f3i * Two - f5i;
+
+ *p2r = f4r;
+ *p3r = f5r;
+ *(p2r + 1) = f4i;
+ *(p3r + 1) = f5i;
+ *(p2r + pos) = f6r;
+ *(p3r + pos) = f7r;
+ *(p2r + posi) = f6i;
+ *(p3r + posi) = f7i;
+
+ p2r += pnext;
+ p3r += pnext;
+ }
+
+ f2r = *p2r;
+ f2i = *(p2r + 1);
+ f3r = *p3r;
+ f3i = *(p3r + 1);
+
+ t0r = f0r + f1r * w0r + f1i * w0i;
+ t0i = f0i - f1r * w0i + f1i * w0r;
+ f1r = f0r * Two - t0r;
+ f1i = f0i * Two - t0i;
+
+ f4r = *(p0r + pos);
+ f4i = *(p0r + posi);
+ f5r = *(p1r + pos);
+ f5i = *(p1r + posi);
+
+ f6r = *(p2r + pos);
+ f6i = *(p2r + posi);
+ f7r = *(p3r + pos);
+ f7i = *(p3r + posi);
+
+ t1r = f2r - f3r * w0r - f3i * w0i;
+ t1i = f2i + f3r * w0i - f3i * w0r;
+ f2r = f2r * Two - t1r;
+ f2i = f2i * Two - t1i;
+
+ f0r = t0r + f2r * w1r + f2i * w1i;
+ f0i = t0i - f2r * w1i + f2i * w1r;
+ f2r = t0r * Two - f0r;
+ f2i = t0i * Two - f0i;
+
+ f3r = f1r + t1r * w1i - t1i * w1r;
+ f3i = f1i + t1r * w1r + t1i * w1i;
+ f1r = f1r * Two - f3r;
+ f1i = f1i * Two - f3i;
+
+ if ((int) DiffUCnt == NDiffU / 2)
+ Uinc4 = -Uinc4;
+
+ u0r += Uinc4;
+ u0i -= Uinc4;
+ u1r += Uinc2;
+ u1i -= Uinc2;
+ u2r += Uinc;
+ u2i -= Uinc;
+
+ pstrt += 2;
+
+ t0r = f4r + f5r * w0r + f5i * w0i;
+ t0i = f4i - f5r * w0i + f5i * w0r;
+ f5r = f4r * Two - t0r;
+ f5i = f4i * Two - t0i;
+
+ t1r = f6r - f7r * w0r - f7i * w0i;
+ t1i = f6i + f7r * w0i - f7i * w0r;
+ f6r = f6r * Two - t1r;
+ f6i = f6i * Two - t1i;
+
+ f4r = t0r + f6r * w1r + f6i * w1i;
+ f4i = t0i - f6r * w1i + f6i * w1r;
+ f6r = t0r * Two - f4r;
+ f6i = t0i * Two - f4i;
+
+ f7r = f5r + t1r * w1i - t1i * w1r;
+ f7i = f5i + t1r * w1r + t1i * w1i;
+ f5r = f5r * Two - f7r;
+ f5i = f5i * Two - f7i;
+
+ w0r = *u0r;
+ w0i = *u0i;
+ w1r = *u1r;
+ w1i = *u1i;
+
+ if ((int) DiffUCnt <= NDiffU / 2)
+ w0r = -w0r;
+
+ t0r = f0r - f4r * w2r - f4i * w2i;
+ t0i = f0i + f4r * w2i - f4i * w2r;
+ f0r = f0r * Two - t0r;
+ f0i = f0i * Two - t0i;
+
+ f4r = f2r - f6r * w2i + f6i * w2r;
+ f4i = f2i - f6r * w2r - f6i * w2i;
+ f6r = f2r * Two - f4r;
+ f6i = f2i * Two - f4i;
+
+ *(p0r + pos) = t0r;
+ *p2r = f4r;
+ *(p0r + posi) = t0i;
+ *(p2r + 1) = f4i;
+ w2r = *u2r;
+ w2i = *u2i;
+ *p0r = f0r;
+ *(p2r + pos) = f6r;
+ *(p0r + 1) = f0i;
+ *(p2r + posi) = f6i;
+
+ p0r = pstrt;
+ p2r = pstrt + pinc + pinc;
+
+ t1r = f1r - f5r * w3r - f5i * w3i;
+ t1i = f1i + f5r * w3i - f5i * w3r;
+ f1r = f1r * Two - t1r;
+ f1i = f1i * Two - t1i;
+
+ f5r = f3r - f7r * w3i + f7i * w3r;
+ f5i = f3i - f7r * w3r - f7i * w3i;
+ f7r = f3r * Two - f5r;
+ f7i = f3i * Two - f5i;
+
+ *(p1r + pos) = t1r;
+ *p3r = f5r;
+ *(p1r + posi) = t1i;
+ *(p3r + 1) = f5i;
+ w3r = *(u2r + U2toU3);
+ w3i = *(u2i - U2toU3);
+ *p1r = f1r;
+ *(p3r + pos) = f7r;
+ *(p1r + 1) = f1i;
+ *(p3r + posi) = f7i;
+
+ p1r = pstrt + pinc;
+ p3r = p2r + pinc;
+ }
+ NSameU /= 8;
+ Uinc /= 8;
+ Uinc2 /= 8;
+ Uinc4 = Uinc * 4;
+ NDiffU *= 8;
+ pinc *= 8;
+ pnext *= 8;
+ pos *= 8;
+ posi = pos + 1;
+ }
+}
+
+template
+void g_fft::fftrecurs(FFT_TYPE *ioptr, int M,
+ FFT_TYPE *Utbl, int Ustride, int NDiffU,
+ int StageCnt)
+{
+// recursive bfstages calls to maximize on chip cache efficiency
+ int i1;
+
+ if (M <= (int) MCACHE) // fits on chip ?
+ bfstages(ioptr, M, Utbl, Ustride, NDiffU, StageCnt); // RADIX 8 Stages
+ else {
+ for (i1 = 0; i1 < 8; i1++) {
+ fftrecurs(&ioptr[i1 * POW2(M - 3) * 2], M - 3, Utbl, 8 * Ustride,
+ NDiffU, StageCnt - 1); // RADIX 8 Stages
+ }
+ bfstages(ioptr, M, Utbl, Ustride, POW2(M - 3), 1); // RADIX 8 Stage
+ }
+}
+
+//------------------------------------------------------------------------------
+// Compute in-place complex fft on the rows of the input array
+// INPUTS
+// *ioptr = input data array
+// M = log2 of fft size (ex M=10 for 1024 point fft)
+// *Utbl = cosine table
+// *BRLow = bit reversed counter table
+// OUTPUTS
+// *ioptr = output data array
+//------------------------------------------------------------------------------
+template
+void g_fft::ffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow)
+{
+ int StageCnt;
+ int NDiffU;
+
+ switch (M) {
+ case 0:
+ break;
+ case 1:
+ fft2pt(ioptr); // a 2 pt fft
+ break;
+ case 2:
+ fft4pt(ioptr); // a 4 pt fft
+ break;
+ case 3:
+ fft8pt(ioptr); // an 8 pt fft
+ break;
+ default:
+ bitrevR2(ioptr, M, BRLow); // bit reverse and first radix 2 stage
+ StageCnt = (M - 1) / 3; // number of radix 8 stages
+ NDiffU = 2; // one radix 2 stage already complete
+ if ((M - 1 - (StageCnt * 3)) == 1) {
+ bfR2(ioptr, M, NDiffU); // 1 radix 2 stage
+ NDiffU *= 2;
+ }
+ if ((M - 1 - (StageCnt * 3)) == 2) {
+ bfR4(ioptr, M, NDiffU); // 1 radix 4 stage
+ NDiffU *= 4;
+ }
+ if (M <= (int) MCACHE)
+ bfstages(ioptr, M, Utbl, 1, NDiffU, StageCnt); // RADIX 8 Stages
+ else
+ fftrecurs(ioptr, M, Utbl, 1, NDiffU, StageCnt); // RADIX 8 Stages
+ }
+}
+
+//------------------------------------------------------------------------------
+// parts of iffts1
+// scaled bit reverse and first radix 2 stage forward or inverse fft
+//------------------------------------------------------------------------------
+template
+void g_fft::scbitrevR2(FFT_TYPE *ioptr, int M, short *BRLow, FFT_TYPE scale)
+{
+ FFT_TYPE f0r;
+ FFT_TYPE f0i;
+ FFT_TYPE f1r;
+ FFT_TYPE f1i;
+ FFT_TYPE f2r;
+ FFT_TYPE f2i;
+ FFT_TYPE f3r;
+ FFT_TYPE f3i;
+ FFT_TYPE f4r;
+ FFT_TYPE f4i;
+ FFT_TYPE f5r;
+ FFT_TYPE f5i;
+ FFT_TYPE f6r;
+ FFT_TYPE f6i;
+ FFT_TYPE f7r;
+ FFT_TYPE f7i;
+ FFT_TYPE t0r;
+ FFT_TYPE t0i;
+ FFT_TYPE t1r;
+ FFT_TYPE t1i;
+ FFT_TYPE *p0r;
+ FFT_TYPE *p1r;
+ FFT_TYPE *IOP;
+ FFT_TYPE *iolimit;
+ int Colstart;
+ int iCol;
+ unsigned int posA;
+ unsigned int posAi;
+ unsigned int posB;
+ unsigned int posBi;
+
+ const unsigned int Nrems2 = POW2((M + 3) / 2);
+ const unsigned int Nroot_1_ColInc = POW2(M) - Nrems2;
+ const unsigned int Nroot_1 = POW2(M / 2 - 1) - 1;
+ const unsigned int ColstartShift = (M + 1) / 2 + 1;
+
+ posA = POW2(M); // 1/2 of POW2(M) complexes
+ posAi = posA + 1;
+ posB = posA + 2;
+ posBi = posB + 1;
+
+ iolimit = ioptr + Nrems2;
+ for (; ioptr < iolimit; ioptr += POW2(M / 2 + 1)) {
+ for (Colstart = Nroot_1; Colstart >= 0; Colstart--) {
+ iCol = Nroot_1;
+ p0r = ioptr + Nroot_1_ColInc + BRLow[Colstart] * 4;
+ IOP = ioptr + (Colstart << ColstartShift);
+ p1r = IOP + BRLow[iCol] * 4;
+ f0r = *(p0r);
+ f0i = *(p0r + 1);
+ f1r = *(p0r + posA);
+ f1i = *(p0r + posAi);
+ for (; iCol > Colstart;) {
+ f2r = *(p0r + 2);
+ f2i = *(p0r + (2 + 1));
+ f3r = *(p0r + posB);
+ f3i = *(p0r + posBi);
+ f4r = *(p1r);
+ f4i = *(p1r + 1);
+ f5r = *(p1r + posA);
+ f5i = *(p1r + posAi);
+ f6r = *(p1r + 2);
+ f6i = *(p1r + (2 + 1));
+ f7r = *(p1r + posB);
+ f7i = *(p1r + posBi);
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+ t1r = f2r + f3r;
+ t1i = f2i + f3i;
+ f3r = f2r - f3r;
+ f3i = f2i - f3i;
+ f0r = f4r + f5r;
+ f0i = f4i + f5i;
+ f5r = f4r - f5r;
+ f5i = f4i - f5i;
+ f2r = f6r + f7r;
+ f2i = f6i + f7i;
+ f7r = f6r - f7r;
+ f7i = f6i - f7i;
+
+ *(p1r) = scale * t0r;
+ *(p1r + 1) = scale * t0i;
+ *(p1r + 2) = scale * f1r;
+ *(p1r + (2 + 1)) = scale * f1i;
+ *(p1r + posA) = scale * t1r;
+ *(p1r + posAi) = scale * t1i;
+ *(p1r + posB) = scale * f3r;
+ *(p1r + posBi) = scale * f3i;
+ *(p0r) = scale * f0r;
+ *(p0r + 1) = scale * f0i;
+ *(p0r + 2) = scale * f5r;
+ *(p0r + (2 + 1)) = scale * f5i;
+ *(p0r + posA) = scale * f2r;
+ *(p0r + posAi) = scale * f2i;
+ *(p0r + posB) = scale * f7r;
+ *(p0r + posBi) = scale * f7i;
+
+ p0r -= Nrems2;
+ f0r = *(p0r);
+ f0i = *(p0r + 1);
+ f1r = *(p0r + posA);
+ f1i = *(p0r + posAi);
+ iCol -= 1;
+ p1r = IOP + BRLow[iCol] * 4;
+ }
+ f2r = *(p0r + 2);
+ f2i = *(p0r + (2 + 1));
+ f3r = *(p0r + posB);
+ f3i = *(p0r + posBi);
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+ t1r = f2r + f3r;
+ t1i = f2i + f3i;
+ f3r = f2r - f3r;
+ f3i = f2i - f3i;
+
+ *(p0r) = scale * t0r;
+ *(p0r + 1) = scale * t0i;
+ *(p0r + 2) = scale * f1r;
+ *(p0r + (2 + 1)) = scale * f1i;
+ *(p0r + posA) = scale * t1r;
+ *(p0r + posAi) = scale * t1i;
+ *(p0r + posB) = scale * f3r;
+ *(p0r + posBi) = scale * f3i;
+ }
+ }
+}
+
+//------------------------------------------------------------------------------
+// RADIX 2 ifft
+//------------------------------------------------------------------------------
+template
+void g_fft::ifft2pt(FFT_TYPE *ioptr, FFT_TYPE scale)
+{
+ FFT_TYPE f0r, f0i, f1r, f1i;
+ FFT_TYPE t0r, t0i;
+
+// bit reversed load
+ f0r = ioptr[0];
+ f0i = ioptr[1];
+ f1r = ioptr[2];
+ f1i = ioptr[3];
+
+// Butterflys
+// f0 - - t0
+// f1 - 1 - f1
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+
+// store result
+ ioptr[0] = scale * t0r;
+ ioptr[1] = scale * t0i;
+ ioptr[2] = scale * f1r;
+ ioptr[3] = scale * f1i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 4 ifft
+//------------------------------------------------------------------------------
+template
+void g_fft::ifft4pt(FFT_TYPE *ioptr, FFT_TYPE scale)
+{
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+
+// bit reversed load
+ f0r = ioptr[0];
+ f0i = ioptr[1];
+ f1r = ioptr[4];
+ f1i = ioptr[5];
+ f2r = ioptr[2];
+ f2i = ioptr[3];
+ f3r = ioptr[6];
+ f3i = ioptr[7];
+
+// Butterflys
+// f0 - - t0 - - f0
+// f1 - 1 - f1 - - f1
+// f2 - - f2 - 1 - f2
+// f3 - 1 - t1 - i - f3
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+
+ t1r = f2r - f3r;
+ t1i = f2i - f3i;
+ f2r = f2r + f3r;
+ f2i = f2i + f3i;
+
+ f0r = t0r + f2r;
+ f0i = t0i + f2i;
+ f2r = t0r - f2r;
+ f2i = t0i - f2i;
+
+ f3r = f1r + t1i;
+ f3i = f1i - t1r;
+ f1r = f1r - t1i;
+ f1i = f1i + t1r;
+
+// store result
+ ioptr[0] = scale * f0r;
+ ioptr[1] = scale * f0i;
+ ioptr[2] = scale * f1r;
+ ioptr[3] = scale * f1i;
+ ioptr[4] = scale * f2r;
+ ioptr[5] = scale * f2i;
+ ioptr[6] = scale * f3r;
+ ioptr[7] = scale * f3i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 8 ifft
+//------------------------------------------------------------------------------
+template
+void g_fft::ifft8pt(FFT_TYPE *ioptr, FFT_TYPE scale)
+{
+ FFT_TYPE w0r = 1.0 / FFT_ROOT2; // cos(pi/4)
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+ const FFT_TYPE Two = 2.0;
+
+// bit reversed load
+ f0r = ioptr[0];
+ f0i = ioptr[1];
+ f1r = ioptr[8];
+ f1i = ioptr[9];
+ f2r = ioptr[4];
+ f2i = ioptr[5];
+ f3r = ioptr[12];
+ f3i = ioptr[13];
+ f4r = ioptr[2];
+ f4i = ioptr[3];
+ f5r = ioptr[10];
+ f5i = ioptr[11];
+ f6r = ioptr[6];
+ f6i = ioptr[7];
+ f7r = ioptr[14];
+ f7i = ioptr[15];
+
+// Butterflys
+// f0 - - t0 - - f0 - - f0
+// f1 - 1 - f1 - - f1 - - f1
+// f2 - - f2 - 1 - f2 - - f2
+// f3 - 1 - t1 - i - f3 - - f3
+// f4 - - t0 - - f4 - 1 - t0
+// f5 - 1 - f5 - - f5 - w3 - f4
+// f6 - - f6 - 1 - f6 - i - t1
+// f7 - 1 - t1 - i - f7 - iw3- f6
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+
+ t1r = f2r - f3r;
+ t1i = f2i - f3i;
+ f2r = f2r + f3r;
+ f2i = f2i + f3i;
+
+ f0r = t0r + f2r;
+ f0i = t0i + f2i;
+ f2r = t0r - f2r;
+ f2i = t0i - f2i;
+
+ f3r = f1r + t1i;
+ f3i = f1i - t1r;
+ f1r = f1r - t1i;
+ f1i = f1i + t1r;
+
+ t0r = f4r + f5r;
+ t0i = f4i + f5i;
+ f5r = f4r - f5r;
+ f5i = f4i - f5i;
+
+ t1r = f6r - f7r;
+ t1i = f6i - f7i;
+ f6r = f6r + f7r;
+ f6i = f6i + f7i;
+
+ f4r = t0r + f6r;
+ f4i = t0i + f6i;
+ f6r = t0r - f6r;
+ f6i = t0i - f6i;
+
+ f7r = f5r + t1i;
+ f7i = f5i - t1r;
+ f5r = f5r - t1i;
+ f5i = f5i + t1r;
+
+ t0r = f0r - f4r;
+ t0i = f0i - f4i;
+ f0r = f0r + f4r;
+ f0i = f0i + f4i;
+
+ t1r = f2r + f6i;
+ t1i = f2i - f6r;
+ f2r = f2r - f6i;
+ f2i = f2i + f6r;
+
+ f4r = f1r - f5r * w0r + f5i * w0r;
+ f4i = f1i - f5r * w0r - f5i * w0r;
+ f1r = f1r * Two - f4r;
+ f1i = f1i * Two - f4i;
+
+ f6r = f3r + f7r * w0r + f7i * w0r;
+ f6i = f3i - f7r * w0r + f7i * w0r;
+ f3r = f3r * Two - f6r;
+ f3i = f3i * Two - f6i;
+
+// store result
+ ioptr[0] = scale * f0r;
+ ioptr[1] = scale * f0i;
+ ioptr[2] = scale * f1r;
+ ioptr[3] = scale * f1i;
+ ioptr[4] = scale * f2r;
+ ioptr[5] = scale * f2i;
+ ioptr[6] = scale * f3r;
+ ioptr[7] = scale * f3i;
+ ioptr[8] = scale * t0r;
+ ioptr[9] = scale * t0i;
+ ioptr[10] = scale * f4r;
+ ioptr[11] = scale * f4i;
+ ioptr[12] = scale * t1r;
+ ioptr[13] = scale * t1i;
+ ioptr[14] = scale * f6r;
+ ioptr[15] = scale * f6i;
+}
+
+//------------------------------------------------------------------------------
+// 2nd radix 2 stage
+//------------------------------------------------------------------------------
+template
+void g_fft::ibfR2(FFT_TYPE *ioptr, int M, int NDiffU)
+{
+ unsigned int pos;
+ unsigned int posi;
+ unsigned int pinc;
+ unsigned int pnext;
+ unsigned int NSameU;
+ unsigned int SameUCnt;
+
+ FFT_TYPE *pstrt;
+ FFT_TYPE *p0r, *p1r, *p2r, *p3r;
+
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i;
+
+ pinc = NDiffU * 2; // 2 floats per complex
+ pnext = pinc * 4;
+ pos = 2;
+ posi = pos + 1;
+ NSameU = POW2(M) / 4 / NDiffU; // 4 Us at a time
+ pstrt = ioptr;
+ p0r = pstrt;
+ p1r = pstrt + pinc;
+ p2r = p1r + pinc;
+ p3r = p2r + pinc;
+
+// Butterflys
+// f0 - - f4
+// f1 - 1 - f5
+// f2 - - f6
+// f3 - 1 - f7
+// Butterflys
+// f0 - - f4
+// f1 - 1 - f5
+// f2 - - f6
+// f3 - 1 - f7
+
+ for (SameUCnt = NSameU; SameUCnt > 0; SameUCnt--) {
+
+ f0r = *p0r;
+ f1r = *p1r;
+ f0i = *(p0r + 1);
+ f1i = *(p1r + 1);
+ f2r = *p2r;
+ f3r = *p3r;
+ f2i = *(p2r + 1);
+ f3i = *(p3r + 1);
+
+ f4r = f0r + f1r;
+ f4i = f0i + f1i;
+ f5r = f0r - f1r;
+ f5i = f0i - f1i;
+
+ f6r = f2r + f3r;
+ f6i = f2i + f3i;
+ f7r = f2r - f3r;
+ f7i = f2i - f3i;
+
+ *p0r = f4r;
+ *(p0r + 1) = f4i;
+ *p1r = f5r;
+ *(p1r + 1) = f5i;
+ *p2r = f6r;
+ *(p2r + 1) = f6i;
+ *p3r = f7r;
+ *(p3r + 1) = f7i;
+
+ f0r = *(p0r + pos);
+ f1i = *(p1r + posi);
+ f0i = *(p0r + posi);
+ f1r = *(p1r + pos);
+ f2r = *(p2r + pos);
+ f3i = *(p3r + posi);
+ f2i = *(p2r + posi);
+ f3r = *(p3r + pos);
+
+ f4r = f0r - f1i;
+ f4i = f0i + f1r;
+ f5r = f0r + f1i;
+ f5i = f0i - f1r;
+
+ f6r = f2r - f3i;
+ f6i = f2i + f3r;
+ f7r = f2r + f3i;
+ f7i = f2i - f3r;
+
+ *(p0r + pos) = f4r;
+ *(p0r + posi) = f4i;
+ *(p1r + pos) = f5r;
+ *(p1r + posi) = f5i;
+ *(p2r + pos) = f6r;
+ *(p2r + posi) = f6i;
+ *(p3r + pos) = f7r;
+ *(p3r + posi) = f7i;
+
+ p0r += pnext;
+ p1r += pnext;
+ p2r += pnext;
+ p3r += pnext;
+ }
+}
+
+//------------------------------------------------------------------------------
+// 1 radix 4 stage
+//------------------------------------------------------------------------------
+template
+void g_fft::ibfR4(FFT_TYPE *ioptr, int M, int NDiffU)
+{
+ unsigned int pos;
+ unsigned int posi;
+ unsigned int pinc;
+ unsigned int pnext;
+ unsigned int pnexti;
+ unsigned int NSameU;
+ unsigned int SameUCnt;
+
+ FFT_TYPE *pstrt;
+ FFT_TYPE *p0r, *p1r, *p2r, *p3r;
+
+ FFT_TYPE w1r = 1.0 / FFT_ROOT2; // cos(pi/4)
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i;
+ FFT_TYPE t1r, t1i;
+ const FFT_TYPE Two = 2.0;
+
+ pinc = NDiffU * 2; // 2 floats per complex
+ pnext = pinc * 4;
+ pnexti = pnext + 1;
+ pos = 2;
+ posi = pos + 1;
+ NSameU = POW2(M) / 4 / NDiffU; // 4 pts per butterfly
+ pstrt = ioptr;
+ p0r = pstrt;
+ p1r = pstrt + pinc;
+ p2r = p1r + pinc;
+ p3r = p2r + pinc;
+
+// Butterflys
+// f0 - - f0 - - f4
+// f1 - 1 - f5 - - f5
+// f2 - - f6 - 1 - f6
+// f3 - 1 - f3 - -i - f7
+// Butterflys
+// f0 - - f4 - - f4
+// f1 - -i - t1 - - f5
+// f2 - - f2 - w1 - f6
+// f3 - -i - f7 - iw1- f7
+
+ f0r = *p0r;
+ f1r = *p1r;
+ f2r = *p2r;
+ f3r = *p3r;
+ f0i = *(p0r + 1);
+ f1i = *(p1r + 1);
+ f2i = *(p2r + 1);
+ f3i = *(p3r + 1);
+
+ f5r = f0r - f1r;
+ f5i = f0i - f1i;
+ f0r = f0r + f1r;
+ f0i = f0i + f1i;
+
+ f6r = f2r + f3r;
+ f6i = f2i + f3i;
+ f3r = f2r - f3r;
+ f3i = f2i - f3i;
+
+ for (SameUCnt = NSameU - 1; SameUCnt > 0; SameUCnt--) {
+
+ f7r = f5r + f3i;
+ f7i = f5i - f3r;
+ f5r = f5r - f3i;
+ f5i = f5i + f3r;
+
+ f4r = f0r + f6r;
+ f4i = f0i + f6i;
+ f6r = f0r - f6r;
+ f6i = f0i - f6i;
+
+ f2r = *(p2r + pos);
+ f2i = *(p2r + posi);
+ f1r = *(p1r + pos);
+ f1i = *(p1r + posi);
+ f3i = *(p3r + posi);
+ f0r = *(p0r + pos);
+ f3r = *(p3r + pos);
+ f0i = *(p0r + posi);
+
+ *p3r = f7r;
+ *p0r = f4r;
+ *(p3r + 1) = f7i;
+ *(p0r + 1) = f4i;
+ *p1r = f5r;
+ *p2r = f6r;
+ *(p1r + 1) = f5i;
+ *(p2r + 1) = f6i;
+
+ f7r = f2r + f3i;
+ f7i = f2i - f3r;
+ f2r = f2r - f3i;
+ f2i = f2i + f3r;
+
+ f4r = f0r - f1i;
+ f4i = f0i + f1r;
+ t1r = f0r + f1i;
+ t1i = f0i - f1r;
+
+ f5r = t1r - f7r * w1r - f7i * w1r;
+ f5i = t1i + f7r * w1r - f7i * w1r;
+ f7r = t1r * Two - f5r;
+ f7i = t1i * Two - f5i;
+
+ f6r = f4r - f2r * w1r + f2i * w1r;
+ f6i = f4i - f2r * w1r - f2i * w1r;
+ f4r = f4r * Two - f6r;
+ f4i = f4i * Two - f6i;
+
+ f3r = *(p3r + pnext);
+ f0r = *(p0r + pnext);
+ f3i = *(p3r + pnexti);
+ f0i = *(p0r + pnexti);
+ f2r = *(p2r + pnext);
+ f2i = *(p2r + pnexti);
+ f1r = *(p1r + pnext);
+ f1i = *(p1r + pnexti);
+
+ *(p2r + pos) = f6r;
+ *(p1r + pos) = f5r;
+ *(p2r + posi) = f6i;
+ *(p1r + posi) = f5i;
+ *(p3r + pos) = f7r;
+ *(p0r + pos) = f4r;
+ *(p3r + posi) = f7i;
+ *(p0r + posi) = f4i;
+
+ f6r = f2r + f3r;
+ f6i = f2i + f3i;
+ f3r = f2r - f3r;
+ f3i = f2i - f3i;
+
+ f5r = f0r - f1r;
+ f5i = f0i - f1i;
+ f0r = f0r + f1r;
+ f0i = f0i + f1i;
+
+ p3r += pnext;
+ p0r += pnext;
+ p1r += pnext;
+ p2r += pnext;
+ }
+ f7r = f5r + f3i;
+ f7i = f5i - f3r;
+ f5r = f5r - f3i;
+ f5i = f5i + f3r;
+
+ f4r = f0r + f6r;
+ f4i = f0i + f6i;
+ f6r = f0r - f6r;
+ f6i = f0i - f6i;
+
+ f2r = *(p2r + pos);
+ f2i = *(p2r + posi);
+ f1r = *(p1r + pos);
+ f1i = *(p1r + posi);
+ f3i = *(p3r + posi);
+ f0r = *(p0r + pos);
+ f3r = *(p3r + pos);
+ f0i = *(p0r + posi);
+
+ *p3r = f7r;
+ *p0r = f4r;
+ *(p3r + 1) = f7i;
+ *(p0r + 1) = f4i;
+ *p1r = f5r;
+ *p2r = f6r;
+ *(p1r + 1) = f5i;
+ *(p2r + 1) = f6i;
+
+ f7r = f2r + f3i;
+ f7i = f2i - f3r;
+ f2r = f2r - f3i;
+ f2i = f2i + f3r;
+
+ f4r = f0r - f1i;
+ f4i = f0i + f1r;
+ t1r = f0r + f1i;
+ t1i = f0i - f1r;
+
+ f5r = t1r - f7r * w1r - f7i * w1r;
+ f5i = t1i + f7r * w1r - f7i * w1r;
+ f7r = t1r * Two - f5r;
+ f7i = t1i * Two - f5i;
+
+ f6r = f4r - f2r * w1r + f2i * w1r;
+ f6i = f4i - f2r * w1r - f2i * w1r;
+ f4r = f4r * Two - f6r;
+ f4i = f4i * Two - f6i;
+
+ *(p2r + pos) = f6r;
+ *(p1r + pos) = f5r;
+ *(p2r + posi) = f6i;
+ *(p1r + posi) = f5i;
+ *(p3r + pos) = f7r;
+ *(p0r + pos) = f4r;
+ *(p3r + posi) = f7i;
+ *(p0r + posi) = f4i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 8 Stages
+//------------------------------------------------------------------------------
+template
+void g_fft::ibfstages(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, int Ustride,
+ int NDiffU, int StageCnt)
+{
+ unsigned int pos;
+ unsigned int posi;
+ unsigned int pinc;
+ unsigned int pnext;
+ unsigned int NSameU;
+ int Uinc;
+ int Uinc2;
+ int Uinc4;
+ unsigned int DiffUCnt;
+ unsigned int SameUCnt;
+ unsigned int U2toU3;
+
+ FFT_TYPE *pstrt;
+ FFT_TYPE *p0r, *p1r, *p2r, *p3r;
+ FFT_TYPE *u0r, *u0i, *u1r, *u1i, *u2r, *u2i;
+
+ FFT_TYPE w0r, w0i, w1r, w1i, w2r, w2i, w3r, w3i;
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+ const FFT_TYPE Two = 2.0;
+
+ pinc = NDiffU * 2; // 2 floats per complex
+ pnext = pinc * 8;
+ pos = pinc * 4;
+ posi = pos + 1;
+ NSameU = POW2(M) / 8 / NDiffU; // 8 pts per butterfly
+ Uinc = (int) NSameU * Ustride;
+ Uinc2 = Uinc * 2;
+ Uinc4 = Uinc * 4;
+ U2toU3 = (POW2(M) / 8) * Ustride;
+ for (; StageCnt > 0; StageCnt--) {
+
+ u0r = &Utbl[0];
+ u0i = &Utbl[POW2(M - 2) * Ustride];
+ u1r = u0r;
+ u1i = u0i;
+ u2r = u0r;
+ u2i = u0i;
+
+ w0r = *u0r;
+ w0i = *u0i;
+ w1r = *u1r;
+ w1i = *u1i;
+ w2r = *u2r;
+ w2i = *u2i;
+ w3r = *(u2r + U2toU3);
+ w3i = *(u2i - U2toU3);
+
+ pstrt = ioptr;
+
+ p0r = pstrt;
+ p1r = pstrt + pinc;
+ p2r = p1r + pinc;
+ p3r = p2r + pinc;
+
+// Butterflys
+// f0 - - t0 - - f0 - - f0
+// f1 - w0- f1 - - f1 - - f1
+// f2 - - f2 - w1- f2 - - f4
+// f3 - w0- t1 - iw1- f3 - - f5
+// f4 - - t0 - - f4 - w2- t0
+// f5 - w0- f5 - - f5 - w3- t1
+// f6 - - f6 - w1- f6 - iw2- f6
+// f7 - w0- t1 - iw1- f7 - iw3- f7
+
+ for (DiffUCnt = NDiffU; DiffUCnt > 0; DiffUCnt--) {
+ f0r = *p0r;
+ f0i = *(p0r + 1);
+ f1r = *p1r;
+ f1i = *(p1r + 1);
+ for (SameUCnt = NSameU - 1; SameUCnt > 0; SameUCnt--) {
+ f2r = *p2r;
+ f2i = *(p2r + 1);
+ f3r = *p3r;
+ f3i = *(p3r + 1);
+
+ t0r = f0r + f1r * w0r - f1i * w0i;
+ t0i = f0i + f1r * w0i + f1i * w0r;
+ f1r = f0r * Two - t0r;
+ f1i = f0i * Two - t0i;
+
+ f4r = *(p0r + pos);
+ f4i = *(p0r + posi);
+ f5r = *(p1r + pos);
+ f5i = *(p1r + posi);
+
+ f6r = *(p2r + pos);
+ f6i = *(p2r + posi);
+ f7r = *(p3r + pos);
+ f7i = *(p3r + posi);
+
+ t1r = f2r - f3r * w0r + f3i * w0i;
+ t1i = f2i - f3r * w0i - f3i * w0r;
+ f2r = f2r * Two - t1r;
+ f2i = f2i * Two - t1i;
+
+ f0r = t0r + f2r * w1r - f2i * w1i;
+ f0i = t0i + f2r * w1i + f2i * w1r;
+ f2r = t0r * Two - f0r;
+ f2i = t0i * Two - f0i;
+
+ f3r = f1r + t1r * w1i + t1i * w1r;
+ f3i = f1i - t1r * w1r + t1i * w1i;
+ f1r = f1r * Two - f3r;
+ f1i = f1i * Two - f3i;
+
+ t0r = f4r + f5r * w0r - f5i * w0i;
+ t0i = f4i + f5r * w0i + f5i * w0r;
+ f5r = f4r * Two - t0r;
+ f5i = f4i * Two - t0i;
+
+ t1r = f6r - f7r * w0r + f7i * w0i;
+ t1i = f6i - f7r * w0i - f7i * w0r;
+ f6r = f6r * Two - t1r;
+ f6i = f6i * Two - t1i;
+
+ f4r = t0r + f6r * w1r - f6i * w1i;
+ f4i = t0i + f6r * w1i + f6i * w1r;
+ f6r = t0r * Two - f4r;
+ f6i = t0i * Two - f4i;
+
+ f7r = f5r + t1r * w1i + t1i * w1r;
+ f7i = f5i - t1r * w1r + t1i * w1i;
+ f5r = f5r * Two - f7r;
+ f5i = f5i * Two - f7i;
+
+ t0r = f0r - f4r * w2r + f4i * w2i;
+ t0i = f0i - f4r * w2i - f4i * w2r;
+ f0r = f0r * Two - t0r;
+ f0i = f0i * Two - t0i;
+
+ t1r = f1r - f5r * w3r + f5i * w3i;
+ t1i = f1i - f5r * w3i - f5i * w3r;
+ f1r = f1r * Two - t1r;
+ f1i = f1i * Two - t1i;
+
+ *(p0r + pos) = t0r;
+ *(p0r + posi) = t0i;
+ *p0r = f0r;
+ *(p0r + 1) = f0i;
+
+ p0r += pnext;
+ f0r = *p0r;
+ f0i = *(p0r + 1);
+
+ *(p1r + pos) = t1r;
+ *(p1r + posi) = t1i;
+ *p1r = f1r;
+ *(p1r + 1) = f1i;
+
+ p1r += pnext;
+
+ f1r = *p1r;
+ f1i = *(p1r + 1);
+
+ f4r = f2r - f6r * w2i - f6i * w2r;
+ f4i = f2i + f6r * w2r - f6i * w2i;
+ f6r = f2r * Two - f4r;
+ f6i = f2i * Two - f4i;
+
+ f5r = f3r - f7r * w3i - f7i * w3r;
+ f5i = f3i + f7r * w3r - f7i * w3i;
+ f7r = f3r * Two - f5r;
+ f7i = f3i * Two - f5i;
+
+ *p2r = f4r;
+ *(p2r + 1) = f4i;
+ *(p2r + pos) = f6r;
+ *(p2r + posi) = f6i;
+
+ p2r += pnext;
+
+ *p3r = f5r;
+ *(p3r + 1) = f5i;
+ *(p3r + pos) = f7r;
+ *(p3r + posi) = f7i;
+
+ p3r += pnext;
+ }
+
+ f2r = *p2r;
+ f2i = *(p2r + 1);
+ f3r = *p3r;
+ f3i = *(p3r + 1);
+
+ t0r = f0r + f1r * w0r - f1i * w0i;
+ t0i = f0i + f1r * w0i + f1i * w0r;
+ f1r = f0r * Two - t0r;
+ f1i = f0i * Two - t0i;
+
+ f4r = *(p0r + pos);
+ f4i = *(p0r + posi);
+ f5r = *(p1r + pos);
+ f5i = *(p1r + posi);
+
+ f6r = *(p2r + pos);
+ f6i = *(p2r + posi);
+ f7r = *(p3r + pos);
+ f7i = *(p3r + posi);
+
+ t1r = f2r - f3r * w0r + f3i * w0i;
+ t1i = f2i - f3r * w0i - f3i * w0r;
+ f2r = f2r * Two - t1r;
+ f2i = f2i * Two - t1i;
+
+ f0r = t0r + f2r * w1r - f2i * w1i;
+ f0i = t0i + f2r * w1i + f2i * w1r;
+ f2r = t0r * Two - f0r;
+ f2i = t0i * Two - f0i;
+
+ f3r = f1r + t1r * w1i + t1i * w1r;
+ f3i = f1i - t1r * w1r + t1i * w1i;
+ f1r = f1r * Two - f3r;
+ f1i = f1i * Two - f3i;
+
+ if ((int) DiffUCnt == NDiffU / 2)
+ Uinc4 = -Uinc4;
+
+ u0r += Uinc4;
+ u0i -= Uinc4;
+ u1r += Uinc2;
+ u1i -= Uinc2;
+ u2r += Uinc;
+ u2i -= Uinc;
+
+ pstrt += 2;
+
+ t0r = f4r + f5r * w0r - f5i * w0i;
+ t0i = f4i + f5r * w0i + f5i * w0r;
+ f5r = f4r * Two - t0r;
+ f5i = f4i * Two - t0i;
+
+ t1r = f6r - f7r * w0r + f7i * w0i;
+ t1i = f6i - f7r * w0i - f7i * w0r;
+ f6r = f6r * Two - t1r;
+ f6i = f6i * Two - t1i;
+
+ f4r = t0r + f6r * w1r - f6i * w1i;
+ f4i = t0i + f6r * w1i + f6i * w1r;
+ f6r = t0r * Two - f4r;
+ f6i = t0i * Two - f4i;
+
+ f7r = f5r + t1r * w1i + t1i * w1r;
+ f7i = f5i - t1r * w1r + t1i * w1i;
+ f5r = f5r * Two - f7r;
+ f5i = f5i * Two - f7i;
+
+ w0r = *u0r;
+ w0i = *u0i;
+ w1r = *u1r;
+ w1i = *u1i;
+
+ if ((int) DiffUCnt <= NDiffU / 2)
+ w0r = -w0r;
+
+ t0r = f0r - f4r * w2r + f4i * w2i;
+ t0i = f0i - f4r * w2i - f4i * w2r;
+ f0r = f0r * Two - t0r;
+ f0i = f0i * Two - t0i;
+
+ f4r = f2r - f6r * w2i - f6i * w2r;
+ f4i = f2i + f6r * w2r - f6i * w2i;
+ f6r = f2r * Two - f4r;
+ f6i = f2i * Two - f4i;
+
+ *(p0r + pos) = t0r;
+ *p2r = f4r;
+ *(p0r + posi) = t0i;
+ *(p2r + 1) = f4i;
+ w2r = *u2r;
+ w2i = *u2i;
+ *p0r = f0r;
+ *(p2r + pos) = f6r;
+ *(p0r + 1) = f0i;
+ *(p2r + posi) = f6i;
+
+ p0r = pstrt;
+ p2r = pstrt + pinc + pinc;
+
+ t1r = f1r - f5r * w3r + f5i * w3i;
+ t1i = f1i - f5r * w3i - f5i * w3r;
+ f1r = f1r * Two - t1r;
+ f1i = f1i * Two - t1i;
+
+ f5r = f3r - f7r * w3i - f7i * w3r;
+ f5i = f3i + f7r * w3r - f7i * w3i;
+ f7r = f3r * Two - f5r;
+ f7i = f3i * Two - f5i;
+
+ *(p1r + pos) = t1r;
+ *p3r = f5r;
+ *(p1r + posi) = t1i;
+ *(p3r + 1) = f5i;
+ w3r = *(u2r + U2toU3);
+ w3i = *(u2i - U2toU3);
+ *p1r = f1r;
+ *(p3r + pos) = f7r;
+ *(p1r + 1) = f1i;
+ *(p3r + posi) = f7i;
+
+ p1r = pstrt + pinc;
+ p3r = p2r + pinc;
+ }
+ NSameU /= 8;
+ Uinc /= 8;
+ Uinc2 /= 8;
+ Uinc4 = Uinc * 4;
+ NDiffU *= 8;
+ pinc *= 8;
+ pnext *= 8;
+ pos *= 8;
+ posi = pos + 1;
+ }
+}
+
+//------------------------------------------------------------------------------
+// recursive bfstages calls to maximize on chip cache efficiency
+//------------------------------------------------------------------------------
+template
+void g_fft::ifftrecurs(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, int Ustride,
+ int NDiffU, int StageCnt)
+{
+ int i1;
+
+ if (M <= (int) MCACHE)
+ ibfstages(ioptr, M, Utbl, Ustride, NDiffU, StageCnt); // RADIX 8 Stages
+ else {
+ for (i1 = 0; i1 < 8; i1++) {
+ ifftrecurs(&ioptr[i1 * POW2(M - 3) * 2], M - 3, Utbl, 8 * Ustride,
+ NDiffU, StageCnt - 1); // RADIX 8 Stages
+ }
+ ibfstages(ioptr, M, Utbl, Ustride, POW2(M - 3), 1); // RADIX 8 Stage
+ }
+}
+
+//------------------------------------------------------------------------------
+// Compute in-place inverse complex fft on the rows of the input array
+// INPUTS
+// *ioptr = input data array
+// M = log2 of fft size
+// *Utbl = cosine table
+// *BRLow = bit reversed counter table
+// OUTPUTS
+// *ioptr = output data array
+//------------------------------------------------------------------------------
+template
+void g_fft::iffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow)
+{
+ int StageCnt;
+ int NDiffU;
+ const FFT_TYPE scale = 1.0 / POW2(M);
+
+ switch (M) {
+ case 0:
+ break;
+ case 1:
+ ifft2pt(ioptr, scale); // a 2 pt fft
+ break;
+ case 2:
+ ifft4pt(ioptr, scale); // a 4 pt fft
+ break;
+ case 3:
+ ifft8pt(ioptr, scale); // an 8 pt fft
+ break;
+ default:
+// bit reverse and first radix 2 stage
+ scbitrevR2(ioptr, M, BRLow, scale);
+ StageCnt = (M - 1) / 3; // number of radix 8 stages
+ NDiffU = 2; // one radix 2 stage already complete
+ if ((M - 1 - (StageCnt * 3)) == 1) {
+ ibfR2(ioptr, M, NDiffU); // 1 radix 2 stage
+ NDiffU *= 2;
+ }
+ if ((M - 1 - (StageCnt * 3)) == 2) {
+ ibfR4(ioptr, M, NDiffU); // 1 radix 4 stage
+ NDiffU *= 4;
+ }
+ if (M <= (int) MCACHE)
+ ibfstages(ioptr, M, Utbl, 1, NDiffU, StageCnt); // RADIX 8 Stages
+ else
+ ifftrecurs(ioptr, M, Utbl, 1, NDiffU, StageCnt); // RADIX 8 Stages
+ }
+}
+
+//------------------------------------------------------------------------------
+// parts of rffts1
+// RADIX 2 rfft
+//------------------------------------------------------------------------------
+template
+void g_fft::rfft1pt(FFT_TYPE *ioptr)
+{
+ FFT_TYPE f0r, f0i;
+ FFT_TYPE t0r, t0i;
+
+// bit reversed load
+ f0r = ioptr[0];
+ f0i = ioptr[1];
+
+// finish rfft
+ t0r = f0r + f0i;
+ t0i = f0r - f0i;
+
+// store result
+ ioptr[0] = t0r;
+ ioptr[1] = t0i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 4 rfft
+//------------------------------------------------------------------------------
+template
+void g_fft::rfft2pt(FFT_TYPE *ioptr)
+{
+ FFT_TYPE f0r, f0i, f1r, f1i;
+ FFT_TYPE t0r, t0i;
+
+// bit reversed load
+ f0r = ioptr[0];
+ f0i = ioptr[1];
+ f1r = ioptr[2];
+ f1i = ioptr[3];
+
+// Butterflys
+// f0 - - t0
+// f1 - 1 - f1
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f1i - f0i;
+// finish rfft
+ f0r = t0r + t0i;
+ f0i = t0r - t0i;
+
+// store result
+ ioptr[0] = f0r;
+ ioptr[1] = f0i;
+ ioptr[2] = f1r;
+ ioptr[3] = f1i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 8 rfft
+//------------------------------------------------------------------------------
+template
+void g_fft::rfft4pt(FFT_TYPE *ioptr)
+{
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+ FFT_TYPE w0r = 1.0 / FFT_ROOT2; // cos(pi/4)
+ const FFT_TYPE Two = 2.0;
+ const FFT_TYPE scale = 0.5;
+
+// bit reversed load
+ f0r = ioptr[0];
+ f0i = ioptr[1];
+ f1r = ioptr[4];
+ f1i = ioptr[5];
+ f2r = ioptr[2];
+ f2i = ioptr[3];
+ f3r = ioptr[6];
+ f3i = ioptr[7];
+
+// Butterflys
+// f0 - - t0 - - f0
+// f1 - 1 - f1 - - f1
+// f2 - - f2 - 1 - f2
+// f3 - 1 - t1 - -i - f3
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+
+ t1r = f2r - f3r;
+ t1i = f2i - f3i;
+ f2r = f2r + f3r;
+ f2i = f2i + f3i;
+
+ f0r = t0r + f2r;
+ f0i = t0i + f2i;
+ f2r = t0r - f2r;
+ f2i = f2i - t0i; // neg for rfft
+
+ f3r = f1r - t1i;
+ f3i = f1i + t1r;
+ f1r = f1r + t1i;
+ f1i = f1i - t1r;
+
+// finish rfft
+ t0r = f0r + f0i; // compute Re(x[0])
+ t0i = f0r - f0i; // compute Re(x[N/2])
+
+ t1r = f1r + f3r;
+ t1i = f1i - f3i;
+ f0r = f1i + f3i;
+ f0i = f3r - f1r;
+
+ f1r = t1r + w0r * f0r + w0r * f0i;
+ f1i = t1i - w0r * f0r + w0r * f0i;
+ f3r = Two * t1r - f1r;
+ f3i = f1i - Two * t1i;
+
+// store result
+ ioptr[4] = f2r;
+ ioptr[5] = f2i;
+ ioptr[0] = t0r;
+ ioptr[1] = t0i;
+
+ ioptr[2] = scale * f1r;
+ ioptr[3] = scale * f1i;
+ ioptr[6] = scale * f3r;
+ ioptr[7] = scale * f3i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 16 rfft
+//------------------------------------------------------------------------------
+template
+void g_fft::rfft8pt(FFT_TYPE *ioptr)
+{
+ FFT_TYPE w0r = 1.0 / FFT_ROOT2; // cos(pi/4)
+ FFT_TYPE w1r = FFT_COSPID8; // cos(pi/8)
+ FFT_TYPE w1i = FFT_SINPID8; // sin(pi/8)
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+ const FFT_TYPE Two = 2.0;
+ const FFT_TYPE scale = 0.5;
+
+// bit reversed load
+ f0r = ioptr[0];
+ f0i = ioptr[1];
+ f1r = ioptr[8];
+ f1i = ioptr[9];
+ f2r = ioptr[4];
+ f2i = ioptr[5];
+ f3r = ioptr[12];
+ f3i = ioptr[13];
+ f4r = ioptr[2];
+ f4i = ioptr[3];
+ f5r = ioptr[10];
+ f5i = ioptr[11];
+ f6r = ioptr[6];
+ f6i = ioptr[7];
+ f7r = ioptr[14];
+ f7i = ioptr[15];
+
+// Butterflys
+// f0 - - t0 - - f0 - - f0
+// f1 - 1 - f1 - - f1 - - f1
+// f2 - - f2 - 1 - f2 - - f2
+// f3 - 1 - t1 - -i - f3 - - f3
+// f4 - - t0 - - f4 - 1 - t0
+// f5 - 1 - f5 - - f5 - w3 - f4
+// f6 - - f6 - 1 - f6 - -i - t1
+// f7 - 1 - t1 - -i - f7 - iw3- f6
+
+ t0r = f0r + f1r;
+ t0i = f0i + f1i;
+ f1r = f0r - f1r;
+ f1i = f0i - f1i;
+
+ t1r = f2r - f3r;
+ t1i = f2i - f3i;
+ f2r = f2r + f3r;
+ f2i = f2i + f3i;
+
+ f0r = t0r + f2r;
+ f0i = t0i + f2i;
+ f2r = t0r - f2r;
+ f2i = t0i - f2i;
+
+ f3r = f1r - t1i;
+ f3i = f1i + t1r;
+ f1r = f1r + t1i;
+ f1i = f1i - t1r;
+
+ t0r = f4r + f5r;
+ t0i = f4i + f5i;
+ f5r = f4r - f5r;
+ f5i = f4i - f5i;
+
+ t1r = f6r - f7r;
+ t1i = f6i - f7i;
+ f6r = f6r + f7r;
+ f6i = f6i + f7i;
+
+ f4r = t0r + f6r;
+ f4i = t0i + f6i;
+ f6r = t0r - f6r;
+ f6i = t0i - f6i;
+
+ f7r = f5r - t1i;
+ f7i = f5i + t1r;
+ f5r = f5r + t1i;
+ f5i = f5i - t1r;
+
+ t0r = f0r - f4r;
+ t0i = f4i - f0i; // neg for rfft
+ f0r = f0r + f4r;
+ f0i = f0i + f4i;
+
+ t1r = f2r - f6i;
+ t1i = f2i + f6r;
+ f2r = f2r + f6i;
+ f2i = f2i - f6r;
+
+ f4r = f1r - f5r * w0r - f5i * w0r;
+ f4i = f1i + f5r * w0r - f5i * w0r;
+ f1r = f1r * Two - f4r;
+ f1i = f1i * Two - f4i;
+
+ f6r = f3r + f7r * w0r - f7i * w0r;
+ f6i = f3i + f7r * w0r + f7i * w0r;
+ f3r = f3r * Two - f6r;
+ f3i = f3i * Two - f6i;
+
+// finish rfft
+ f5r = f0r + f0i; // compute Re(x[0])
+ f5i = f0r - f0i; // compute Re(x[N/2])
+
+ f0r = f2r + t1r;
+ f0i = f2i - t1i;
+ f7r = f2i + t1i;
+ f7i = t1r - f2r;
+
+ f2r = f0r + w0r * f7r + w0r * f7i;
+ f2i = f0i - w0r * f7r + w0r * f7i;
+ t1r = Two * f0r - f2r;
+ t1i = f2i - Two * f0i;
+
+ f0r = f1r + f6r;
+ f0i = f1i - f6i;
+ f7r = f1i + f6i;
+ f7i = f6r - f1r;
+
+ f1r = f0r + w1r * f7r + w1i * f7i;
+ f1i = f0i - w1i * f7r + w1r * f7i;
+ f6r = Two * f0r - f1r;
+ f6i = f1i - Two * f0i;
+
+ f0r = f3r + f4r;
+ f0i = f3i - f4i;
+ f7r = f3i + f4i;
+ f7i = f4r - f3r;
+
+ f3r = f0r + w1i * f7r + w1r * f7i;
+ f3i = f0i - w1r * f7r + w1i * f7i;
+ f4r = Two * f0r - f3r;
+ f4i = f3i - Two * f0i;
+
+// store result
+ ioptr[8] = t0r;
+ ioptr[9] = t0i;
+ ioptr[0] = f5r;
+ ioptr[1] = f5i;
+
+ ioptr[4] = scale * f2r;
+ ioptr[5] = scale * f2i;
+ ioptr[12] = scale * t1r;
+ ioptr[13] = scale * t1i;
+
+ ioptr[2] = scale * f1r;
+ ioptr[3] = scale * f1i;
+ ioptr[6] = scale * f3r;
+ ioptr[7] = scale * f3i;
+ ioptr[10] = scale * f4r;
+ ioptr[11] = scale * f4i;
+ ioptr[14] = scale * f6r;
+ ioptr[15] = scale * f6i;
+}
+
+//------------------------------------------------------------------------------
+// Finish RFFT
+//------------------------------------------------------------------------------
+template
+void g_fft::frstage(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl)
+{
+ unsigned int pos;
+ unsigned int posi;
+ unsigned int diffUcnt;
+
+ FFT_TYPE *p0r, *p1r;
+ FFT_TYPE *u0r, *u0i;
+
+ FFT_TYPE w0r, w0i;
+ FFT_TYPE f0r, f0i, f1r, f1i, f4r, f4i, f5r, f5i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+ const FFT_TYPE Two = 2.0;
+
+ pos = POW2(M - 1);
+ posi = pos + 1;
+
+ p0r = ioptr;
+ p1r = ioptr + pos / 2;
+
+ u0r = Utbl + POW2(M - 3);
+
+ w0r = *u0r, f0r = *(p0r);
+ f0i = *(p0r + 1);
+ f4r = *(p0r + pos);
+ f4i = *(p0r + posi);
+ f1r = *(p1r);
+ f1i = *(p1r + 1);
+ f5r = *(p1r + pos);
+ f5i = *(p1r + posi);
+
+ t0r = Two * f0r + Two * f0i; // compute Re(x[0])
+ t0i = Two * f0r - Two * f0i; // compute Re(x[N/2])
+ t1r = f4r + f4r;
+ t1i = -f4i - f4i;
+
+ f0r = f1r + f5r;
+ f0i = f1i - f5i;
+ f4r = f1i + f5i;
+ f4i = f5r - f1r;
+
+ f1r = f0r + w0r * f4r + w0r * f4i;
+ f1i = f0i - w0r * f4r + w0r * f4i;
+ f5r = Two * f0r - f1r;
+ f5i = f1i - Two * f0i;
+
+ *(p0r) = t0r;
+ *(p0r + 1) = t0i;
+ *(p0r + pos) = t1r;
+ *(p0r + posi) = t1i;
+ *(p1r) = f1r;
+ *(p1r + 1) = f1i;
+ *(p1r + pos) = f5r;
+ *(p1r + posi) = f5i;
+
+ u0r = Utbl + 1;
+ u0i = Utbl + (POW2(M - 2) - 1);
+
+ w0r = *u0r, w0i = *u0i;
+
+ p0r = (ioptr + 2);
+ p1r = (ioptr + (POW2(M - 2) - 1) * 2);
+
+// Butterflys
+// f0 - t0 - - f0
+// f5 - t1 - w0 - f5
+// f1 - t0 - - f1
+// f4 - t1 -iw0 - f4
+
+ for (diffUcnt = POW2(M - 3) - 1; diffUcnt > 0; diffUcnt--) {
+
+ f0r = *(p0r);
+ f0i = *(p0r + 1);
+ f5r = *(p1r + pos);
+ f5i = *(p1r + posi);
+ f1r = *(p1r);
+ f1i = *(p1r + 1);
+ f4r = *(p0r + pos);
+ f4i = *(p0r + posi);
+
+ t0r = f0r + f5r;
+ t0i = f0i - f5i;
+ t1r = f0i + f5i;
+ t1i = f5r - f0r;
+
+ f0r = t0r + w0r * t1r + w0i * t1i;
+ f0i = t0i - w0i * t1r + w0r * t1i;
+ f5r = Two * t0r - f0r;
+ f5i = f0i - Two * t0i;
+
+ t0r = f1r + f4r;
+ t0i = f1i - f4i;
+ t1r = f1i + f4i;
+ t1i = f4r - f1r;
+
+ f1r = t0r + w0i * t1r + w0r * t1i;
+ f1i = t0i - w0r * t1r + w0i * t1i;
+ f4r = Two * t0r - f1r;
+ f4i = f1i - Two * t0i;
+
+ *(p0r) = f0r;
+ *(p0r + 1) = f0i;
+ *(p1r + pos) = f5r;
+ *(p1r + posi) = f5i;
+
+ w0r = *++u0r;
+ w0i = *--u0i;
+
+ *(p1r) = f1r;
+ *(p1r + 1) = f1i;
+ *(p0r + pos) = f4r;
+ *(p0r + posi) = f4i;
+
+ p0r += 2;
+ p1r -= 2;
+ }
+}
+
+//------------------------------------------------------------------------------
+// Compute in-place real fft on the rows of the input array
+// The result is the complex spectra of the positive frequencies
+// except the location for the first complex number contains the real
+// values for DC and Nyquest
+// INPUTS
+// *ioptr = real input data array
+// M = log2 of fft size
+// *Utbl = cosine table
+// *BRLow = bit reversed counter table
+// OUTPUTS
+// *ioptr = output data array in the following order
+// Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]),
+// ... Re(x[N/2-1]), Im(x[N/2-1]).
+//------------------------------------------------------------------------------
+template
+void g_fft::rffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow)
+{
+ FFT_TYPE scale;
+ int StageCnt;
+ int NDiffU;
+
+ M = M - 1;
+ switch (M) {
+ case -1:
+ break;
+ case 0:
+ rfft1pt(ioptr); // a 2 pt fft
+ break;
+ case 1:
+ rfft2pt(ioptr); // a 4 pt fft
+ break;
+ case 2:
+ rfft4pt(ioptr); // an 8 pt fft
+ break;
+ case 3:
+ rfft8pt(ioptr); // a 16 pt fft
+ break;
+ default:
+ scale = 0.5;
+// bit reverse and first radix 2 stage
+ scbitrevR2(ioptr, M, BRLow, scale);
+ StageCnt = (M - 1) / 3; // number of radix 8 stages
+ NDiffU = 2; // one radix 2 stage already complete
+ if ((M - 1 - (StageCnt * 3)) == 1) {
+ bfR2(ioptr, M, NDiffU); // 1 radix 2 stage
+ NDiffU *= 2;
+ }
+ if ((M - 1 - (StageCnt * 3)) == 2) {
+ bfR4(ioptr, M, NDiffU); // 1 radix 4 stage
+ NDiffU *= 4;
+ }
+ if (M <= (int) MCACHE)
+ bfstages(ioptr, M, Utbl, 2, NDiffU, StageCnt); // RADIX 8 Stages
+ else
+ fftrecurs(ioptr, M, Utbl, 2, NDiffU, StageCnt); // RADIX 8 Stages
+ frstage(ioptr, M + 1, Utbl);
+ }
+}
+
+//------------------------------------------------------------------------------
+// parts of riffts1
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+// RADIX 2 rifft
+//------------------------------------------------------------------------------
+template
+void g_fft::rifft1pt(FFT_TYPE *ioptr, FFT_TYPE scale)
+{
+ FFT_TYPE f0r, f0i;
+ FFT_TYPE t0r, t0i;
+
+// bit reversed load
+ f0r = ioptr[0];
+ f0i = ioptr[1];
+
+// finish rfft
+ t0r = f0r + f0i;
+ t0i = f0r - f0i;
+
+// store result
+ ioptr[0] = scale * t0r;
+ ioptr[1] = scale * t0i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 4 rifft
+//------------------------------------------------------------------------------
+template
+void g_fft::rifft2pt(FFT_TYPE *ioptr, FFT_TYPE scale)
+{
+ FFT_TYPE f0r, f0i, f1r, f1i;
+ FFT_TYPE t0r, t0i;
+ const FFT_TYPE Two = FFT_TYPE(2.0);
+
+// bit reversed load
+ t0r = ioptr[0];
+ t0i = ioptr[1];
+ f1r = Two * ioptr[2];
+ f1i = Two * ioptr[3];
+
+// start rifft
+ f0r = t0r + t0i;
+ f0i = t0r - t0i;
+
+// Butterflys
+// f0 - - t0
+// f1 - 1 - f1
+
+ t0r = f0r + f1r;
+ t0i = f0i - f1i;
+ f1r = f0r - f1r;
+ f1i = f0i + f1i;
+
+// store result
+ ioptr[0] = scale * t0r;
+ ioptr[1] = scale * t0i;
+ ioptr[2] = scale * f1r;
+ ioptr[3] = scale * f1i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 8 rifft
+//------------------------------------------------------------------------------
+template
+void g_fft::rifft4pt(FFT_TYPE *ioptr, FFT_TYPE scale)
+{
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+ FFT_TYPE w0r = 1.0 / FFT_ROOT2; // cos(pi/4)
+ const FFT_TYPE Two = FFT_TYPE(2.0);
+
+// bit reversed load
+ t0r = ioptr[0];
+ t0i = ioptr[1];
+ f2r = ioptr[2];
+ f2i = ioptr[3];
+ f1r = Two * ioptr[4];
+ f1i = Two * ioptr[5];
+ f3r = ioptr[6];
+ f3i = ioptr[7];
+
+// start rfft
+ f0r = t0r + t0i; // compute Re(x[0])
+ f0i = t0r - t0i; // compute Re(x[N/2])
+
+ t1r = f2r + f3r;
+ t1i = f2i - f3i;
+ t0r = f2r - f3r;
+ t0i = f2i + f3i;
+
+ f2r = t1r - w0r * t0r - w0r * t0i;
+ f2i = t1i + w0r * t0r - w0r * t0i;
+ f3r = Two * t1r - f2r;
+ f3i = f2i - Two * t1i;
+
+// Butterflys
+// f0 - - t0 - - f0
+// f1 - 1 - f1 - - f1
+// f2 - - f2 - 1 - f2
+// f3 - 1 - t1 - i - f3
+
+ t0r = f0r + f1r;
+ t0i = f0i - f1i;
+ f1r = f0r - f1r;
+ f1i = f0i + f1i;
+
+ t1r = f2r - f3r;
+ t1i = f2i - f3i;
+ f2r = f2r + f3r;
+ f2i = f2i + f3i;
+
+ f0r = t0r + f2r;
+ f0i = t0i + f2i;
+ f2r = t0r - f2r;
+ f2i = t0i - f2i;
+
+ f3r = f1r + t1i;
+ f3i = f1i - t1r;
+ f1r = f1r - t1i;
+ f1i = f1i + t1r;
+
+// store result
+ ioptr[0] = scale * f0r;
+ ioptr[1] = scale * f0i;
+ ioptr[2] = scale * f1r;
+ ioptr[3] = scale * f1i;
+ ioptr[4] = scale * f2r;
+ ioptr[5] = scale * f2i;
+ ioptr[6] = scale * f3r;
+ ioptr[7] = scale * f3i;
+}
+
+//------------------------------------------------------------------------------
+// RADIX 16 rifft
+//------------------------------------------------------------------------------
+template
+void g_fft::rifft8pt(FFT_TYPE *ioptr, FFT_TYPE scale)
+{
+ FFT_TYPE w0r = (FFT_TYPE) (1.0 / FFT_ROOT2); // cos(pi/4)
+ FFT_TYPE w1r = FFT_COSPID8; // cos(pi/8)
+ FFT_TYPE w1i = FFT_SINPID8; // sin(pi/8)
+ FFT_TYPE f0r, f0i, f1r, f1i, f2r, f2i, f3r, f3i;
+ FFT_TYPE f4r, f4i, f5r, f5i, f6r, f6i, f7r, f7i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+ const FFT_TYPE Two = FFT_TYPE(2.0);
+
+// bit reversed load
+ t0r = ioptr[0];
+ t0i = ioptr[1];
+ f4r = ioptr[2];
+ f4i = ioptr[3];
+ f2r = ioptr[4];
+ f2i = ioptr[5];
+ f6r = ioptr[6];
+ f6i = ioptr[7];
+ f1r = Two * ioptr[8];
+ f1i = Two * ioptr[9];
+ f5r = ioptr[10];
+ f5i = ioptr[11];
+ f3r = ioptr[12];
+ f3i = ioptr[13];
+ f7r = ioptr[14];
+ f7i = ioptr[15];
+
+// start rfft
+ f0r = t0r + t0i; // compute Re(x[0])
+ f0i = t0r - t0i; // compute Re(x[N/2])
+
+ t0r = f2r + f3r;
+ t0i = f2i - f3i;
+ t1r = f2r - f3r;
+ t1i = f2i + f3i;
+
+ f2r = t0r - w0r * t1r - w0r * t1i;
+ f2i = t0i + w0r * t1r - w0r * t1i;
+ f3r = Two * t0r - f2r;
+ f3i = f2i - Two * t0i;
+
+ t0r = f4r + f7r;
+ t0i = f4i - f7i;
+ t1r = f4r - f7r;
+ t1i = f4i + f7i;
+
+ f4r = t0r - w1i * t1r - w1r * t1i;
+ f4i = t0i + w1r * t1r - w1i * t1i;
+ f7r = Two * t0r - f4r;
+ f7i = f4i - Two * t0i;
+
+ t0r = f6r + f5r;
+ t0i = f6i - f5i;
+ t1r = f6r - f5r;
+ t1i = f6i + f5i;
+
+ f6r = t0r - w1r * t1r - w1i * t1i;
+ f6i = t0i + w1i * t1r - w1r * t1i;
+ f5r = Two * t0r - f6r;
+ f5i = f6i - Two * t0i;
+
+// Butterflys
+// f0 - - t0 - - f0 - - f0
+// f1* - 1 - f1 - - f1 - - f1
+// f2 - - f2 - 1 - f2 - - f2
+// f3 - 1 - t1 - i - f3 - - f3
+// f4 - - t0 - - f4 - 1 - t0
+// f5 - 1 - f5 - - f5 - w3 - f4
+// f6 - - f6 - 1 - f6 - i - t1
+// f7 - 1 - t1 - i - f7 - iw3- f6
+
+ t0r = f0r + f1r;
+ t0i = f0i - f1i;
+ f1r = f0r - f1r;
+ f1i = f0i + f1i;
+
+ t1r = f2r - f3r;
+ t1i = f2i - f3i;
+ f2r = f2r + f3r;
+ f2i = f2i + f3i;
+
+ f0r = t0r + f2r;
+ f0i = t0i + f2i;
+ f2r = t0r - f2r;
+ f2i = t0i - f2i;
+
+ f3r = f1r + t1i;
+ f3i = f1i - t1r;
+ f1r = f1r - t1i;
+ f1i = f1i + t1r;
+
+ t0r = f4r + f5r;
+ t0i = f4i + f5i;
+ f5r = f4r - f5r;
+ f5i = f4i - f5i;
+
+ t1r = f6r - f7r;
+ t1i = f6i - f7i;
+ f6r = f6r + f7r;
+ f6i = f6i + f7i;
+
+ f4r = t0r + f6r;
+ f4i = t0i + f6i;
+ f6r = t0r - f6r;
+ f6i = t0i - f6i;
+
+ f7r = f5r + t1i;
+ f7i = f5i - t1r;
+ f5r = f5r - t1i;
+ f5i = f5i + t1r;
+
+ t0r = f0r - f4r;
+ t0i = f0i - f4i;
+ f0r = f0r + f4r;
+ f0i = f0i + f4i;
+
+ t1r = f2r + f6i;
+ t1i = f2i - f6r;
+ f2r = f2r - f6i;
+ f2i = f2i + f6r;
+
+ f4r = f1r - f5r * w0r + f5i * w0r;
+ f4i = f1i - f5r * w0r - f5i * w0r;
+ f1r = f1r * Two - f4r;
+ f1i = f1i * Two - f4i;
+
+ f6r = f3r + f7r * w0r + f7i * w0r;
+ f6i = f3i - f7r * w0r + f7i * w0r;
+ f3r = f3r * Two - f6r;
+ f3i = f3i * Two - f6i;
+
+// store result
+ ioptr[0] = scale * f0r;
+ ioptr[1] = scale * f0i;
+ ioptr[2] = scale * f1r;
+ ioptr[3] = scale * f1i;
+ ioptr[4] = scale * f2r;
+ ioptr[5] = scale * f2i;
+ ioptr[6] = scale * f3r;
+ ioptr[7] = scale * f3i;
+ ioptr[8] = scale * t0r;
+ ioptr[9] = scale * t0i;
+ ioptr[10] = scale * f4r;
+ ioptr[11] = scale * f4i;
+ ioptr[12] = scale * t1r;
+ ioptr[13] = scale * t1i;
+ ioptr[14] = scale * f6r;
+ ioptr[15] = scale * f6i;
+}
+
+//------------------------------------------------------------------------------
+// Start RIFFT
+//------------------------------------------------------------------------------
+template
+void g_fft::ifrstage(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl)
+{
+ unsigned int pos;
+ unsigned int posi;
+ unsigned int diffUcnt;
+
+ FFT_TYPE *p0r, *p1r;
+ FFT_TYPE *u0r, *u0i;
+
+ FFT_TYPE w0r, w0i;
+ FFT_TYPE f0r, f0i, f1r, f1i, f4r, f4i, f5r, f5i;
+ FFT_TYPE t0r, t0i, t1r, t1i;
+ const FFT_TYPE Two = FFT_TYPE(2.0);
+
+ pos = POW2(M - 1);
+ posi = pos + 1;
+
+ p0r = ioptr;
+ p1r = ioptr + pos / 2;
+
+ u0r = Utbl + POW2(M - 3);
+
+ w0r = *u0r, f0r = *(p0r);
+ f0i = *(p0r + 1);
+ f4r = *(p0r + pos);
+ f4i = *(p0r + posi);
+ f1r = *(p1r);
+ f1i = *(p1r + 1);
+ f5r = *(p1r + pos);
+ f5i = *(p1r + posi);
+
+ t0r = f0r + f0i;
+ t0i = f0r - f0i;
+ t1r = f4r + f4r;
+ t1i = -f4i - f4i;
+
+ f0r = f1r + f5r;
+ f0i = f1i - f5i;
+ f4r = f1r - f5r;
+ f4i = f1i + f5i;
+
+ f1r = f0r - w0r * f4r - w0r * f4i;
+ f1i = f0i + w0r * f4r - w0r * f4i;
+ f5r = Two * f0r - f1r;
+ f5i = f1i - Two * f0i;
+
+ *(p0r) = t0r;
+ *(p0r + 1) = t0i;
+ *(p0r + pos) = t1r;
+ *(p0r + posi) = t1i;
+ *(p1r) = f1r;
+ *(p1r + 1) = f1i;
+ *(p1r + pos) = f5r;
+ *(p1r + posi) = f5i;
+
+ u0r = Utbl + 1;
+ u0i = Utbl + (POW2(M - 2) - 1);
+
+ w0r = *u0r, w0i = *u0i;
+
+ p0r = (ioptr + 2);
+ p1r = (ioptr + (POW2(M - 2) - 1) * 2);
+
+// Butterflys
+// f0 - t0 - f0
+// f1 - t1 -w0- f1
+// f2 - t0 - f2
+// f3 - t1 -iw0- f3
+
+ for (diffUcnt = POW2(M - 3) - 1; diffUcnt > 0; diffUcnt--) {
+
+ f0r = *(p0r);
+ f0i = *(p0r + 1);
+ f5r = *(p1r + pos);
+ f5i = *(p1r + posi);
+ f1r = *(p1r);
+ f1i = *(p1r + 1);
+ f4r = *(p0r + pos);
+ f4i = *(p0r + posi);
+
+ t0r = f0r + f5r;
+ t0i = f0i - f5i;
+ t1r = f0r - f5r;
+ t1i = f0i + f5i;
+
+ f0r = t0r - w0i * t1r - w0r * t1i;
+ f0i = t0i + w0r * t1r - w0i * t1i;
+ f5r = Two * t0r - f0r;
+ f5i = f0i - Two * t0i;
+
+ t0r = f1r + f4r;
+ t0i = f1i - f4i;
+ t1r = f1r - f4r;
+ t1i = f1i + f4i;
+
+ f1r = t0r - w0r * t1r - w0i * t1i;
+ f1i = t0i + w0i * t1r - w0r * t1i;
+ f4r = Two * t0r - f1r;
+ f4i = f1i - Two * t0i;
+
+ *(p0r) = f0r;
+ *(p0r + 1) = f0i;
+ *(p1r + pos) = f5r;
+ *(p1r + posi) = f5i;
+
+ w0r = *++u0r;
+ w0i = *--u0i;
+
+ *(p1r) = f1r;
+ *(p1r + 1) = f1i;
+ *(p0r + pos) = f4r;
+ *(p0r + posi) = f4i;
+
+ p0r += 2;
+ p1r -= 2;
+ }
+}
+
+//------------------------------------------------------------------------------
+// Compute in-place real ifft on the rows of the input array
+// data order as from rffts1
+// INPUTS
+// *ioptr = input data array in the following order
+// M = log2 of fft size
+// Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]),
+// Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]).
+// *Utbl = cosine table
+// *BRLow = bit reversed counter table
+// OUTPUTS
+// *ioptr = real output data array
+//------------------------------------------------------------------------------
+template
+void g_fft::riffts1(FFT_TYPE *ioptr, int M, FFT_TYPE *Utbl, short *BRLow)
+{
+ FFT_TYPE scale;
+ int StageCnt;
+ int NDiffU;
+
+ scale = (FFT_TYPE)(1.0 / (float)((int)POW2(M)));
+ M = M - 1;
+ switch (M) {
+ case -1:
+ break;
+ case 0:
+ rifft1pt(ioptr, scale); // a 2 pt fft
+ break;
+ case 1:
+ rifft2pt(ioptr, scale); // a 4 pt fft
+ break;
+ case 2:
+ rifft4pt(ioptr, scale); // an 8 pt fft
+ break;
+ case 3:
+ rifft8pt(ioptr, scale); // a 16 pt fft
+ break;
+ default:
+ ifrstage(ioptr, M + 1, Utbl);
+// bit reverse and first radix 2 stage
+ scbitrevR2(ioptr, M, BRLow, scale);
+ StageCnt = (M - 1) / 3; // number of radix 8 stages
+ NDiffU = 2; // one radix 2 stage already complete
+ if ((M - 1 - (StageCnt * 3)) == 1) {
+ ibfR2(ioptr, M, NDiffU); // 1 radix 2 stage
+ NDiffU *= 2;
+ }
+ if ((M - 1 - (StageCnt * 3)) == 2) {
+ ibfR4(ioptr, M, NDiffU); // 1 radix 4 stage
+ NDiffU *= 4;
+ }
+ if (M <= (int) MCACHE)
+ ibfstages(ioptr, M, Utbl, 2, NDiffU, StageCnt); // RADIX 8 Stages
+ else
+ ifftrecurs(ioptr, M, Utbl, 2, NDiffU, StageCnt); // RADIX 8 Stages
+ }
+}
+
+//==============================================================================
+// End of original C functions
+//
+// Wrapper methods for simple class access
+//==============================================================================
+
+//------------------------------------------------------------------------------
+// malloc and init cosine and bit reversed tables for a given size
+// fft, ifft, rfft, rifft
+// INPUTS
+// M = log2 of fft size (ex M=10 for 1024 point fft)
+// OUTPUTS
+// private cosine and bit reversed tables
+//------------------------------------------------------------------------------
+template
+void g_fft::fftInit()
+{
+ for (int i = 0; i < 32; i++) {
+ FFT_table_1[i] = (FFT_TYPE*)0;
+ FFT_table_2[i] = (short int*)0;
+ }
+
+ FFT_N = ConvertFFTSize(FFT_size);
+
+// create and initialize cos table
+ FFT_table_1[FFT_N] = new FFT_TYPE[(POW2(FFT_N) / 4 + 1)];
+ fftCosInit(FFT_N, FFT_table_1[FFT_N]);
+
+// create and initialize bit reverse tables
+ FFT_table_2[FFT_N/2] = new short[POW2(FFT_N/2 - 1)];
+ fftBRInit(FFT_N, FFT_table_2[FFT_N/2]);
+
+ FFT_table_2[(FFT_N - 1) / 2] = new short[POW2((FFT_N - 1) / 2 - 1)];
+ fftBRInit(FFT_N - 1, FFT_table_2[(FFT_N - 1) / 2]);
+
+ Utbl = ((FFT_TYPE**) FFT_table_1)[FFT_N];
+ BRLow = ((short**) FFT_table_2)[FFT_N / 2];
+
+}
+
+//------------------------------------------------------------------------------
+// convert from N to LOG2(N)
+//------------------------------------------------------------------------------
+template
+int g_fft::ConvertFFTSize(int N)
+{
+ if (N <= 0) N = -N;
+
+ switch (N) {
+ case 0x00000001: return 0; // 1
+ case 0x00000002: return 1; // 2
+ case 0x00000004: return 2; // 4
+ case 0x00000008: return 3; // 8
+ case 0x00000010: return 4; // 16
+ case 0x00000020: return 5; // 32
+ case 0x00000040: return 6; // 64
+ case 0x00000080: return 7; // 128
+ case 0x00000100: return 8; // 256
+ case 0x00000200: return 9; // 512
+ case 0x00000400: return 10; // 1024
+ case 0x00000800: return 11; // 2048
+ case 0x00001000: return 12; // 4096
+ case 0x00002000: return 13; // 8192
+ case 0x00004000: return 14; // 16384
+ case 0x00008000: return 15; // 32768
+ case 0x00010000: return 16; // 65536
+ case 0x00020000: return 17; // 131072
+ case 0x00040000: return 18; // 262144
+ case 0x00080000: return 19; // 525288
+ case 0x00100000: return 20; // 1048576
+ case 0x00200000: return 21; // 2097152
+ case 0x00400000: return 22; // 4194304
+ case 0x00800000: return 23; // 8388608
+ case 0x01000000: return 24; // 16777216
+ case 0x02000000: return 25; // 33554432
+ case 0x04000000: return 26; // 67108864
+ case 0x08000000: return 27; // 134217728
+ case 0x10000000: return 28; // 268435456
+ }
+ return 0;
+}
+
+//------------------------------------------------------------------------------
+// Compute in-place complex FFT
+// FFTsize: FFT length in samples
+// buf: array of FFTsize*2 FFT_TYPE values,
+// in interleaved real/imaginary format
+//------------------------------------------------------------------------------
+template
+void g_fft::ComplexFFT(std::complex *buf)
+{
+ void *ptr = buf;
+ FFT_TYPE *nbuf = static_cast(ptr);
+ ffts1(nbuf, FFT_N, Utbl, BRLow);
+}
+
+//------------------------------------------------------------------------------
+// Compute in-place inverse complex FFT
+// FFTsize: FFT length in samples
+// buf: array of FFTsize*2 FFT_TYPE values,
+// in interleaved real/imaginary format
+// Output should be scaled by the return value of
+// GetInverseComplexFFTScale(fft_struct, FFTsize).
+//------------------------------------------------------------------------------
+template
+void g_fft::InverseComplexFFT(std::complex *buf)
+{
+ void *ptr = buf;
+ FFT_TYPE *nbuf = static_cast(ptr);
+ iffts1(nbuf, FFT_N, Utbl, BRLow);
+}
+
+//------------------------------------------------------------------------------
+// Compute in-place real FFT
+// FFTsize: FFT length in samples
+// buf: array of FFTsize FFT_TYPE values; output is in interleaved
+// real/imaginary format, except for buf[1] which is the real
+// part for the Nyquist frequency
+//------------------------------------------------------------------------------
+template
+void g_fft::RealFFT(std::complex *buf)
+{
+ void *ptr = buf;
+ FFT_TYPE *nbuf = static_cast(ptr);
+ rffts1(nbuf, FFT_N, Utbl, BRLow);
+}
+
+//------------------------------------------------------------------------------
+// Compute in-place inverse real FFT
+// FFTsize: FFT length in samples
+// buf: array of FFTsize FFT_TYPE values; input is expected to be in
+// interleaved real/imaginary format, except for buf[1] which
+// is the real part for the Nyquist frequency
+// Output should be scaled by the return value of
+// GetInverseRealFFTScale(fft_struct, FFTsize).
+//------------------------------------------------------------------------------
+template
+void g_fft::InverseRealFFT(std::complex *buf)
+{
+ void *ptr = buf;
+ FFT_TYPE *nbuf = static_cast(ptr);
+ riffts1(nbuf, FFT_N, Utbl, BRLow);
+}
+
+//------------------------------------------------------------------------------
+// Returns the amplitude scale that should be applied to the result of
+// an inverse complex FFT with a length of 'FFTsize' samples.
+//------------------------------------------------------------------------------
+template
+FFT_TYPE g_fft::GetInverseComplexFFTScale()
+{
+ return FFT_TYPE(1.0);
+}
+
+//------------------------------------------------------------------------------
+// Returns the amplitude scale that should be applied to the result of
+// an inverse real FFT with a length of 'FFTsize' samples.
+//------------------------------------------------------------------------------
+template
+FFT_TYPE g_fft::GetInverseRealFFTScale()
+{
+ return FFT_TYPE(1.0);
+}
+
+#endif
+
diff --git a/include-gpl/dsp/misc.h b/include-gpl/dsp/misc.h
new file mode 100644
index 000000000..f0c73f1f0
--- /dev/null
+++ b/include-gpl/dsp/misc.h
@@ -0,0 +1,75 @@
+// ----------------------------------------------------------------------------
+// misc.h -- Miscellaneous helper functions
+//
+// Copyright (C) 2006-2008
+// Dave Freese, W1HKJ
+//
+// This file is part of fldigi. These filters were adapted from code contained
+// in the gmfsk source code distribution.
+//
+// Fldigi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// Fldigi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with fldigi. If not, see .
+// ----------------------------------------------------------------------------
+
+#ifndef _MISC_H
+#define _MISC_H
+
+#include
+
+inline float sinc(float x)
+{
+ return (fabs(x) < 1e-10) ? 1.0 : (sin(M_PI * x) / (M_PI * x));
+}
+
+inline float cosc(float x)
+{
+ return (fabs(x) < 1e-10) ? 0.0 : ((1.0 - cos(M_PI * x)) / (M_PI * x));
+}
+
+inline float clamp(float x, float min, float max)
+{
+ return (x < min) ? min : ((x > max) ? max : x);
+}
+
+/// This is always called with an int weight
+inline float decayavg(float average, float input, int weight)
+{
+ if (weight <= 1) return input;
+ return ( ( input - average ) / (float)weight ) + average ;
+}
+
+// following are defined inline to provide best performance
+inline float blackman(float x)
+{
+ return (0.42 - 0.50 * cos(2 * M_PI * x) + 0.08 * cos(4 * M_PI * x));
+}
+
+inline float hamming(float x)
+{
+ return 0.54 - 0.46 * cos(2 * M_PI * x);
+}
+
+inline float hanning(float x)
+{
+ return 0.5 - 0.5 * cos(2 * M_PI * x);
+}
+
+inline float rcos( float t, float T, float alpha=1.0 )
+{
+ if( t == 0 ) return 1.0;
+ float taT = T / (2.0 * alpha);
+ if( fabs(t) == taT ) return ((alpha/2.0) * sin(M_PI/(2.0*alpha)));
+ return (sin(M_PI*t/T)/(M_PI*t/T))*cos(alpha*M_PI*t/T)/(1.0-(t/taT)*(t/taT));
+}
+
+#endif
diff --git a/sdrbase/dsp/fftfilt.cxx b/sdrbase/dsp/fftfilt.cxx
new file mode 100644
index 000000000..2f7437548
--- /dev/null
+++ b/sdrbase/dsp/fftfilt.cxx
@@ -0,0 +1,172 @@
+// ----------------------------------------------------------------------------
+// fftfilt.cxx -- Fast convolution Overlap-Add filter
+//
+// Filter implemented using overlap-add FFT convolution method
+// h(t) characterized by Windowed-Sinc impulse response
+//
+// Reference:
+// "The Scientist and Engineer's Guide to Digital Signal Processing"
+// by Dr. Steven W. Smith, http://www.dspguide.com
+// Chapters 16, 18 and 21
+//
+// Copyright (C) 2006-2008 Dave Freese, W1HKJ
+//
+// This file is part of fldigi.
+//
+// Fldigi is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// Fldigi is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with fldigi. If not, see .
+// ----------------------------------------------------------------------------
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+//------------------------------------------------------------------------------
+// initialize the filter
+// create forward and reverse FFTs
+//------------------------------------------------------------------------------
+
+// Only need a single instance of g_fft, used for both forward and reverse
+void fftfilt::init_filter()
+{
+ flen2 = flen >> 1;
+ fft = new g_fft(flen);
+
+ filter = new cmplx[flen];
+ timedata = new cmplx[flen];
+ freqdata = new cmplx[flen];
+ output = new cmplx[flen];
+ ovlbuf = new cmplx[flen2];
+ ht = new cmplx[flen];
+
+ memset(filter, 0, flen * sizeof(cmplx));
+ memset(timedata, 0, flen * sizeof(cmplx));
+ memset(freqdata, 0, flen * sizeof(cmplx));
+ memset(output, 0, flen * sizeof(cmplx));
+ memset(ovlbuf, 0, flen2 * sizeof(cmplx));
+ memset(ht, 0, flen * sizeof(cmplx));
+
+ inptr = 0;
+}
+
+//------------------------------------------------------------------------------
+// fft filter
+// f1 < f2 ==> band pass filter
+// f1 > f2 ==> band reject filter
+// f1 == 0 ==> low pass filter
+// f2 == 0 ==> high pass filter
+//------------------------------------------------------------------------------
+fftfilt::fftfilt(float f1, float f2, int len)
+{
+ flen = len;
+ init_filter();
+ create_filter(f1, f2);
+}
+
+fftfilt::~fftfilt()
+{
+ if (fft) delete fft;
+
+ if (filter) delete [] filter;
+ if (timedata) delete [] timedata;
+ if (freqdata) delete [] freqdata;
+ if (output) delete [] output;
+ if (ovlbuf) delete [] ovlbuf;
+ if (ht) delete [] ht;
+}
+
+void fftfilt::create_filter(float f1, float f2)
+{
+// initialize the filter to zero
+ memset(ht, 0, flen * sizeof(cmplx));
+
+// create the filter shape coefficients by fft
+// filter values initialized to the ht response h(t)
+ bool b_lowpass, b_highpass;//, window;
+ b_lowpass = (f2 != 0);
+ b_highpass = (f1 != 0);
+
+ for (int i = 0; i < flen2; i++) {
+ ht[i] = 0;
+//combine lowpass / highpass
+// lowpass @ f2
+ if (b_lowpass) ht[i] += fsinc(f2, i, flen2);
+// highighpass @ f1
+ if (b_highpass) ht[i] -= fsinc(f1, i, flen2);
+ }
+// highpass is delta[flen2/2] - h(t)
+ if (b_highpass && f2 < f1) ht[flen2 / 2] += 1;
+
+ for (int i = 0; i < flen2; i++)
+ ht[i] *= _blackman(i, flen2);
+
+// this may change since green fft is in place fft
+ memcpy(filter, ht, flen * sizeof(cmplx));
+
+// ht is flen complex points with imaginary all zero
+// first half describes h(t), second half all zeros
+// perform the cmplx forward fft to obtain H(w)
+// filter is flen/2 complex values
+
+ fft->ComplexFFT(filter);
+
+// normalize the output filter for unity gain
+ float scale = 0, mag;
+ for (int i = 0; i < flen2; i++) {
+ mag = abs(filter[i]);
+ if (mag > scale) scale = mag;
+ }
+ if (scale != 0) {
+ for (int i = 0; i < flen; i++)
+ filter[i] /= scale;
+ }
+}
+
+// Filter with fast convolution (overlap-add algorithm).
+int fftfilt::run(const cmplx & in, cmplx **out)
+{
+ timedata[inptr++] = in;
+
+ if (inptr < flen2)
+ return 0;
+ inptr = 0;
+
+ memcpy(freqdata, timedata, flen * sizeof(cmplx));
+ fft->ComplexFFT(freqdata);
+
+ for (int i = 0; i < flen; i++)
+ freqdata[i] *= filter[i];
+
+ fft->InverseComplexFFT(freqdata);
+
+ // overlap and add
+ for (int i = 0; i < flen2; i++) {
+ output[i] = ovlbuf[i] + freqdata[i];
+ ovlbuf[i] = freqdata[i+flen2];
+ }
+
+ *out = output;
+ return flen2;
+}
+