341 lines
10 KiB
C
341 lines
10 KiB
C
/* (c) Copyright 2008/2009 Xiph.Org Foundation */
|
|
/*
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
|
|
- Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
- Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
|
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "c64_fft.h"
|
|
|
|
#include "dsp_fft16x16t.h"
|
|
#include "dsp_fft32x32s.h"
|
|
#include "dsp_ifft32x32.h"
|
|
|
|
#ifndef PI
|
|
# ifdef M_PI
|
|
# define PI M_PI
|
|
# else
|
|
# define PI 3.14159265358979323846
|
|
# endif
|
|
#endif
|
|
|
|
|
|
/* ======================================================================== */
|
|
/* D2S -- Truncate a 'double' to a 'short', with clamping. */
|
|
/* ======================================================================== */
|
|
static short d2s(double d)
|
|
{
|
|
if (d >= 32767.0) return 32767;
|
|
if (d <= -32768.0) return -32768;
|
|
return (short)d;
|
|
}
|
|
|
|
|
|
/* ======================================================================== */
|
|
/* D2S -- Truncate a 'double' to a 'int', with clamping. */
|
|
/* ======================================================================== */
|
|
static int d2i(double d)
|
|
{
|
|
if (d >= 2147483647.0) return (int)0x7FFFFFFF;
|
|
if (d <= -2147483648.0) return (int)0x80000000;
|
|
return (int)d;
|
|
}
|
|
|
|
|
|
/* ======================================================================== */
|
|
/* GEN_TWIDDLE -- Generate twiddle factors for TI's custom FFTs. */
|
|
/* */
|
|
/* USAGE */
|
|
/* This routine is called as follows: */
|
|
/* */
|
|
/* int gen_twiddle(short *w, int n, double scale) */
|
|
/* */
|
|
/* short *w Pointer to twiddle-factor array */
|
|
/* int n Size of FFT */
|
|
/* double scale Scale factor to apply to values. */
|
|
/* */
|
|
/* The routine will generate the twiddle-factors directly into the */
|
|
/* array you specify. The array needs to be approximately 2*N */
|
|
/* elements long. (The actual size, which is slightly smaller, is */
|
|
/* returned by the function.) */
|
|
/* ======================================================================== */
|
|
int gen_twiddle16(short *w, int n, double scale)
|
|
{
|
|
int i, j, k;
|
|
|
|
for (j = 1, k = 0; j < n >> 2; j = j << 2)
|
|
{
|
|
for (i = 0; i < n >> 2; i += j << 1)
|
|
{
|
|
w[k + 11] = d2s(scale * cos(6.0 * PI * (i + j) / n));
|
|
w[k + 10] = d2s(scale * sin(6.0 * PI * (i + j) / n));
|
|
w[k + 9] = d2s(scale * cos(6.0 * PI * (i ) / n));
|
|
w[k + 8] = d2s(scale * sin(6.0 * PI * (i ) / n));
|
|
|
|
w[k + 7] = d2s(scale * cos(4.0 * PI * (i + j) / n));
|
|
w[k + 6] = d2s(scale * sin(4.0 * PI * (i + j) / n));
|
|
w[k + 5] = d2s(scale * cos(4.0 * PI * (i ) / n));
|
|
w[k + 4] = d2s(scale * sin(4.0 * PI * (i ) / n));
|
|
|
|
w[k + 3] = d2s(scale * cos(2.0 * PI * (i + j) / n));
|
|
w[k + 2] = d2s(scale * sin(2.0 * PI * (i + j) / n));
|
|
w[k + 1] = d2s(scale * cos(2.0 * PI * (i ) / n));
|
|
w[k + 0] = d2s(scale * sin(2.0 * PI * (i ) / n));
|
|
|
|
k += 12;
|
|
}
|
|
}
|
|
|
|
return k;
|
|
}
|
|
|
|
|
|
/* ======================================================================== */
|
|
/* GEN_TWIDDLE -- Generate twiddle factors for TI's custom FFTs. */
|
|
/* */
|
|
/* USAGE */
|
|
/* This routine is called as follows: */
|
|
/* */
|
|
/* int gen_twiddle(int *w, int n, double scale) */
|
|
/* */
|
|
/* int *w Pointer to twiddle-factor array */
|
|
/* int n Size of FFT */
|
|
/* double scale Scale factor to apply to values. */
|
|
/* */
|
|
/* The routine will generate the twiddle-factors directly into the */
|
|
/* array you specify. The array needs to be approximately 2*N */
|
|
/* elements long. (The actual size, which is slightly smaller, is */
|
|
/* returned by the function.) */
|
|
/* ======================================================================== */
|
|
int gen_twiddle32(int *w, int n, double scale)
|
|
{
|
|
int i, j, k, s=0, t;
|
|
|
|
for (j = 1, k = 0; j < n >> 2; j = j << 2, s++)
|
|
{
|
|
for (i = t=0; i < n >> 2; i += j, t++)
|
|
{
|
|
w[k + 5] = d2i(scale * cos(6.0 * PI * i / n));
|
|
w[k + 4] = d2i(scale * sin(6.0 * PI * i / n));
|
|
|
|
w[k + 3] = d2i(scale * cos(4.0 * PI * i / n));
|
|
w[k + 2] = d2i(scale * sin(4.0 * PI * i / n));
|
|
|
|
w[k + 1] = d2i(scale * cos(2.0 * PI * i / n));
|
|
w[k + 0] = d2i(scale * sin(2.0 * PI * i / n));
|
|
|
|
k += 6;
|
|
}
|
|
}
|
|
|
|
return k;
|
|
}
|
|
|
|
#define NBCACHE 3
|
|
static c64_fft_t *cache16[NBCACHE] = {NULL,};
|
|
static c64_fft_t *cache32[NBCACHE] = {NULL,};
|
|
|
|
c64_fft_t *c64_fft16_alloc(int length, int x, int y)
|
|
{
|
|
c64_fft_t *state;
|
|
celt_int16 *w, *iw;
|
|
|
|
int i, c;
|
|
|
|
for (c = 0; c < NBCACHE; c++) {
|
|
if (cache16[c] == NULL)
|
|
break;
|
|
if (cache16[c]->nfft == length)
|
|
return cache16[c];
|
|
}
|
|
|
|
state = (c64_fft_t *)celt_alloc(sizeof(c64_fft_t));
|
|
state->shift = log(length)/log(2) - ceil(log(length)/log(4)-1);
|
|
state->nfft = length;
|
|
state->twiddle = celt_alloc(length*2*sizeof(celt_int16));
|
|
state->itwiddle = celt_alloc(length*2*sizeof(celt_int16));
|
|
|
|
gen_twiddle16((celt_int16 *)state->twiddle, length, 32767.0);
|
|
|
|
w = (celt_int16 *)state->twiddle;
|
|
iw = (celt_int16 *)state->itwiddle;
|
|
|
|
for (i = 0; i < length; i++) {
|
|
iw[2*i+0] = w[2*i+0];
|
|
iw[2*i+1] = - w[2*i+1];
|
|
}
|
|
|
|
if (c < NBCACHE)
|
|
cache16[c++] = state;
|
|
if (c < NBCACHE)
|
|
cache16[c] = NULL;
|
|
|
|
return state;
|
|
}
|
|
|
|
|
|
c64_fft_t *c64_fft32_alloc(int length, int x, int y)
|
|
{
|
|
c64_fft_t *state;
|
|
int i, c;
|
|
|
|
for (c = 0; c < NBCACHE; c++) {
|
|
if (cache32[c] == NULL)
|
|
break;
|
|
if (cache32[c]->nfft == length)
|
|
return cache32[c];
|
|
}
|
|
|
|
state = (c64_fft_t *)celt_alloc(sizeof(c64_fft_t));
|
|
state->shift = log(length)/log(2) - ceil(log(length)/log(4)-1);
|
|
state->nfft = length;
|
|
state->twiddle = celt_alloc(length*2*sizeof(celt_int32));
|
|
state->itwiddle = celt_alloc(length*2*sizeof(celt_int32));
|
|
|
|
// Generate the inverse twiddle first because it does not need scaling
|
|
gen_twiddle32(state->itwiddle, length, 2147483647.000000000);
|
|
|
|
for (i = 0; i < length; i++) {
|
|
state->twiddle[2*i+0] = state->itwiddle[2*i+0] >> 1;
|
|
state->twiddle[2*i+1] = state->itwiddle[2*i+1] >> 1;
|
|
}
|
|
|
|
if (c < NBCACHE)
|
|
cache32[c++] = state;
|
|
if (c < NBCACHE)
|
|
cache32[c] = NULL;
|
|
|
|
return state;
|
|
}
|
|
|
|
|
|
void c64_fft16_free(c64_fft_t *state)
|
|
{
|
|
c64_fft32_free(state);
|
|
}
|
|
|
|
|
|
void c64_fft32_free(c64_fft_t *state)
|
|
{
|
|
}
|
|
|
|
|
|
void c64_fft16_inplace(c64_fft_t * restrict state, celt_int16 *X)
|
|
{
|
|
int i;
|
|
VARDECL(celt_int16, cin);
|
|
VARDECL(celt_int16, cout);
|
|
SAVE_STACK;
|
|
|
|
ALLOC(cin, state->nfft*2, celt_int16);
|
|
ALLOC(cout, state->nfft*2, celt_int16);
|
|
|
|
for (i = 0; i < state->nfft; i++) {
|
|
cin[2*i+0] = X[2*i+0];
|
|
cin[2*i+1] = X[2*i+1];
|
|
}
|
|
|
|
DSP_fft16x16t((celt_int16 *)state->twiddle, state->nfft, cin, cout);
|
|
|
|
for (i = 0; i < state->nfft; i++) {
|
|
X[2*i+0] = cout[2*i+0];
|
|
X[2*i+1] = cout[2*i+1];
|
|
}
|
|
|
|
RESTORE_STACK;
|
|
}
|
|
|
|
|
|
|
|
void c64_fft32(c64_fft_t * restrict state, const celt_int32 *X, celt_int32 *Y)
|
|
{
|
|
int i;
|
|
VARDECL(celt_int32, cin);
|
|
SAVE_STACK;
|
|
ALLOC(cin, state->nfft*2, celt_int32);
|
|
|
|
for (i = 0; i < state->nfft; i++) {
|
|
cin[2*i+0] = X[2*i+0] >> state->shift;
|
|
cin[2*i+1] = X[2*i+1] >> state->shift;
|
|
}
|
|
|
|
DSP_fft32x32s(state->twiddle, state->nfft, cin, Y);
|
|
|
|
RESTORE_STACK;
|
|
}
|
|
|
|
|
|
void c64_ifft16(c64_fft_t * restrict state, const celt_int16 *X, celt_int16 *Y)
|
|
{
|
|
int i;
|
|
VARDECL(celt_int16, cin);
|
|
VARDECL(celt_int16, cout);
|
|
SAVE_STACK;
|
|
|
|
ALLOC(cin, state->nfft*2, celt_int16);
|
|
if ((celt_int32)Y & 7)
|
|
ALLOC(cout, state->nfft*2, celt_int16);
|
|
else
|
|
cout = Y;
|
|
|
|
for (i = 0; i < state->nfft; i++) {
|
|
// No need to scale for this one but still need to save the input
|
|
// because the fft is going to change it!
|
|
cin[2*i+0] = X[2*i+0];
|
|
cin[2*i+1] = X[2*i+1];
|
|
}
|
|
|
|
DSP_fft16x16t((celt_int16 *)state->itwiddle, state->nfft, cin, cout);
|
|
|
|
if ((celt_int32)Y & 7)
|
|
for (i = 0; i < state->nfft; i++) {
|
|
Y[2*i+0] = cout[2*i+0];
|
|
Y[2*i+1] = cout[2*i+1];
|
|
}
|
|
|
|
RESTORE_STACK;
|
|
}
|
|
|
|
|
|
void c64_ifft32(c64_fft_t * restrict state, const celt_int32 *X, celt_int32 *Y)
|
|
{
|
|
int i;
|
|
VARDECL(celt_int32, cin);
|
|
SAVE_STACK;
|
|
ALLOC(cin, state->nfft*2, celt_int32);
|
|
|
|
celt_assert(Y & 7 == 0);
|
|
|
|
for (i = 0; i < state->nfft; i++) {
|
|
// No need to scale for this one but still need to save the input
|
|
// because the fft is going to change it!
|
|
cin[2*i+0] = X[2*i+0];
|
|
cin[2*i+1] = X[2*i+1];
|
|
}
|
|
|
|
DSP_ifft32x32(state->itwiddle, state->nfft, cin, Y);
|
|
|
|
RESTORE_STACK;
|
|
}
|
|
|
|
|