174 lines
5.6 KiB
C
174 lines
5.6 KiB
C
/* Copyright (c) 2015 Xiph.Org Foundation
|
|
Written by Viswanath Puttagunta */
|
|
/**
|
|
@file celt_fft_ne10.c
|
|
@brief ARM Neon optimizations for fft using NE10 library
|
|
*/
|
|
|
|
/*
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
|
|
- Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
- Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef SKIP_CONFIG_H
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
#endif
|
|
|
|
#include <NE10_dsp.h>
|
|
#include "os_support.h"
|
|
#include "kiss_fft.h"
|
|
#include "stack_alloc.h"
|
|
|
|
#if !defined(FIXED_POINT)
|
|
# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
|
|
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
|
|
# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
|
|
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
|
|
# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
|
|
# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
|
|
#else
|
|
# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
|
|
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
|
|
# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
|
|
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
|
|
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
|
|
# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
|
|
# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
|
|
#endif
|
|
|
|
#if defined(CUSTOM_MODES)
|
|
|
|
/* nfft lengths in NE10 that support scaled fft */
|
|
# define NE10_FFTSCALED_SUPPORT_MAX 4
|
|
static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
|
|
480, 240, 120, 60
|
|
};
|
|
|
|
int opus_fft_alloc_arm_neon(kiss_fft_state *st)
|
|
{
|
|
int i;
|
|
size_t memneeded = sizeof(struct arch_fft_state);
|
|
|
|
st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
|
|
if (!st->arch_fft)
|
|
return -1;
|
|
|
|
for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
|
|
if(st->nfft == ne10_fft_scaled_support[i])
|
|
break;
|
|
}
|
|
if (i == NE10_FFTSCALED_SUPPORT_MAX) {
|
|
/* This nfft length (scaled fft) is not supported in NE10 */
|
|
st->arch_fft->is_supported = 0;
|
|
st->arch_fft->priv = NULL;
|
|
}
|
|
else {
|
|
st->arch_fft->is_supported = 1;
|
|
st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
|
|
if (st->arch_fft->priv == NULL) {
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void opus_fft_free_arm_neon(kiss_fft_state *st)
|
|
{
|
|
NE10_FFT_CFG_TYPE_T cfg;
|
|
|
|
if (!st->arch_fft)
|
|
return;
|
|
|
|
cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
|
|
if (cfg)
|
|
NE10_FFT_DESTROY_C2C_TYPE(cfg);
|
|
opus_free(st->arch_fft);
|
|
}
|
|
#endif
|
|
|
|
void opus_fft_neon(const kiss_fft_state *st,
|
|
const kiss_fft_cpx *fin,
|
|
kiss_fft_cpx *fout)
|
|
{
|
|
NE10_FFT_STATE_TYPE_T state;
|
|
NE10_FFT_CFG_TYPE_T cfg = &state;
|
|
VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
|
|
SAVE_STACK;
|
|
ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
|
|
|
|
if (!st->arch_fft->is_supported) {
|
|
/* This nfft length (scaled fft) not supported in NE10 */
|
|
opus_fft_c(st, fin, fout);
|
|
}
|
|
else {
|
|
memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
|
|
state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
|
|
#if !defined(FIXED_POINT)
|
|
state.is_forward_scaled = 1;
|
|
|
|
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
|
|
(NE10_FFT_CPX_TYPE_T *)fin,
|
|
cfg, 0);
|
|
#else
|
|
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
|
|
(NE10_FFT_CPX_TYPE_T *)fin,
|
|
cfg, 0, 1);
|
|
#endif
|
|
}
|
|
RESTORE_STACK;
|
|
}
|
|
|
|
void opus_ifft_neon(const kiss_fft_state *st,
|
|
const kiss_fft_cpx *fin,
|
|
kiss_fft_cpx *fout)
|
|
{
|
|
NE10_FFT_STATE_TYPE_T state;
|
|
NE10_FFT_CFG_TYPE_T cfg = &state;
|
|
VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
|
|
SAVE_STACK;
|
|
ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
|
|
|
|
if (!st->arch_fft->is_supported) {
|
|
/* This nfft length (scaled fft) not supported in NE10 */
|
|
opus_ifft_c(st, fin, fout);
|
|
}
|
|
else {
|
|
memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
|
|
state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
|
|
#if !defined(FIXED_POINT)
|
|
state.is_backward_scaled = 0;
|
|
|
|
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
|
|
(NE10_FFT_CPX_TYPE_T *)fin,
|
|
cfg, 1);
|
|
#else
|
|
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
|
|
(NE10_FFT_CPX_TYPE_T *)fin,
|
|
cfg, 1, 0);
|
|
#endif
|
|
}
|
|
RESTORE_STACK;
|
|
}
|