replace fftw with pffft

This commit is contained in:
Pavel Demin 2020-06-27 18:39:46 +00:00
parent 071d85029e
commit 9bf5d0b233
8 changed files with 2099 additions and 1430 deletions

View File

@ -1,7 +1,7 @@
TARGET = ft8d TARGET = ft8d
OBJECTS = \ OBJECTS = \
crc14.o crc.o ft8_downsample.o sync8d.o sync8.o grid2deg.o fftw3mod.o \ crc14.o crc.o ft8_downsample.o sync8d.o sync8.o grid2deg.o pffft.o \
four2a.o deg2grid.o determ.o baseline.o platanh.o bpdecode174_91.o \ four2a.o deg2grid.o determ.o baseline.o platanh.o bpdecode174_91.o \
fmtmsg.o packjt.o chkcrc14a.o indexx.o shell.o pctile.o polyfit.o \ fmtmsg.o packjt.o chkcrc14a.o indexx.o shell.o pctile.o polyfit.o \
twkfreq1.o osd174_91.o encode174_91.o chkcall.o packjt77.o genft8.o \ twkfreq1.o osd174_91.o encode174_91.o chkcall.o packjt77.o genft8.o \

1246
fftw3.f03

File diff suppressed because it is too large Load Diff

View File

@ -1,64 +0,0 @@
INTEGER FFTW_R2HC
PARAMETER (FFTW_R2HC=0)
INTEGER FFTW_HC2R
PARAMETER (FFTW_HC2R=1)
INTEGER FFTW_DHT
PARAMETER (FFTW_DHT=2)
INTEGER FFTW_REDFT00
PARAMETER (FFTW_REDFT00=3)
INTEGER FFTW_REDFT01
PARAMETER (FFTW_REDFT01=4)
INTEGER FFTW_REDFT10
PARAMETER (FFTW_REDFT10=5)
INTEGER FFTW_REDFT11
PARAMETER (FFTW_REDFT11=6)
INTEGER FFTW_RODFT00
PARAMETER (FFTW_RODFT00=7)
INTEGER FFTW_RODFT01
PARAMETER (FFTW_RODFT01=8)
INTEGER FFTW_RODFT10
PARAMETER (FFTW_RODFT10=9)
INTEGER FFTW_RODFT11
PARAMETER (FFTW_RODFT11=10)
INTEGER FFTW_FORWARD
PARAMETER (FFTW_FORWARD=-1)
INTEGER FFTW_BACKWARD
PARAMETER (FFTW_BACKWARD=+1)
INTEGER FFTW_MEASURE
PARAMETER (FFTW_MEASURE=0)
INTEGER FFTW_DESTROY_INPUT
PARAMETER (FFTW_DESTROY_INPUT=1)
INTEGER FFTW_UNALIGNED
PARAMETER (FFTW_UNALIGNED=2)
INTEGER FFTW_CONSERVE_MEMORY
PARAMETER (FFTW_CONSERVE_MEMORY=4)
INTEGER FFTW_EXHAUSTIVE
PARAMETER (FFTW_EXHAUSTIVE=8)
INTEGER FFTW_PRESERVE_INPUT
PARAMETER (FFTW_PRESERVE_INPUT=16)
INTEGER FFTW_PATIENT
PARAMETER (FFTW_PATIENT=32)
INTEGER FFTW_ESTIMATE
PARAMETER (FFTW_ESTIMATE=64)
INTEGER FFTW_ESTIMATE_PATIENT
PARAMETER (FFTW_ESTIMATE_PATIENT=128)
INTEGER FFTW_BELIEVE_PCOST
PARAMETER (FFTW_BELIEVE_PCOST=256)
INTEGER FFTW_DFT_R2HC_ICKY
PARAMETER (FFTW_DFT_R2HC_ICKY=512)
INTEGER FFTW_NONTHREADED_ICKY
PARAMETER (FFTW_NONTHREADED_ICKY=1024)
INTEGER FFTW_NO_BUFFERING
PARAMETER (FFTW_NO_BUFFERING=2048)
INTEGER FFTW_NO_INDIRECT_OP
PARAMETER (FFTW_NO_INDIRECT_OP=4096)
INTEGER FFTW_ALLOW_LARGE_GENERIC
PARAMETER (FFTW_ALLOW_LARGE_GENERIC=8192)
INTEGER FFTW_NO_RANK_SPLITS
PARAMETER (FFTW_NO_RANK_SPLITS=16384)
INTEGER FFTW_NO_VRANK_SPLITS
PARAMETER (FFTW_NO_VRANK_SPLITS=32768)
INTEGER FFTW_NO_VRECURSE
PARAMETER (FFTW_NO_VRECURSE=65536)
INTEGER FFTW_NO_SIMD
PARAMETER (FFTW_NO_SIMD=131072)

View File

@ -1,4 +0,0 @@
module FFTW3
use, intrinsic :: iso_c_binding
include 'fftw3.f03'
end module FFTW3

40
four2a.c Normal file
View File

@ -0,0 +1,40 @@
#include "pffft.h"
static struct
{
PFFFT_Setup *s;
int n;
} setups[10];
static int size;
void four2a_(float *a, int *nfft, int *ndim, int *sign, int *form)
{
int i;
PFFFT_Setup *s;
pffft_direction_t direction;
s = NULL;
for(i = 0; i < size; ++i)
{
if(setups[i].n == *nfft)
{
s = setups[i].s;
break;
}
}
if(s == NULL && size < 10)
{
s = pffft_new_setup(*nfft, PFFFT_COMPLEX);
setups[size].s = s;
setups[size].n = *nfft;
++size;
}
if(s != NULL)
{
direction = *sign == 1 ? PFFFT_BACKWARD : PFFFT_FORWARD;
pffft_transform_ordered(s, a, a, NULL, direction);
}
}

View File

@ -1,115 +0,0 @@
subroutine four2a(a,nfft,ndim,isign,iform)
! IFORM = 1, 0 or -1, as data is
! complex, real, or the first half of a complex array. Transform
! values are returned in array DATA. They are complex, real, or
! the first half of a complex array, as IFORM = 1, -1 or 0.
! The transform of a real array (IFORM = 0) dimensioned N(1) by N(2)
! by ... will be returned in the same array, now considered to
! be complex of dimensions N(1)/2+1 by N(2) by .... Note that if
! IFORM = 0 or -1, N(1) must be even, and enough room must be
! reserved. The missing values may be obtained by complex conjugation.
! The reverse transformation of a half complex array dimensioned
! N(1)/2+1 by N(2) by ..., is accomplished by setting IFORM
! to -1. In the N array, N(1) must be the true N(1), not N(1)/2+1.
! The transform will be real and returned to the input array.
! This version of four2a makes calls to the FFTW library to do the
! actual computations.
use fftw3
parameter (NPMAX=2100) !Max numberf of stored plans
parameter (NSMALL=16384) !Max size of "small" FFTs
complex a(nfft) !Array to be transformed
complex aa(NSMALL) !Local copy of "small" a()
integer nn(NPMAX),ns(NPMAX),nf(NPMAX) !Params of stored plans
integer*8 nl(NPMAX),nloc !More params of plans
integer*8 plan(NPMAX) !Pointers to stored plans
logical found_plan
data nplan/0/ !Number of stored plans
common/patience/npatience,nthreads !Patience and threads for FFTW plans
save plan,nplan,nn,ns,nf,nl
if(nfft.lt.0) go to 999
nloc=loc(a)
found_plan = .false.
!$omp critical(four2a_setup)
do i=1,nplan
if(nfft.eq.nn(i) .and. isign.eq.ns(i) .and. &
iform.eq.nf(i) .and. nloc.eq.nl(i)) then
found_plan = .true.
exit
end if
enddo
if(i.ge.NPMAX) stop 'Too many FFTW plans requested.'
if (.not. found_plan) then
nplan=nplan+1
i=nplan
nn(i)=nfft
ns(i)=isign
nf(i)=iform
nl(i)=nloc
! Planning: FFTW_ESTIMATE, FFTW_ESTIMATE_PATIENT, FFTW_MEASURE,
! FFTW_PATIENT, FFTW_EXHAUSTIVE
nflags=FFTW_ESTIMATE
if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT
if(npatience.eq.2) nflags=FFTW_MEASURE
if(npatience.eq.3) nflags=FFTW_PATIENT
if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE
if(nfft.le.NSMALL) then
jz=nfft
if(iform.eq.0) jz=nfft/2
aa(1:jz)=a(1:jz)
endif
!$omp critical(fftw) ! serialize non thread-safe FFTW3 calls
if(isign.eq.-1 .and. iform.eq.1) then
call sfftw_plan_dft_1d(plan(i),nfft,a,a,FFTW_FORWARD,nflags)
else if(isign.eq.1 .and. iform.eq.1) then
call sfftw_plan_dft_1d(plan(i),nfft,a,a,FFTW_BACKWARD,nflags)
else if(isign.eq.-1 .and. iform.eq.0) then
call sfftw_plan_dft_r2c_1d(plan(i),nfft,a,a,nflags)
else if(isign.eq.1 .and. iform.eq.-1) then
call sfftw_plan_dft_c2r_1d(plan(i),nfft,a,a,nflags)
else
stop 'Unsupported request in four2a'
endif
!$omp end critical(fftw)
if(nfft.le.NSMALL) then
jz=nfft
if(iform.eq.0) jz=nfft/2
a(1:jz)=aa(1:jz)
endif
end if
!$omp end critical(four2a_setup)
call sfftw_execute(plan(i))
return
999 continue
!$omp critical(four2a)
do i=1,nplan
! The test is only to silence a compiler warning:
if(ndim.ne.-999) then
!$omp critical(fftw) ! serialize non thread-safe FFTW3 calls
call sfftw_destroy_plan(plan(i))
!$omp end critical(fftw)
end if
enddo
call fftwf_cleanup()
nplan=0
!$omp end critical(four2a)
return
end subroutine four2a

1881
pffft.c Normal file

File diff suppressed because it is too large Load Diff

177
pffft.h Normal file
View File

@ -0,0 +1,177 @@
/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com )
Based on original fortran 77 code from FFTPACKv4 from NETLIB,
authored by Dr Paul Swarztrauber of NCAR, in 1985.
As confirmed by the NCAR fftpack software curators, the following
FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
released under the same terms.
FFTPACK license:
http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
Copyright (c) 2004 the University Corporation for Atmospheric
Research ("UCAR"). All rights reserved. Developed by NCAR's
Computational and Information Systems Laboratory, UCAR,
www.cisl.ucar.edu.
Redistribution and use of the Software in source and binary forms,
with or without modification, is permitted provided that the
following conditions are met:
- Neither the names of NCAR's Computational and Information Systems
Laboratory, the University Corporation for Atmospheric Research,
nor the names of its sponsors or contributors may be used to
endorse or promote products derived from this Software without
specific prior written permission.
- Redistributions of source code must retain the above copyright
notices, this list of conditions, and the disclaimer below.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the disclaimer below in the
documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.
*/
/*
PFFFT : a Pretty Fast FFT.
This is basically an adaptation of the single precision fftpack
(v4) as found on netlib taking advantage of SIMD instruction found
on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
For architectures where no SIMD instruction is available, the code
falls back to a scalar version.
Restrictions:
- 1D transforms only, with 32-bit single precision.
- supports only transforms for inputs of length N of the form
N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
144, 160, etc are all acceptable lengths). Performance is best for
128<=N<=8192.
- all (float*) pointers in the functions below are expected to
have an "simd-compatible" alignment, that is 16 bytes on x86 and
powerpc CPUs.
You can allocate such buffers with the functions
pffft_aligned_malloc / pffft_aligned_free (or with stuff like
posix_memalign..)
*/
#ifndef PFFFT_H
#define PFFFT_H
#include <stddef.h> // for size_t
#ifdef __cplusplus
extern "C" {
#endif
/* opaque struct holding internal stuff (precomputed twiddle factors)
this struct can be shared by many threads as it contains only
read-only data.
*/
typedef struct PFFFT_Setup PFFFT_Setup;
/* direction of the transform */
typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
/* type of transform */
typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
/*
prepare for performing transforms of size N -- the returned
PFFFT_Setup structure is read-only so it can safely be shared by
multiple concurrent threads.
*/
PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
void pffft_destroy_setup(PFFFT_Setup *);
/*
Perform a Fourier transform , The z-domain data is stored in the
most efficient order for transforming it back, or using it for
convolution. If you need to have its content sorted in the
"usual" way, that is as an array of interleaved complex numbers,
either use pffft_transform_ordered , or call pffft_zreorder after
the forward fft, and before the backward fft.
Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
Typically you will want to scale the backward transform by 1/N.
The 'work' pointer should point to an area of N (2*N for complex
fft) floats, properly aligned. If 'work' is NULL, then stack will
be used instead (this is probably the best strategy for small
FFTs, say for N < 16384).
input and output may alias.
*/
void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
/*
Similar to pffft_transform, but makes sure that the output is
ordered as expected (interleaved complex numbers). This is
similar to calling pffft_transform and then pffft_zreorder.
input and output may alias.
*/
void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
/*
call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
PFFFT_FORWARD) if you want to have the frequency components in
the correct "canonical" order, as interleaved complex numbers.
(for real transforms, both 0-frequency and half frequency
components, which are real, are assembled in the first entry as
F(0)+i*F(n/2+1). Note that the original fftpack did place
F(n/2+1) at the end of the arrays).
input and output should not alias.
*/
void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
/*
Perform a multiplication of the frequency components of dft_a and
dft_b and accumulate them into dft_ab. The arrays should have
been obtained with pffft_transform(.., PFFFT_FORWARD) and should
*not* have been reordered with pffft_zreorder (otherwise just
perform the operation yourself as the dft coefs are stored as
interleaved complex numbers).
the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
The dft_a, dft_b and dft_ab pointers may alias.
*/
void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
/*
the float buffers must have the correct alignment (16-byte boundary
on intel and powerpc). This function may be used to obtain such
correctly aligned buffers.
*/
void *pffft_aligned_malloc(size_t nb_bytes);
void pffft_aligned_free(void *);
/* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */
int pffft_simd_size();
#ifdef __cplusplus
}
#endif
#endif // PFFFT_H