2023-08-13 18:52:57 -04:00
|
|
|
///////////////////////////////////////////////////////////////////////////////////
|
2023-11-19 00:43:20 -05:00
|
|
|
// Copyright (C) 2023 Jon Beniston, M7RCE <jon@beniston.com> //
|
2023-08-13 18:52:57 -04:00
|
|
|
// //
|
|
|
|
// This program is free software; you can redistribute it and/or modify //
|
|
|
|
// it under the terms of the GNU General Public License as published by //
|
|
|
|
// the Free Software Foundation as version 3 of the License, or //
|
|
|
|
// (at your option) any later version. //
|
|
|
|
// //
|
|
|
|
// This program is distributed in the hope that it will be useful, //
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
|
|
|
|
// GNU General Public License V3 for more details. //
|
|
|
|
// //
|
|
|
|
// You should have received a copy of the GNU General Public License //
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>. //
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
#include <QDebug>
|
|
|
|
|
|
|
|
#include "dsp/cudavkfftengine.h"
|
2024-06-04 03:11:34 -04:00
|
|
|
#include "util/profiler.h"
|
2023-08-13 18:52:57 -04:00
|
|
|
|
|
|
|
CUDAvkFFTEngine::CUDAvkFFTEngine()
|
|
|
|
{
|
|
|
|
VkFFTResult resFFT;
|
|
|
|
resFFT = gpuInit();
|
|
|
|
if (resFFT != VKFFT_SUCCESS)
|
|
|
|
{
|
|
|
|
qDebug() << "CUDAvkFFTEngine::CUDAvkFFTEngine: Failed to initialise GPU" << getVkFFTErrorString(resFFT);
|
|
|
|
delete vkGPU;
|
|
|
|
vkGPU = nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
CUDAvkFFTEngine::~CUDAvkFFTEngine()
|
|
|
|
{
|
|
|
|
if (vkGPU)
|
|
|
|
{
|
|
|
|
freeAll();
|
|
|
|
cuCtxDestroy(vkGPU->context);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const QString CUDAvkFFTEngine::m_name = "vkFFT (CUDA)";
|
|
|
|
|
|
|
|
QString CUDAvkFFTEngine::getName() const
|
|
|
|
{
|
|
|
|
return m_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
VkFFTResult CUDAvkFFTEngine::gpuInit()
|
|
|
|
{
|
|
|
|
CUresult res = CUDA_SUCCESS;
|
|
|
|
cudaError_t res2 = cudaSuccess;
|
|
|
|
res = cuInit(0);
|
|
|
|
if (res != CUDA_SUCCESS) {
|
|
|
|
return VKFFT_ERROR_FAILED_TO_INITIALIZE;
|
|
|
|
}
|
|
|
|
res2 = cudaSetDevice((int)vkGPU->device_id);
|
|
|
|
if (res2 != cudaSuccess) {
|
|
|
|
return VKFFT_ERROR_FAILED_TO_SET_DEVICE_ID;
|
|
|
|
}
|
|
|
|
res = cuDeviceGet(&vkGPU->device, (int)vkGPU->device_id);
|
|
|
|
if (res != CUDA_SUCCESS) {
|
|
|
|
return VKFFT_ERROR_FAILED_TO_GET_DEVICE;
|
|
|
|
}
|
|
|
|
res = cuDevicePrimaryCtxRetain(&vkGPU->context, (int)vkGPU->device);
|
|
|
|
if (res != CUDA_SUCCESS) {
|
|
|
|
return VKFFT_ERROR_FAILED_TO_CREATE_CONTEXT;
|
|
|
|
}
|
|
|
|
return VKFFT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
VkFFTResult CUDAvkFFTEngine::gpuAllocateBuffers()
|
|
|
|
{
|
|
|
|
cudaError_t res;
|
|
|
|
CUDAPlan *plan = reinterpret_cast<CUDAPlan *>(m_currentPlan);
|
|
|
|
|
|
|
|
// Allocate DMA accessible pinned memory, which may be faster than malloc'ed memory
|
|
|
|
res = cudaHostAlloc(&plan->m_in, sizeof(Complex) * plan->n, cudaHostAllocMapped);
|
|
|
|
if (res != cudaSuccess) {
|
|
|
|
return VKFFT_ERROR_FAILED_TO_ALLOCATE;
|
|
|
|
}
|
|
|
|
res = cudaHostAlloc(&plan->m_out, sizeof(Complex) * plan->n, cudaHostAllocMapped);
|
|
|
|
if (res != cudaSuccess) {
|
|
|
|
return VKFFT_ERROR_FAILED_TO_ALLOCATE;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Allocate GPU memory
|
|
|
|
res = cudaMalloc((void**)&plan->m_buffer, sizeof(cuFloatComplex) * plan->n * 2);
|
|
|
|
if (res != cudaSuccess) {
|
|
|
|
return VKFFT_ERROR_FAILED_TO_ALLOCATE;
|
|
|
|
}
|
|
|
|
|
|
|
|
plan->m_configuration->buffer = (void**)&plan->m_buffer;
|
|
|
|
|
|
|
|
return VKFFT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
VkFFTResult CUDAvkFFTEngine::gpuConfigure()
|
|
|
|
{
|
|
|
|
return VKFFT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CUDAvkFFTEngine::transform()
|
|
|
|
{
|
|
|
|
if (m_currentPlan)
|
|
|
|
{
|
|
|
|
CUDAPlan *plan = reinterpret_cast<CUDAPlan *>(m_currentPlan);
|
|
|
|
cudaError_t res = cudaSuccess;
|
|
|
|
void* buffer = ((void**)&plan->m_buffer)[0];
|
|
|
|
|
|
|
|
// Transfer input from CPU to GPU memory
|
|
|
|
PROFILER_START()
|
|
|
|
res = cudaMemcpy(buffer, plan->m_in, plan->m_bufferSize, cudaMemcpyHostToDevice);
|
|
|
|
PROFILER_STOP(QString("%1 TX %2").arg(getName()).arg(m_currentPlan->n))
|
|
|
|
if (res != cudaSuccess) {
|
|
|
|
qDebug() << "CUDAvkFFTEngine::transform: cudaMemcpy host to device failed";
|
|
|
|
}
|
|
|
|
|
|
|
|
// Perform FFT
|
|
|
|
PROFILER_RESTART()
|
|
|
|
VkFFTLaunchParams launchParams = {};
|
|
|
|
VkFFTResult resFFT = VkFFTAppend(plan->m_app, plan->m_inverse ? 1 : -1, &launchParams);
|
|
|
|
PROFILER_STOP(QString("%1 FFT %2").arg(getName()).arg(m_currentPlan->n))
|
|
|
|
if (resFFT != VKFFT_SUCCESS) {
|
|
|
|
qDebug() << "CUDAvkFFTEngine::transform: VkFFTAppend failed:" << getVkFFTErrorString(resFFT);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Transfer result from GPU to CPU memory
|
|
|
|
PROFILER_RESTART()
|
|
|
|
res = cudaMemcpy(plan->m_out, buffer, plan->m_bufferSize, cudaMemcpyDeviceToHost);
|
|
|
|
PROFILER_STOP(QString("%1 RX %2").arg(getName()).arg(m_currentPlan->n))
|
|
|
|
if (res != cudaSuccess) {
|
|
|
|
qDebug() << "CUDAvkFFTEngine::transform: cudaMemcpy device to host failed";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
vkFFTEngine::Plan *CUDAvkFFTEngine::gpuAllocatePlan()
|
|
|
|
{
|
|
|
|
return new CUDAPlan();
|
|
|
|
}
|
|
|
|
|
|
|
|
void CUDAvkFFTEngine::gpuDeallocatePlan(Plan *p)
|
|
|
|
{
|
|
|
|
CUDAPlan *plan = reinterpret_cast<CUDAPlan *>(p);
|
|
|
|
|
|
|
|
cudaFree(plan->m_in);
|
|
|
|
plan->m_in = nullptr;
|
|
|
|
cudaFree(plan->m_out);
|
|
|
|
plan->m_out = nullptr;
|
|
|
|
cudaFree(plan->m_buffer);
|
|
|
|
}
|