/////////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2023 Jon Beniston, M7RCE // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation as version 3 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License V3 for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program. If not, see . // /////////////////////////////////////////////////////////////////////////////////// #include #include "glslang_c_interface.h" #include "dsp/vulkanvkfftengine.h" #include "util/profiler.h" class GLSInitialiser { public: GLSInitialiser() { glslang_initialize_process(); }; ~GLSInitialiser() { glslang_finalize_process(); } }; static GLSInitialiser glsInitialiser; VulkanvkFFTEngine::VulkanvkFFTEngine() { VkFFTResult resFFT; resFFT = gpuInit(); if (resFFT != VKFFT_SUCCESS) { qDebug() << "VulkanvkFFTEngine::VulkanvkFFTEngine: Failed to initialise GPU:" << getVkFFTErrorString(resFFT); delete vkGPU; vkGPU = nullptr; } } VulkanvkFFTEngine::~VulkanvkFFTEngine() { if (vkGPU) { freeAll(); vkDestroyFence(vkGPU->device, vkGPU->fence, nullptr); vkDestroyCommandPool(vkGPU->device, vkGPU->commandPool, nullptr); vkDestroyDevice(vkGPU->device, nullptr); DestroyDebugUtilsMessengerEXT(vkGPU, nullptr); vkDestroyInstance(vkGPU->instance, nullptr); } } const QString VulkanvkFFTEngine::m_name = "vkFFT (Vulkan)"; QString VulkanvkFFTEngine::getName() const { return m_name; } VkFFTResult VulkanvkFFTEngine::gpuInit() { VkResult res = VK_SUCCESS; // To enable validation on Windows: // set VK_LAYER_PATH=%VULKAN_SDK%\Bin // set VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_api_dump;VK_LAYER_KHRONOS_validation // https://vulkan.lunarg.com/doc/view/1.3.204.1/windows/layer_configuration.html // Create vk_layer_settings.txt in working dir // Or run vkconfig to do so // Create instance - a connection between the application and the Vulkan library res = createInstance(vkGPU, 0); if (res != 0) { return VKFFT_ERROR_FAILED_TO_CREATE_INSTANCE; } // Set up the debugging messenger res = setupDebugMessenger(vkGPU); if (res != 0) { return VKFFT_ERROR_FAILED_TO_SETUP_DEBUG_MESSENGER; } // Check if there are GPUs that support Vulkan and select one res = findPhysicalDevice(vkGPU); if (res != 0) { return VKFFT_ERROR_FAILED_TO_FIND_PHYSICAL_DEVICE; } // Create logical device representation res = createDevice(vkGPU, 0); if (res != 0) { return VKFFT_ERROR_FAILED_TO_CREATE_DEVICE; } // Create fence for synchronization res = createFence(vkGPU); if (res != 0) { return VKFFT_ERROR_FAILED_TO_CREATE_FENCE; } // Create a place, command buffer memory is allocated from res = createCommandPool(vkGPU); if (res != 0) { return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_POOL; } vkGetPhysicalDeviceProperties(vkGPU->physicalDevice, &vkGPU->physicalDeviceProperties); vkGetPhysicalDeviceMemoryProperties(vkGPU->physicalDevice, &vkGPU->physicalDeviceMemoryProperties); return VKFFT_SUCCESS; } VkFFTResult VulkanvkFFTEngine::gpuAllocateBuffers() { VkFFTResult resFFT; VulkanPlan *plan = reinterpret_cast(m_currentPlan); // Allocate GPU memory resFFT = allocateBuffer(vkGPU, &plan->m_buffer, &plan->m_bufferDeviceMemory, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, plan->m_bufferSize); if (resFFT != VKFFT_SUCCESS) { return resFFT; } // Allocate CPU/GPU memory (Requires m_currentPlan->m_buffer to have been created) resFFT = vulkanAllocateIn(plan); if (resFFT != VKFFT_SUCCESS) { return resFFT; } resFFT = vulkanAllocateOut(plan); if (resFFT != VKFFT_SUCCESS) { return resFFT; } plan->m_configuration->buffer = &plan->m_buffer; return VKFFT_SUCCESS; } VkFFTResult VulkanvkFFTEngine::gpuConfigure() { VkFFTResult resFFT; VulkanPlan *plan = reinterpret_cast(m_currentPlan); // Allocate command buffer with command to perform FFT resFFT = vulkanAllocateFFTCommand(plan); if (resFFT != VKFFT_SUCCESS) { return resFFT; } return VKFFT_SUCCESS; } // Allocate CPU to GPU memory buffer VkFFTResult VulkanvkFFTEngine::vulkanAllocateIn(VulkanPlan *plan) { VkFFTResult resFFT; VkResult res = VK_SUCCESS; VkBuffer* buffer = (VkBuffer*)&plan->m_buffer; resFFT = allocateBuffer(vkGPU, &plan->m_inBuffer, &plan->m_inMemory, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, m_currentPlan->m_bufferSize); if (resFFT != VKFFT_SUCCESS) { return resFFT; } void* data; res = vkMapMemory(vkGPU->device, plan->m_inMemory, 0, plan->m_bufferSize, 0, &data); if (res != VK_SUCCESS) { return VKFFT_ERROR_FAILED_TO_MAP_MEMORY; } plan->m_in = (Complex*) data; return VKFFT_SUCCESS; } // Allocate GPU to CPU memory buffer VkFFTResult VulkanvkFFTEngine::vulkanAllocateOut(VulkanPlan *plan) { VkFFTResult resFFT; VkResult res; VkBuffer* buffer = (VkBuffer*)&plan->m_buffer; resFFT = allocateBuffer(vkGPU, &plan->m_outBuffer, &plan->m_outMemory, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, m_currentPlan->m_bufferSize); if (resFFT != VKFFT_SUCCESS) { return resFFT; } void* data; res = vkMapMemory(vkGPU->device, plan->m_outMemory, 0, plan->m_bufferSize, 0, &data); if (res != VK_SUCCESS) { return VKFFT_ERROR_FAILED_TO_MAP_MEMORY; } plan->m_out = (Complex*) data; return VKFFT_SUCCESS; } void VulkanvkFFTEngine::vulkanDeallocateIn(VulkanPlan *plan) { vkUnmapMemory(vkGPU->device, plan->m_inMemory); vkDestroyBuffer(vkGPU->device, plan->m_inBuffer, nullptr); vkFreeMemory(vkGPU->device, plan->m_inMemory, nullptr); plan->m_in = nullptr; } void VulkanvkFFTEngine::vulkanDeallocateOut(VulkanPlan *plan) { vkUnmapMemory(vkGPU->device, plan->m_outMemory); vkDestroyBuffer(vkGPU->device, plan->m_outBuffer, nullptr); vkFreeMemory(vkGPU->device, plan->m_outMemory, nullptr); plan->m_out = nullptr; } VkFFTResult VulkanvkFFTEngine::vulkanAllocateFFTCommand(VulkanPlan *plan) { VkFFTResult resFFT; VkResult res = VK_SUCCESS; VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO }; commandBufferAllocateInfo.commandPool = vkGPU->commandPool; commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; commandBufferAllocateInfo.commandBufferCount = 1; res = vkAllocateCommandBuffers(vkGPU->device, &commandBufferAllocateInfo, &plan->m_commandBuffer); if (res != 0) { return VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS; } VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; commandBufferBeginInfo.flags = 0; res = vkBeginCommandBuffer(plan->m_commandBuffer, &commandBufferBeginInfo); if (res != 0) { return VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER; } VkBuffer* buffer = (VkBuffer*)&plan->m_buffer; // Copy from CPU to GPU VkBufferCopy copyRegionIn = { 0 }; copyRegionIn.srcOffset = 0; copyRegionIn.dstOffset = 0; copyRegionIn.size = plan->m_bufferSize; vkCmdCopyBuffer(plan->m_commandBuffer, plan->m_inBuffer, buffer[0], 1, ©RegionIn); // Wait for copy to complete VkMemoryBarrier memoryBarrierIn = { VK_STRUCTURE_TYPE_MEMORY_BARRIER, 0, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, }; vkCmdPipelineBarrier( plan->m_commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &memoryBarrierIn, 0, 0, 0, 0); // Perform FFT VkFFTLaunchParams launchParams = {}; launchParams.commandBuffer = &plan->m_commandBuffer; resFFT = VkFFTAppend(plan->m_app, plan->m_inverse, &launchParams); if (resFFT != VKFFT_SUCCESS) { return resFFT; } // Wait for FFT to complete VkMemoryBarrier memoryBarrierOut = { VK_STRUCTURE_TYPE_MEMORY_BARRIER, 0, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, }; vkCmdPipelineBarrier( plan->m_commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &memoryBarrierIn, 0, 0, 0, 0); // Copy from GPU to CPU VkBufferCopy copyRegionOut = { 0 }; copyRegionOut.srcOffset = 0; copyRegionOut.dstOffset = 0; copyRegionOut.size = plan->m_bufferSize; vkCmdCopyBuffer(plan->m_commandBuffer, buffer[0], plan->m_outBuffer, 1, ©RegionOut); res = vkEndCommandBuffer(plan->m_commandBuffer); if (res != 0) { return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER; } return VKFFT_SUCCESS; } void VulkanvkFFTEngine::transform() { PROFILER_START() VkResult res = VK_SUCCESS; VulkanPlan *plan = reinterpret_cast(m_currentPlan); VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &plan->m_commandBuffer; res = vkQueueSubmit(vkGPU->queue, 1, &submitInfo, vkGPU->fence); if (res != 0) { qDebug() << "VulkanvkFFTEngine::transform: Failed to submit to queue"; } res = vkWaitForFences(vkGPU->device, 1, &vkGPU->fence, VK_TRUE, 100000000000); if (res != 0) { qDebug() << "VulkanvkFFTEngine::transform: Failed to wait for fences"; } res = vkResetFences(vkGPU->device, 1, &vkGPU->fence); if (res != 0) { qDebug() << "VulkanvkFFTEngine::transform: Failed to reset fences"; } PROFILER_STOP(QString("%1 FFT %2").arg(getName()).arg(m_currentPlan->n)) } vkFFTEngine::Plan *VulkanvkFFTEngine::gpuAllocatePlan() { return new VulkanPlan(); } void VulkanvkFFTEngine::gpuDeallocatePlan(Plan *p) { VulkanPlan *plan = reinterpret_cast(p); vulkanDeallocateOut(plan); vulkanDeallocateIn(plan); vkFreeCommandBuffers(vkGPU->device, vkGPU->commandPool, 1, &plan->m_commandBuffer); vkDestroyBuffer(vkGPU->device, plan->m_buffer, nullptr); vkFreeMemory(vkGPU->device, plan->m_bufferDeviceMemory, nullptr); }