diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 69a8e17..b9d5908 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -4,9 +4,6 @@ name: CMake on multiple platforms on: push: - branches: [ "main", "bugfix/workflow" ] - paths-ignore: - - '**/README.md' pull_request: branches: [ "main" ] @@ -25,7 +22,7 @@ jobs: # # To add more build types (Release, Debug, RelWithDebInfo, etc.) customize the build_type list. matrix: - os: [ubuntu-latest, windows-latest] + os: [ubuntu-latest, windows-latest, macos-14] build_type: [Release] c_compiler: [gcc, clang, cl] include: @@ -35,7 +32,7 @@ jobs: - os: ubuntu-latest c_compiler: gcc cpp_compiler: g++ - - os: ubuntu-latest + - os: macos-14 c_compiler: clang cpp_compiler: clang++ exclude: @@ -45,6 +42,12 @@ jobs: c_compiler: clang - os: ubuntu-latest c_compiler: cl + - os: macos-14 + c_compiler: cl + - os: macos-14 + c_compiler: gcc + - os: ubuntu-latest + c_compiler: clang steps: - uses: actions/checkout@v3 @@ -64,8 +67,8 @@ jobs: - name: Prepare Vulkan SDK uses: humbletim/setup-vulkan-sdk@v1.2.0 with: - vulkan-query-version: 1.3.204.0 - vulkan-components: Vulkan-Headers, Vulkan-Loader, Glslang + vulkan-query-version: latest + vulkan-components: Vulkan-Headers, Vulkan-Loader, Glslang, SPIRV-Tools vulkan-use-cache: true - name: Configure CMake @@ -81,7 +84,7 @@ jobs: - name: Build # Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator). - run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} + run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} -j3 - name: Upload build artifacts uses: actions/upload-artifact@v4 diff --git a/3dgs/Renderer.cpp b/3dgs/Renderer.cpp index b9dea8c..10ee124 100644 --- a/3dgs/Renderer.cpp +++ b/3dgs/Renderer.cpp @@ -60,8 +60,20 @@ void Renderer::handleInput() { } } +void Renderer::retrieveTimestamps() { + std::vector timestamps(queryManager->nextId); + auto res = context->device->getQueryPoolResults(context->queryPool.get(), 0, queryManager->nextId, + timestamps.size() * sizeof(uint64_t), + timestamps.data(), sizeof(uint64_t), vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait); + if (res != vk::Result::eSuccess) { + throw std::runtime_error("Failed to retrieve timestamps"); + } + + queryManager->parseResults(timestamps); +} + void Renderer::initializeVulkan() { - window = std::make_shared("Vulkan Splatting", 800, 600); + window = std::make_shared("Vulkan Splatting", 1920, 1080); context = std::make_shared(Window::getRequiredInstanceExtensions(), std::vector{}, configuration.enableVulkanValidationLayers); @@ -72,10 +84,11 @@ void Renderer::initializeVulkan() { vk::PhysicalDeviceFeatures pdf{}; vk::PhysicalDeviceVulkan11Features pdf11{}; vk::PhysicalDeviceVulkan12Features pdf12{}; + pdf.shaderStorageImageWriteWithoutFormat = true; pdf.shaderInt64 = true; pdf12.shaderFloat16 = true; - pdf12.shaderBufferInt64Atomics = true; - pdf12.shaderSharedInt64Atomics = true; + // pdf12.shaderBufferInt64Atomics = true; + // pdf12.shaderSharedInt64Atomics = true; context->createLogicalDevice(pdf, pdf11, pdf12); context->createDescriptorPool(1); @@ -97,7 +110,8 @@ void Renderer::initializeVulkan() { void Renderer::loadSceneToGPU() { scene = std::make_shared(configuration.scene); scene->load(context); - // scene->loadTestScene(context); + // reset descriptor pool + context->device->resetDescriptorPool(context->descriptorPool.get()); } void Renderer::createPreprocessPipeline() { @@ -344,6 +358,8 @@ void Renderer::run() { fpsCounter++; } + retrieveTimestamps(); + // auto nn = totalSumBufferHost->readOne() ; // auto staging = Buffer::staging(context, nn* sizeof(uint64_t)); // sortKVBufferEven->downloadTo(staging); @@ -384,6 +400,7 @@ void Renderer::recordPreprocessCommandBuffer() { preprocessCommandBuffer->begin(vk::CommandBufferBeginInfo{}); preprocessPipeline->bind(preprocessCommandBuffer, 0, 0); + preprocessCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe, context->queryPool.get(), queryManager->registerQuery("preprocess_start")); preprocessCommandBuffer->dispatch(numGroups, 1, 1); tileOverlapBuffer->computeWriteReadBarrier(preprocessCommandBuffer.get()); @@ -392,7 +409,10 @@ void Renderer::recordPreprocessCommandBuffer() { prefixSumPingBuffer->computeWriteReadBarrier(preprocessCommandBuffer.get()); + preprocessCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe, context->queryPool.get(), queryManager->registerQuery("preprocess_end")); + prefixSumPipeline->bind(preprocessCommandBuffer, 0, 0); + preprocessCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe, context->queryPool.get(), queryManager->registerQuery("prefix_sum_start")); const auto iters = static_cast(std::ceil(std::log2(static_cast(scene->getNumVertices())))); for (uint32_t timestep = 0; timestep <= iters; timestep++) { preprocessCommandBuffer->pushConstants(prefixSumPipeline->pipelineLayout.get(), @@ -422,7 +442,10 @@ void Renderer::recordPreprocessCommandBuffer() { vertexAttributeBuffer->computeWriteReadBarrier(preprocessCommandBuffer.get()); + preprocessCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe, context->queryPool.get(), queryManager->registerQuery("prefix_sum_end")); + preprocessSortPipeline->bind(preprocessCommandBuffer, 0, iters % 2 == 0 ? 0 : 1); + preprocessCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe, context->queryPool.get(), queryManager->registerQuery("preprocess_sort_start")); uint32_t tileX = (swapchain->swapchainExtent.width + 16 - 1) / 16; // assert(tileX == 50); preprocessCommandBuffer->pushConstants(preprocessSortPipeline->pipelineLayout.get(), @@ -431,6 +454,7 @@ void Renderer::recordPreprocessCommandBuffer() { preprocessCommandBuffer->dispatch(numGroups, 1, 1); sortKBufferEven->computeWriteReadBarrier(preprocessCommandBuffer.get()); + preprocessCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe, context->queryPool.get(), queryManager->registerQuery("preprocess_sort_end")); preprocessCommandBuffer->end(); } @@ -452,6 +476,9 @@ void Renderer::recordRenderCommandBuffer(uint32_t currentFrame) { assert(numInstances <= scene->getNumVertices() * SORT_ALLOCATE_MULTIPLIER); for (auto i = 0; i < 8; i++) { sortHistPipeline->bind(renderCommandBuffer, 0, i % 2 == 0 ? 0 : 1); + if (i == 0) { + renderCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe, context->queryPool.get(), queryManager->registerQuery("sort_start")); + } auto invocationSize = (numInstances + numRadixSortBlocksPerWorkgroup - 1) / numRadixSortBlocksPerWorkgroup; invocationSize = (invocationSize + 255) / 256; @@ -482,6 +509,10 @@ void Renderer::recordRenderCommandBuffer(uint32_t currentFrame) { sortKBufferEven->computeWriteReadBarrier(renderCommandBuffer.get()); sortVBufferEven->computeWriteReadBarrier(renderCommandBuffer.get()); } + + if (i == 7) { + renderCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe, context->queryPool.get(), queryManager->registerQuery("sort_end")); + } } renderCommandBuffer->fillBuffer(tileBoundaryBuffer->buffer, 0, VK_WHOLE_SIZE, 0); @@ -494,14 +525,17 @@ void Renderer::recordRenderCommandBuffer(uint32_t currentFrame) { // Since we have 64 bit keys, the sort result is always in the even buffer tileBoundaryPipeline->bind(renderCommandBuffer, 0, 0); + renderCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe, context->queryPool.get(), queryManager->registerQuery("tile_boundary_start")); renderCommandBuffer->pushConstants(tileBoundaryPipeline->pipelineLayout.get(), vk::ShaderStageFlagBits::eCompute, 0, sizeof(uint32_t), &numInstances); renderCommandBuffer->dispatch((numInstances + 255) / 256, 1, 1); tileBoundaryBuffer->computeWriteReadBarrier(renderCommandBuffer.get()); + renderCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe, context->queryPool.get(), queryManager->registerQuery("tile_boundary_end")); renderPipeline->bind(renderCommandBuffer, 0, std::vector{0, currentImageIndex}); + renderCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe, context->queryPool.get(), queryManager->registerQuery("render_start")); auto [width, height] = window->getFramebufferSize(); uint32_t constants[2] = {width, height}; renderCommandBuffer->pushConstants(renderPipeline->pipelineLayout.get(), @@ -534,6 +568,7 @@ void Renderer::recordRenderCommandBuffer(uint32_t currentFrame) { renderCommandBuffer->pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eBottomOfPipe, vk::DependencyFlagBits::eByRegion, nullptr, nullptr, imageMemoryBarrier); + renderCommandBuffer->writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe, context->queryPool.get(), queryManager->registerQuery("render_end")); renderCommandBuffer->end(); } diff --git a/3dgs/Renderer.h b/3dgs/Renderer.h index 0da0a71..ed790fb 100644 --- a/3dgs/Renderer.h +++ b/3dgs/Renderer.h @@ -11,6 +11,8 @@ #include "../vulkan/Swapchain.h" #include +#include "../vulkan/QueryManager.h" + struct RendererConfiguration { bool enableVulkanValidationLayers = false; std::optional physicalDeviceId = std::nullopt; @@ -64,6 +66,8 @@ class Renderer { void handleInput(); + void retrieveTimestamps(); + void run(); ~Renderer(); @@ -72,6 +76,7 @@ class Renderer { std::shared_ptr window; std::shared_ptr context; std::shared_ptr scene; + std::shared_ptr queryManager = std::make_shared(); std::shared_ptr preprocessPipeline; std::shared_ptr renderPipeline; diff --git a/CMakeLists.txt b/CMakeLists.txt index a0b3760..315934f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ set(CMAKE_CXX_STANDARD 20) find_package(Vulkan COMPONENTS glslangValidator) -if (WIN32) +if (WIN32 OR APPLE) FetchContent_Declare( glfw GIT_REPOSITORY https://github.com/glfw/glfw @@ -26,6 +26,8 @@ if (WIN32) add_subdirectory(${glfw_SOURCE_DIR} ${glfw_BINARY_DIR}) endif() + set(GLM_ENABLE_CXX_20 ON CACHE INTERNAL "Enable experimental features") + FetchContent_Declare( glm GIT_REPOSITORY https://github.com/g-truc/glm @@ -65,12 +67,15 @@ else () endif () if (CMAKE_BUILD_TYPE MATCHES Debug AND NOT APPLE) - set(GLSLC_DEFINE "-DDEBUG") + list(APPEND GLSLC_DEFINE "-DDEBUG") else () - set(GLSLC_DEFINE "-DNDEBUG") + list(APPEND GLSLC_DEFINE "-DNDEBUG") endif () -set(GLSLC_DEFINE "${GLSLC_DEFINE}") +if (APPLE) + # append -DAPPLE to GLSLC_DEFINE + list(APPEND GLSLC_DEFINE "-DAPPLE") +endif () foreach (GLSL ${GLSL_SOURCE_FILES}) get_filename_component(FILE_NAME ${GLSL} NAME) @@ -108,6 +113,8 @@ add_executable(vulkan_splatting main.cpp vulkan/pipelines/ComputePipeline.h vulkan/Swapchain.cpp vulkan/Swapchain.h + vulkan/QueryManager.cpp + vulkan/QueryManager.h ) add_dependencies(vulkan_splatting Shaders) diff --git a/shaders/sort/sort.comp b/shaders/sort/sort.comp index 10febac..02bab7d 100644 --- a/shaders/sort/sort.comp +++ b/shaders/sort/sort.comp @@ -37,7 +37,9 @@ #extension GL_KHR_shader_subgroup_arithmetic: enable #extension GL_KHR_shader_subgroup_ballot: enable #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable +#ifndef APPLE #extension GL_EXT_shader_atomic_int64 : enable +#endif #define WORKGROUP_SIZE 256// assert WORKGROUP_SIZE >= RADIX_SORT_BINS #define RADIX_SORT_BINS 256U @@ -85,7 +87,12 @@ shared uint[RADIX_SORT_BINS / SUBGROUP_SIZE] sums;// subgroup reductions shared uint[RADIX_SORT_BINS] global_offsets;// global exclusive scan (prefix sum) struct BinFlags { +#ifndef APPLE key_t flags[WORKGROUP_SIZE / BITS]; +#else + uint flags1[WORKGROUP_SIZE / BITS]; + uint flags2[WORKGROUP_SIZE / BITS]; +#endif }; shared BinFlags[RADIX_SORT_BINS] bin_flags; @@ -133,7 +140,12 @@ void main() { // initialize bin flags if (lID < RADIX_SORT_BINS) { for (int i = 0; i < WORKGROUP_SIZE / BITS; i++) { + #ifndef APPLE bin_flags[lID].flags[i] = 0U;// init all bin flags to 0 + #else + bin_flags[lID].flags1[i] = 0U;// init all bin flags to 0 + bin_flags[lID].flags2[i] = 0U;// init all bin flags to 0 + #endif } } barrier(); @@ -149,7 +161,12 @@ void main() { // offset for group binOffset = global_offsets[binID]; // add bit to flag + #ifndef APPLE atomicAdd(bin_flags[binID].flags[flags_bin], flags_bit); + #else + atomicAdd(bin_flags[binID].flags1[flags_bin], uint(flags_bit)); + atomicAdd(bin_flags[binID].flags2[flags_bin], uint(flags_bit >> 32)); + #endif } barrier(); @@ -158,11 +175,24 @@ void main() { uint prefix = 0; uint count = 0; for (uint i = 0; i < WORKGROUP_SIZE / BITS; i++) { - const key_t bits = bin_flags[binID].flags[i]; + #ifndef APPLE + const key_t bits = bin_flags[binID].flags[i]; + #else + const uint flag1 = bin_flags[binID].flags1[i]; + const uint flag2 = bin_flags[binID].flags2[i]; + #endif #if BITS == 64 - const uint full_count = bitCount(uint(bits)) + bitCount(uint(bits >> 32)); - const key_t partial_bits = bits & (flags_bit - 1); - const uint partial_count = bitCount(uint(partial_bits)) + bitCount(uint(partial_bits >> 32)); + #ifndef APPLE + const uint full_count = bitCount(uint(bits)) + bitCount(uint(bits >> 32)); + const key_t partial_bits = bits & (flags_bit - 1); + const uint partial_count = bitCount(uint(partial_bits)) + bitCount(uint(partial_bits >> 32)); + #else + const uint full_count = bitCount(flag1) + bitCount(flag2); + const uint64_t f = flags_bit - 1; + const uint partial_bits1 = flag1 & uint(f); + const uint partial_bits2 = flag2 & uint(f >> 32); + const uint partial_count = bitCount(partial_bits1) + bitCount(partial_bits2); + #endif #else const uint full_count = bitCount(bits); const uint partial_count = bitCount(bits & (flags_bit - 1)); diff --git a/vulkan/QueryManager.cpp b/vulkan/QueryManager.cpp new file mode 100644 index 0000000..193ad53 --- /dev/null +++ b/vulkan/QueryManager.cpp @@ -0,0 +1,56 @@ +#include "QueryManager.h" + +#include + +uint32_t QueryManager::registerQuery(const std::string& name) { + std::lock_guard lock(mutex); + if (!registry.contains(name)) { + registry[name] = nextId++; + } + return registry[name]; +} + +uint32_t QueryManager::getQueryId(const std::string& name) { + std::lock_guard lock(mutex); + if (registry.contains(name)) { + return registry.at(name); + } + return 0; +} + +void QueryManager::parseResults(const std::vector& results) { + // all names end with _start or _end + // calculate the time between the two + // push the results to the results map + // print every 1 seconds + std::lock_guard lock(mutex); + for (auto& [name, id] : registry) { + if (name.ends_with("_start")) { + auto endName = name.substr(0, name.size() - 5) + "end"; + if (registry.contains(endName)) { + auto start = results[id]; + auto end = results[registry[endName]]; + auto diff = end - start; + if (this->results.contains(name)) { + this->results[name].push_back(diff); + } else { + this->results[name] = {diff}; + } + } + } + } + auto now = std::chrono::high_resolution_clock::now(); + if (now - lastPrint > std::chrono::seconds(1)) { + lastPrint = now; + for (auto& [name, result] : this->results) { + auto truncated = name.substr(0, name.size() - 6); + std::cout << truncated << ": "; + // calculate average + uint64_t sum = 0; + for (auto& r : result) { + sum += r; + } + std::cout << sum / result.size() / 1000000.0 << "ms" << std::endl; + } + } +} diff --git a/vulkan/QueryManager.h b/vulkan/QueryManager.h new file mode 100644 index 0000000..98d8431 --- /dev/null +++ b/vulkan/QueryManager.h @@ -0,0 +1,26 @@ +#ifndef QUERYMANAGER_H +#define QUERYMANAGER_H +#include +#include +#include +#include +#include +#include + + +class QueryManager { +public: + uint32_t registerQuery(const std::string &name); + [[nodiscard]] uint32_t getQueryId(const std::string &name); + void parseResults(const std::vector& results); + int nextId = 0; +private: + std::mutex mutex; + std::unordered_map registry; + std::unordered_map> results; + std::chrono::time_point lastPrint; +}; + + + +#endif //QUERYMANAGER_H diff --git a/vulkan/VulkanContext.cpp b/vulkan/VulkanContext.cpp index 0e8b6eb..074260f 100644 --- a/vulkan/VulkanContext.cpp +++ b/vulkan/VulkanContext.cpp @@ -9,8 +9,8 @@ VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE VkBool32 debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, - const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData) { - const char *severity = "???"; + const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, void* pUserData) { + const char* severity = "???"; if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { severity = "VERBOSE"; } else if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { @@ -21,7 +21,7 @@ VkBool32 debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, severity = "ERROR"; } - const char *type = "???"; + const char* type = "???"; if (messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT) { type = "GENERAL"; } else if (messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) { @@ -35,10 +35,10 @@ VkBool32 debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, return VK_FALSE; } -VulkanContext::VulkanContext(const std::vector &instance_extensions, - const std::vector &device_extensions, bool validation_layers_enabled) - : instanceExtensions(instance_extensions), deviceExtensions(device_extensions), - validationLayersEnabled(validation_layers_enabled) { +VulkanContext::VulkanContext(const std::vector& instance_extensions, + const std::vector& device_extensions, bool validation_layers_enabled) + : instanceExtensions(instance_extensions), deviceExtensions(device_extensions), + validationLayersEnabled(validation_layers_enabled) { #ifdef __APPLE__ instanceExtensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); deviceExtensions.push_back("VK_KHR_portability_subset"); @@ -48,13 +48,13 @@ VulkanContext::VulkanContext(const std::vector &instance_extensions instanceExtensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } auto getInstanceProcAddr = dl.getProcAddress( - "vkGetInstanceProcAddr"); + "vkGetInstanceProcAddr"); VULKAN_HPP_DEFAULT_DISPATCHER.init(getInstanceProcAddr); } void VulkanContext::createInstance() { vk::ApplicationInfo appInfo = { - "Vulkan Splatting", VK_MAKE_VERSION(1, 0, 0), "No Engine", VK_MAKE_VERSION(1, 0, 0), VK_API_VERSION_1_3 + "Vulkan Splatting", VK_MAKE_VERSION(1, 0, 0), "No Engine", VK_MAKE_VERSION(1, 0, 0), VK_API_VERSION_1_3 }; std::vector requiredLayers; @@ -64,17 +64,19 @@ void VulkanContext::createInstance() { auto instanceExtensionsCharPtr = Utils::stringVectorToCharPtrVector(instanceExtensions); vk::StructureChain createInfoChain = { - { - {}, &appInfo, (uint32_t) requiredLayers.size(), requiredLayers.data(), (uint32_t) instanceExtensions.size(), - instanceExtensionsCharPtr.data() - }, - { - {}, vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning | vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose | vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo, - vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral | - vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | - vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance, - debugCallback - } + { + {}, &appInfo, (uint32_t) requiredLayers.size(), requiredLayers.data(), (uint32_t) instanceExtensions.size(), + instanceExtensionsCharPtr.data() + }, + { + {}, + vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning | vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | + vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose | vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo, + vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral | + vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | + vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance, + debugCallback + } }; #ifdef __APPLE__ @@ -94,9 +96,9 @@ bool VulkanContext::isDeviceSuitable(vk::PhysicalDevice device, std::optional id, std::optiona auto devices = instance->enumeratePhysicalDevices(); int ind = 0; - for (auto &device: devices) { + for (auto& device: devices) { std::cout << "[" << ind++ << "] " << device.getProperties().deviceName << std::endl; } @@ -142,14 +144,14 @@ void VulkanContext::selectPhysicalDevice(std::optional id, std::optiona } auto suitableDevices = std::vector{}; - for (auto &device: devices) { + for (auto& device: devices) { if (isDeviceSuitable(device, surface)) { suitableDevices.push_back(device); } } physicalDevice = suitableDevices[0]; - for (auto &device: suitableDevices) { + for (auto& device: suitableDevices) { auto properties = device.getProperties(); if (properties.deviceType == vk::PhysicalDeviceType::eDiscreteGpu) { physicalDevice = device; @@ -165,7 +167,7 @@ VulkanContext::QueueFamilyIndices VulkanContext::findQueueFamilies() { auto queueFamilies = physicalDevice.getQueueFamilyProperties(); for (uint32_t i = 0; i < queueFamilies.size(); i++) { - auto &queueFamily = queueFamilies[i]; + auto& queueFamily = queueFamilies[i]; if (queueFamily.queueFlags & vk::QueueFlagBits::eGraphics) { indices.graphicsFamily = i; } @@ -184,12 +186,21 @@ VulkanContext::QueueFamilyIndices VulkanContext::findQueueFamilies() { return indices; } -void VulkanContext::createLogicalDevice(vk::PhysicalDeviceFeatures deviceFeatures, vk::PhysicalDeviceVulkan11Features deviceFeatures11, vk::PhysicalDeviceVulkan12Features deviceFeatures12) { +void VulkanContext::createQueryPool() { + vk::QueryPoolCreateInfo queryPoolCreateInfo = {}; + queryPoolCreateInfo.queryType = vk::QueryType::eTimestamp; + queryPoolCreateInfo.queryCount = 20; + queryPool = device->createQueryPoolUnique(queryPoolCreateInfo); +} + +void VulkanContext::createLogicalDevice(vk::PhysicalDeviceFeatures deviceFeatures, + vk::PhysicalDeviceVulkan11Features deviceFeatures11, + vk::PhysicalDeviceVulkan12Features deviceFeatures12) { QueueFamilyIndices indices = findQueueFamilies(); std::vector queueCreateInfos; - std::set < uint32_t > uniqueQueueFamilies = { - indices.graphicsFamily.value(), indices.computeFamily.value(), - indices.presentFamily.value() + std::set uniqueQueueFamilies = { + indices.graphicsFamily.value(), indices.computeFamily.value(), + indices.presentFamily.value() }; float queuePriority = 1.0f; @@ -202,17 +213,21 @@ void VulkanContext::createLogicalDevice(vk::PhysicalDeviceFeatures deviceFeature auto deviceExtensionsCharPtr = Utils::stringVectorToCharPtrVector(deviceExtensions); vk::DeviceCreateInfo createInfo = { - {}, (uint32_t) queueCreateInfos.size(), queueCreateInfos.data(), 0, nullptr, - (uint32_t) deviceExtensionsCharPtr.size(), deviceExtensionsCharPtr.data(), &deviceFeatures + {}, (uint32_t) queueCreateInfos.size(), queueCreateInfos.data(), 0, nullptr, + (uint32_t) deviceExtensionsCharPtr.size(), deviceExtensionsCharPtr.data(), &deviceFeatures }; createInfo.pNext = &deviceFeatures11; deviceFeatures11.pNext = &deviceFeatures12; + vk::PhysicalDeviceHostQueryResetFeatures hostQueryResetFeatures = {}; + hostQueryResetFeatures.hostQueryReset = VK_TRUE; + deviceFeatures12.pNext = &hostQueryResetFeatures; + device = physicalDevice.createDeviceUnique(createInfo); for (auto unique_queue_family: uniqueQueueFamilies) { auto queue = device->getQueue(unique_queue_family, 0); - std::set < Queue::Type > types; + std::set types; if (unique_queue_family == indices.graphicsFamily.value()) { types.insert(Queue::Type::GRAPHICS); } @@ -223,7 +238,7 @@ void VulkanContext::createLogicalDevice(vk::PhysicalDeviceFeatures deviceFeature types.insert(Queue::Type::PRESENT); } - for (auto type : types) { + for (auto type: types) { queues[type] = Queue{types, unique_queue_family, 0, queue}; } } @@ -231,6 +246,7 @@ void VulkanContext::createLogicalDevice(vk::PhysicalDeviceFeatures deviceFeature // Create VMA setupVma(); createCommandPool(); + createQueryPool(); } vk::UniqueCommandBuffer VulkanContext::beginOneTimeCommandBuffer() { @@ -246,7 +262,7 @@ vk::UniqueCommandBuffer VulkanContext::beginOneTimeCommandBuffer() { return commandBuffer; } -void VulkanContext::endOneTimeCommandBuffer(vk::UniqueCommandBuffer &&commandBuffer, Queue::Type queue) { +void VulkanContext::endOneTimeCommandBuffer(vk::UniqueCommandBuffer&& commandBuffer, Queue::Type queue) { commandBuffer->end(); vk::SubmitInfo submitInfo = {}; submitInfo.commandBufferCount = 1; @@ -274,16 +290,18 @@ void VulkanContext::createCommandPool() { } void VulkanContext::createDescriptorPool(uint8_t framesInFlight) { + // get max number of descriptor sets from physical device std::vector poolSizes = { - {vk::DescriptorType::eUniformBuffer, static_cast(framesInFlight * 10)}, - {vk::DescriptorType::eStorageBuffer, static_cast(framesInFlight * 10)} + {vk::DescriptorType::eUniformBuffer, static_cast(framesInFlight * 10)}, + {vk::DescriptorType::eStorageBuffer, static_cast(framesInFlight * 50)}, + {vk::DescriptorType::eStorageImage, static_cast(framesInFlight * 10)} }; - vk::DescriptorPoolCreateInfo poolInfo = {}; - poolInfo.poolSizeCount = (uint32_t) poolSizes.size(); - poolInfo.pPoolSizes = poolSizes.data(); - poolInfo.maxSets = physicalDevice.getProperties().limits.maxBoundDescriptorSets; - poolInfo.flags = vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet; + vk::DescriptorPoolCreateInfo poolInfo{ + vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, + 100, static_cast(poolSizes.size()), + poolSizes.data() + }; descriptorPool = device->createDescriptorPoolUnique(poolInfo); } diff --git a/vulkan/VulkanContext.h b/vulkan/VulkanContext.h index eaf9a10..c581821 100644 --- a/vulkan/VulkanContext.h +++ b/vulkan/VulkanContext.h @@ -74,6 +74,8 @@ class VulkanContext { VulkanContext::QueueFamilyIndices findQueueFamilies(); + void createQueryPool(); + void createLogicalDevice(vk::PhysicalDeviceFeatures deviceFeatures, vk::PhysicalDeviceVulkan11Features deviceFeatures11, vk::PhysicalDeviceVulkan12Features deviceFeatures12); void createDescriptorPool(uint8_t framesInFlight); @@ -92,6 +94,8 @@ class VulkanContext { VmaAllocator allocator; vk::UniqueDescriptorPool descriptorPool; + vk::UniqueQueryPool queryPool; + private: vk::DynamicLoader dl; std::vector instanceExtensions;