From e587c5bceb4b42ce82606f9c2b540e62201ade17 Mon Sep 17 00:00:00 2001 From: Kasper Sauramo Date: Sun, 9 Nov 2025 14:42:54 +0200 Subject: [PATCH] benchmark pinned memory (wip) --- src/main.cpp | 54 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 0ba8acf..51abccc 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -35,8 +35,10 @@ struct Gpu { // for benchmarks VkDeviceMemory stagingMem = nullptr; VkDeviceMemory deviceMem = nullptr; + VkDeviceMemory pinnedMem = nullptr; VkBuffer stagingBuf = nullptr; VkBuffer deviceBuf = nullptr; + VkBuffer pinnedBuf = nullptr; }; // ---------- helpers ---------- @@ -182,6 +184,14 @@ Gpu initGpu(VkPhysicalDevice phy, uint32_t buffer_size) { gpu.deviceMem = allocateMem(gpu.device, phy, gpu.deviceBuf, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + gpu.pinnedBuf = createBuffer(gpu.device, buffer_size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT); + gpu.pinnedMem = allocateMem(gpu.device, phy, gpu.pinnedBuf, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + return gpu; } @@ -189,8 +199,10 @@ void cleanupGpu(Gpu &gpu) { vkDestroyBuffer(gpu.device, gpu.stagingBuf, nullptr); vkDestroyBuffer(gpu.device, gpu.deviceBuf, nullptr); + vkDestroyBuffer(gpu.device, gpu.pinnedBuf, nullptr); vkFreeMemory(gpu.device, gpu.stagingMem, nullptr); vkFreeMemory(gpu.device, gpu.deviceMem, nullptr); + vkFreeMemory(gpu.device, gpu.pinnedMem, nullptr); vkDestroyCommandPool(gpu.device, gpu.pool, nullptr); vkDestroyDevice(gpu.device, nullptr); } @@ -213,9 +225,9 @@ void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu, std::cout << std::endl; } -BenchmarkResult runBenchmark(Gpu &gpu, BenchmarkConfig config) { +BenchmarkResult runStagedBenchmark(Gpu &gpu, BenchmarkConfig config) { - // ---- fill staging buffer ---- + // fill staging buffer void *mapped; vkMapMemory(gpu.device, gpu.stagingMem, 0, config.buffer_size, 0, &mapped); std::memset(mapped, 0xAB, config.buffer_size); @@ -242,6 +254,35 @@ BenchmarkResult runBenchmark(Gpu &gpu, BenchmarkConfig config) { return {tH2D, tD2H}; } +BenchmarkResult runPinnedBenchmark(Gpu &gpu, BenchmarkConfig config) { + + // fill staging buffer + void *mapped; + vkMapMemory(gpu.device, gpu.pinnedMem, 0, config.buffer_size, 0, &mapped); + std::memset(mapped, 0xAB, config.buffer_size); + vkUnmapMemory(gpu.device, gpu.pinnedMem); + + // warm-up, probably not significant + benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf, gpu.deviceBuf, + config.buffer_size); + + // ---- benchmark host->device ---- + double tH2D = 0.0; + for (uint32_t i = 0; i < config.iterations; ++i) + tH2D += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.pinnedBuf, + gpu.pinnedBuf, config.buffer_size); + tH2D /= config.iterations; + + // ---- benchmark device->host ---- + double tD2H = 0.0; + for (uint32_t i = 0; i < config.iterations; ++i) + tD2H += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.pinnedBuf, + gpu.pinnedBuf, config.buffer_size); + tD2H /= config.iterations; + + return {tH2D, tD2H}; +} + auto main() -> int { VkApplicationInfo app{}; @@ -286,8 +327,15 @@ auto main() -> int { std::cout << "-------------------" << std::endl; for (auto &gpu : gpus) { - BenchmarkResult res = runBenchmark(gpu, config); + std::cout << "Running staged benchmark" << std::endl; + BenchmarkResult res = runStagedBenchmark(gpu, config); reportBenchmark(res, gpu, config); + + std::cout << "Running pinned benchmark" << std::endl; + res = runPinnedBenchmark(gpu, config); + reportBenchmark(res, gpu, config); + + std::cout << "--------------------" << std::endl; } for (auto gpu : gpus) {