From 167d4068cb621c4c0f5607b7a15a5169b83046a6 Mon Sep 17 00:00:00 2001 From: Kasper Sauramo Date: Wed, 19 Nov 2025 11:38:24 +0200 Subject: [PATCH] make it possible to select gpu Also skip non-gpu's by default --- src/main.cpp | 105 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 78 insertions(+), 27 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index c238942..36e93fa 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include @@ -7,18 +9,13 @@ #include #include -/// TODO: Pinned memory /// TODO: Plot by buffer size -enum class TransferType { - Memory, - Pinned, -}; - struct BenchmarkConfig { - uint32_t buffer_size = 128ULL << 20; + uint32_t bufferSize = 128ULL << 20; uint32_t iterations = 32; - TransferType type = TransferType::Memory; + int32_t gpuIndex = -1; // index < 0 is unselected + bool gpusOnly = true; }; struct BenchmarkResult { @@ -47,6 +44,19 @@ VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback( return VK_FALSE; } +void usage(const char *name) { + std::cout << "Usage: " << name << " [options]\n" + << "Options:\n" + << " -g, --gpu INDEX Benchmark the INDEX gpu" << std::endl + << " -h, --help Show this help" << std::endl; +} + +bool isGpuType(VkPhysicalDeviceType type) { + return type == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU || + type == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU || + type == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU; +} + const char *getGpuTypeName(VkPhysicalDeviceType type) { switch (type) { case VK_PHYSICAL_DEVICE_TYPE_OTHER: @@ -198,9 +208,9 @@ void cleanupGpu(Gpu &gpu) { void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu, BenchmarkConfig config) { - const double gib = static_cast(config.buffer_size) / (1 << 30); + const double gib = static_cast(config.bufferSize) / (1 << 30); std::cout << "Device: " << gpu.properties.deviceName << std::endl; - std::cout << "Buffer size : " << config.buffer_size / (1 << 20) << " MiB\n"; + std::cout << "Buffer size : " << config.bufferSize / (1 << 20) << " MiB\n"; std::cout << "Iterations : " << config.iterations << "\n"; std::cout << "H->D average : " << (gib / (result.host_to_device_time * 1e-3)) << " GiB/s\n"; @@ -210,39 +220,66 @@ void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu, << (gib / (result.device_to_host_time * 1e-3)) << " GiB/s\n"; std::cout << "D->H avg transfer time: " << result.device_to_host_time << "ms" << std::endl; - std::cout << std::endl; } BenchmarkResult runStagedBenchmark(Gpu &gpu, BenchmarkConfig config) { // fill staging buffer void *mapped; - vkMapMemory(gpu.device, gpu.stagingMem, 0, config.buffer_size, 0, &mapped); - std::memset(mapped, 0xAB, config.buffer_size); + vkMapMemory(gpu.device, gpu.stagingMem, 0, config.bufferSize, 0, &mapped); + std::memset(mapped, 0xAB, config.bufferSize); vkUnmapMemory(gpu.device, gpu.stagingMem); // warm-up, probably not significant benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf, gpu.deviceBuf, - config.buffer_size); + config.bufferSize); // ---- benchmark host->device ---- double tH2D = 0.0; for (uint32_t i = 0; i < config.iterations; ++i) tH2D += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf, - gpu.deviceBuf, config.buffer_size); + gpu.deviceBuf, config.bufferSize); tH2D /= config.iterations; // ---- benchmark device->host ---- double tD2H = 0.0; for (uint32_t i = 0; i < config.iterations; ++i) tD2H += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.deviceBuf, - gpu.stagingBuf, config.buffer_size); + gpu.stagingBuf, config.bufferSize); tD2H /= config.iterations; return {tH2D, tD2H}; } -auto main() -> int { +void runBenchmarkOnGPU(Gpu &gpu, BenchmarkConfig config) { + std::cout << std::endl << "Running staged benchmark" << std::endl; + BenchmarkResult res = runStagedBenchmark(gpu, config); + reportBenchmark(res, gpu, config); + std::cout << "--------------------" << std::endl; +} + +auto main(int argc, char *argv[]) -> int { + BenchmarkConfig config = {}; + + for (int i = 1; i < argc; i++) { + std::string arg = argv[i]; + + if (arg == "-g" || arg == "--gpu") { + const char *index(argv[++i]); + try { + config.gpuIndex = std::stoi(index); + } catch (std::invalid_argument &e) { + usage(argv[0]); + return EXIT_FAILURE; + } + } else if (arg == "-h" || arg == "--help") { + usage(argv[0]); + return EXIT_SUCCESS; + } else { + usage(argv[0]); + return EXIT_FAILURE; + } + } VkApplicationInfo app{}; app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; @@ -262,7 +299,7 @@ auto main() -> int { uint32_t n = 0; vkEnumeratePhysicalDevices(inst, &n, nullptr); - std::cout << "Found " << n << " gpus." << std::endl; + std::cout << "Found " << n << " devices." << std::endl; std::vector vulkanPhysicalDevices(n); vkEnumeratePhysicalDevices(inst, &n, vulkanPhysicalDevices.data()); @@ -270,11 +307,9 @@ auto main() -> int { // for each physical device, add initialize and add details std::vector gpus; - BenchmarkConfig config = {}; - uint32_t counter = 0; for (const auto phy : vulkanPhysicalDevices) { - gpus.emplace_back(initGpu(phy, config.buffer_size)); + gpus.emplace_back(initGpu(phy, config.bufferSize)); const Gpu &gpu = gpus.back(); @@ -285,16 +320,32 @@ auto main() -> int { } std::cout << "-------------------" << std::endl; - for (auto &gpu : gpus) { - std::cout << "Running staged benchmark" << std::endl; - BenchmarkResult res = runStagedBenchmark(gpu, config); - reportBenchmark(res, gpu, config); - std::cout << "--------------------" << std::endl; + if (config.gpuIndex >= 0) { + // A single gpu selected + if (config.gpuIndex >= static_cast(gpus.size())) { + std::cout << "Gpu index out of bounds." << std::endl; + } else { + runBenchmarkOnGPU(gpus[config.gpuIndex], config); + } + } else { + // Run on all devices + for (auto &gpu : gpus) { + + if (config.gpusOnly) { + if (!isGpuType(gpu.properties.deviceType)) { + std::cout << "Skipping non-gpu type: " + << gpu.properties.deviceName << std::endl; + std::cout << "--------------------" << std::endl; + continue; + } + } + runBenchmarkOnGPU(gpu, config); + } } for (auto gpu : gpus) { cleanupGpu(gpu); } vkDestroyInstance(inst, nullptr); - return 0; + return EXIT_SUCCESS; }