From 167d4068cb621c4c0f5607b7a15a5169b83046a6 Mon Sep 17 00:00:00 2001
From: Kasper Sauramo <kasper.sauramo@analogway.com>
Date: Wed, 19 Nov 2025 11:38:24 +0200
Subject: [PATCH] make it possible to select gpu

Also skip non-gpu's by default
---
 src/main.cpp | 105 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 78 insertions(+), 27 deletions(-)
diff --git a/src/main.cpp b/src/main.cpp
index c238942..36e93fa 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,3 +1,5 @@
+#include <cstdlib>
+#include <stdexcept>
 #include <vulkan/vulkan.h>
 #include <vulkan/vulkan_core.h>
 
@@ -7,18 +9,13 @@
 #include <limits>
 #include <vector>
 
-/// TODO: Pinned memory
 /// TODO: Plot by buffer size
 
-enum class TransferType {
-    Memory,
-    Pinned,
-};
-
 struct BenchmarkConfig {
-    uint32_t buffer_size = 128ULL << 20;
+    uint32_t bufferSize = 128ULL << 20;
     uint32_t iterations = 32;
-    TransferType type = TransferType::Memory;
+    int32_t gpuIndex = -1; // index < 0 is unselected
+    bool gpusOnly = true;
 };
 
 struct BenchmarkResult {
@@ -47,6 +44,19 @@ VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
     return VK_FALSE;
 }
 
+void usage(const char *name) {
+    std::cout << "Usage: " << name << " [options]\n"
+              << "Options:\n"
+              << "  -g, --gpu INDEX  Benchmark the INDEX gpu" << std::endl
+              << "  -h, --help       Show this help" << std::endl;
+}
+
+bool isGpuType(VkPhysicalDeviceType type) {
+    return type == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU ||
+           type == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU ||
+           type == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU;
+}
+
 const char *getGpuTypeName(VkPhysicalDeviceType type) {
     switch (type) {
     case VK_PHYSICAL_DEVICE_TYPE_OTHER:
@@ -198,9 +208,9 @@ void cleanupGpu(Gpu &gpu) {
 void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu,
                      BenchmarkConfig config) {
 
-    const double gib = static_cast<double>(config.buffer_size) / (1 << 30);
+    const double gib = static_cast<double>(config.bufferSize) / (1 << 30);
     std::cout << "Device: " << gpu.properties.deviceName << std::endl;
-    std::cout << "Buffer size : " << config.buffer_size / (1 << 20) << " MiB\n";
+    std::cout << "Buffer size : " << config.bufferSize / (1 << 20) << " MiB\n";
     std::cout << "Iterations  : " << config.iterations << "\n";
     std::cout << "H->D average : "
               << (gib / (result.host_to_device_time * 1e-3)) << " GiB/s\n";
@@ -210,39 +220,66 @@ void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu,
               << (gib / (result.device_to_host_time * 1e-3)) << " GiB/s\n";
     std::cout << "D->H avg transfer time: " << result.device_to_host_time
               << "ms" << std::endl;
-    std::cout << std::endl;
 }
 
 BenchmarkResult runStagedBenchmark(Gpu &gpu, BenchmarkConfig config) {
 
     // fill staging buffer
     void *mapped;
-    vkMapMemory(gpu.device, gpu.stagingMem, 0, config.buffer_size, 0, &mapped);
-    std::memset(mapped, 0xAB, config.buffer_size);
+    vkMapMemory(gpu.device, gpu.stagingMem, 0, config.bufferSize, 0, &mapped);
+    std::memset(mapped, 0xAB, config.bufferSize);
     vkUnmapMemory(gpu.device, gpu.stagingMem);
 
     // warm-up, probably not significant
     benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf, gpu.deviceBuf,
-              config.buffer_size);
+              config.bufferSize);
 
     // ---- benchmark host->device ----
     double tH2D = 0.0;
     for (uint32_t i = 0; i < config.iterations; ++i)
         tH2D += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf,
-                          gpu.deviceBuf, config.buffer_size);
+                          gpu.deviceBuf, config.bufferSize);
     tH2D /= config.iterations;
 
     // ---- benchmark device->host ----
     double tD2H = 0.0;
     for (uint32_t i = 0; i < config.iterations; ++i)
         tD2H += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.deviceBuf,
-                          gpu.stagingBuf, config.buffer_size);
+                          gpu.stagingBuf, config.bufferSize);
     tD2H /= config.iterations;
 
     return {tH2D, tD2H};
 }
 
-auto main() -> int {
+void runBenchmarkOnGPU(Gpu &gpu, BenchmarkConfig config) {
+    std::cout << std::endl << "Running staged benchmark" << std::endl;
+    BenchmarkResult res = runStagedBenchmark(gpu, config);
+    reportBenchmark(res, gpu, config);
+    std::cout << "--------------------" << std::endl;
+}
+
+auto main(int argc, char *argv[]) -> int {
+    BenchmarkConfig config = {};
+
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+
+        if (arg == "-g" || arg == "--gpu") {
+            const char *index(argv[++i]);
+            try {
+                config.gpuIndex = std::stoi(index);
+            } catch (std::invalid_argument &e) {
+                usage(argv[0]);
+                return EXIT_FAILURE;
+            }
+        } else if (arg == "-h" || arg == "--help") {
+            usage(argv[0]);
+            return EXIT_SUCCESS;
+        } else {
+            usage(argv[0]);
+            return EXIT_FAILURE;
+        }
+    }
 
     VkApplicationInfo app{};
     app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
@@ -262,7 +299,7 @@ auto main() -> int {
     uint32_t n = 0;
     vkEnumeratePhysicalDevices(inst, &n, nullptr);
 
-    std::cout << "Found " << n << " gpus." << std::endl;
+    std::cout << "Found " << n << " devices." << std::endl;
     std::vector<VkPhysicalDevice> vulkanPhysicalDevices(n);
 
     vkEnumeratePhysicalDevices(inst, &n, vulkanPhysicalDevices.data());
@@ -270,11 +307,9 @@ auto main() -> int {
     // for each physical device, add initialize and add details
     std::vector<Gpu> gpus;
 
-    BenchmarkConfig config = {};
-
     uint32_t counter = 0;
     for (const auto phy : vulkanPhysicalDevices) {
-        gpus.emplace_back(initGpu(phy, config.buffer_size));
+        gpus.emplace_back(initGpu(phy, config.bufferSize));
 
         const Gpu &gpu = gpus.back();
 
@@ -285,16 +320,32 @@ auto main() -> int {
     }
     std::cout << "-------------------" << std::endl;
 
-    for (auto &gpu : gpus) {
-        std::cout << "Running staged benchmark" << std::endl;
-        BenchmarkResult res = runStagedBenchmark(gpu, config);
-        reportBenchmark(res, gpu, config);
-        std::cout << "--------------------" << std::endl;
+    if (config.gpuIndex >= 0) {
+        // A single gpu selected
+        if (config.gpuIndex >= static_cast<int32_t>(gpus.size())) {
+            std::cout << "Gpu index out of bounds." << std::endl;
+        } else {
+            runBenchmarkOnGPU(gpus[config.gpuIndex], config);
+        }
+    } else {
+        // Run on all devices
+        for (auto &gpu : gpus) {
+
+            if (config.gpusOnly) {
+                if (!isGpuType(gpu.properties.deviceType)) {
+                    std::cout << "Skipping non-gpu type: "
+                              << gpu.properties.deviceName << std::endl;
+                    std::cout << "--------------------" << std::endl;
+                    continue;
+                }
+            }
+            runBenchmarkOnGPU(gpu, config);
+        }
     }
 
     for (auto gpu : gpus) {
         cleanupGpu(gpu);
     }
     vkDestroyInstance(inst, nullptr);
-    return 0;
+    return EXIT_SUCCESS;
 }