refactor for adding another type of test

2025-11-08 23:23:31 +02:00
parent 776c09908e
commit eb299020ae
1 changed files with 160 additions and 95 deletions
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -7,17 +7,30 @@
 #include <vector>
 #include <vulkan/vulkan_core.h>

-/// TODO: Run benchmark on all GPU's
 /// TODO: Pinned memory
 /// TODO: Plot by buffer size

-static constexpr uint64_t BUF_SIZE = 256ULL << 20; // 256 MiB
-static constexpr uint32_t ITERATIONS = 32;
+enum class TransferType {
+    Memory,
+    Pinned,
+};
+
+struct BenchmarkConfig {
+    uint32_t buffer_size = 256ULL << 20;
+    uint32_t iterations = 32;
+    TransferType type = TransferType::Memory;
+};
+
+struct BenchmarkResult {
+    double host_to_device_time = 0.0f;
+    double device_to_host_time = 0.0f;
+};

 // ---------- helpers ----------
 VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
    VkDebugUtilsMessageSeverityFlagBitsEXT, VkDebugUtilsMessageTypeFlagsEXT,
    const VkDebugUtilsMessengerCallbackDataEXT *, void *) {
+
    return VK_FALSE;
 }

@@ -111,7 +124,121 @@ double benchCopy(VkDevice dev, VkCommandPool pool, VkQueue queue, VkBuffer src,
    return std::chrono::duration<double, std::milli>(t1 - t0).count();
 }

-int main() {
+struct Gpu {
+    VkPhysicalDevice physicalDevice = nullptr;
+    VkPhysicalDeviceProperties properties = {};
+    VkDevice device = nullptr;
+    VkQueue queue = nullptr;
+    VkCommandPool pool = nullptr;
+    // for benchmarks
+    VkBuffer stagingBuf = nullptr;
+    VkBuffer deviceBuf = nullptr;
+    VkDeviceMemory stagingMem = nullptr;
+    VkDeviceMemory deviceMem = nullptr;
+};
+
+Gpu initGpu(VkPhysicalDevice phy, uint32_t buffer_size) {
+    Gpu gpu = {};
+    gpu.physicalDevice = phy;
+
+    // properties with lots of nice info
+    vkGetPhysicalDeviceProperties(phy, &gpu.properties);
+
+    // logical device
+    float prio = 1.0f;
+    VkDeviceQueueCreateInfo qi{};
+    qi.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+    qi.queueFamilyIndex = 0; // assume family 0 supports transfer
+    qi.queueCount = 1;
+    qi.pQueuePriorities = &prio;
+
+    VkDeviceCreateInfo dci{};
+    dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
+    dci.queueCreateInfoCount = 1;
+    dci.pQueueCreateInfos = &qi;
+
+    vkCreateDevice(phy, &dci, nullptr, &gpu.device);
+    vkGetDeviceQueue(gpu.device, 0, 0, &gpu.queue);
+
+    // command pool
+    VkCommandPoolCreateInfo pci{};
+    pci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+    pci.queueFamilyIndex = 0;
+    pci.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
+
+    vkCreateCommandPool(gpu.device, &pci, nullptr, &gpu.pool);
+
+    // buffers
+    gpu.stagingBuf = createBuffer(gpu.device, buffer_size,
+                                  VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+                                      VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+    gpu.stagingMem = allocateMem(gpu.device, phy, gpu.stagingBuf,
+                                 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                     VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
+
+    gpu.deviceBuf = createBuffer(gpu.device, buffer_size,
+                                 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+                                     VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+    gpu.deviceMem = allocateMem(gpu.device, phy, gpu.deviceBuf,
+                                VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+
+    return gpu;
+}
+
+void cleanupGpu(Gpu &gpu) {
+
+    vkDestroyBuffer(gpu.device, gpu.stagingBuf, nullptr);
+    vkDestroyBuffer(gpu.device, gpu.deviceBuf, nullptr);
+    vkFreeMemory(gpu.device, gpu.stagingMem, nullptr);
+    vkFreeMemory(gpu.device, gpu.deviceMem, nullptr);
+    vkDestroyCommandPool(gpu.device, gpu.pool, nullptr);
+    vkDestroyDevice(gpu.device, nullptr);
+}
+
+void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu,
+                     BenchmarkConfig config) {
+
+    const double gib = static_cast<double>(config.buffer_size) / (1 << 30);
+    std::cout << "Device: " << gpu.properties.deviceName << std::endl;
+    std::cout << "Buffer size : " << config.buffer_size / (1 << 20) << " MiB\n";
+    std::cout << "Iterations  : " << config.iterations << "\n";
+    std::cout << "H→D average : " << (gib / (result.host_to_device_time * 1e-3))
+              << " GiB/s\n";
+    std::cout << "D→H average : " << (gib / (result.device_to_host_time * 1e-3))
+              << " GiB/s\n";
+    std::cout << std::endl;
+}
+
+BenchmarkResult runBenchmark(Gpu &gpu, BenchmarkConfig config) {
+
+    // ---- fill staging buffer ----
+    void *mapped;
+    vkMapMemory(gpu.device, gpu.stagingMem, 0, config.buffer_size, 0, &mapped);
+    std::memset(mapped, 0xAB, config.buffer_size);
+    vkUnmapMemory(gpu.device, gpu.stagingMem);
+
+    // ---- warm-up ----
+    benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf, gpu.deviceBuf,
+              config.buffer_size);
+
+    // ---- benchmark host->device ----
+    double tH2D = 0.0;
+    for (uint32_t i = 0; i < config.iterations; ++i)
+        tH2D += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf,
+                          gpu.deviceBuf, config.buffer_size);
+    tH2D /= config.iterations;
+
+    // ---- benchmark device->host ----
+    double tD2H = 0.0;
+    for (uint32_t i = 0; i < config.iterations; ++i)
+        tD2H += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.deviceBuf,
+                          gpu.stagingBuf, config.buffer_size);
+    tD2H /= config.iterations;
+
+    return {tH2D, tD2H};
+}
+
+auto main() -> int {

    VkApplicationInfo app{};
    app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
@@ -125,105 +252,43 @@ int main() {
    VkInstance inst;
    vkCreateInstance(&ici, nullptr, &inst);

-    std::cout << "Starting benchmark..." << std::endl;
+    std::cout << "Starting benchmark." << std::endl;

-    // ---- physical device ----
+    // physical devices
    uint32_t n = 0;
    vkEnumeratePhysicalDevices(inst, &n, nullptr);
-    std::vector<VkPhysicalDevice> gpus(n);
-    vkEnumeratePhysicalDevices(inst, &n, gpus.data());
-    VkPhysicalDevice phy = gpus[0];

-    std::cout << "Found " << gpus.size() << " gpus." << std::endl;
+    std::cout << "Found " << n << " gpus." << std::endl;
+    std::vector<VkPhysicalDevice> vulkanPhysicalDevices(n);

-    {
-        std::vector<VkPhysicalDeviceProperties> gpu_properties(n);
-        // Print info
-        for (uint32_t i = 0; i < gpus.size(); i++) {
-            VkPhysicalDeviceProperties *prop = &gpu_properties[i];
-            vkGetPhysicalDeviceProperties(gpus[i], prop);
+    vkEnumeratePhysicalDevices(inst, &n, vulkanPhysicalDevices.data());

-            std::cout << "GPU: [" << i << "] " << prop->deviceName << " ("
-                      << getGpuTypeName(prop->deviceType) << ")" << std::endl;
-        }
+    // for each physical device, add initialize and add details
+    std::vector<Gpu> gpus;
+
+    BenchmarkConfig config = {};
+
+    uint32_t counter = 0;
+    for (const auto phy : vulkanPhysicalDevices) {
+        gpus.emplace_back(initGpu(phy, config.buffer_size));
+
+        const Gpu &gpu = gpus.back();
+
+        std::cout << "Device: [" << counter << "] " << gpu.properties.deviceName
+                  << " (" << getGpuTypeName(gpu.properties.deviceType) << ")"
+                  << std::endl;
+        counter++;
+    }
+    std::cout << "-------------------" << std::endl;
+
+    for (auto &gpu : gpus) {
+        BenchmarkResult res = runBenchmark(gpu, config);
+        reportBenchmark(res, gpu, config);
    }

-    // ---- logical device ----
-    float prio = 1.0f;
-    VkDeviceQueueCreateInfo qi{};
-    qi.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
-    qi.queueFamilyIndex = 0; // assume family 0 supports transfer
-    qi.queueCount = 1;
-    qi.pQueuePriorities = &prio;
-
-    VkDeviceCreateInfo dci{};
-    dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
-    dci.queueCreateInfoCount = 1;
-    dci.pQueueCreateInfos = &qi;
-
-    VkDevice dev;
-    vkCreateDevice(phy, &dci, nullptr, &dev);
-
-    VkQueue queue;
-    vkGetDeviceQueue(dev, 0, 0, &queue);
-
-    // ---- command pool ----
-    VkCommandPoolCreateInfo pci{};
-    pci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
-    pci.queueFamilyIndex = 0;
-    pci.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
-    VkCommandPool pool;
-    vkCreateCommandPool(dev, &pci, nullptr, &pool);
-
-    // ---- buffers ----
-    VkBuffer staging = createBuffer(dev, BUF_SIZE,
-                                    VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
-                                        VK_BUFFER_USAGE_TRANSFER_DST_BIT);
-    VkDeviceMemory stagingMem =
-        allocateMem(dev, phy, staging,
-                    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
-
-    VkBuffer deviceBuf = createBuffer(dev, BUF_SIZE,
-                                      VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
-                                          VK_BUFFER_USAGE_TRANSFER_DST_BIT);
-    VkDeviceMemory deviceMem =
-        allocateMem(dev, phy, deviceBuf, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
-
-    // ---- fill staging buffer ----
-    void *mapped;
-    vkMapMemory(dev, stagingMem, 0, BUF_SIZE, 0, &mapped);
-    std::memset(mapped, 0xAB, BUF_SIZE);
-    vkUnmapMemory(dev, stagingMem);
-
-    // ---- warm-up ----
-    benchCopy(dev, pool, queue, staging, deviceBuf, BUF_SIZE);
-
-    // ---- benchmark host->device ----
-    double tH2D = 0.0;
-    for (uint32_t i = 0; i < ITERATIONS; ++i)
-        tH2D += benchCopy(dev, pool, queue, staging, deviceBuf, BUF_SIZE);
-    tH2D /= ITERATIONS;
-
-    // ---- benchmark device->host ----
-    double tD2H = 0.0;
-    for (uint32_t i = 0; i < ITERATIONS; ++i)
-        tD2H += benchCopy(dev, pool, queue, deviceBuf, staging, BUF_SIZE);
-    tD2H /= ITERATIONS;
-
-    const double gib = static_cast<double>(BUF_SIZE) / (1 << 30);
-    std::cout << "Buffer size : " << BUF_SIZE / (1 << 20) << " MiB\n";
-    std::cout << "Iterations  : " << ITERATIONS << "\n";
-    std::cout << "H→D average : " << (gib / (tH2D * 1e-3)) << " GiB/s\n";
-    std::cout << "D→H average : " << (gib / (tD2H * 1e-3)) << " GiB/s\n";
-
-    // ---- cleanup ----
-    vkDestroyBuffer(dev, staging, nullptr);
-    vkDestroyBuffer(dev, deviceBuf, nullptr);
-    vkFreeMemory(dev, stagingMem, nullptr);
-    vkFreeMemory(dev, deviceMem, nullptr);
-    vkDestroyCommandPool(dev, pool, nullptr);
-    vkDestroyDevice(dev, nullptr);
+    for (auto gpu : gpus) {
+        cleanupGpu(gpu);
+    }
    vkDestroyInstance(inst, nullptr);
    return 0;
 }