diff --git a/src/main.cpp b/src/main.cpp index 0ab2c1f..0ba8acf 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -16,7 +16,7 @@ enum class TransferType { }; struct BenchmarkConfig { - uint32_t buffer_size = 256ULL << 20; + uint32_t buffer_size = 128ULL << 20; uint32_t iterations = 32; TransferType type = TransferType::Memory; }; @@ -26,6 +26,19 @@ struct BenchmarkResult { double device_to_host_time = 0.0f; }; +struct Gpu { + VkPhysicalDevice physicalDevice = nullptr; + VkPhysicalDeviceProperties properties = {}; + VkDevice device = nullptr; + VkQueue queue = nullptr; + VkCommandPool pool = nullptr; + // for benchmarks + VkDeviceMemory stagingMem = nullptr; + VkDeviceMemory deviceMem = nullptr; + VkBuffer stagingBuf = nullptr; + VkBuffer deviceBuf = nullptr; +}; + // ---------- helpers ---------- VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback( VkDebugUtilsMessageSeverityFlagBitsEXT, VkDebugUtilsMessageTypeFlagsEXT, @@ -124,19 +137,6 @@ double benchCopy(VkDevice dev, VkCommandPool pool, VkQueue queue, VkBuffer src, return std::chrono::duration(t1 - t0).count(); } -struct Gpu { - VkPhysicalDevice physicalDevice = nullptr; - VkPhysicalDeviceProperties properties = {}; - VkDevice device = nullptr; - VkQueue queue = nullptr; - VkCommandPool pool = nullptr; - // for benchmarks - VkBuffer stagingBuf = nullptr; - VkBuffer deviceBuf = nullptr; - VkDeviceMemory stagingMem = nullptr; - VkDeviceMemory deviceMem = nullptr; -}; - Gpu initGpu(VkPhysicalDevice phy, uint32_t buffer_size) { Gpu gpu = {}; gpu.physicalDevice = phy; @@ -202,10 +202,14 @@ void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu, std::cout << "Device: " << gpu.properties.deviceName << std::endl; std::cout << "Buffer size : " << config.buffer_size / (1 << 20) << " MiB\n"; std::cout << "Iterations : " << config.iterations << "\n"; - std::cout << "H→D average : " << (gib / (result.host_to_device_time * 1e-3)) - << " GiB/s\n"; - std::cout << "D→H average : " << (gib / (result.device_to_host_time * 1e-3)) - << " GiB/s\n"; + std::cout << "H->D average : " + << (gib / (result.host_to_device_time * 1e-3)) << " GiB/s\n"; + std::cout << "H->D avg transfer time: " << result.host_to_device_time + << "ms" << std::endl; + std::cout << "D->H average : " + << (gib / (result.device_to_host_time * 1e-3)) << " GiB/s\n"; + std::cout << "D->H avg transfer time: " << result.device_to_host_time + << "ms" << std::endl; std::cout << std::endl; } @@ -217,7 +221,7 @@ BenchmarkResult runBenchmark(Gpu &gpu, BenchmarkConfig config) { std::memset(mapped, 0xAB, config.buffer_size); vkUnmapMemory(gpu.device, gpu.stagingMem); - // ---- warm-up ---- + // warm-up, probably not significant benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf, gpu.deviceBuf, config.buffer_size);