initial version

This commit is contained in:
2025-11-07 23:18:23 +02:00
commit edb9e941d7
3 changed files with 219 additions and 0 deletions

6
.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
build/
.DS_Store
.idea
*.log
tmp/

17
CMakeLists.txt Normal file
View File

@@ -0,0 +1,17 @@
cmake_minimum_required(VERSION 3.19)
project(vulkan-transfer-bench CXX)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_compile_definitions(VK_ENABLE_BETA_EXTENSIONS)
add_executable(vulkan-transfer-bench
src/main.cpp
)
target_link_libraries(vulkan-transfer-bench PRIVATE
vulkan
)
target_compile_options(vulkan-transfer-bench PRIVATE -Wall -Wextra)

196
src/main.cpp Normal file
View File

@@ -0,0 +1,196 @@
#include <vulkan/vulkan.h>
#include <chrono>
#include <cstring>
#include <iostream>
#include <limits>
#include <vector>
/// TODO: Print available GPU's
/// TODO: Run benchmark on all GPU's
/// TODO: Pinned memory
/// TODO: Plot by buffer size
static constexpr uint64_t BUF_SIZE = 256ULL << 20; // 256 MiB
static constexpr uint32_t ITERATIONS = 32;
// ---------- helpers ----------
VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
VkDebugUtilsMessageSeverityFlagBitsEXT, VkDebugUtilsMessageTypeFlagsEXT,
const VkDebugUtilsMessengerCallbackDataEXT *, void *) {
return VK_FALSE;
}
uint32_t findMemory(VkPhysicalDevice phy, uint32_t typeBits,
VkMemoryPropertyFlags props) {
VkPhysicalDeviceMemoryProperties mem;
vkGetPhysicalDeviceMemoryProperties(phy, &mem);
for (uint32_t i = 0; i < mem.memoryTypeCount; ++i)
if ((typeBits & (1u << i)) &&
(mem.memoryTypes[i].propertyFlags & props) == props)
return i;
return std::numeric_limits<uint32_t>::max();
}
VkBuffer createBuffer(VkDevice dev, VkDeviceSize size,
VkBufferUsageFlags usage) {
VkBufferCreateInfo ci{};
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
ci.size = size;
ci.usage = usage;
ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VkBuffer buf;
vkCreateBuffer(dev, &ci, nullptr, &buf);
return buf;
}
VkDeviceMemory allocateMem(VkDevice dev, VkPhysicalDevice phy, VkBuffer buf,
VkMemoryPropertyFlags props) {
VkMemoryRequirements req;
vkGetBufferMemoryRequirements(dev, buf, &req);
uint32_t idx = findMemory(phy, req.memoryTypeBits, props);
VkMemoryAllocateInfo ai{};
ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
ai.allocationSize = req.size;
ai.memoryTypeIndex = idx;
VkDeviceMemory mem;
vkAllocateMemory(dev, &ai, nullptr, &mem);
vkBindBufferMemory(dev, buf, mem, 0);
return mem;
}
double benchCopy(VkDevice dev, VkCommandPool pool, VkQueue queue, VkBuffer src,
VkBuffer dst, VkDeviceSize size) {
VkCommandBuffer cmd;
VkCommandBufferAllocateInfo ai{};
ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
ai.commandPool = pool;
ai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
ai.commandBufferCount = 1;
vkAllocateCommandBuffers(dev, &ai, &cmd);
VkCommandBufferBeginInfo bi{};
bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(cmd, &bi);
VkBufferCopy region{};
region.size = size;
vkCmdCopyBuffer(cmd, src, dst, 1, &region);
vkEndCommandBuffer(cmd);
VkSubmitInfo si{};
si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
si.commandBufferCount = 1;
si.pCommandBuffers = &cmd;
auto t0 = std::chrono::steady_clock::now();
vkQueueSubmit(queue, 1, &si, VK_NULL_HANDLE);
vkQueueWaitIdle(queue);
auto t1 = std::chrono::steady_clock::now();
vkFreeCommandBuffers(dev, pool, 1, &cmd);
return std::chrono::duration<double, std::milli>(t1 - t0).count();
}
// ---------- main ----------
int main() {
// ---- instance ----
VkApplicationInfo app{};
app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
app.pApplicationName = "VulkanTransferBench";
app.apiVersion = VK_API_VERSION_1_2;
VkInstanceCreateInfo ici{};
ici.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
ici.pApplicationInfo = &app;
VkInstance inst;
vkCreateInstance(&ici, nullptr, &inst);
// ---- physical device ----
uint32_t n = 0;
vkEnumeratePhysicalDevices(inst, &n, nullptr);
std::vector<VkPhysicalDevice> gpus(n);
vkEnumeratePhysicalDevices(inst, &n, gpus.data());
VkPhysicalDevice phy = gpus[0];
// ---- logical device ----
float prio = 1.0f;
VkDeviceQueueCreateInfo qi{};
qi.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
qi.queueFamilyIndex = 0; // assume family 0 supports transfer
qi.queueCount = 1;
qi.pQueuePriorities = &prio;
VkDeviceCreateInfo dci{};
dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
dci.queueCreateInfoCount = 1;
dci.pQueueCreateInfos = &qi;
VkDevice dev;
vkCreateDevice(phy, &dci, nullptr, &dev);
VkQueue queue;
vkGetDeviceQueue(dev, 0, 0, &queue);
// ---- command pool ----
VkCommandPoolCreateInfo pci{};
pci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
pci.queueFamilyIndex = 0;
pci.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
VkCommandPool pool;
vkCreateCommandPool(dev, &pci, nullptr, &pool);
// ---- buffers ----
VkBuffer staging = createBuffer(dev, BUF_SIZE,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
VkDeviceMemory stagingMem =
allocateMem(dev, phy, staging,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
VkBuffer deviceBuf = createBuffer(dev, BUF_SIZE,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
VkDeviceMemory deviceMem =
allocateMem(dev, phy, deviceBuf, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
// ---- fill staging buffer ----
void *mapped;
vkMapMemory(dev, stagingMem, 0, BUF_SIZE, 0, &mapped);
std::memset(mapped, 0xAB, BUF_SIZE);
vkUnmapMemory(dev, stagingMem);
// ---- warm-up ----
benchCopy(dev, pool, queue, staging, deviceBuf, BUF_SIZE);
// ---- benchmark host->device ----
double tH2D = 0.0;
for (uint32_t i = 0; i < ITERATIONS; ++i)
tH2D += benchCopy(dev, pool, queue, staging, deviceBuf, BUF_SIZE);
tH2D /= ITERATIONS;
// ---- benchmark device->host ----
double tD2H = 0.0;
for (uint32_t i = 0; i < ITERATIONS; ++i)
tD2H += benchCopy(dev, pool, queue, deviceBuf, staging, BUF_SIZE);
tD2H /= ITERATIONS;
const double gib = static_cast<double>(BUF_SIZE) / (1 << 30);
std::cout << "Buffer size : " << BUF_SIZE / (1 << 20) << " MiB\n";
std::cout << "Iterations : " << ITERATIONS << "\n";
std::cout << "H→D average : " << (gib / (tH2D * 1e-3)) << " GiB/s\n";
std::cout << "D→H average : " << (gib / (tD2H * 1e-3)) << " GiB/s\n";
// ---- cleanup ----
vkDestroyBuffer(dev, staging, nullptr);
vkDestroyBuffer(dev, deviceBuf, nullptr);
vkFreeMemory(dev, stagingMem, nullptr);
vkFreeMemory(dev, deviceMem, nullptr);
vkDestroyCommandPool(dev, pool, nullptr);
vkDestroyDevice(dev, nullptr);
vkDestroyInstance(inst, nullptr);
return 0;
}