initial version
This commit is contained in:
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
build/
|
||||
|
||||
.DS_Store
|
||||
.idea
|
||||
*.log
|
||||
tmp/
|
||||
17
CMakeLists.txt
Normal file
17
CMakeLists.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
cmake_minimum_required(VERSION 3.19)
|
||||
project(vulkan-transfer-bench CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
add_compile_definitions(VK_ENABLE_BETA_EXTENSIONS)
|
||||
|
||||
add_executable(vulkan-transfer-bench
|
||||
src/main.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(vulkan-transfer-bench PRIVATE
|
||||
vulkan
|
||||
)
|
||||
|
||||
target_compile_options(vulkan-transfer-bench PRIVATE -Wall -Wextra)
|
||||
196
src/main.cpp
Normal file
196
src/main.cpp
Normal file
@@ -0,0 +1,196 @@
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
/// TODO: Print available GPU's
|
||||
/// TODO: Run benchmark on all GPU's
|
||||
/// TODO: Pinned memory
|
||||
/// TODO: Plot by buffer size
|
||||
|
||||
static constexpr uint64_t BUF_SIZE = 256ULL << 20; // 256 MiB
|
||||
static constexpr uint32_t ITERATIONS = 32;
|
||||
|
||||
// ---------- helpers ----------
|
||||
VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
|
||||
VkDebugUtilsMessageSeverityFlagBitsEXT, VkDebugUtilsMessageTypeFlagsEXT,
|
||||
const VkDebugUtilsMessengerCallbackDataEXT *, void *) {
|
||||
return VK_FALSE;
|
||||
}
|
||||
|
||||
uint32_t findMemory(VkPhysicalDevice phy, uint32_t typeBits,
|
||||
VkMemoryPropertyFlags props) {
|
||||
VkPhysicalDeviceMemoryProperties mem;
|
||||
vkGetPhysicalDeviceMemoryProperties(phy, &mem);
|
||||
for (uint32_t i = 0; i < mem.memoryTypeCount; ++i)
|
||||
if ((typeBits & (1u << i)) &&
|
||||
(mem.memoryTypes[i].propertyFlags & props) == props)
|
||||
return i;
|
||||
return std::numeric_limits<uint32_t>::max();
|
||||
}
|
||||
|
||||
VkBuffer createBuffer(VkDevice dev, VkDeviceSize size,
|
||||
VkBufferUsageFlags usage) {
|
||||
VkBufferCreateInfo ci{};
|
||||
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
ci.size = size;
|
||||
ci.usage = usage;
|
||||
ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
VkBuffer buf;
|
||||
vkCreateBuffer(dev, &ci, nullptr, &buf);
|
||||
return buf;
|
||||
}
|
||||
|
||||
VkDeviceMemory allocateMem(VkDevice dev, VkPhysicalDevice phy, VkBuffer buf,
|
||||
VkMemoryPropertyFlags props) {
|
||||
VkMemoryRequirements req;
|
||||
vkGetBufferMemoryRequirements(dev, buf, &req);
|
||||
uint32_t idx = findMemory(phy, req.memoryTypeBits, props);
|
||||
|
||||
VkMemoryAllocateInfo ai{};
|
||||
ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
ai.allocationSize = req.size;
|
||||
ai.memoryTypeIndex = idx;
|
||||
VkDeviceMemory mem;
|
||||
vkAllocateMemory(dev, &ai, nullptr, &mem);
|
||||
vkBindBufferMemory(dev, buf, mem, 0);
|
||||
return mem;
|
||||
}
|
||||
|
||||
double benchCopy(VkDevice dev, VkCommandPool pool, VkQueue queue, VkBuffer src,
|
||||
VkBuffer dst, VkDeviceSize size) {
|
||||
VkCommandBuffer cmd;
|
||||
VkCommandBufferAllocateInfo ai{};
|
||||
ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
ai.commandPool = pool;
|
||||
ai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
ai.commandBufferCount = 1;
|
||||
vkAllocateCommandBuffers(dev, &ai, &cmd);
|
||||
|
||||
VkCommandBufferBeginInfo bi{};
|
||||
bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
vkBeginCommandBuffer(cmd, &bi);
|
||||
VkBufferCopy region{};
|
||||
region.size = size;
|
||||
vkCmdCopyBuffer(cmd, src, dst, 1, ®ion);
|
||||
vkEndCommandBuffer(cmd);
|
||||
|
||||
VkSubmitInfo si{};
|
||||
si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
si.commandBufferCount = 1;
|
||||
si.pCommandBuffers = &cmd;
|
||||
|
||||
auto t0 = std::chrono::steady_clock::now();
|
||||
vkQueueSubmit(queue, 1, &si, VK_NULL_HANDLE);
|
||||
vkQueueWaitIdle(queue);
|
||||
auto t1 = std::chrono::steady_clock::now();
|
||||
|
||||
vkFreeCommandBuffers(dev, pool, 1, &cmd);
|
||||
return std::chrono::duration<double, std::milli>(t1 - t0).count();
|
||||
}
|
||||
|
||||
// ---------- main ----------
|
||||
int main() {
|
||||
// ---- instance ----
|
||||
VkApplicationInfo app{};
|
||||
app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
||||
app.pApplicationName = "VulkanTransferBench";
|
||||
app.apiVersion = VK_API_VERSION_1_2;
|
||||
|
||||
VkInstanceCreateInfo ici{};
|
||||
ici.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
||||
ici.pApplicationInfo = &app;
|
||||
|
||||
VkInstance inst;
|
||||
vkCreateInstance(&ici, nullptr, &inst);
|
||||
|
||||
// ---- physical device ----
|
||||
uint32_t n = 0;
|
||||
vkEnumeratePhysicalDevices(inst, &n, nullptr);
|
||||
std::vector<VkPhysicalDevice> gpus(n);
|
||||
vkEnumeratePhysicalDevices(inst, &n, gpus.data());
|
||||
VkPhysicalDevice phy = gpus[0];
|
||||
|
||||
// ---- logical device ----
|
||||
float prio = 1.0f;
|
||||
VkDeviceQueueCreateInfo qi{};
|
||||
qi.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
qi.queueFamilyIndex = 0; // assume family 0 supports transfer
|
||||
qi.queueCount = 1;
|
||||
qi.pQueuePriorities = &prio;
|
||||
|
||||
VkDeviceCreateInfo dci{};
|
||||
dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||
dci.queueCreateInfoCount = 1;
|
||||
dci.pQueueCreateInfos = &qi;
|
||||
|
||||
VkDevice dev;
|
||||
vkCreateDevice(phy, &dci, nullptr, &dev);
|
||||
|
||||
VkQueue queue;
|
||||
vkGetDeviceQueue(dev, 0, 0, &queue);
|
||||
|
||||
// ---- command pool ----
|
||||
VkCommandPoolCreateInfo pci{};
|
||||
pci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
||||
pci.queueFamilyIndex = 0;
|
||||
pci.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
|
||||
VkCommandPool pool;
|
||||
vkCreateCommandPool(dev, &pci, nullptr, &pool);
|
||||
|
||||
// ---- buffers ----
|
||||
VkBuffer staging = createBuffer(dev, BUF_SIZE,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
||||
VkDeviceMemory stagingMem =
|
||||
allocateMem(dev, phy, staging,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
||||
|
||||
VkBuffer deviceBuf = createBuffer(dev, BUF_SIZE,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
||||
VkDeviceMemory deviceMem =
|
||||
allocateMem(dev, phy, deviceBuf, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
|
||||
// ---- fill staging buffer ----
|
||||
void *mapped;
|
||||
vkMapMemory(dev, stagingMem, 0, BUF_SIZE, 0, &mapped);
|
||||
std::memset(mapped, 0xAB, BUF_SIZE);
|
||||
vkUnmapMemory(dev, stagingMem);
|
||||
|
||||
// ---- warm-up ----
|
||||
benchCopy(dev, pool, queue, staging, deviceBuf, BUF_SIZE);
|
||||
|
||||
// ---- benchmark host->device ----
|
||||
double tH2D = 0.0;
|
||||
for (uint32_t i = 0; i < ITERATIONS; ++i)
|
||||
tH2D += benchCopy(dev, pool, queue, staging, deviceBuf, BUF_SIZE);
|
||||
tH2D /= ITERATIONS;
|
||||
|
||||
// ---- benchmark device->host ----
|
||||
double tD2H = 0.0;
|
||||
for (uint32_t i = 0; i < ITERATIONS; ++i)
|
||||
tD2H += benchCopy(dev, pool, queue, deviceBuf, staging, BUF_SIZE);
|
||||
tD2H /= ITERATIONS;
|
||||
|
||||
const double gib = static_cast<double>(BUF_SIZE) / (1 << 30);
|
||||
std::cout << "Buffer size : " << BUF_SIZE / (1 << 20) << " MiB\n";
|
||||
std::cout << "Iterations : " << ITERATIONS << "\n";
|
||||
std::cout << "H→D average : " << (gib / (tH2D * 1e-3)) << " GiB/s\n";
|
||||
std::cout << "D→H average : " << (gib / (tD2H * 1e-3)) << " GiB/s\n";
|
||||
|
||||
// ---- cleanup ----
|
||||
vkDestroyBuffer(dev, staging, nullptr);
|
||||
vkDestroyBuffer(dev, deviceBuf, nullptr);
|
||||
vkFreeMemory(dev, stagingMem, nullptr);
|
||||
vkFreeMemory(dev, deviceMem, nullptr);
|
||||
vkDestroyCommandPool(dev, pool, nullptr);
|
||||
vkDestroyDevice(dev, nullptr);
|
||||
vkDestroyInstance(inst, nullptr);
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user