refactor for adding another type of test
This commit is contained in:
255
src/main.cpp
255
src/main.cpp
@@ -7,17 +7,30 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <vulkan/vulkan_core.h>
|
#include <vulkan/vulkan_core.h>
|
||||||
|
|
||||||
/// TODO: Run benchmark on all GPU's
|
|
||||||
/// TODO: Pinned memory
|
/// TODO: Pinned memory
|
||||||
/// TODO: Plot by buffer size
|
/// TODO: Plot by buffer size
|
||||||
|
|
||||||
static constexpr uint64_t BUF_SIZE = 256ULL << 20; // 256 MiB
|
enum class TransferType {
|
||||||
static constexpr uint32_t ITERATIONS = 32;
|
Memory,
|
||||||
|
Pinned,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BenchmarkConfig {
|
||||||
|
uint32_t buffer_size = 256ULL << 20;
|
||||||
|
uint32_t iterations = 32;
|
||||||
|
TransferType type = TransferType::Memory;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BenchmarkResult {
|
||||||
|
double host_to_device_time = 0.0f;
|
||||||
|
double device_to_host_time = 0.0f;
|
||||||
|
};
|
||||||
|
|
||||||
// ---------- helpers ----------
|
// ---------- helpers ----------
|
||||||
VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
|
VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
|
||||||
VkDebugUtilsMessageSeverityFlagBitsEXT, VkDebugUtilsMessageTypeFlagsEXT,
|
VkDebugUtilsMessageSeverityFlagBitsEXT, VkDebugUtilsMessageTypeFlagsEXT,
|
||||||
const VkDebugUtilsMessengerCallbackDataEXT *, void *) {
|
const VkDebugUtilsMessengerCallbackDataEXT *, void *) {
|
||||||
|
|
||||||
return VK_FALSE;
|
return VK_FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -111,7 +124,121 @@ double benchCopy(VkDevice dev, VkCommandPool pool, VkQueue queue, VkBuffer src,
|
|||||||
return std::chrono::duration<double, std::milli>(t1 - t0).count();
|
return std::chrono::duration<double, std::milli>(t1 - t0).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
struct Gpu {
|
||||||
|
VkPhysicalDevice physicalDevice = nullptr;
|
||||||
|
VkPhysicalDeviceProperties properties = {};
|
||||||
|
VkDevice device = nullptr;
|
||||||
|
VkQueue queue = nullptr;
|
||||||
|
VkCommandPool pool = nullptr;
|
||||||
|
// for benchmarks
|
||||||
|
VkBuffer stagingBuf = nullptr;
|
||||||
|
VkBuffer deviceBuf = nullptr;
|
||||||
|
VkDeviceMemory stagingMem = nullptr;
|
||||||
|
VkDeviceMemory deviceMem = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
Gpu initGpu(VkPhysicalDevice phy, uint32_t buffer_size) {
|
||||||
|
Gpu gpu = {};
|
||||||
|
gpu.physicalDevice = phy;
|
||||||
|
|
||||||
|
// properties with lots of nice info
|
||||||
|
vkGetPhysicalDeviceProperties(phy, &gpu.properties);
|
||||||
|
|
||||||
|
// logical device
|
||||||
|
float prio = 1.0f;
|
||||||
|
VkDeviceQueueCreateInfo qi{};
|
||||||
|
qi.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||||
|
qi.queueFamilyIndex = 0; // assume family 0 supports transfer
|
||||||
|
qi.queueCount = 1;
|
||||||
|
qi.pQueuePriorities = &prio;
|
||||||
|
|
||||||
|
VkDeviceCreateInfo dci{};
|
||||||
|
dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||||
|
dci.queueCreateInfoCount = 1;
|
||||||
|
dci.pQueueCreateInfos = &qi;
|
||||||
|
|
||||||
|
vkCreateDevice(phy, &dci, nullptr, &gpu.device);
|
||||||
|
vkGetDeviceQueue(gpu.device, 0, 0, &gpu.queue);
|
||||||
|
|
||||||
|
// command pool
|
||||||
|
VkCommandPoolCreateInfo pci{};
|
||||||
|
pci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
||||||
|
pci.queueFamilyIndex = 0;
|
||||||
|
pci.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
|
||||||
|
|
||||||
|
vkCreateCommandPool(gpu.device, &pci, nullptr, &gpu.pool);
|
||||||
|
|
||||||
|
// buffers
|
||||||
|
gpu.stagingBuf = createBuffer(gpu.device, buffer_size,
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
||||||
|
gpu.stagingMem = allocateMem(gpu.device, phy, gpu.stagingBuf,
|
||||||
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||||
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
||||||
|
|
||||||
|
gpu.deviceBuf = createBuffer(gpu.device, buffer_size,
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
||||||
|
gpu.deviceMem = allocateMem(gpu.device, phy, gpu.deviceBuf,
|
||||||
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||||
|
|
||||||
|
return gpu;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cleanupGpu(Gpu &gpu) {
|
||||||
|
|
||||||
|
vkDestroyBuffer(gpu.device, gpu.stagingBuf, nullptr);
|
||||||
|
vkDestroyBuffer(gpu.device, gpu.deviceBuf, nullptr);
|
||||||
|
vkFreeMemory(gpu.device, gpu.stagingMem, nullptr);
|
||||||
|
vkFreeMemory(gpu.device, gpu.deviceMem, nullptr);
|
||||||
|
vkDestroyCommandPool(gpu.device, gpu.pool, nullptr);
|
||||||
|
vkDestroyDevice(gpu.device, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu,
|
||||||
|
BenchmarkConfig config) {
|
||||||
|
|
||||||
|
const double gib = static_cast<double>(config.buffer_size) / (1 << 30);
|
||||||
|
std::cout << "Device: " << gpu.properties.deviceName << std::endl;
|
||||||
|
std::cout << "Buffer size : " << config.buffer_size / (1 << 20) << " MiB\n";
|
||||||
|
std::cout << "Iterations : " << config.iterations << "\n";
|
||||||
|
std::cout << "H→D average : " << (gib / (result.host_to_device_time * 1e-3))
|
||||||
|
<< " GiB/s\n";
|
||||||
|
std::cout << "D→H average : " << (gib / (result.device_to_host_time * 1e-3))
|
||||||
|
<< " GiB/s\n";
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
BenchmarkResult runBenchmark(Gpu &gpu, BenchmarkConfig config) {
|
||||||
|
|
||||||
|
// ---- fill staging buffer ----
|
||||||
|
void *mapped;
|
||||||
|
vkMapMemory(gpu.device, gpu.stagingMem, 0, config.buffer_size, 0, &mapped);
|
||||||
|
std::memset(mapped, 0xAB, config.buffer_size);
|
||||||
|
vkUnmapMemory(gpu.device, gpu.stagingMem);
|
||||||
|
|
||||||
|
// ---- warm-up ----
|
||||||
|
benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf, gpu.deviceBuf,
|
||||||
|
config.buffer_size);
|
||||||
|
|
||||||
|
// ---- benchmark host->device ----
|
||||||
|
double tH2D = 0.0;
|
||||||
|
for (uint32_t i = 0; i < config.iterations; ++i)
|
||||||
|
tH2D += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf,
|
||||||
|
gpu.deviceBuf, config.buffer_size);
|
||||||
|
tH2D /= config.iterations;
|
||||||
|
|
||||||
|
// ---- benchmark device->host ----
|
||||||
|
double tD2H = 0.0;
|
||||||
|
for (uint32_t i = 0; i < config.iterations; ++i)
|
||||||
|
tD2H += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.deviceBuf,
|
||||||
|
gpu.stagingBuf, config.buffer_size);
|
||||||
|
tD2H /= config.iterations;
|
||||||
|
|
||||||
|
return {tH2D, tD2H};
|
||||||
|
}
|
||||||
|
|
||||||
|
auto main() -> int {
|
||||||
|
|
||||||
VkApplicationInfo app{};
|
VkApplicationInfo app{};
|
||||||
app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
||||||
@@ -125,105 +252,43 @@ int main() {
|
|||||||
VkInstance inst;
|
VkInstance inst;
|
||||||
vkCreateInstance(&ici, nullptr, &inst);
|
vkCreateInstance(&ici, nullptr, &inst);
|
||||||
|
|
||||||
std::cout << "Starting benchmark..." << std::endl;
|
std::cout << "Starting benchmark." << std::endl;
|
||||||
|
|
||||||
// ---- physical device ----
|
// physical devices
|
||||||
uint32_t n = 0;
|
uint32_t n = 0;
|
||||||
vkEnumeratePhysicalDevices(inst, &n, nullptr);
|
vkEnumeratePhysicalDevices(inst, &n, nullptr);
|
||||||
std::vector<VkPhysicalDevice> gpus(n);
|
|
||||||
vkEnumeratePhysicalDevices(inst, &n, gpus.data());
|
|
||||||
VkPhysicalDevice phy = gpus[0];
|
|
||||||
|
|
||||||
std::cout << "Found " << gpus.size() << " gpus." << std::endl;
|
std::cout << "Found " << n << " gpus." << std::endl;
|
||||||
|
std::vector<VkPhysicalDevice> vulkanPhysicalDevices(n);
|
||||||
|
|
||||||
{
|
vkEnumeratePhysicalDevices(inst, &n, vulkanPhysicalDevices.data());
|
||||||
std::vector<VkPhysicalDeviceProperties> gpu_properties(n);
|
|
||||||
// Print info
|
|
||||||
for (uint32_t i = 0; i < gpus.size(); i++) {
|
|
||||||
VkPhysicalDeviceProperties *prop = &gpu_properties[i];
|
|
||||||
vkGetPhysicalDeviceProperties(gpus[i], prop);
|
|
||||||
|
|
||||||
std::cout << "GPU: [" << i << "] " << prop->deviceName << " ("
|
// for each physical device, add initialize and add details
|
||||||
<< getGpuTypeName(prop->deviceType) << ")" << std::endl;
|
std::vector<Gpu> gpus;
|
||||||
}
|
|
||||||
|
BenchmarkConfig config = {};
|
||||||
|
|
||||||
|
uint32_t counter = 0;
|
||||||
|
for (const auto phy : vulkanPhysicalDevices) {
|
||||||
|
gpus.emplace_back(initGpu(phy, config.buffer_size));
|
||||||
|
|
||||||
|
const Gpu &gpu = gpus.back();
|
||||||
|
|
||||||
|
std::cout << "Device: [" << counter << "] " << gpu.properties.deviceName
|
||||||
|
<< " (" << getGpuTypeName(gpu.properties.deviceType) << ")"
|
||||||
|
<< std::endl;
|
||||||
|
counter++;
|
||||||
|
}
|
||||||
|
std::cout << "-------------------" << std::endl;
|
||||||
|
|
||||||
|
for (auto &gpu : gpus) {
|
||||||
|
BenchmarkResult res = runBenchmark(gpu, config);
|
||||||
|
reportBenchmark(res, gpu, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---- logical device ----
|
for (auto gpu : gpus) {
|
||||||
float prio = 1.0f;
|
cleanupGpu(gpu);
|
||||||
VkDeviceQueueCreateInfo qi{};
|
}
|
||||||
qi.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
|
||||||
qi.queueFamilyIndex = 0; // assume family 0 supports transfer
|
|
||||||
qi.queueCount = 1;
|
|
||||||
qi.pQueuePriorities = &prio;
|
|
||||||
|
|
||||||
VkDeviceCreateInfo dci{};
|
|
||||||
dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
|
||||||
dci.queueCreateInfoCount = 1;
|
|
||||||
dci.pQueueCreateInfos = &qi;
|
|
||||||
|
|
||||||
VkDevice dev;
|
|
||||||
vkCreateDevice(phy, &dci, nullptr, &dev);
|
|
||||||
|
|
||||||
VkQueue queue;
|
|
||||||
vkGetDeviceQueue(dev, 0, 0, &queue);
|
|
||||||
|
|
||||||
// ---- command pool ----
|
|
||||||
VkCommandPoolCreateInfo pci{};
|
|
||||||
pci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
|
||||||
pci.queueFamilyIndex = 0;
|
|
||||||
pci.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
|
|
||||||
VkCommandPool pool;
|
|
||||||
vkCreateCommandPool(dev, &pci, nullptr, &pool);
|
|
||||||
|
|
||||||
// ---- buffers ----
|
|
||||||
VkBuffer staging = createBuffer(dev, BUF_SIZE,
|
|
||||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
|
||||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
|
||||||
VkDeviceMemory stagingMem =
|
|
||||||
allocateMem(dev, phy, staging,
|
|
||||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
|
||||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
|
||||||
|
|
||||||
VkBuffer deviceBuf = createBuffer(dev, BUF_SIZE,
|
|
||||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
|
||||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
|
||||||
VkDeviceMemory deviceMem =
|
|
||||||
allocateMem(dev, phy, deviceBuf, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
|
||||||
|
|
||||||
// ---- fill staging buffer ----
|
|
||||||
void *mapped;
|
|
||||||
vkMapMemory(dev, stagingMem, 0, BUF_SIZE, 0, &mapped);
|
|
||||||
std::memset(mapped, 0xAB, BUF_SIZE);
|
|
||||||
vkUnmapMemory(dev, stagingMem);
|
|
||||||
|
|
||||||
// ---- warm-up ----
|
|
||||||
benchCopy(dev, pool, queue, staging, deviceBuf, BUF_SIZE);
|
|
||||||
|
|
||||||
// ---- benchmark host->device ----
|
|
||||||
double tH2D = 0.0;
|
|
||||||
for (uint32_t i = 0; i < ITERATIONS; ++i)
|
|
||||||
tH2D += benchCopy(dev, pool, queue, staging, deviceBuf, BUF_SIZE);
|
|
||||||
tH2D /= ITERATIONS;
|
|
||||||
|
|
||||||
// ---- benchmark device->host ----
|
|
||||||
double tD2H = 0.0;
|
|
||||||
for (uint32_t i = 0; i < ITERATIONS; ++i)
|
|
||||||
tD2H += benchCopy(dev, pool, queue, deviceBuf, staging, BUF_SIZE);
|
|
||||||
tD2H /= ITERATIONS;
|
|
||||||
|
|
||||||
const double gib = static_cast<double>(BUF_SIZE) / (1 << 30);
|
|
||||||
std::cout << "Buffer size : " << BUF_SIZE / (1 << 20) << " MiB\n";
|
|
||||||
std::cout << "Iterations : " << ITERATIONS << "\n";
|
|
||||||
std::cout << "H→D average : " << (gib / (tH2D * 1e-3)) << " GiB/s\n";
|
|
||||||
std::cout << "D→H average : " << (gib / (tD2H * 1e-3)) << " GiB/s\n";
|
|
||||||
|
|
||||||
// ---- cleanup ----
|
|
||||||
vkDestroyBuffer(dev, staging, nullptr);
|
|
||||||
vkDestroyBuffer(dev, deviceBuf, nullptr);
|
|
||||||
vkFreeMemory(dev, stagingMem, nullptr);
|
|
||||||
vkFreeMemory(dev, deviceMem, nullptr);
|
|
||||||
vkDestroyCommandPool(dev, pool, nullptr);
|
|
||||||
vkDestroyDevice(dev, nullptr);
|
|
||||||
vkDestroyInstance(inst, nullptr);
|
vkDestroyInstance(inst, nullptr);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user