make it possible to select gpu

Also skip non-gpu's by default
This commit is contained in:
Kasper Sauramo
2025-11-19 11:38:24 +02:00
parent 7875e26ea2
commit 167d4068cb

View File

@@ -1,3 +1,5 @@
#include <cstdlib>
#include <stdexcept>
#include <vulkan/vulkan.h>
#include <vulkan/vulkan_core.h>
@@ -7,18 +9,13 @@
#include <limits>
#include <vector>
/// TODO: Pinned memory
/// TODO: Plot by buffer size
enum class TransferType {
Memory,
Pinned,
};
struct BenchmarkConfig {
uint32_t buffer_size = 128ULL << 20;
uint32_t bufferSize = 128ULL << 20;
uint32_t iterations = 32;
TransferType type = TransferType::Memory;
int32_t gpuIndex = -1; // index < 0 is unselected
bool gpusOnly = true;
};
struct BenchmarkResult {
@@ -47,6 +44,19 @@ VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
return VK_FALSE;
}
void usage(const char *name) {
std::cout << "Usage: " << name << " [options]\n"
<< "Options:\n"
<< " -g, --gpu INDEX Benchmark the INDEX gpu" << std::endl
<< " -h, --help Show this help" << std::endl;
}
bool isGpuType(VkPhysicalDeviceType type) {
return type == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU ||
type == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU ||
type == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU;
}
const char *getGpuTypeName(VkPhysicalDeviceType type) {
switch (type) {
case VK_PHYSICAL_DEVICE_TYPE_OTHER:
@@ -198,9 +208,9 @@ void cleanupGpu(Gpu &gpu) {
void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu,
BenchmarkConfig config) {
const double gib = static_cast<double>(config.buffer_size) / (1 << 30);
const double gib = static_cast<double>(config.bufferSize) / (1 << 30);
std::cout << "Device: " << gpu.properties.deviceName << std::endl;
std::cout << "Buffer size : " << config.buffer_size / (1 << 20) << " MiB\n";
std::cout << "Buffer size : " << config.bufferSize / (1 << 20) << " MiB\n";
std::cout << "Iterations : " << config.iterations << "\n";
std::cout << "H->D average : "
<< (gib / (result.host_to_device_time * 1e-3)) << " GiB/s\n";
@@ -210,39 +220,66 @@ void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu,
<< (gib / (result.device_to_host_time * 1e-3)) << " GiB/s\n";
std::cout << "D->H avg transfer time: " << result.device_to_host_time
<< "ms" << std::endl;
std::cout << std::endl;
}
BenchmarkResult runStagedBenchmark(Gpu &gpu, BenchmarkConfig config) {
// fill staging buffer
void *mapped;
vkMapMemory(gpu.device, gpu.stagingMem, 0, config.buffer_size, 0, &mapped);
std::memset(mapped, 0xAB, config.buffer_size);
vkMapMemory(gpu.device, gpu.stagingMem, 0, config.bufferSize, 0, &mapped);
std::memset(mapped, 0xAB, config.bufferSize);
vkUnmapMemory(gpu.device, gpu.stagingMem);
// warm-up, probably not significant
benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf, gpu.deviceBuf,
config.buffer_size);
config.bufferSize);
// ---- benchmark host->device ----
double tH2D = 0.0;
for (uint32_t i = 0; i < config.iterations; ++i)
tH2D += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf,
gpu.deviceBuf, config.buffer_size);
gpu.deviceBuf, config.bufferSize);
tH2D /= config.iterations;
// ---- benchmark device->host ----
double tD2H = 0.0;
for (uint32_t i = 0; i < config.iterations; ++i)
tD2H += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.deviceBuf,
gpu.stagingBuf, config.buffer_size);
gpu.stagingBuf, config.bufferSize);
tD2H /= config.iterations;
return {tH2D, tD2H};
}
auto main() -> int {
void runBenchmarkOnGPU(Gpu &gpu, BenchmarkConfig config) {
std::cout << std::endl << "Running staged benchmark" << std::endl;
BenchmarkResult res = runStagedBenchmark(gpu, config);
reportBenchmark(res, gpu, config);
std::cout << "--------------------" << std::endl;
}
auto main(int argc, char *argv[]) -> int {
BenchmarkConfig config = {};
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];
if (arg == "-g" || arg == "--gpu") {
const char *index(argv[++i]);
try {
config.gpuIndex = std::stoi(index);
} catch (std::invalid_argument &e) {
usage(argv[0]);
return EXIT_FAILURE;
}
} else if (arg == "-h" || arg == "--help") {
usage(argv[0]);
return EXIT_SUCCESS;
} else {
usage(argv[0]);
return EXIT_FAILURE;
}
}
VkApplicationInfo app{};
app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
@@ -262,7 +299,7 @@ auto main() -> int {
uint32_t n = 0;
vkEnumeratePhysicalDevices(inst, &n, nullptr);
std::cout << "Found " << n << " gpus." << std::endl;
std::cout << "Found " << n << " devices." << std::endl;
std::vector<VkPhysicalDevice> vulkanPhysicalDevices(n);
vkEnumeratePhysicalDevices(inst, &n, vulkanPhysicalDevices.data());
@@ -270,11 +307,9 @@ auto main() -> int {
// for each physical device, add initialize and add details
std::vector<Gpu> gpus;
BenchmarkConfig config = {};
uint32_t counter = 0;
for (const auto phy : vulkanPhysicalDevices) {
gpus.emplace_back(initGpu(phy, config.buffer_size));
gpus.emplace_back(initGpu(phy, config.bufferSize));
const Gpu &gpu = gpus.back();
@@ -285,16 +320,32 @@ auto main() -> int {
}
std::cout << "-------------------" << std::endl;
if (config.gpuIndex >= 0) {
// A single gpu selected
if (config.gpuIndex >= static_cast<int32_t>(gpus.size())) {
std::cout << "Gpu index out of bounds." << std::endl;
} else {
runBenchmarkOnGPU(gpus[config.gpuIndex], config);
}
} else {
// Run on all devices
for (auto &gpu : gpus) {
std::cout << "Running staged benchmark" << std::endl;
BenchmarkResult res = runStagedBenchmark(gpu, config);
reportBenchmark(res, gpu, config);
if (config.gpusOnly) {
if (!isGpuType(gpu.properties.deviceType)) {
std::cout << "Skipping non-gpu type: "
<< gpu.properties.deviceName << std::endl;
std::cout << "--------------------" << std::endl;
continue;
}
}
runBenchmarkOnGPU(gpu, config);
}
}
for (auto gpu : gpus) {
cleanupGpu(gpu);
}
vkDestroyInstance(inst, nullptr);
return 0;
return EXIT_SUCCESS;
}