make it possible to select gpu
Also skip non-gpu's by default
This commit is contained in:
105
src/main.cpp
105
src/main.cpp
@@ -1,3 +1,5 @@
|
|||||||
|
#include <cstdlib>
|
||||||
|
#include <stdexcept>
|
||||||
#include <vulkan/vulkan.h>
|
#include <vulkan/vulkan.h>
|
||||||
#include <vulkan/vulkan_core.h>
|
#include <vulkan/vulkan_core.h>
|
||||||
|
|
||||||
@@ -7,18 +9,13 @@
|
|||||||
#include <limits>
|
#include <limits>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
/// TODO: Pinned memory
|
|
||||||
/// TODO: Plot by buffer size
|
/// TODO: Plot by buffer size
|
||||||
|
|
||||||
enum class TransferType {
|
|
||||||
Memory,
|
|
||||||
Pinned,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct BenchmarkConfig {
|
struct BenchmarkConfig {
|
||||||
uint32_t buffer_size = 128ULL << 20;
|
uint32_t bufferSize = 128ULL << 20;
|
||||||
uint32_t iterations = 32;
|
uint32_t iterations = 32;
|
||||||
TransferType type = TransferType::Memory;
|
int32_t gpuIndex = -1; // index < 0 is unselected
|
||||||
|
bool gpusOnly = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BenchmarkResult {
|
struct BenchmarkResult {
|
||||||
@@ -47,6 +44,19 @@ VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
|
|||||||
return VK_FALSE;
|
return VK_FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void usage(const char *name) {
|
||||||
|
std::cout << "Usage: " << name << " [options]\n"
|
||||||
|
<< "Options:\n"
|
||||||
|
<< " -g, --gpu INDEX Benchmark the INDEX gpu" << std::endl
|
||||||
|
<< " -h, --help Show this help" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isGpuType(VkPhysicalDeviceType type) {
|
||||||
|
return type == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU ||
|
||||||
|
type == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU ||
|
||||||
|
type == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU;
|
||||||
|
}
|
||||||
|
|
||||||
const char *getGpuTypeName(VkPhysicalDeviceType type) {
|
const char *getGpuTypeName(VkPhysicalDeviceType type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case VK_PHYSICAL_DEVICE_TYPE_OTHER:
|
case VK_PHYSICAL_DEVICE_TYPE_OTHER:
|
||||||
@@ -198,9 +208,9 @@ void cleanupGpu(Gpu &gpu) {
|
|||||||
void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu,
|
void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu,
|
||||||
BenchmarkConfig config) {
|
BenchmarkConfig config) {
|
||||||
|
|
||||||
const double gib = static_cast<double>(config.buffer_size) / (1 << 30);
|
const double gib = static_cast<double>(config.bufferSize) / (1 << 30);
|
||||||
std::cout << "Device: " << gpu.properties.deviceName << std::endl;
|
std::cout << "Device: " << gpu.properties.deviceName << std::endl;
|
||||||
std::cout << "Buffer size : " << config.buffer_size / (1 << 20) << " MiB\n";
|
std::cout << "Buffer size : " << config.bufferSize / (1 << 20) << " MiB\n";
|
||||||
std::cout << "Iterations : " << config.iterations << "\n";
|
std::cout << "Iterations : " << config.iterations << "\n";
|
||||||
std::cout << "H->D average : "
|
std::cout << "H->D average : "
|
||||||
<< (gib / (result.host_to_device_time * 1e-3)) << " GiB/s\n";
|
<< (gib / (result.host_to_device_time * 1e-3)) << " GiB/s\n";
|
||||||
@@ -210,39 +220,66 @@ void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu,
|
|||||||
<< (gib / (result.device_to_host_time * 1e-3)) << " GiB/s\n";
|
<< (gib / (result.device_to_host_time * 1e-3)) << " GiB/s\n";
|
||||||
std::cout << "D->H avg transfer time: " << result.device_to_host_time
|
std::cout << "D->H avg transfer time: " << result.device_to_host_time
|
||||||
<< "ms" << std::endl;
|
<< "ms" << std::endl;
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BenchmarkResult runStagedBenchmark(Gpu &gpu, BenchmarkConfig config) {
|
BenchmarkResult runStagedBenchmark(Gpu &gpu, BenchmarkConfig config) {
|
||||||
|
|
||||||
// fill staging buffer
|
// fill staging buffer
|
||||||
void *mapped;
|
void *mapped;
|
||||||
vkMapMemory(gpu.device, gpu.stagingMem, 0, config.buffer_size, 0, &mapped);
|
vkMapMemory(gpu.device, gpu.stagingMem, 0, config.bufferSize, 0, &mapped);
|
||||||
std::memset(mapped, 0xAB, config.buffer_size);
|
std::memset(mapped, 0xAB, config.bufferSize);
|
||||||
vkUnmapMemory(gpu.device, gpu.stagingMem);
|
vkUnmapMemory(gpu.device, gpu.stagingMem);
|
||||||
|
|
||||||
// warm-up, probably not significant
|
// warm-up, probably not significant
|
||||||
benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf, gpu.deviceBuf,
|
benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf, gpu.deviceBuf,
|
||||||
config.buffer_size);
|
config.bufferSize);
|
||||||
|
|
||||||
// ---- benchmark host->device ----
|
// ---- benchmark host->device ----
|
||||||
double tH2D = 0.0;
|
double tH2D = 0.0;
|
||||||
for (uint32_t i = 0; i < config.iterations; ++i)
|
for (uint32_t i = 0; i < config.iterations; ++i)
|
||||||
tH2D += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf,
|
tH2D += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf,
|
||||||
gpu.deviceBuf, config.buffer_size);
|
gpu.deviceBuf, config.bufferSize);
|
||||||
tH2D /= config.iterations;
|
tH2D /= config.iterations;
|
||||||
|
|
||||||
// ---- benchmark device->host ----
|
// ---- benchmark device->host ----
|
||||||
double tD2H = 0.0;
|
double tD2H = 0.0;
|
||||||
for (uint32_t i = 0; i < config.iterations; ++i)
|
for (uint32_t i = 0; i < config.iterations; ++i)
|
||||||
tD2H += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.deviceBuf,
|
tD2H += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.deviceBuf,
|
||||||
gpu.stagingBuf, config.buffer_size);
|
gpu.stagingBuf, config.bufferSize);
|
||||||
tD2H /= config.iterations;
|
tD2H /= config.iterations;
|
||||||
|
|
||||||
return {tH2D, tD2H};
|
return {tH2D, tD2H};
|
||||||
}
|
}
|
||||||
|
|
||||||
auto main() -> int {
|
void runBenchmarkOnGPU(Gpu &gpu, BenchmarkConfig config) {
|
||||||
|
std::cout << std::endl << "Running staged benchmark" << std::endl;
|
||||||
|
BenchmarkResult res = runStagedBenchmark(gpu, config);
|
||||||
|
reportBenchmark(res, gpu, config);
|
||||||
|
std::cout << "--------------------" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto main(int argc, char *argv[]) -> int {
|
||||||
|
BenchmarkConfig config = {};
|
||||||
|
|
||||||
|
for (int i = 1; i < argc; i++) {
|
||||||
|
std::string arg = argv[i];
|
||||||
|
|
||||||
|
if (arg == "-g" || arg == "--gpu") {
|
||||||
|
const char *index(argv[++i]);
|
||||||
|
try {
|
||||||
|
config.gpuIndex = std::stoi(index);
|
||||||
|
} catch (std::invalid_argument &e) {
|
||||||
|
usage(argv[0]);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
} else if (arg == "-h" || arg == "--help") {
|
||||||
|
usage(argv[0]);
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
} else {
|
||||||
|
usage(argv[0]);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
VkApplicationInfo app{};
|
VkApplicationInfo app{};
|
||||||
app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
||||||
@@ -262,7 +299,7 @@ auto main() -> int {
|
|||||||
uint32_t n = 0;
|
uint32_t n = 0;
|
||||||
vkEnumeratePhysicalDevices(inst, &n, nullptr);
|
vkEnumeratePhysicalDevices(inst, &n, nullptr);
|
||||||
|
|
||||||
std::cout << "Found " << n << " gpus." << std::endl;
|
std::cout << "Found " << n << " devices." << std::endl;
|
||||||
std::vector<VkPhysicalDevice> vulkanPhysicalDevices(n);
|
std::vector<VkPhysicalDevice> vulkanPhysicalDevices(n);
|
||||||
|
|
||||||
vkEnumeratePhysicalDevices(inst, &n, vulkanPhysicalDevices.data());
|
vkEnumeratePhysicalDevices(inst, &n, vulkanPhysicalDevices.data());
|
||||||
@@ -270,11 +307,9 @@ auto main() -> int {
|
|||||||
// for each physical device, add initialize and add details
|
// for each physical device, add initialize and add details
|
||||||
std::vector<Gpu> gpus;
|
std::vector<Gpu> gpus;
|
||||||
|
|
||||||
BenchmarkConfig config = {};
|
|
||||||
|
|
||||||
uint32_t counter = 0;
|
uint32_t counter = 0;
|
||||||
for (const auto phy : vulkanPhysicalDevices) {
|
for (const auto phy : vulkanPhysicalDevices) {
|
||||||
gpus.emplace_back(initGpu(phy, config.buffer_size));
|
gpus.emplace_back(initGpu(phy, config.bufferSize));
|
||||||
|
|
||||||
const Gpu &gpu = gpus.back();
|
const Gpu &gpu = gpus.back();
|
||||||
|
|
||||||
@@ -285,16 +320,32 @@ auto main() -> int {
|
|||||||
}
|
}
|
||||||
std::cout << "-------------------" << std::endl;
|
std::cout << "-------------------" << std::endl;
|
||||||
|
|
||||||
for (auto &gpu : gpus) {
|
if (config.gpuIndex >= 0) {
|
||||||
std::cout << "Running staged benchmark" << std::endl;
|
// A single gpu selected
|
||||||
BenchmarkResult res = runStagedBenchmark(gpu, config);
|
if (config.gpuIndex >= static_cast<int32_t>(gpus.size())) {
|
||||||
reportBenchmark(res, gpu, config);
|
std::cout << "Gpu index out of bounds." << std::endl;
|
||||||
std::cout << "--------------------" << std::endl;
|
} else {
|
||||||
|
runBenchmarkOnGPU(gpus[config.gpuIndex], config);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Run on all devices
|
||||||
|
for (auto &gpu : gpus) {
|
||||||
|
|
||||||
|
if (config.gpusOnly) {
|
||||||
|
if (!isGpuType(gpu.properties.deviceType)) {
|
||||||
|
std::cout << "Skipping non-gpu type: "
|
||||||
|
<< gpu.properties.deviceName << std::endl;
|
||||||
|
std::cout << "--------------------" << std::endl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
runBenchmarkOnGPU(gpu, config);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto gpu : gpus) {
|
for (auto gpu : gpus) {
|
||||||
cleanupGpu(gpu);
|
cleanupGpu(gpu);
|
||||||
}
|
}
|
||||||
vkDestroyInstance(inst, nullptr);
|
vkDestroyInstance(inst, nullptr);
|
||||||
return 0;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user