benchmark pinned memory (wip)
This commit is contained in:
54
src/main.cpp
54
src/main.cpp
@@ -35,8 +35,10 @@ struct Gpu {
|
|||||||
// for benchmarks
|
// for benchmarks
|
||||||
VkDeviceMemory stagingMem = nullptr;
|
VkDeviceMemory stagingMem = nullptr;
|
||||||
VkDeviceMemory deviceMem = nullptr;
|
VkDeviceMemory deviceMem = nullptr;
|
||||||
|
VkDeviceMemory pinnedMem = nullptr;
|
||||||
VkBuffer stagingBuf = nullptr;
|
VkBuffer stagingBuf = nullptr;
|
||||||
VkBuffer deviceBuf = nullptr;
|
VkBuffer deviceBuf = nullptr;
|
||||||
|
VkBuffer pinnedBuf = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
// ---------- helpers ----------
|
// ---------- helpers ----------
|
||||||
@@ -182,6 +184,14 @@ Gpu initGpu(VkPhysicalDevice phy, uint32_t buffer_size) {
|
|||||||
gpu.deviceMem = allocateMem(gpu.device, phy, gpu.deviceBuf,
|
gpu.deviceMem = allocateMem(gpu.device, phy, gpu.deviceBuf,
|
||||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||||
|
|
||||||
|
gpu.pinnedBuf = createBuffer(gpu.device, buffer_size,
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
||||||
|
gpu.pinnedMem = allocateMem(gpu.device, phy, gpu.pinnedBuf,
|
||||||
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||||
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||||
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
||||||
|
|
||||||
return gpu;
|
return gpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -189,8 +199,10 @@ void cleanupGpu(Gpu &gpu) {
|
|||||||
|
|
||||||
vkDestroyBuffer(gpu.device, gpu.stagingBuf, nullptr);
|
vkDestroyBuffer(gpu.device, gpu.stagingBuf, nullptr);
|
||||||
vkDestroyBuffer(gpu.device, gpu.deviceBuf, nullptr);
|
vkDestroyBuffer(gpu.device, gpu.deviceBuf, nullptr);
|
||||||
|
vkDestroyBuffer(gpu.device, gpu.pinnedBuf, nullptr);
|
||||||
vkFreeMemory(gpu.device, gpu.stagingMem, nullptr);
|
vkFreeMemory(gpu.device, gpu.stagingMem, nullptr);
|
||||||
vkFreeMemory(gpu.device, gpu.deviceMem, nullptr);
|
vkFreeMemory(gpu.device, gpu.deviceMem, nullptr);
|
||||||
|
vkFreeMemory(gpu.device, gpu.pinnedMem, nullptr);
|
||||||
vkDestroyCommandPool(gpu.device, gpu.pool, nullptr);
|
vkDestroyCommandPool(gpu.device, gpu.pool, nullptr);
|
||||||
vkDestroyDevice(gpu.device, nullptr);
|
vkDestroyDevice(gpu.device, nullptr);
|
||||||
}
|
}
|
||||||
@@ -213,9 +225,9 @@ void reportBenchmark(const BenchmarkResult &result, const Gpu &gpu,
|
|||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
BenchmarkResult runBenchmark(Gpu &gpu, BenchmarkConfig config) {
|
BenchmarkResult runStagedBenchmark(Gpu &gpu, BenchmarkConfig config) {
|
||||||
|
|
||||||
// ---- fill staging buffer ----
|
// fill staging buffer
|
||||||
void *mapped;
|
void *mapped;
|
||||||
vkMapMemory(gpu.device, gpu.stagingMem, 0, config.buffer_size, 0, &mapped);
|
vkMapMemory(gpu.device, gpu.stagingMem, 0, config.buffer_size, 0, &mapped);
|
||||||
std::memset(mapped, 0xAB, config.buffer_size);
|
std::memset(mapped, 0xAB, config.buffer_size);
|
||||||
@@ -242,6 +254,35 @@ BenchmarkResult runBenchmark(Gpu &gpu, BenchmarkConfig config) {
|
|||||||
return {tH2D, tD2H};
|
return {tH2D, tD2H};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BenchmarkResult runPinnedBenchmark(Gpu &gpu, BenchmarkConfig config) {
|
||||||
|
|
||||||
|
// fill staging buffer
|
||||||
|
void *mapped;
|
||||||
|
vkMapMemory(gpu.device, gpu.pinnedMem, 0, config.buffer_size, 0, &mapped);
|
||||||
|
std::memset(mapped, 0xAB, config.buffer_size);
|
||||||
|
vkUnmapMemory(gpu.device, gpu.pinnedMem);
|
||||||
|
|
||||||
|
// warm-up, probably not significant
|
||||||
|
benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.stagingBuf, gpu.deviceBuf,
|
||||||
|
config.buffer_size);
|
||||||
|
|
||||||
|
// ---- benchmark host->device ----
|
||||||
|
double tH2D = 0.0;
|
||||||
|
for (uint32_t i = 0; i < config.iterations; ++i)
|
||||||
|
tH2D += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.pinnedBuf,
|
||||||
|
gpu.pinnedBuf, config.buffer_size);
|
||||||
|
tH2D /= config.iterations;
|
||||||
|
|
||||||
|
// ---- benchmark device->host ----
|
||||||
|
double tD2H = 0.0;
|
||||||
|
for (uint32_t i = 0; i < config.iterations; ++i)
|
||||||
|
tD2H += benchCopy(gpu.device, gpu.pool, gpu.queue, gpu.pinnedBuf,
|
||||||
|
gpu.pinnedBuf, config.buffer_size);
|
||||||
|
tD2H /= config.iterations;
|
||||||
|
|
||||||
|
return {tH2D, tD2H};
|
||||||
|
}
|
||||||
|
|
||||||
auto main() -> int {
|
auto main() -> int {
|
||||||
|
|
||||||
VkApplicationInfo app{};
|
VkApplicationInfo app{};
|
||||||
@@ -286,8 +327,15 @@ auto main() -> int {
|
|||||||
std::cout << "-------------------" << std::endl;
|
std::cout << "-------------------" << std::endl;
|
||||||
|
|
||||||
for (auto &gpu : gpus) {
|
for (auto &gpu : gpus) {
|
||||||
BenchmarkResult res = runBenchmark(gpu, config);
|
std::cout << "Running staged benchmark" << std::endl;
|
||||||
|
BenchmarkResult res = runStagedBenchmark(gpu, config);
|
||||||
reportBenchmark(res, gpu, config);
|
reportBenchmark(res, gpu, config);
|
||||||
|
|
||||||
|
std::cout << "Running pinned benchmark" << std::endl;
|
||||||
|
res = runPinnedBenchmark(gpu, config);
|
||||||
|
reportBenchmark(res, gpu, config);
|
||||||
|
|
||||||
|
std::cout << "--------------------" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto gpu : gpus) {
|
for (auto gpu : gpus) {
|
||||||
|
|||||||
Reference in New Issue
Block a user