semaphores...

main
noah metz 2024-10-26 18:25:44 -06:00
parent dd8ec88d3a
commit 5788f1c10a
6 changed files with 227 additions and 156 deletions

@ -23,6 +23,7 @@
#include <cglm/affine.h> #include <cglm/affine.h>
#include <cglm/quat.h> #include <cglm/quat.h>
#include <cglm/cam.h> #include <cglm/cam.h>
#include "stdatomic.h"
#define MAX_FRAMES_IN_FLIGHT 2 #define MAX_FRAMES_IN_FLIGHT 2
#define WINDOW_MIN_WIDTH 800 #define WINDOW_MIN_WIDTH 800
@ -66,11 +67,23 @@ typedef struct SwapchainDetailsStruct {
uint32_t present_modes_count; uint32_t present_modes_count;
} SwapchainDetails; } SwapchainDetails;
typedef struct FrameSyncStruct {
uint64_t transfer;
uint64_t frame;
} FrameSync;
FrameSync increment_transfer(_Atomic FrameSync* frame);
FrameSync increment_frame(_Atomic FrameSync* frame);
typedef struct FrameContextStruct { typedef struct FrameContextStruct {
VkFence ready; VkFence ready;
VkSemaphore image; VkSemaphore image;
VkSemaphore render; VkSemaphore render;
VkSemaphore transfer; VkSemaphore transfer;
VkSemaphore frame;
_Atomic FrameSync id;
} FrameContext; } FrameContext;
typedef struct RenderContextStruct { typedef struct RenderContextStruct {

@ -122,17 +122,17 @@ typedef struct GPULayerStruct {
} GPULayer; } GPULayer;
typedef struct LayerStruct { typedef struct LayerStruct {
VkBuffer strings; VkBuffer strings[MAX_FRAMES_IN_FLIGHT];
VkBuffer codes; VkBuffer codes[MAX_FRAMES_IN_FLIGHT];
VkBuffer drawables; VkBuffer drawables[MAX_FRAMES_IN_FLIGHT];
VkBuffer layer; VkBuffer layer[MAX_FRAMES_IN_FLIGHT];
VmaAllocation strings_memory; VmaAllocation strings_memory[MAX_FRAMES_IN_FLIGHT];
VmaAllocation drawables_memory; VmaAllocation drawables_memory[MAX_FRAMES_IN_FLIGHT];
VmaAllocation codes_memory; VmaAllocation codes_memory[MAX_FRAMES_IN_FLIGHT];
VmaAllocation layer_memory; VmaAllocation layer_memory[MAX_FRAMES_IN_FLIGHT];
VkDeviceAddress address; VkDeviceAddress address[MAX_FRAMES_IN_FLIGHT];
GPUDrawable* drawables_buffer; GPUDrawable* drawables_buffer;
GPUString* strings_buffer; GPUString* strings_buffer;
@ -169,7 +169,7 @@ typedef struct ContainerStruct {
GPUContainer data; GPUContainer data;
uint32_t id; uint32_t id;
uint32_t layer_count; uint32_t _Atomic layer_count;
Layer* layers; Layer* layers;
} Container; } Container;

@ -1,7 +1,9 @@
#include "draw.h" #include "draw.h"
#include "gpu.h" #include "gpu.h"
#include "stdatomic.h"
#include "vulkan/vulkan_core.h"
void record_ui_draw(VkCommandBuffer command_buffer, UIContext* ui_context, double time) { void record_ui_draw(VkCommandBuffer command_buffer, UIContext* ui_context, double time, uint32_t frame) {
UIPushConstant push = { UIPushConstant push = {
.time = (float)time, .time = (float)time,
.layer = 0, .layer = 0,
@ -13,12 +15,12 @@ void record_ui_draw(VkCommandBuffer command_buffer, UIContext* ui_context, doubl
vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.layout, 2, 1, &ui_context->samplers, 0, NULL); vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.layout, 2, 1, &ui_context->samplers, 0, NULL);
vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.layout, 3, 1, &ui_context->textures, 0, NULL); vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.layout, 3, 1, &ui_context->textures, 0, NULL);
for(uint32_t i = 0; i < ui_context->max_containers; i++) { for(uint32_t i = 0; i < ui_context->max_containers; i++) {
if(ui_context->containers[i].id != 0x00000000) { uint32_t layer_count = atomic_load(&ui_context->containers[i].layer_count);
for(uint32_t j = 0; j < ui_context->containers[i].layer_count; j++) { __sync_synchronize();
push.layer = ui_context->containers[i].layers[j].address; for(uint32_t j = 0; j < layer_count; j++) {
vkCmdPushConstants(command_buffer, ui_context->pipeline.layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, &push); push.layer = ui_context->containers[i].layers[j].address[frame];
vkCmdDrawIndirect(command_buffer, ui_context->containers[i].layers[j].layer, offsetof(GPULayer, draw), 1, 0); vkCmdPushConstants(command_buffer, ui_context->pipeline.layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, &push);
} vkCmdDrawIndirect(command_buffer, ui_context->containers[i].layers[j].layer[frame], offsetof(GPULayer, draw), 1, 0);
} }
} }
} }
@ -79,24 +81,35 @@ VkResult draw_frame(
}; };
vkCmdBeginRenderPass(command_buffer, &render_pass_begin, VK_SUBPASS_CONTENTS_INLINE); vkCmdBeginRenderPass(command_buffer, &render_pass_begin, VK_SUBPASS_CONTENTS_INLINE);
// Render World
vkCmdNextSubpass(command_buffer, VK_SUBPASS_CONTENTS_INLINE); vkCmdNextSubpass(command_buffer, VK_SUBPASS_CONTENTS_INLINE);
// Render UI record_ui_draw(command_buffer, ui, time, context->current_frame);
record_ui_draw(command_buffer, ui, time);
vkCmdEndRenderPass(command_buffer); vkCmdEndRenderPass(command_buffer);
VK_RESULT(vkEndCommandBuffer(command_buffer)); VK_RESULT(vkEndCommandBuffer(command_buffer));
VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT}; VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT};
VkSemaphore wait_semaphores[] = {context->frame[context->current_frame].image, context->frame[context->current_frame].transfer};
VkSemaphore signal_semaphores[] = {context->frame[context->current_frame].render, context->frame[context->current_frame].frame};
FrameSync id = increment_frame(&context->frame[context->current_frame].id);
uint64_t wait_values[] = {0, id.transfer};
uint64_t signal_values[] = {0, id.frame + 1};
VkTimelineSemaphoreSubmitInfo timeline_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.waitSemaphoreValueCount = sizeof(wait_values)/sizeof(uint64_t),
.pWaitSemaphoreValues = wait_values,
.signalSemaphoreValueCount = sizeof(signal_values)/sizeof(uint64_t),
.pSignalSemaphoreValues = signal_values,
};
VkSubmitInfo submit_info = { VkSubmitInfo submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.waitSemaphoreCount = 1, .waitSemaphoreCount = sizeof(wait_semaphores)/sizeof(VkSemaphore),
.pWaitSemaphores = &context->frame[context->current_frame].image, .pWaitSemaphores = wait_semaphores,
.pWaitDstStageMask = wait_stages, .pWaitDstStageMask = wait_stages,
.commandBufferCount = 1, .commandBufferCount = 1,
.pCommandBuffers = &context->swapchain_command_buffers[image_index], .pCommandBuffers = &context->swapchain_command_buffers[image_index],
.signalSemaphoreCount = 1, .signalSemaphoreCount = sizeof(signal_semaphores)/sizeof(VkSemaphore),
.pSignalSemaphores = &context->frame[context->current_frame].render, .pSignalSemaphores = signal_semaphores,
.pNext = &timeline_info,
}; };
result = vkQueueSubmit(context->graphics_queue.handle, 1, &submit_info, context->frame[context->current_frame].ready); result = vkQueueSubmit(context->graphics_queue.handle, 1, &submit_info, context->frame[context->current_frame].ready);

@ -345,6 +345,7 @@ VkResult create_logical_device(VkPhysicalDevice physical_device, VkSurfaceKHR su
.descriptorBindingUniformBufferUpdateAfterBind = VK_TRUE, .descriptorBindingUniformBufferUpdateAfterBind = VK_TRUE,
.descriptorBindingStorageBufferUpdateAfterBind = VK_TRUE, .descriptorBindingStorageBufferUpdateAfterBind = VK_TRUE,
.descriptorBindingSampledImageUpdateAfterBind = VK_TRUE, .descriptorBindingSampledImageUpdateAfterBind = VK_TRUE,
.timelineSemaphore = VK_TRUE,
}; };
VkPhysicalDeviceFeatures device_features = { VkPhysicalDeviceFeatures device_features = {
@ -611,6 +612,14 @@ VkResult create_render_pass(VkDevice device, VkSurfaceFormatKHR format, VkFormat
// This basically says "make sure nothing else is writing to the depth_stencil or the color attachment during the pipeline // This basically says "make sure nothing else is writing to the depth_stencil or the color attachment during the pipeline
VkSubpassDependency dependencies[] = { VkSubpassDependency dependencies[] = {
{
.srcSubpass = VK_SUBPASS_EXTERNAL,
.dstSubpass = 1,
.srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
.dstStageMask = VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT,
},
{ {
.srcSubpass = VK_SUBPASS_EXTERNAL, .srcSubpass = VK_SUBPASS_EXTERNAL,
.dstSubpass = 0, .dstSubpass = 0,
@ -687,11 +696,29 @@ VkResult create_swapchain_framebuffers(VkDevice device, uint32_t image_count, Vk
return VK_SUCCESS; return VK_SUCCESS;
} }
VkSemaphore create_timeline_semaphore(VkDevice device) {
VkSemaphoreTypeCreateInfo semaphore_type = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
.initialValue = 0,
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
};
VkSemaphoreCreateInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = &semaphore_type,
};
VkSemaphore create_semaphore(VkDevice device, VkSemaphoreCreateFlags flags) { VkSemaphore semaphore;
VkResult result = vkCreateSemaphore(device, &semaphore_info, 0, &semaphore);
if(result != VK_SUCCESS) {
return 0;
}
return semaphore;
}
VkSemaphore create_semaphore(VkDevice device) {
VkSemaphoreCreateInfo semaphore_info = { VkSemaphoreCreateInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.flags = flags,
}; };
VkSemaphore semaphore; VkSemaphore semaphore;
@ -720,9 +747,12 @@ VkFence create_fence(VkDevice device, VkFenceCreateFlags flags) {
VkResult create_frame_context(VkDevice device, FrameContext* frame) { VkResult create_frame_context(VkDevice device, FrameContext* frame) {
frame->ready = create_fence(device, VK_FENCE_CREATE_SIGNALED_BIT); frame->ready = create_fence(device, VK_FENCE_CREATE_SIGNALED_BIT);
frame->image = create_semaphore(device, 0); frame->image = create_semaphore(device);
frame->render = create_semaphore(device, 0); frame->render = create_semaphore(device);
frame->transfer = create_semaphore(device, 0); frame->transfer = create_timeline_semaphore(device);
frame->frame = create_timeline_semaphore(device);
FrameSync tmp = {};
atomic_store(&frame->id, tmp);
return VK_SUCCESS; return VK_SUCCESS;
} }
@ -1041,3 +1071,26 @@ VkResult command_transition_image_layout(VkDevice device, VkCommandPool transfer
return command_end_single(device, command_buffer, transfer_pool, transfer_queue); return command_end_single(device, command_buffer, transfer_pool, transfer_queue);
} }
FrameSync increment_transfer(_Atomic FrameSync* frame) {
FrameSync loaded = {};
FrameSync set = {};
do {
loaded = atomic_load(frame);
set.frame = loaded.frame;
set.transfer = loaded.transfer + 1;
} while(atomic_compare_exchange_strong(frame, &loaded, set) == false);
return loaded;
}
FrameSync increment_frame(_Atomic FrameSync* frame) {
FrameSync loaded = {};
FrameSync set = {};
do {
loaded = atomic_load(frame);
set.frame = loaded.frame + 1;
set.transfer = loaded.transfer;
} while(atomic_compare_exchange_strong(frame, &loaded, set) == false);
return loaded;
}

@ -16,57 +16,20 @@ typedef struct ClientContextStruct {
UIContext ui; UIContext ui;
} ClientContext; } ClientContext;
void record_ui_compute(VkCommandBuffer command_buffer, UIContext* ui_context) { void record_ui_compute(VkCommandBuffer command_buffer, UIContext* ui, uint32_t frame) {
UIPushConstant push = { UIPushConstant push = {
.time = 0.0, .time = 0.0,
.layer = 0, .layer = 0,
}; };
vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui_context->string_pipeline.pipeline); vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui->string_pipeline.pipeline);
for(uint32_t i = 0; i < ui_context->max_containers; i++) { for(uint32_t i = 0; i < ui->max_containers; i++) {
if(ui_context->containers[i].id != 0x00000000) { if(ui->containers[i].id != 0x00000000) {
for(uint32_t j = 0; j < ui_context->containers[i].layer_count; j++) { for(uint32_t j = 0; j < ui->containers[i].layer_count; j++) {
push.layer = ui_context->containers[i].layers[j].address; push.layer = ui->containers[i].layers[j].address[frame];
VkBufferMemoryBarrier draw_command_barrier_1 = { command_copy_buffer(command_buffer, ui->containers[i].layers[j].layer[frame], ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, num_drawables), offsetof(GPULayer, draw) + offsetof(DrawCommand, instance_count), sizeof(uint32_t));
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, vkCmdPushConstants(command_buffer, ui->string_pipeline.layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, &push);
.buffer = ui_context->containers[i].layers[j].layer, vkCmdDispatchIndirect(command_buffer, ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, dispatch_strings));
.offset = offsetof(GPULayer, draw),
.size = sizeof(DrawCommand),
.srcAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT,
};
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &draw_command_barrier_1, 0, NULL);
command_copy_buffer(command_buffer, ui_context->containers[i].layers[j].layer, ui_context->containers[i].layers[j].layer, offsetof(GPULayer, num_drawables), offsetof(GPULayer, draw) + offsetof(DrawCommand, instance_count), sizeof(uint32_t));
VkBufferMemoryBarrier draw_command_barrier_2 = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.buffer = ui_context->containers[i].layers[j].layer,
.offset = offsetof(GPULayer, draw),
.size = sizeof(DrawCommand),
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT,
.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
};
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 1, &draw_command_barrier_2, 0, NULL);
vkCmdPushConstants(command_buffer, ui_context->string_pipeline.layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, &push);
vkCmdDispatchIndirect(command_buffer, ui_context->containers[i].layers[j].layer, offsetof(GPULayer, dispatch_strings));
VkBufferMemoryBarrier draw_command_barrier_3 = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.buffer = ui_context->containers[i].layers[j].layer,
.offset = offsetof(GPULayer, draw),
.size = sizeof(DrawCommand),
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT,
};
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, 0, 0, NULL, 1, &draw_command_barrier_3, 0, NULL);
VkBufferMemoryBarrier drawables_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.buffer = ui_context->containers[i].layers[j].drawables,
.offset = 0,
.size = sizeof(GPUDrawable)*ui_context->containers[i].layers[j].data.max_drawables,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
};
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0, 0, NULL, 1, &drawables_barrier, 0, NULL);
} }
} }
} }
@ -207,13 +170,6 @@ VkResult test_ui(RenderContext* gpu, UIContext* ui) {
VK_RESULT(create_container(&inventory_info, gpu, ui)); VK_RESULT(create_container(&inventory_info, gpu, ui));
VK_RESULT(create_container(&chat_info, gpu, ui)); VK_RESULT(create_container(&chat_info, gpu, ui));
for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
VkCommandBuffer command_buffer = command_begin_single(gpu->device, gpu->transfer_pool);
record_ui_compute(command_buffer, ui);
VK_RESULT(command_end_single(gpu->device, command_buffer, gpu->transfer_pool, gpu->transfer_queue));
}
return VK_SUCCESS; return VK_SUCCESS;
} }
@ -267,6 +223,36 @@ int main_thread(void* data) {
x = 1; x = 1;
test_ui(&context->render, &context->ui); test_ui(&context->render, &context->ui);
} }
for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
VkCommandBuffer command_buffer = command_begin_single(context->render.device, context->render.transfer_pool);
record_ui_compute(command_buffer, &context->ui, i);
vkEndCommandBuffer(command_buffer);
FrameSync id = increment_transfer(&context->render.frame[i].id);
VkSemaphore wait_semaphores[] = {context->render.frame[i].transfer, context->render.frame[i].frame};
uint64_t wait_values[] = {id.transfer, id.frame};
uint64_t signal_values[] = {id.transfer+1};
VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT};
VkTimelineSemaphoreSubmitInfo timeline_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.pSignalSemaphoreValues = signal_values,
.signalSemaphoreValueCount = 1,
.pWaitSemaphoreValues = wait_values,
.waitSemaphoreValueCount = 2,
};
VkSubmitInfo submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.commandBufferCount = 1,
.pCommandBuffers = &command_buffer,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &context->render.frame[i].transfer,
.waitSemaphoreCount = 2,
.pWaitSemaphores = wait_semaphores,
.pWaitDstStageMask = wait_stages,
.pNext = &timeline_info,
};
vkQueueSubmit(context->render.transfer_queue.handle, 1, &submit_info, VK_NULL_HANDLE);
}
} }
return 0; return 0;

@ -9,6 +9,7 @@
#include "vk_mem_alloc.h" #include "vk_mem_alloc.h"
#include "vulkan/vulkan_core.h" #include "vulkan/vulkan_core.h"
#include "spng.h" #include "spng.h"
#include "stdatomic.h"
VkShaderModule load_shader_file(const char* path, VkDevice device) { VkShaderModule load_shader_file(const char* path, VkDevice device) {
FILE* file; FILE* file;
@ -314,12 +315,14 @@ VkResult create_container(
context->containers[index].address = buffer_address(gpu->device, context->containers[index].container); context->containers[index].address = buffer_address(gpu->device, context->containers[index].container);
context->containers[index].id = container->id; context->containers[index].id = container->id;
context->containers[index].layer_count = container->layer_count;
context->containers[index].layers = malloc(sizeof(Layer)*container->layer_count); context->containers[index].layers = malloc(sizeof(Layer)*container->layer_count);
for(uint32_t i = 0; i < container->layer_count; i++) { for(uint32_t i = 0; i < container->layer_count; i++) {
VK_RESULT(create_layer(i, &container->layers[i], gpu, &context->containers[index])); VK_RESULT(create_layer(i, &container->layers[i], gpu, &context->containers[index]));
} }
__sync_synchronize();
atomic_store(&context->containers[index].layer_count, container->layer_count);
return VK_SUCCESS; return VK_SUCCESS;
} }
@ -330,91 +333,94 @@ VkResult create_layer(
Container* container) { Container* container) {
VkResult result; VkResult result;
VK_RESULT(create_storage_buffer(gpu->allocator, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, sizeof(GPULayer), &container->layers[index].layer, &container->layers[index].layer_memory)); for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
if(input->num_strings > 0) {
VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUString)*input->num_strings, &container->layers[index].strings, &container->layers[index].strings_memory));
container->layers[index].strings_buffer = malloc(sizeof(GPUString)*input->num_strings);
}
if(input->num_codes + input->num_drawables > 0) {
VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUDrawable)*(input->num_drawables + input->num_codes), &container->layers[index].drawables, &container->layers[index].drawables_memory));
container->layers[index].drawables_buffer = malloc(sizeof(GPUDrawable)*input->num_drawables);
}
if(input->num_codes > 0) {
VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(uint32_t)*input->num_codes, &container->layers[index].codes, &container->layers[index].codes_memory));
container->layers[index].codes_buffer = malloc(sizeof(uint32_t)*input->num_codes);
}
VkBuffer transfer; VK_RESULT(create_storage_buffer(gpu->allocator, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, sizeof(GPULayer), &container->layers[index].layer[i], &container->layers[index].layer_memory[i]));
VmaAllocation transfer_memory; if(input->num_strings > 0) {
void* mapped; VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUString)*input->num_strings, &container->layers[index].strings[i], &container->layers[index].strings_memory[i]));
VK_RESULT(create_transfer_buffer(gpu->allocator, sizeof(GPULayer) + sizeof(GPUString) * input->num_strings + sizeof(GPUDrawable)*input->num_drawables + sizeof(uint32_t)*input->num_codes, &transfer, &transfer_memory, &mapped)); container->layers[index].strings_buffer = malloc(sizeof(GPUString)*input->num_strings);
}
if(input->num_strings > 0) { if(input->num_codes + input->num_drawables > 0) {
container->layers[index].data.strings = buffer_address(gpu->device, container->layers[index].strings); VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUDrawable)*(input->num_drawables + input->num_codes), &container->layers[index].drawables[i], &container->layers[index].drawables_memory[i]));
} else { container->layers[index].drawables_buffer = malloc(sizeof(GPUDrawable)*input->num_drawables);
container->layers[index].data.strings = 0x00000000; }
} if(input->num_codes > 0) {
VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(uint32_t)*input->num_codes, &container->layers[index].codes[i], &container->layers[index].codes_memory[i]));
if(input->num_codes > 0) { container->layers[index].codes_buffer = malloc(sizeof(uint32_t)*input->num_codes);
container->layers[index].data.codes = buffer_address(gpu->device, container->layers[index].codes); }
} else {
container->layers[index].data.codes = 0x00000000;
}
if(input->num_codes + input->num_drawables > 0) { VkBuffer transfer;
container->layers[index].data.drawables = buffer_address(gpu->device, container->layers[index].drawables); VmaAllocation transfer_memory;
} else { void* mapped;
container->layers[index].data.drawables = 0x00000000; VK_RESULT(create_transfer_buffer(gpu->allocator, sizeof(GPULayer) + sizeof(GPUString) * input->num_strings + sizeof(GPUDrawable)*input->num_drawables + sizeof(uint32_t)*input->num_codes, &transfer, &transfer_memory, &mapped));
}
if(input->num_strings > 0) {
container->layers[index].data.strings = buffer_address(gpu->device, container->layers[index].strings[i]);
} else {
container->layers[index].data.strings = 0x00000000;
}
container->layers[index].data.draw.first_vertex = 0; if(input->num_codes > 0) {
container->layers[index].data.draw.vertex_count = 6; container->layers[index].data.codes = buffer_address(gpu->device, container->layers[index].codes[i]);
container->layers[index].data.draw.first_instance = 0; } else {
container->layers[index].data.draw.instance_count = 0; container->layers[index].data.codes = 0x00000000;
}
container->layers[index].data.dispatch_strings.x = input->num_strings; if(input->num_codes + input->num_drawables > 0) {
container->layers[index].data.dispatch_strings.y = 1; container->layers[index].data.drawables = buffer_address(gpu->device, container->layers[index].drawables[i]);
container->layers[index].data.dispatch_strings.z = 1; } else {
container->layers[index].data.drawables = 0x00000000;
}
container->layers[index].data.max_drawables = input->num_drawables + input->num_codes;
container->layers[index].data.max_strings = input->num_strings;
container->layers[index].data.num_drawables = input->num_drawables;
container->layers[index].data.container = container->address;
memcpy(mapped, &container->layers[index].data, sizeof(GPULayer));
VkCommandBuffer command_buffer = command_begin_single(gpu->device, gpu->transfer_pool); container->layers[index].data.draw.first_vertex = 0;
command_copy_buffer(command_buffer, transfer, container->layers[index].layer, 0, 0, sizeof(GPULayer)); container->layers[index].data.draw.vertex_count = 6;
if(input->num_strings > 0) { container->layers[index].data.draw.first_instance = 0;
GPUString* strings = (GPUString*)(mapped + sizeof(GPULayer)); container->layers[index].data.draw.instance_count = 0;
for(uint32_t i = 0; i < input->num_strings; i++) {
memcpy(&strings[i], &input->strings[i], sizeof(GPUString)); container->layers[index].data.dispatch_strings.x = input->num_strings;
memcpy(&container->layers[index].strings_buffer[i], &input->strings[i], sizeof(GPUString)); container->layers[index].data.dispatch_strings.y = 1;
container->layers[index].data.dispatch_strings.z = 1;
container->layers[index].data.max_drawables = input->num_drawables + input->num_codes;
container->layers[index].data.max_strings = input->num_strings;
container->layers[index].data.num_drawables = input->num_drawables;
container->layers[index].data.container = container->address;
memcpy(mapped, &container->layers[index].data, sizeof(GPULayer));
VkCommandBuffer command_buffer = command_begin_single(gpu->device, gpu->transfer_pool);
command_copy_buffer(command_buffer, transfer, container->layers[index].layer[i], 0, 0, sizeof(GPULayer));
if(input->num_strings > 0) {
GPUString* strings = (GPUString*)(mapped + sizeof(GPULayer));
for(uint32_t i = 0; i < input->num_strings; i++) {
memcpy(&strings[i], &input->strings[i], sizeof(GPUString));
memcpy(&container->layers[index].strings_buffer[i], &input->strings[i], sizeof(GPUString));
}
command_copy_buffer(command_buffer, transfer, container->layers[index].strings[i], sizeof(GPULayer), 0, sizeof(GPUString)*input->num_strings);
} }
command_copy_buffer(command_buffer, transfer, container->layers[index].strings, sizeof(GPULayer), 0, sizeof(GPUString)*input->num_strings);
}
if(input->num_drawables > 0) { if(input->num_drawables > 0) {
GPUDrawable* drawables = (GPUDrawable*)(mapped + sizeof(GPULayer) + sizeof(GPUString)*input->num_strings); GPUDrawable* drawables = (GPUDrawable*)(mapped + sizeof(GPULayer) + sizeof(GPUString)*input->num_strings);
for(uint32_t i = 0; i < input->num_drawables; i++) { for(uint32_t i = 0; i < input->num_drawables; i++) {
memcpy(&drawables[i], &input->drawables[i], sizeof(GPUDrawable)); memcpy(&drawables[i], &input->drawables[i], sizeof(GPUDrawable));
memcpy(&container->layers[index].drawables_buffer[i], &input->drawables[i], sizeof(GPUDrawable)); memcpy(&container->layers[index].drawables_buffer[i], &input->drawables[i], sizeof(GPUDrawable));
}
command_copy_buffer(command_buffer, transfer, container->layers[index].drawables[i], sizeof(GPULayer) + sizeof(GPUString)*input->num_strings, 0, sizeof(GPUDrawable)*input->num_drawables);
} }
command_copy_buffer(command_buffer, transfer, container->layers[index].drawables, sizeof(GPULayer) + sizeof(GPUString)*input->num_strings, 0, sizeof(GPUDrawable)*input->num_drawables);
}
if(input->num_codes > 0) { if(input->num_codes > 0) {
uint32_t* codes = (uint32_t*)(mapped + sizeof(GPULayer) + sizeof(GPUString)*input->num_strings + sizeof(GPUDrawable)*input->num_drawables); uint32_t* codes = (uint32_t*)(mapped + sizeof(GPULayer) + sizeof(GPUString)*input->num_strings + sizeof(GPUDrawable)*input->num_drawables);
for(uint32_t i = 0; i < input->num_codes; i++) { for(uint32_t i = 0; i < input->num_codes; i++) {
codes[i] = input->codes[i]; codes[i] = input->codes[i];
container->layers[index].codes_buffer[i] = input->codes[i]; container->layers[index].codes_buffer[i] = input->codes[i];
}
command_copy_buffer(command_buffer, transfer, container->layers[index].codes[i], sizeof(GPULayer) + sizeof(GPUString)*input->num_strings + sizeof(GPUDrawable)*input->num_drawables, 0, sizeof(uint32_t)*input->num_codes);
} }
command_copy_buffer(command_buffer, transfer, container->layers[index].codes, sizeof(GPULayer) + sizeof(GPUString)*input->num_strings + sizeof(GPUDrawable)*input->num_drawables, 0, sizeof(uint32_t)*input->num_codes); VK_RESULT(command_end_single(gpu->device, command_buffer, gpu->transfer_pool, gpu->transfer_queue));
} destroy_transfer_buffer(gpu->allocator, transfer, transfer_memory);
VK_RESULT(command_end_single(gpu->device, command_buffer, gpu->transfer_pool, gpu->transfer_queue));
destroy_transfer_buffer(gpu->allocator, transfer, transfer_memory);
container->layers[index].address = buffer_address(gpu->device, container->layers[index].layer); container->layers[index].address[i] = buffer_address(gpu->device, container->layers[index].layer[i]);
}
return VK_SUCCESS; return VK_SUCCESS;
} }