diff --git a/client/include/gpu.h b/client/include/gpu.h index 30c8478..30402ff 100644 --- a/client/include/gpu.h +++ b/client/include/gpu.h @@ -23,12 +23,13 @@ #include #include #include -#include "stdatomic.h" #define MAX_FRAMES_IN_FLIGHT 2 #define WINDOW_MIN_WIDTH 800 #define WINDOW_MIN_HEIGHT 600 +extern PFN_vkCmdPipelineBarrier2KHR pVkCmdPipelineBarrier2KHR; + typedef struct GPUBufferStruct { VkBuffer buffers[MAX_FRAMES_IN_FLIGHT]; VmaAllocation memory[MAX_FRAMES_IN_FLIGHT]; @@ -70,12 +71,11 @@ typedef struct SwapchainDetailsStruct { typedef struct TransferInfoStruct { VkDeviceSize offset; VkDeviceSize size; - VkBuffer buffers[MAX_FRAMES_IN_FLIGHT]; + VkBuffer buffer; } TransferInfo; typedef struct FrameContextStruct { VkFence ready; - VkFence transfer_ready[MAX_FRAMES_IN_FLIGHT]; VkSemaphore image; VkSemaphore render; VkSemaphore transfer; @@ -84,7 +84,7 @@ typedef struct FrameContextStruct { uint64_t frame_index; uint64_t transfer_index; - VkCommandBuffer transfer_commands[MAX_FRAMES_IN_FLIGHT]; + VkCommandBuffer transfer_commands; VkBuffer transfer_buffer; VmaAllocation transfer_memory; void* transfer_mapped; @@ -168,6 +168,14 @@ VkDeviceAddress buffer_address( VkBuffer buffer); VkResult add_transfer( + void* data, + VkBuffer buffer, + VkDeviceSize offset, + VkDeviceSize size, + uint32_t frame_index, + RenderContext* gpu); + +VkResult add_transfers( void* data, VkBuffer* buffers, VkDeviceSize offset, diff --git a/client/include/ui.h b/client/include/ui.h index a67ab10..114335b 100644 --- a/client/include/ui.h +++ b/client/include/ui.h @@ -41,7 +41,7 @@ typedef struct DispatchCommandStruct { typedef struct UIPushConstantStruct { VkDeviceAddress layer; float time; - float pad; + uint32_t pad; } UIPushConstant; typedef struct GPUFontStruct { @@ -164,16 +164,16 @@ typedef struct GPUContainerStruct { } GPUContainer; typedef struct ContainerStruct { - VkBuffer container; + VkBuffer container[MAX_FRAMES_IN_FLIGHT]; - VmaAllocation container_memory; + VmaAllocation container_memory[MAX_FRAMES_IN_FLIGHT]; - VkDeviceAddress address; + VkDeviceAddress address[MAX_FRAMES_IN_FLIGHT]; GPUContainer data; uint32_t id; - uint32_t _Atomic layer_count; + uint32_t layer_count; Layer* layers; } Container; diff --git a/client/shader_src/ui_common.glsl b/client/shader_src/ui_common.glsl index a316118..678399a 100644 --- a/client/shader_src/ui_common.glsl +++ b/client/shader_src/ui_common.glsl @@ -106,4 +106,5 @@ layout(std430, buffer_reference) readonly buffer Layer { layout(std430, push_constant) uniform PushConstant { Layer layer; float time; + uint frame; } pc; diff --git a/client/src/draw.c b/client/src/draw.c index 5761cc1..d147ea9 100644 --- a/client/src/draw.c +++ b/client/src/draw.c @@ -1,12 +1,12 @@ #include "draw.h" #include "gpu.h" -#include "stdatomic.h" #include "vulkan/vulkan_core.h" void record_ui_draw(VkCommandBuffer command_buffer, UIContext* ui_context, double time, uint32_t frame) { UIPushConstant push = { .time = (float)time, .layer = 0, + .pad = frame, }; vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.pipeline); @@ -15,9 +15,7 @@ void record_ui_draw(VkCommandBuffer command_buffer, UIContext* ui_context, doubl vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.layout, 2, 1, &ui_context->samplers, 0, NULL); vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.layout, 3, 1, &ui_context->textures, 0, NULL); for(uint32_t i = 0; i < ui_context->max_containers; i++) { - uint32_t layer_count = atomic_load(&ui_context->containers[i].layer_count); - __sync_synchronize(); - for(uint32_t j = 0; j < layer_count; j++) { + for(uint32_t j = 0; j < ui_context->containers[i].layer_count; j++) { push.layer = ui_context->containers[i].layers[j].address[frame]; vkCmdPushConstants(command_buffer, ui_context->pipeline.layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, &push); vkCmdDrawIndirect(command_buffer, ui_context->containers[i].layers[j].layer[frame], offsetof(GPULayer, draw), 1, 0); @@ -29,19 +27,28 @@ void record_ui_compute(VkCommandBuffer command_buffer, UIContext* ui, uint32_t f UIPushConstant push = { .time = 0.0, .layer = 0, + .pad = frame, }; vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui->string_pipeline.pipeline); for(uint32_t i = 0; i < ui->max_containers; i++) { if(ui->containers[i].id != 0x00000000) { for(uint32_t j = 0; j < ui->containers[i].layer_count; j++) { - push.layer = ui->containers[i].layers[j].address[frame]; command_copy_buffer(command_buffer, ui->containers[i].layers[j].layer[frame], ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, num_drawables), offsetof(GPULayer, draw) + offsetof(DrawCommand, instance_count), sizeof(uint32_t)); + } + } + } + + for(uint32_t i = 0; i < ui->max_containers; i++) { + if(ui->containers[i].id != 0x00000000) { + for(uint32_t j = 0; j < ui->containers[i].layer_count; j++) { + push.layer = ui->containers[i].layers[j].address[frame]; vkCmdPushConstants(command_buffer, ui->string_pipeline.layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, &push); vkCmdDispatchIndirect(command_buffer, ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, dispatch_strings)); } } } + } VkResult draw_frame( @@ -59,54 +66,50 @@ VkResult draw_frame( }; if(context->frame[context->current_frame].transfer_count > 0) { - VkFence fences[] = {context->frame[context->current_frame].transfer_ready[0], context->frame[context->current_frame].transfer_ready[1]}; - VK_RESULT(vkWaitForFences(context->device, 2, fences, VK_TRUE, UINT64_MAX)); - VK_RESULT(vkResetFences(context->device, 2, fences)); - - for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { - VkCommandBuffer transfer_commands = context->frame[context->current_frame].transfer_commands[i]; - VK_RESULT(vkResetCommandBuffer(transfer_commands, 0)); - VK_RESULT(vkBeginCommandBuffer(transfer_commands, &begin_info)); - - VkDeviceSize src_offset = 0; - for(uint32_t j = 0; j < context->frame[context->current_frame].transfer_count; j++) { - command_copy_buffer( - transfer_commands, - context->frame[context->current_frame].transfer_buffer, - context->frame[context->current_frame].transfer_infos[j].buffers[i], - src_offset, - context->frame[context->current_frame].transfer_infos[j].offset, - context->frame[context->current_frame].transfer_infos[j].size); - src_offset += context->frame[context->current_frame].transfer_infos[j].size; - } - record_ui_compute(transfer_commands, ui, i); - VK_RESULT(vkEndCommandBuffer(transfer_commands)); - VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT}; - context->frame[i].transfer_index += 1; - VkSemaphore transfer_signals[] = {context->frame[i].transfer}; - uint64_t transfer_signal_values[] = {context->frame[i].transfer_index}; - VkSemaphore transfer_waits[] = {context->frame[i].transfer, context->frame[i].frame}; - uint64_t transfer_wait_values[] = {context->frame[i].transfer_index-1, context->frame[i].frame_index}; - VkTimelineSemaphoreSubmitInfo timeline_info = { - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .signalSemaphoreValueCount = sizeof(transfer_signal_values)/sizeof(uint64_t), - .pSignalSemaphoreValues = transfer_signal_values, - .waitSemaphoreValueCount = sizeof(transfer_wait_values)/sizeof(uint64_t), - .pWaitSemaphoreValues = transfer_wait_values, - }; - VkSubmitInfo submit_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .commandBufferCount = 1, - .pCommandBuffers = &transfer_commands, - .pSignalSemaphores = transfer_signals, - .signalSemaphoreCount = sizeof(transfer_signals)/sizeof(VkSemaphore), - .pWaitSemaphores = transfer_waits, - .pWaitDstStageMask = wait_stages, - .waitSemaphoreCount = sizeof(transfer_waits)/sizeof(VkSemaphore), - .pNext = &timeline_info, - }; - VK_RESULT(vkQueueSubmit(context->transfer_queue.handle, 1, &submit_info, context->frame[context->current_frame].transfer_ready[i])); + VkCommandBuffer transfer_commands = context->frame[context->current_frame].transfer_commands; + VK_RESULT(vkResetCommandBuffer(transfer_commands, 0)); + VK_RESULT(vkBeginCommandBuffer(transfer_commands, &begin_info)); + + VkDeviceSize src_offset = 0; + for(uint32_t transfer_index = 0; transfer_index < context->frame[context->current_frame].transfer_count; transfer_index++) { + command_copy_buffer( + transfer_commands, + context->frame[context->current_frame].transfer_buffer, + context->frame[context->current_frame].transfer_infos[transfer_index].buffer, + src_offset, + context->frame[context->current_frame].transfer_infos[transfer_index].offset, + context->frame[context->current_frame].transfer_infos[transfer_index].size); + src_offset += context->frame[context->current_frame].transfer_infos[transfer_index].size; } + + + record_ui_compute(transfer_commands, ui, context->current_frame); + VK_RESULT(vkEndCommandBuffer(transfer_commands)); + VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT}; + context->frame[context->current_frame].transfer_index += 1; + VkSemaphore transfer_signals[] = {context->frame[context->current_frame].transfer}; + uint64_t transfer_signal_values[] = {context->frame[context->current_frame].transfer_index}; + VkSemaphore transfer_waits[] = {context->frame[context->current_frame].frame}; + uint64_t transfer_wait_values[] = {context->frame[context->current_frame].frame_index}; + VkTimelineSemaphoreSubmitInfo timeline_info = { + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .signalSemaphoreValueCount = sizeof(transfer_signal_values)/sizeof(uint64_t), + .pSignalSemaphoreValues = transfer_signal_values, + .waitSemaphoreValueCount = sizeof(transfer_wait_values)/sizeof(uint64_t), + .pWaitSemaphoreValues = transfer_wait_values, + }; + VkSubmitInfo submit_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &transfer_commands, + .pSignalSemaphores = transfer_signals, + .signalSemaphoreCount = sizeof(transfer_signals)/sizeof(VkSemaphore), + .pWaitSemaphores = transfer_waits, + .pWaitDstStageMask = wait_stages, + .waitSemaphoreCount = sizeof(transfer_waits)/sizeof(VkSemaphore), + .pNext = &timeline_info, + }; + VK_RESULT(vkQueueSubmit(context->transfer_queue.handle, 1, &submit_info, VK_NULL_HANDLE)); context->frame[context->current_frame].transfer_count = 0; context->frame[context->current_frame].transfer_written = 0; } diff --git a/client/src/gpu.c b/client/src/gpu.c index 1319fe7..511fed2 100644 --- a/client/src/gpu.c +++ b/client/src/gpu.c @@ -5,6 +5,8 @@ #include "vk_mem_alloc.h" #include "vulkan/vulkan_core.h" +PFN_vkCmdPipelineBarrier2KHR pVkCmdPipelineBarrier2KHR; + const char * validation_layers[] = { "VK_LAYER_KHRONOS_validation", //"VK_LAYER_LUNARG_api_dump", @@ -335,6 +337,11 @@ VkResult create_logical_device(VkPhysicalDevice physical_device, VkSurfaceKHR su queue_create_info[1].pQueuePriorities = &default_queue_priority; } + VkPhysicalDeviceSynchronization2FeaturesKHR sync2 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR, + .synchronization2 = VK_TRUE, + }; + VkPhysicalDeviceVulkan12Features features_12 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, .bufferDeviceAddress = VK_TRUE, @@ -346,6 +353,7 @@ VkResult create_logical_device(VkPhysicalDevice physical_device, VkSurfaceKHR su .descriptorBindingStorageBufferUpdateAfterBind = VK_TRUE, .descriptorBindingSampledImageUpdateAfterBind = VK_TRUE, .timelineSemaphore = VK_TRUE, + .pNext = &sync2, }; VkPhysicalDeviceFeatures device_features = { @@ -770,9 +778,7 @@ VkResult create_frame_context(VkDevice device, VmaAllocator allocator, VkCommand VkResult result; frame->ready = create_fence(device, VK_FENCE_CREATE_SIGNALED_BIT); - for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { - frame->transfer_ready[i] = create_fence(device, VK_FENCE_CREATE_SIGNALED_BIT); - } + frame->image = create_semaphore(device); frame->render = create_semaphore(device); frame->transfer = create_timeline_semaphore(device); @@ -783,11 +789,11 @@ VkResult create_frame_context(VkDevice device, VmaAllocator allocator, VkCommand VkCommandBufferAllocateInfo command_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .commandBufferCount = 2, + .commandBufferCount = 1, .commandPool = transfer_pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, }; - VK_RESULT(vkAllocateCommandBuffers(device, &command_info, frame->transfer_commands)); + VK_RESULT(vkAllocateCommandBuffers(device, &command_info, &frame->transfer_commands)); // TODO: better defaults frame->transfer_max_size = 1; @@ -876,6 +882,8 @@ VkResult init_vulkan(GLFWwindow* window, RenderContext* context) { VK_RESULT(create_logical_device(context->physical_device, context->surface, &context->graphics_queue, &context->present_queue, &context->transfer_queue, &context->device)); + pVkCmdPipelineBarrier2KHR = (PFN_vkCmdPipelineBarrier2KHR)vkGetDeviceProcAddr(context->device, "vkCmdPipelineBarrier2KHR"); + VK_RESULT(create_memory_allocator(context->instance, context->physical_device, context->device, &context->allocator)); VkCommandPoolCreateInfo extra_pool_info = { @@ -1093,27 +1101,46 @@ VkResult command_transition_image_layout(VkDevice device, VkCommandPool transfer return command_end_single(device, command_buffer, transfer_pool, transfer_queue); } -VkResult add_transfer( +VkResult add_transfers( void* data, VkBuffer* buffers, VkDeviceSize offset, VkDeviceSize size, RenderContext* gpu) { VkResult result; - FrameContext* frame = &gpu->frame[gpu->current_frame]; + for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { + VK_RESULT(add_transfer(data, buffers[i], offset, size, i, gpu)); + } + return VK_SUCCESS; +} - while(frame->transfer_written + size >= frame->transfer_max_size) { +VkResult add_transfer( + void* data, + VkBuffer buffer, + VkDeviceSize offset, + VkDeviceSize size, + uint32_t frame_index, + RenderContext* gpu) { + VkResult result; + + FrameContext* frame = &gpu->frame[frame_index]; + + if(frame->transfer_written + size >= frame->transfer_max_size) { + uint32_t new_size = 2*frame->transfer_max_size; + while(frame->transfer_written + size >= new_size) { + new_size *= 2; + } VkBuffer new_transfer; VmaAllocation new_transfer_memory; void* new_transfer_data; - VK_RESULT(create_transfer_buffer(gpu->allocator, 2*frame->transfer_max_size, &new_transfer, &new_transfer_memory, &new_transfer_data)); + VK_RESULT(create_transfer_buffer(gpu->allocator, new_size, &new_transfer, &new_transfer_memory, &new_transfer_data)); memcpy(new_transfer_data, frame->transfer_mapped, frame->transfer_written); destroy_transfer_buffer(gpu->allocator, frame->transfer_buffer, frame->transfer_memory); frame->transfer_buffer = new_transfer; frame->transfer_memory = new_transfer_memory; frame->transfer_mapped = new_transfer_data; - frame->transfer_max_size *= 2; + frame->transfer_max_size = new_size; } if(frame->transfer_count + 1 >= frame->transfer_max_count) { @@ -1130,9 +1157,7 @@ VkResult add_transfer( memcpy(frame->transfer_mapped + frame->transfer_written, data, size); frame->transfer_infos[frame->transfer_count].size = size; - for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { - frame->transfer_infos[frame->transfer_count].buffers[i] = buffers[i]; - } + frame->transfer_infos[frame->transfer_count].buffer = buffer; frame->transfer_infos[frame->transfer_count].offset = offset; frame->transfer_written += size; diff --git a/client/src/main.c b/client/src/main.c index 0bb0f36..338613f 100644 --- a/client/src/main.c +++ b/client/src/main.c @@ -8,7 +8,6 @@ #include "vulkan/vk_enum_string_helper.h" #include "vulkan/vulkan_core.h" #include "pthread.h" -#include "stdatomic.h" typedef struct ClientContextStruct { GLFWwindow* window; @@ -169,7 +168,7 @@ VkResult main_thread(ClientContext* context) { .size = 32, .color = {1.0, 1.0, 1.0, 1.0}, .offset = 0, - .length = 4, + .length = 0, .font = 0, }; @@ -189,12 +188,13 @@ VkResult main_thread(ClientContext* context) { create_container(&fps_container, &context->render, &context->ui); // - double last_draw = -1; - double draw_interval = 1; + double last_draw = 0; + double draw_interval = 0.1; double frame_count = 0; uint32_t* mapped_codes = context->ui.containers[0].layers[0].codes_buffer; - GPUString* mapped_string = &context->ui.containers[0].layers[0].strings_buffer[0]; + GPUString* mapped_string = context->ui.containers[0].layers[0].strings_buffer; char str[11]; + int test = 0; // while(glfwWindowShouldClose(context->window) == 0) { @@ -203,33 +203,29 @@ VkResult main_thread(ClientContext* context) { // if(frame_time - last_draw > draw_interval) { + test = (test + 1) % 2; snprintf(str, 11, "%3.2f", frame_count/(frame_time-last_draw)); map_string(str, mapped_codes, 0, 0, &context->ui); - mapped_string->size = 32; - mapped_string->pos[0] = 0; - mapped_string->pos[1] = 32; - mapped_string->color[0] = 1.0; - mapped_string->color[1] = 1.0; - mapped_string->color[2] = 1.0; - mapped_string->color[3] = 1.0; - mapped_string->font = 0; - mapped_string->offset = 0; - mapped_string->length = strlen(str); + if(test) { + mapped_string->length = strlen(str); + } else { + mapped_string->length = 0; + } last_draw = frame_time; frame_count = 0; - VK_RESULT(add_transfer( + VK_RESULT(add_transfers( context->ui.containers[0].layers[0].codes_buffer, context->ui.containers[0].layers[0].codes, 0, 10*sizeof(uint32_t), &context->render)); - VK_RESULT(add_transfer( - context->ui.containers[0].layers[0].strings_buffer, + VK_RESULT(add_transfers( + &context->ui.containers[0].layers[0].strings_buffer[0].length, context->ui.containers[0].layers[0].strings, - 0, - sizeof(GPUString), + offsetof(GPUString, length), + sizeof(uint32_t), &context->render)); } // diff --git a/client/src/ui.c b/client/src/ui.c index 9847fbe..0388873 100644 --- a/client/src/ui.c +++ b/client/src/ui.c @@ -9,7 +9,7 @@ #include "vk_mem_alloc.h" #include "vulkan/vulkan_core.h" #include "spng.h" -#include "stdatomic.h" +#include VkShaderModule load_shader_file(const char* path, VkDevice device) { FILE* file; @@ -292,13 +292,12 @@ VkResult create_container( } VkResult result; + for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { + VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUContainer), &context->containers[index].container[i], &context->containers[index].container_memory[i])); + context->containers[index].address[i] = buffer_address(gpu->device, context->containers[index].container[i]); + } - VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUContainer), &context->containers[index].container, &context->containers[index].container_memory)); - - VkBuffer transfer; - VmaAllocation transfer_memory; - void* mapped; - VK_RESULT(create_transfer_buffer(gpu->allocator, sizeof(GPUContainer), &transfer, &transfer_memory, &mapped)); + fprintf(stderr, "Created container with storage buffers %p/%p\n", context->containers[index].container[0], context->containers[index].container[1]); context->containers[index].data.offset[0] = container->offset[0]; context->containers[index].data.offset[1] = container->offset[1]; @@ -306,22 +305,15 @@ VkResult create_container( context->containers[index].data.size[1] = container->size[1]; context->containers[index].data.anchor = container->anchor; context->containers[index].data.context = context->address; - memcpy(mapped, &context->containers[index].data, sizeof(GPUContainer)); - - VkCommandBuffer command_buffer = command_begin_single(gpu->device, gpu->transfer_pool); - command_copy_buffer(command_buffer, transfer, context->containers[index].container, 0, 0, sizeof(GPUContainer)); - VK_RESULT(command_end_single(gpu->device, command_buffer, gpu->transfer_pool, gpu->transfer_queue)); - destroy_transfer_buffer(gpu->allocator, transfer, transfer_memory); + add_transfers(&context->containers[index].data, context->containers[index].container, 0, sizeof(GPUContainer), gpu); - context->containers[index].address = buffer_address(gpu->device, context->containers[index].container); context->containers[index].id = container->id; context->containers[index].layers = malloc(sizeof(Layer)*container->layer_count); for(uint32_t i = 0; i < container->layer_count; i++) { VK_RESULT(create_layer(i, &container->layers[i], gpu, &context->containers[index])); } - __sync_synchronize(); - atomic_store(&context->containers[index].layer_count, container->layer_count); + context->containers[index].layer_count = container->layer_count; return VK_SUCCESS; } @@ -370,59 +362,50 @@ VkResult create_layer( } else { container->layers[index].data.drawables = 0x00000000; } + container->layers[index].address[i] = buffer_address(gpu->device, container->layers[index].layer[i]); + } + fprintf(stderr, "Created layer with storage buffers %p/%p\n", container->layers[index].layer[0], container->layers[index].layer[1]); + fprintf(stderr, "String Buffers %p/%p\n", container->layers[index].strings[0], container->layers[index].strings[1]); + fprintf(stderr, "Code Buffers %p/%p\n", container->layers[index].codes[0], container->layers[index].codes[1]); - VkBuffer transfer; - VmaAllocation transfer_memory; - void* mapped; - VK_RESULT(create_transfer_buffer(gpu->allocator, sizeof(GPULayer) + sizeof(GPUString) * input->num_strings + sizeof(GPUDrawable)*input->num_drawables + sizeof(uint32_t)*input->num_codes, &transfer, &transfer_memory, &mapped)); - - container->layers[index].data.draw.first_vertex = 0; - container->layers[index].data.draw.vertex_count = 6; - container->layers[index].data.draw.first_instance = 0; - container->layers[index].data.draw.instance_count = 0; - - container->layers[index].data.dispatch_strings.x = max_strings; - container->layers[index].data.dispatch_strings.y = 1; - container->layers[index].data.dispatch_strings.z = 1; - - container->layers[index].data.max_drawables = max_drawables + max_codes; - container->layers[index].data.max_strings = max_strings; - container->layers[index].data.num_drawables = max_drawables; - container->layers[index].data.container = container->address; - memcpy(mapped, &container->layers[index].data, sizeof(GPULayer)); - - VkCommandBuffer command_buffer = command_begin_single(gpu->device, gpu->transfer_pool); - command_copy_buffer(command_buffer, transfer, container->layers[index].layer[i], 0, 0, sizeof(GPULayer)); - if(input->num_strings > 0) { - GPUString* strings = (GPUString*)(mapped + sizeof(GPULayer)); - for(uint32_t i = 0; i < input->num_strings; i++) { - memcpy(&strings[i], &input->strings[i], sizeof(GPUString)); - memcpy(&container->layers[index].strings_buffer[i], &input->strings[i], sizeof(GPUString)); - } - command_copy_buffer(command_buffer, transfer, container->layers[index].strings[i], sizeof(GPULayer), 0, sizeof(GPUString)*input->num_strings); - } + for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { + add_transfer( + &container->address[i], + container->layers[index].layer[i], + offsetof(GPULayer, container), + sizeof(VkDeviceAddress), + i, + gpu); + } - if(input->num_drawables > 0) { - GPUDrawable* drawables = (GPUDrawable*)(mapped + sizeof(GPULayer) + sizeof(GPUString)*input->num_strings); - for(uint32_t i = 0; i < input->num_drawables; i++) { - memcpy(&drawables[i], &input->drawables[i], sizeof(GPUDrawable)); - memcpy(&container->layers[index].drawables_buffer[i], &input->drawables[i], sizeof(GPUDrawable)); - } - command_copy_buffer(command_buffer, transfer, container->layers[index].drawables[i], sizeof(GPULayer) + sizeof(GPUString)*input->num_strings, 0, sizeof(GPUDrawable)*input->num_drawables); - } + container->layers[index].data.draw.first_vertex = 0; + container->layers[index].data.draw.vertex_count = 6; + container->layers[index].data.draw.first_instance = 0; + container->layers[index].data.draw.instance_count = 0; - if(input->num_codes > 0) { - uint32_t* codes = (uint32_t*)(mapped + sizeof(GPULayer) + sizeof(GPUString)*input->num_strings + sizeof(GPUDrawable)*input->num_drawables); - for(uint32_t i = 0; i < input->num_codes; i++) { - codes[i] = input->codes[i]; - container->layers[index].codes_buffer[i] = input->codes[i]; - } - command_copy_buffer(command_buffer, transfer, container->layers[index].codes[i], sizeof(GPULayer) + sizeof(GPUString)*input->num_strings + sizeof(GPUDrawable)*input->num_drawables, 0, sizeof(uint32_t)*input->num_codes); - } - VK_RESULT(command_end_single(gpu->device, command_buffer, gpu->transfer_pool, gpu->transfer_queue)); - destroy_transfer_buffer(gpu->allocator, transfer, transfer_memory); + container->layers[index].data.dispatch_strings.x = max_strings; + container->layers[index].data.dispatch_strings.y = 1; + container->layers[index].data.dispatch_strings.z = 1; - container->layers[index].address[i] = buffer_address(gpu->device, container->layers[index].layer[i]); + container->layers[index].data.max_drawables = max_drawables + max_codes; + container->layers[index].data.max_strings = max_strings; + container->layers[index].data.max_codes = max_codes; + container->layers[index].data.num_drawables = max_drawables; + add_transfers(&container->layers[index].data, container->layers[index].layer, 0, sizeof(GPULayer)-sizeof(VkDeviceAddress), gpu); + + if(input->num_strings > 0) { + memcpy(container->layers[index].strings_buffer, input->strings, sizeof(GPUString)*input->num_strings); + add_transfers(container->layers[index].strings_buffer, container->layers[index].strings, 0, sizeof(GPUString)*input->num_strings, gpu); + } + + if(input->num_drawables > 0) { + memcpy(container->layers[index].drawables_buffer, input->drawables, sizeof(GPUDrawable)*input->num_drawables); + add_transfers(container->layers[index].drawables_buffer, container->layers[index].drawables, 0, sizeof(GPUDrawable)*input->num_drawables, gpu); + } + + if(input->num_codes > 0) { + memcpy(container->layers[index].codes_buffer, input->codes, sizeof(uint32_t)*input->num_codes); + add_transfers(container->layers[index].codes_buffer, container->layers[index].codes, 0, sizeof(uint32_t)*input->num_codes, gpu); } return VK_SUCCESS;