From b003c94efdcf954dc08e75a10158ebdef1624d0a Mon Sep 17 00:00:00 2001
From: Noah Metz <noah@metznet.ca>
Date: Mon, 28 Oct 2024 13:44:34 -0600
Subject: [PATCH] Moved copy to per-frame operation, and added members to
 FrameContext to support per-frame copy. Added copy logic to draw.

---
 client/include/gpu.h |  24 ++++---
 client/include/ui.h  |   4 ++
 client/src/draw.c    |  97 ++++++++++++++++++++-----
 client/src/gpu.c     |  70 +++++++++---------
 client/src/main.c    | 168 ++++++++++++++++++-------------------------
 client/src/ui.c      |  46 ++++++------
 6 files changed, 228 insertions(+), 181 deletions(-)

diff --git a/client/include/gpu.h b/client/include/gpu.h
index 6663941..ffa3c1a 100644
--- a/client/include/gpu.h
+++ b/client/include/gpu.h
@@ -67,23 +67,29 @@ typedef struct SwapchainDetailsStruct {
   uint32_t present_modes_count;
 } SwapchainDetails;
 
-typedef struct FrameSyncStruct {
-  uint64_t transfer;
-  uint64_t frame;
-} FrameSync;
-
-FrameSync increment_transfer(_Atomic FrameSync* frame);
-
-FrameSync increment_frame(_Atomic FrameSync* frame);
+typedef struct TransferInfoStruct {
+  VkDeviceSize dst_offset;
+  VkDeviceSize size;
+  VkBuffer     buffers[MAX_FRAMES_IN_FLIGHT];
+} TransferInfo;
 
 typedef struct FrameContextStruct {
   VkFence     ready;
+  VkFence     transfer_ready[MAX_FRAMES_IN_FLIGHT];
   VkSemaphore image;
   VkSemaphore render;
   VkSemaphore transfer;
   VkSemaphore frame;
 
-  _Atomic FrameSync id;
+  uint64_t frame_index;
+  uint64_t transfer_index;
+
+  VkCommandBuffer transfer_commands[MAX_FRAMES_IN_FLIGHT];
+  VkBuffer        transfer_buffer;
+  VmaAllocation   transfer_memory;
+  void*           transfer_mapped;
+  TransferInfo*   transfer_infos;
+  uint32_t        transfer_count;
 } FrameContext;
 
 typedef struct RenderContextStruct {
diff --git a/client/include/ui.h b/client/include/ui.h
index 758fbf8..a67ab10 100644
--- a/client/include/ui.h
+++ b/client/include/ui.h
@@ -146,6 +146,10 @@ typedef struct LayerInputStruct {
   uint32_t num_codes;
   uint32_t num_drawables;
 
+  uint32_t max_codes;
+  uint32_t max_strings;
+  uint32_t max_drawables;
+
   GPUString* strings;
   uint32_t* codes;
   GPUDrawable* drawables;
diff --git a/client/src/draw.c b/client/src/draw.c
index 412b973..3225600 100644
--- a/client/src/draw.c
+++ b/client/src/draw.c
@@ -25,20 +25,89 @@ void record_ui_draw(VkCommandBuffer command_buffer, UIContext* ui_context, doubl
   }
 }
 
+void record_ui_compute(VkCommandBuffer command_buffer, UIContext* ui, uint32_t frame) {
+  UIPushConstant push = {
+    .time = 0.0,
+    .layer = 0,
+  };
+
+  vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui->string_pipeline.pipeline);
+  for(uint32_t i = 0; i < ui->max_containers; i++) {
+    if(ui->containers[i].id != 0x00000000) {
+      for(uint32_t j = 0; j < ui->containers[i].layer_count; j++) {
+        push.layer = ui->containers[i].layers[j].address[frame];
+        command_copy_buffer(command_buffer, ui->containers[i].layers[j].layer[frame], ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, num_drawables), offsetof(GPULayer, draw) + offsetof(DrawCommand, instance_count), sizeof(uint32_t));
+        vkCmdPushConstants(command_buffer, ui->string_pipeline.layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, &push);
+        vkCmdDispatchIndirect(command_buffer, ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, dispatch_strings));
+      }
+    }
+  }
+}
+
 VkResult draw_frame(
     RenderContext* context,
     UIContext* ui,
     double time) {
   VkResult result;
 
-  result = vkWaitForFences(context->device, 1, &context->frame[context->current_frame].ready, VK_TRUE, UINT64_MAX);
-  if(result != VK_SUCCESS) {
-    return result;
-  }
+  VkFence fences[] = {context->frame[context->current_frame].ready};
+  VK_RESULT(vkWaitForFences(context->device, 1, fences, VK_TRUE, UINT64_MAX));
+  VK_RESULT(vkResetFences(context->device, 1, fences));
 
-  result = vkResetFences(context->device, 1, &context->frame[context->current_frame].ready);
-  if(result != VK_SUCCESS) {
-    return result;
+  VkCommandBufferBeginInfo begin_info = {
+    .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+  }; 
+
+  if(context->frame[context->current_frame].transfer_count > 0) {
+    VkFence fences[] = {context->frame[context->current_frame].transfer_ready[0], context->frame[context->current_frame].transfer_ready[1]};
+    VK_RESULT(vkWaitForFences(context->device, 2, fences, VK_TRUE, UINT64_MAX));
+    VK_RESULT(vkResetFences(context->device, 2, fences));
+
+    for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
+      VkCommandBuffer transfer_commands = context->frame[context->current_frame].transfer_commands[i];
+      VK_RESULT(vkResetCommandBuffer(transfer_commands, 0));
+      VK_RESULT(vkBeginCommandBuffer(transfer_commands, &begin_info));
+
+      VkDeviceSize src_offset = 0;
+      for(uint32_t j = 0; j < context->frame[context->current_frame].transfer_count; j++) {
+        command_copy_buffer(
+            transfer_commands,
+            context->frame[context->current_frame].transfer_buffer,
+            context->frame[context->current_frame].transfer_infos[j].buffers[i],
+            src_offset,
+            context->frame[context->current_frame].transfer_infos[j].dst_offset,
+            context->frame[context->current_frame].transfer_infos[j].size);
+        src_offset += context->frame[context->current_frame].transfer_infos[j].size;
+      }
+      record_ui_compute(transfer_commands, ui, i);
+      VK_RESULT(vkEndCommandBuffer(transfer_commands));
+      VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT};
+      context->frame[i].transfer_index += 1;
+      VkSemaphore transfer_signals[] = {context->frame[i].transfer};
+      uint64_t transfer_signal_values[] = {context->frame[i].transfer_index};
+      VkSemaphore transfer_waits[] = {context->frame[i].transfer, context->frame[i].frame};
+      uint64_t transfer_wait_values[] = {context->frame[i].transfer_index-1, context->frame[i].frame_index};
+      VkTimelineSemaphoreSubmitInfo timeline_info = {
+        .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+        .signalSemaphoreValueCount = sizeof(transfer_signal_values)/sizeof(uint64_t),
+        .pSignalSemaphoreValues = transfer_signal_values,
+        .waitSemaphoreValueCount = sizeof(transfer_wait_values)/sizeof(uint64_t),
+        .pWaitSemaphoreValues = transfer_wait_values,
+      };
+      VkSubmitInfo submit_info = {
+        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+        .commandBufferCount = 1,
+        .pCommandBuffers = &transfer_commands,
+        .pSignalSemaphores = transfer_signals,
+        .signalSemaphoreCount = sizeof(transfer_signals)/sizeof(VkSemaphore),
+        .pWaitSemaphores = transfer_waits,
+        .pWaitDstStageMask = wait_stages,
+        .waitSemaphoreCount = sizeof(transfer_waits)/sizeof(VkSemaphore),
+        .pNext = &timeline_info,
+      };
+      VK_RESULT(vkQueueSubmit(context->transfer_queue.handle, 1, &submit_info, context->frame[context->current_frame].transfer_ready[i]));
+    }
+    context->frame[context->current_frame].transfer_count = 0;
   }
 
   uint32_t image_index;
@@ -49,10 +118,6 @@ VkResult draw_frame(
 
   VkCommandBuffer command_buffer = context->swapchain_command_buffers[image_index];
   VK_RESULT(vkResetCommandBuffer(command_buffer, 0));
-
-  VkCommandBufferBeginInfo begin_info = {
-    .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
-  }; 
   VK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info));
 
   VkViewport viewport = {
@@ -90,15 +155,15 @@ VkResult draw_frame(
   VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT};
   VkSemaphore wait_semaphores[] = {context->frame[context->current_frame].image, context->frame[context->current_frame].transfer};
   VkSemaphore signal_semaphores[] = {context->frame[context->current_frame].render, context->frame[context->current_frame].frame};
-  FrameSync id = increment_frame(&context->frame[context->current_frame].id);
-  uint64_t wait_values[] = {0, id.transfer};
-  uint64_t signal_values[] = {0, id.frame + 1};
+  context->frame[context->current_frame].frame_index += 1;
+  uint64_t wait_values[] = {0, context->frame[context->current_frame].transfer_index};
+  uint64_t signal_values[] = {0, context->frame[context->current_frame].frame_index};
   VkTimelineSemaphoreSubmitInfo timeline_info = {
     .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
-    .waitSemaphoreValueCount = sizeof(wait_values)/sizeof(uint64_t),
-    .pWaitSemaphoreValues = wait_values,
     .signalSemaphoreValueCount = sizeof(signal_values)/sizeof(uint64_t),
     .pSignalSemaphoreValues = signal_values,
+    .waitSemaphoreValueCount = sizeof(wait_values)/sizeof(uint64_t),
+    .pWaitSemaphoreValues = wait_values,
   };
   VkSubmitInfo submit_info = {
     .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
diff --git a/client/src/gpu.c b/client/src/gpu.c
index 5bb59c3..2a6984e 100644
--- a/client/src/gpu.c
+++ b/client/src/gpu.c
@@ -745,17 +745,6 @@ VkFence create_fence(VkDevice device, VkFenceCreateFlags flags) {
   return fence;
 }
 
-VkResult create_frame_context(VkDevice device, FrameContext* frame) {
-  frame->ready = create_fence(device, VK_FENCE_CREATE_SIGNALED_BIT);
-  frame->image = create_semaphore(device);
-  frame->render = create_semaphore(device);
-  frame->transfer = create_timeline_semaphore(device);
-  frame->frame = create_timeline_semaphore(device);
-  FrameSync tmp = {};
-  atomic_store(&frame->id, tmp);
-  return VK_SUCCESS;
-}
-
 VkCommandBuffer* create_command_buffers(VkDevice device, VkCommandPool command_pool, uint32_t image_count)  {
   VkCommandBufferAllocateInfo alloc_info = {
     .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
@@ -777,6 +766,37 @@ VkCommandBuffer* create_command_buffers(VkDevice device, VkCommandPool command_p
   return command_buffers;
 }
 
+VkResult create_frame_context(VkDevice device, VmaAllocator allocator, VkCommandPool transfer_pool, FrameContext* frame) {
+  VkResult result;
+
+  frame->ready = create_fence(device, VK_FENCE_CREATE_SIGNALED_BIT);
+  for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
+    frame->transfer_ready[i] = create_fence(device, VK_FENCE_CREATE_SIGNALED_BIT);
+  }
+  frame->image = create_semaphore(device);
+  frame->render = create_semaphore(device);
+  frame->transfer = create_timeline_semaphore(device);
+  frame->frame = create_timeline_semaphore(device);
+
+  frame->frame_index = 0;
+  frame->transfer_index = 0;
+
+  VkCommandBufferAllocateInfo command_info = {
+    .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+    .commandBufferCount = 2,
+    .commandPool = transfer_pool,
+    .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+  };
+  VK_RESULT(vkAllocateCommandBuffers(device, &command_info, frame->transfer_commands));
+
+  // TODO: temp size values, add dynamic resizing
+  VK_RESULT(create_transfer_buffer(allocator, 1024, &frame->transfer_buffer, &frame->transfer_memory, &frame->transfer_mapped));
+  frame->transfer_count = 0;
+  frame->transfer_infos = malloc(sizeof(TransferInfo)*10);
+
+  return VK_SUCCESS;
+}
+
 VkResult create_depth_image(VkDevice device, VkFormat depth_format, VkExtent2D swapchain_extent, VmaAllocator allocator, VkCommandPool extra_graphics_pool, GPUQueue graphics_queue, VkImage* depth_image, VmaAllocation* depth_image_memory, VkImageView* depth_image_view) {
   
     VkExtent3D depth_extent = {
@@ -872,13 +892,13 @@ VkResult init_vulkan(GLFWwindow* window, RenderContext* context) {
 
   VkCommandPoolCreateInfo transfer_pool_info = {
     .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
-    .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
+    .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
     .queueFamilyIndex = context->transfer_queue.family,
   };
   VK_RESULT(vkCreateCommandPool(context->device, &transfer_pool_info, 0, &context->transfer_pool));
 
   for(int i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
-    VK_RESULT(create_frame_context(context->device, &context->frame[i]));
+    VK_RESULT(create_frame_context(context->device, context->allocator, context->transfer_pool, &context->frame[i]));
   }
 
   VK_RESULT(get_swapchain_details(context->physical_device, context->surface, &context->swapchain_details));
@@ -1070,27 +1090,3 @@ VkResult command_transition_image_layout(VkDevice device, VkCommandPool transfer
 
   return command_end_single(device, command_buffer, transfer_pool, transfer_queue);
 }
-
-FrameSync increment_transfer(_Atomic FrameSync* frame) {
-  FrameSync loaded = {};
-  FrameSync set = {};
-  do {
-    loaded = atomic_load(frame);
-    set.frame = loaded.frame;
-    set.transfer = loaded.transfer + 1;
-  } while(atomic_compare_exchange_strong(frame, &loaded, set) == false);
-
-  return loaded;
-}
-
-FrameSync increment_frame(_Atomic FrameSync* frame) {
-  FrameSync loaded = {};
-  FrameSync set = {};
-  do {
-    loaded = atomic_load(frame);
-    set.frame = loaded.frame + 1;
-    set.transfer = loaded.transfer;
-  } while(atomic_compare_exchange_strong(frame, &loaded, set) == false);
-
-  return loaded;
-}
diff --git a/client/src/main.c b/client/src/main.c
index 7c3433f..dc9e93a 100644
--- a/client/src/main.c
+++ b/client/src/main.c
@@ -16,25 +16,6 @@ typedef struct ClientContextStruct {
   UIContext ui;
 } ClientContext;
 
-void record_ui_compute(VkCommandBuffer command_buffer, UIContext* ui, uint32_t frame) {
-  UIPushConstant push = {
-    .time = 0.0,
-    .layer = 0,
-  };
-
-  vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui->string_pipeline.pipeline);
-  for(uint32_t i = 0; i < ui->max_containers; i++) {
-    if(ui->containers[i].id != 0x00000000) {
-      for(uint32_t j = 0; j < ui->containers[i].layer_count; j++) {
-        push.layer = ui->containers[i].layers[j].address[frame];
-        command_copy_buffer(command_buffer, ui->containers[i].layers[j].layer[frame], ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, num_drawables), offsetof(GPULayer, draw) + offsetof(DrawCommand, instance_count), sizeof(uint32_t));
-        vkCmdPushConstants(command_buffer, ui->string_pipeline.layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, &push);
-        vkCmdDispatchIndirect(command_buffer, ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, dispatch_strings));
-      }
-    }
-  }
-}
-
 VkResult test_ui(RenderContext* gpu, UIContext* ui) {
   VkResult result;
 
@@ -173,39 +154,6 @@ VkResult test_ui(RenderContext* gpu, UIContext* ui) {
   return VK_SUCCESS;
 }
 
-// Threads:
-// 1. render
-//  - Submits the draw buffer to the GPU as soon as it can
-// 2. network
-//  - Handles packets to/from the network to/from the main thread
-// 3. main
-//  - updates the data in the GPU that's being drawn from
-//  - updates the data in the GPU from network requests
-//
-//  Data:
-//  Render thread reads Render and UI context 
-//  Main thread reads and writes UI context
-
-void* render_thread(void* data) {
-  ClientContext* context = (ClientContext*)data;
-
-  double last_frame_time = glfwGetTime();
-  while(glfwWindowShouldClose(context->window) == 0) {
-    double frame_time = glfwGetTime();
-    double delta_time = frame_time - last_frame_time;
-    (void)delta_time;
-
-    VkResult result = draw_frame(&context->render, &context->ui, frame_time);
-    if(result != VK_SUCCESS) {
-      fprintf(stderr, "draw_frame error: %s\n", string_VkResult(result));
-      glfwDestroyWindow(context->window);
-    }
-    last_frame_time = frame_time;
-  }
-
-  return NULL;
-}
-
 void* network_thread(void* data) {
   ClientContext* context = (ClientContext*)data;
   (void)context;
@@ -213,46 +161,82 @@ void* network_thread(void* data) {
   return NULL;
 }
 
-int main_thread(void* data) {
-  ClientContext* context = (ClientContext*)data;
+int main_thread(ClientContext* context) {
+  GPUString fps_string = {
+    .pos = {0, 32},
+    .size = 32,
+    .color = {1.0, 1.0, 1.0, 1.0},
+    .offset = 0,
+    .length = 4,
+    .font = 0,
+  };
+
+  LayerInput fps_layer = {
+    .num_strings = 1,
+    .strings = &fps_string,
+    .max_codes = 10,
+  };
+
+  ContainerInput fps_container = {
+    .id = 1,
+    .size = {200, 200},
+    .layer_count = 1,
+    .layers = &fps_layer,
+  };
+
+  create_container(&fps_container, &context->render, &context->ui);
 
-  int x = 0;
+  double last_draw = -1;
+  double draw_interval = 1;
+  double frame_count = 0;
   while(glfwWindowShouldClose(context->window) == 0) {
     glfwPollEvents();
-    if(x == 0 && glfwGetTime() > 0.0) {
-      x = 1;
-      test_ui(&context->render, &context->ui);
+    double frame_time = glfwGetTime();
+
+    //
+    if(frame_time - last_draw > draw_interval) {
+      context->render.frame[context->render.current_frame].transfer_count = 2;
+
+      TransferInfo* transfer_infos = context->render.frame[context->render.current_frame].transfer_infos;
+      transfer_infos[0].size = 10*sizeof(uint32_t);
+      transfer_infos[0].dst_offset = 0;
+      transfer_infos[0].buffers[0] = context->ui.containers[0].layers[0].codes[0];
+      transfer_infos[0].buffers[1] = context->ui.containers[0].layers[0].codes[1];
+
+      transfer_infos[1].size = sizeof(GPUString);
+      transfer_infos[1].dst_offset = 0;
+      transfer_infos[1].buffers[0] = context->ui.containers[0].layers[0].strings[0];
+      transfer_infos[1].buffers[1] = context->ui.containers[0].layers[0].strings[1];
+
+      void* mapped = context->render.frame[context->render.current_frame].transfer_mapped;
+      uint32_t* mapped_codes = (uint32_t*)mapped;
+      GPUString* mapped_string = (GPUString*)(mapped + 10*sizeof(uint32_t));
+      char str[11];
+      snprintf(str, 11, "%3.2f", frame_count/(frame_time-last_draw));
+      map_string(str, mapped_codes, 0, 0, &context->ui);
+      mapped_string->size = 32;
+      mapped_string->pos[0] = 0;
+      mapped_string->pos[1] = 32;
+      mapped_string->color[0] = 1.0;
+      mapped_string->color[1] = 1.0;
+      mapped_string->color[2] = 1.0;
+      mapped_string->color[3] = 1.0;
+      mapped_string->font = 0;
+      mapped_string->offset = 0;
+      mapped_string->length = strlen(str);
+
+      last_draw = frame_time;
+      frame_count = 0;
     }
+    //
 
-    for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
-      VkCommandBuffer command_buffer = command_begin_single(context->render.device, context->render.transfer_pool);
-      record_ui_compute(command_buffer, &context->ui, i);
-      vkEndCommandBuffer(command_buffer);
-      FrameSync id = increment_transfer(&context->render.frame[i].id);
-      VkSemaphore wait_semaphores[] = {context->render.frame[i].transfer, context->render.frame[i].frame};
-      uint64_t wait_values[] = {id.transfer, id.frame};
-      uint64_t signal_values[] = {id.transfer+1};
-      VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT};
-      VkTimelineSemaphoreSubmitInfo timeline_info = {
-        .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
-        .pSignalSemaphoreValues = signal_values,
-        .signalSemaphoreValueCount = 1,
-        .pWaitSemaphoreValues = wait_values,
-        .waitSemaphoreValueCount = 2,
-      };
-      VkSubmitInfo submit_info = {
-        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
-        .commandBufferCount = 1,
-        .pCommandBuffers = &command_buffer,
-        .signalSemaphoreCount = 1,
-        .pSignalSemaphores = &context->render.frame[i].transfer,
-        .waitSemaphoreCount = 2,
-        .pWaitSemaphores = wait_semaphores,
-        .pWaitDstStageMask = wait_stages,
-        .pNext = &timeline_info,
-      };
-      vkQueueSubmit(context->render.transfer_queue.handle, 1, &submit_info, VK_NULL_HANDLE);
+    VkResult result = draw_frame(&context->render, &context->ui, frame_time);
+    if(result != VK_SUCCESS) {
+      fprintf(stderr, "draw_frame error: %s\n", string_VkResult(result));
+      glfwDestroyWindow(context->window);
     }
+
+    frame_count += 1;
   }
 
   return 0;
@@ -327,15 +311,8 @@ int main() {
   // TODO: make # of fonts/textures/containers scaling, recreate GPU buffers as necessary
   VK_RESULT(create_ui_context(10, 10, 10, &context.render, &context.ui));
 
-  // Start threads
-  pthread_t render_thread_handle;
   pthread_t network_thread_handle;
 
-  error = pthread_create(&render_thread_handle, NULL, &render_thread, &context);
-  if(error != 0) {
-    return error;
-  }
-
   error = pthread_create(&network_thread_handle, NULL, &network_thread, &context);
   if(error != 0) {
     return error;
@@ -346,11 +323,6 @@ int main() {
     return error;
   }
 
-  error = pthread_join(render_thread_handle, NULL);
-  if(error != 0) {
-    return error;
-  }
-
   error = pthread_join(network_thread_handle, NULL);
   if(error != 0) {
     return error;
diff --git a/client/src/ui.c b/client/src/ui.c
index 96a4bc6..97168dc 100644
--- a/client/src/ui.c
+++ b/client/src/ui.c
@@ -333,58 +333,62 @@ VkResult create_layer(
     Container* container) {
   VkResult result;
 
+  uint32_t max_strings = (input->num_strings > input->max_strings) ? input->num_strings : input->max_strings;
+  uint32_t max_codes = (input->num_codes > input->max_codes) ? input->num_codes : input->max_codes;
+  uint32_t max_drawables = (input->num_drawables > input->max_drawables) ? input->num_drawables : input->max_drawables;
+
   for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
 
     VK_RESULT(create_storage_buffer(gpu->allocator, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, sizeof(GPULayer), &container->layers[index].layer[i], &container->layers[index].layer_memory[i]));
-    if(input->num_strings > 0) {
-      VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUString)*input->num_strings, &container->layers[index].strings[i], &container->layers[index].strings_memory[i]));
-      container->layers[index].strings_buffer = malloc(sizeof(GPUString)*input->num_strings);
+    if(max_strings > 0) {
+      VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUString)*max_strings, &container->layers[index].strings[i], &container->layers[index].strings_memory[i]));
+      container->layers[index].strings_buffer = malloc(sizeof(GPUString)*max_strings);
     }
-    if(input->num_codes + input->num_drawables > 0) {
-      VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUDrawable)*(input->num_drawables + input->num_codes), &container->layers[index].drawables[i], &container->layers[index].drawables_memory[i]));
-      container->layers[index].drawables_buffer = malloc(sizeof(GPUDrawable)*input->num_drawables);
+    if(max_codes + max_drawables > 0) {
+      VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUDrawable)*(max_drawables + max_codes), &container->layers[index].drawables[i], &container->layers[index].drawables_memory[i]));
+      container->layers[index].drawables_buffer = malloc(sizeof(GPUDrawable)*max_drawables);
     }
-    if(input->num_codes > 0) {
-      VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(uint32_t)*input->num_codes, &container->layers[index].codes[i], &container->layers[index].codes_memory[i]));
-      container->layers[index].codes_buffer = malloc(sizeof(uint32_t)*input->num_codes);
+    if(max_codes > 0) {
+      VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(uint32_t)*max_codes, &container->layers[index].codes[i], &container->layers[index].codes_memory[i]));
+      container->layers[index].codes_buffer = malloc(sizeof(uint32_t)*max_codes);
     }
 
-    VkBuffer transfer;
-    VmaAllocation transfer_memory;
-    void* mapped;
-    VK_RESULT(create_transfer_buffer(gpu->allocator, sizeof(GPULayer) + sizeof(GPUString) * input->num_strings + sizeof(GPUDrawable)*input->num_drawables + sizeof(uint32_t)*input->num_codes, &transfer, &transfer_memory, &mapped));
-
-    if(input->num_strings > 0) {
+    if(max_strings > 0) {
       container->layers[index].data.strings = buffer_address(gpu->device, container->layers[index].strings[i]);
     } else {
       container->layers[index].data.strings = 0x00000000;
     }
 
-    if(input->num_codes > 0) {
+    if(max_codes > 0) {
       container->layers[index].data.codes = buffer_address(gpu->device, container->layers[index].codes[i]);
     } else {
       container->layers[index].data.codes = 0x00000000;
     }
 
-    if(input->num_codes + input->num_drawables > 0) {
+    if(max_codes + max_drawables > 0) {
       container->layers[index].data.drawables = buffer_address(gpu->device, container->layers[index].drawables[i]);
     } else {
       container->layers[index].data.drawables = 0x00000000;
     }
 
+    VkBuffer transfer;
+    VmaAllocation transfer_memory;
+    void* mapped;
+    VK_RESULT(create_transfer_buffer(gpu->allocator, sizeof(GPULayer) + sizeof(GPUString) * input->num_strings + sizeof(GPUDrawable)*input->num_drawables + sizeof(uint32_t)*input->num_codes, &transfer, &transfer_memory, &mapped));
 
     container->layers[index].data.draw.first_vertex = 0;
     container->layers[index].data.draw.vertex_count = 6;
     container->layers[index].data.draw.first_instance = 0;
     container->layers[index].data.draw.instance_count = 0;
 
-    container->layers[index].data.dispatch_strings.x = input->num_strings;
+    container->layers[index].data.dispatch_strings.x = max_strings;
     container->layers[index].data.dispatch_strings.y = 1;
     container->layers[index].data.dispatch_strings.z = 1;
 
-    container->layers[index].data.max_drawables = input->num_drawables + input->num_codes;
-    container->layers[index].data.max_strings = input->num_strings;
-    container->layers[index].data.num_drawables = input->num_drawables;
+    container->layers[index].data.max_drawables = max_drawables + max_codes;
+    container->layers[index].data.max_strings = max_strings;
+    container->layers[index].data.max_codes = max_codes;
+    container->layers[index].data.num_drawables = max_drawables;
     container->layers[index].data.container = container->address;
     memcpy(mapped, &container->layers[index].data, sizeof(GPULayer));