Trying to figure out why the transfers aren't complete before the compute shader runs, even with memory barriers

main
noah metz 2024-10-28 22:07:13 -06:00
parent 152635f14f
commit 7fea23c6ff
7 changed files with 175 additions and 159 deletions

@ -23,12 +23,13 @@
#include <cglm/affine.h> #include <cglm/affine.h>
#include <cglm/quat.h> #include <cglm/quat.h>
#include <cglm/cam.h> #include <cglm/cam.h>
#include "stdatomic.h"
#define MAX_FRAMES_IN_FLIGHT 2 #define MAX_FRAMES_IN_FLIGHT 2
#define WINDOW_MIN_WIDTH 800 #define WINDOW_MIN_WIDTH 800
#define WINDOW_MIN_HEIGHT 600 #define WINDOW_MIN_HEIGHT 600
extern PFN_vkCmdPipelineBarrier2KHR pVkCmdPipelineBarrier2KHR;
typedef struct GPUBufferStruct { typedef struct GPUBufferStruct {
VkBuffer buffers[MAX_FRAMES_IN_FLIGHT]; VkBuffer buffers[MAX_FRAMES_IN_FLIGHT];
VmaAllocation memory[MAX_FRAMES_IN_FLIGHT]; VmaAllocation memory[MAX_FRAMES_IN_FLIGHT];
@ -70,12 +71,11 @@ typedef struct SwapchainDetailsStruct {
typedef struct TransferInfoStruct { typedef struct TransferInfoStruct {
VkDeviceSize offset; VkDeviceSize offset;
VkDeviceSize size; VkDeviceSize size;
VkBuffer buffers[MAX_FRAMES_IN_FLIGHT]; VkBuffer buffer;
} TransferInfo; } TransferInfo;
typedef struct FrameContextStruct { typedef struct FrameContextStruct {
VkFence ready; VkFence ready;
VkFence transfer_ready[MAX_FRAMES_IN_FLIGHT];
VkSemaphore image; VkSemaphore image;
VkSemaphore render; VkSemaphore render;
VkSemaphore transfer; VkSemaphore transfer;
@ -84,7 +84,7 @@ typedef struct FrameContextStruct {
uint64_t frame_index; uint64_t frame_index;
uint64_t transfer_index; uint64_t transfer_index;
VkCommandBuffer transfer_commands[MAX_FRAMES_IN_FLIGHT]; VkCommandBuffer transfer_commands;
VkBuffer transfer_buffer; VkBuffer transfer_buffer;
VmaAllocation transfer_memory; VmaAllocation transfer_memory;
void* transfer_mapped; void* transfer_mapped;
@ -168,6 +168,14 @@ VkDeviceAddress buffer_address(
VkBuffer buffer); VkBuffer buffer);
VkResult add_transfer( VkResult add_transfer(
void* data,
VkBuffer buffer,
VkDeviceSize offset,
VkDeviceSize size,
uint32_t frame_index,
RenderContext* gpu);
VkResult add_transfers(
void* data, void* data,
VkBuffer* buffers, VkBuffer* buffers,
VkDeviceSize offset, VkDeviceSize offset,

@ -41,7 +41,7 @@ typedef struct DispatchCommandStruct {
typedef struct UIPushConstantStruct { typedef struct UIPushConstantStruct {
VkDeviceAddress layer; VkDeviceAddress layer;
float time; float time;
float pad; uint32_t pad;
} UIPushConstant; } UIPushConstant;
typedef struct GPUFontStruct { typedef struct GPUFontStruct {
@ -164,16 +164,16 @@ typedef struct GPUContainerStruct {
} GPUContainer; } GPUContainer;
typedef struct ContainerStruct { typedef struct ContainerStruct {
VkBuffer container; VkBuffer container[MAX_FRAMES_IN_FLIGHT];
VmaAllocation container_memory; VmaAllocation container_memory[MAX_FRAMES_IN_FLIGHT];
VkDeviceAddress address; VkDeviceAddress address[MAX_FRAMES_IN_FLIGHT];
GPUContainer data; GPUContainer data;
uint32_t id; uint32_t id;
uint32_t _Atomic layer_count; uint32_t layer_count;
Layer* layers; Layer* layers;
} Container; } Container;

@ -106,4 +106,5 @@ layout(std430, buffer_reference) readonly buffer Layer {
layout(std430, push_constant) uniform PushConstant { layout(std430, push_constant) uniform PushConstant {
Layer layer; Layer layer;
float time; float time;
uint frame;
} pc; } pc;

@ -1,12 +1,12 @@
#include "draw.h" #include "draw.h"
#include "gpu.h" #include "gpu.h"
#include "stdatomic.h"
#include "vulkan/vulkan_core.h" #include "vulkan/vulkan_core.h"
void record_ui_draw(VkCommandBuffer command_buffer, UIContext* ui_context, double time, uint32_t frame) { void record_ui_draw(VkCommandBuffer command_buffer, UIContext* ui_context, double time, uint32_t frame) {
UIPushConstant push = { UIPushConstant push = {
.time = (float)time, .time = (float)time,
.layer = 0, .layer = 0,
.pad = frame,
}; };
vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.pipeline); vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.pipeline);
@ -15,9 +15,7 @@ void record_ui_draw(VkCommandBuffer command_buffer, UIContext* ui_context, doubl
vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.layout, 2, 1, &ui_context->samplers, 0, NULL); vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.layout, 2, 1, &ui_context->samplers, 0, NULL);
vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.layout, 3, 1, &ui_context->textures, 0, NULL); vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, ui_context->pipeline.layout, 3, 1, &ui_context->textures, 0, NULL);
for(uint32_t i = 0; i < ui_context->max_containers; i++) { for(uint32_t i = 0; i < ui_context->max_containers; i++) {
uint32_t layer_count = atomic_load(&ui_context->containers[i].layer_count); for(uint32_t j = 0; j < ui_context->containers[i].layer_count; j++) {
__sync_synchronize();
for(uint32_t j = 0; j < layer_count; j++) {
push.layer = ui_context->containers[i].layers[j].address[frame]; push.layer = ui_context->containers[i].layers[j].address[frame];
vkCmdPushConstants(command_buffer, ui_context->pipeline.layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, &push); vkCmdPushConstants(command_buffer, ui_context->pipeline.layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, &push);
vkCmdDrawIndirect(command_buffer, ui_context->containers[i].layers[j].layer[frame], offsetof(GPULayer, draw), 1, 0); vkCmdDrawIndirect(command_buffer, ui_context->containers[i].layers[j].layer[frame], offsetof(GPULayer, draw), 1, 0);
@ -29,19 +27,28 @@ void record_ui_compute(VkCommandBuffer command_buffer, UIContext* ui, uint32_t f
UIPushConstant push = { UIPushConstant push = {
.time = 0.0, .time = 0.0,
.layer = 0, .layer = 0,
.pad = frame,
}; };
vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui->string_pipeline.pipeline); vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui->string_pipeline.pipeline);
for(uint32_t i = 0; i < ui->max_containers; i++) { for(uint32_t i = 0; i < ui->max_containers; i++) {
if(ui->containers[i].id != 0x00000000) { if(ui->containers[i].id != 0x00000000) {
for(uint32_t j = 0; j < ui->containers[i].layer_count; j++) { for(uint32_t j = 0; j < ui->containers[i].layer_count; j++) {
push.layer = ui->containers[i].layers[j].address[frame];
command_copy_buffer(command_buffer, ui->containers[i].layers[j].layer[frame], ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, num_drawables), offsetof(GPULayer, draw) + offsetof(DrawCommand, instance_count), sizeof(uint32_t)); command_copy_buffer(command_buffer, ui->containers[i].layers[j].layer[frame], ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, num_drawables), offsetof(GPULayer, draw) + offsetof(DrawCommand, instance_count), sizeof(uint32_t));
}
}
}
for(uint32_t i = 0; i < ui->max_containers; i++) {
if(ui->containers[i].id != 0x00000000) {
for(uint32_t j = 0; j < ui->containers[i].layer_count; j++) {
push.layer = ui->containers[i].layers[j].address[frame];
vkCmdPushConstants(command_buffer, ui->string_pipeline.layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, &push); vkCmdPushConstants(command_buffer, ui->string_pipeline.layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, &push);
vkCmdDispatchIndirect(command_buffer, ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, dispatch_strings)); vkCmdDispatchIndirect(command_buffer, ui->containers[i].layers[j].layer[frame], offsetof(GPULayer, dispatch_strings));
} }
} }
} }
} }
VkResult draw_frame( VkResult draw_frame(
@ -59,34 +66,31 @@ VkResult draw_frame(
}; };
if(context->frame[context->current_frame].transfer_count > 0) { if(context->frame[context->current_frame].transfer_count > 0) {
VkFence fences[] = {context->frame[context->current_frame].transfer_ready[0], context->frame[context->current_frame].transfer_ready[1]}; VkCommandBuffer transfer_commands = context->frame[context->current_frame].transfer_commands;
VK_RESULT(vkWaitForFences(context->device, 2, fences, VK_TRUE, UINT64_MAX));
VK_RESULT(vkResetFences(context->device, 2, fences));
for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
VkCommandBuffer transfer_commands = context->frame[context->current_frame].transfer_commands[i];
VK_RESULT(vkResetCommandBuffer(transfer_commands, 0)); VK_RESULT(vkResetCommandBuffer(transfer_commands, 0));
VK_RESULT(vkBeginCommandBuffer(transfer_commands, &begin_info)); VK_RESULT(vkBeginCommandBuffer(transfer_commands, &begin_info));
VkDeviceSize src_offset = 0; VkDeviceSize src_offset = 0;
for(uint32_t j = 0; j < context->frame[context->current_frame].transfer_count; j++) { for(uint32_t transfer_index = 0; transfer_index < context->frame[context->current_frame].transfer_count; transfer_index++) {
command_copy_buffer( command_copy_buffer(
transfer_commands, transfer_commands,
context->frame[context->current_frame].transfer_buffer, context->frame[context->current_frame].transfer_buffer,
context->frame[context->current_frame].transfer_infos[j].buffers[i], context->frame[context->current_frame].transfer_infos[transfer_index].buffer,
src_offset, src_offset,
context->frame[context->current_frame].transfer_infos[j].offset, context->frame[context->current_frame].transfer_infos[transfer_index].offset,
context->frame[context->current_frame].transfer_infos[j].size); context->frame[context->current_frame].transfer_infos[transfer_index].size);
src_offset += context->frame[context->current_frame].transfer_infos[j].size; src_offset += context->frame[context->current_frame].transfer_infos[transfer_index].size;
} }
record_ui_compute(transfer_commands, ui, i);
record_ui_compute(transfer_commands, ui, context->current_frame);
VK_RESULT(vkEndCommandBuffer(transfer_commands)); VK_RESULT(vkEndCommandBuffer(transfer_commands));
VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT}; VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT};
context->frame[i].transfer_index += 1; context->frame[context->current_frame].transfer_index += 1;
VkSemaphore transfer_signals[] = {context->frame[i].transfer}; VkSemaphore transfer_signals[] = {context->frame[context->current_frame].transfer};
uint64_t transfer_signal_values[] = {context->frame[i].transfer_index}; uint64_t transfer_signal_values[] = {context->frame[context->current_frame].transfer_index};
VkSemaphore transfer_waits[] = {context->frame[i].transfer, context->frame[i].frame}; VkSemaphore transfer_waits[] = {context->frame[context->current_frame].frame};
uint64_t transfer_wait_values[] = {context->frame[i].transfer_index-1, context->frame[i].frame_index}; uint64_t transfer_wait_values[] = {context->frame[context->current_frame].frame_index};
VkTimelineSemaphoreSubmitInfo timeline_info = { VkTimelineSemaphoreSubmitInfo timeline_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.signalSemaphoreValueCount = sizeof(transfer_signal_values)/sizeof(uint64_t), .signalSemaphoreValueCount = sizeof(transfer_signal_values)/sizeof(uint64_t),
@ -105,8 +109,7 @@ VkResult draw_frame(
.waitSemaphoreCount = sizeof(transfer_waits)/sizeof(VkSemaphore), .waitSemaphoreCount = sizeof(transfer_waits)/sizeof(VkSemaphore),
.pNext = &timeline_info, .pNext = &timeline_info,
}; };
VK_RESULT(vkQueueSubmit(context->transfer_queue.handle, 1, &submit_info, context->frame[context->current_frame].transfer_ready[i])); VK_RESULT(vkQueueSubmit(context->transfer_queue.handle, 1, &submit_info, VK_NULL_HANDLE));
}
context->frame[context->current_frame].transfer_count = 0; context->frame[context->current_frame].transfer_count = 0;
context->frame[context->current_frame].transfer_written = 0; context->frame[context->current_frame].transfer_written = 0;
} }

@ -5,6 +5,8 @@
#include "vk_mem_alloc.h" #include "vk_mem_alloc.h"
#include "vulkan/vulkan_core.h" #include "vulkan/vulkan_core.h"
PFN_vkCmdPipelineBarrier2KHR pVkCmdPipelineBarrier2KHR;
const char * validation_layers[] = { const char * validation_layers[] = {
"VK_LAYER_KHRONOS_validation", "VK_LAYER_KHRONOS_validation",
//"VK_LAYER_LUNARG_api_dump", //"VK_LAYER_LUNARG_api_dump",
@ -335,6 +337,11 @@ VkResult create_logical_device(VkPhysicalDevice physical_device, VkSurfaceKHR su
queue_create_info[1].pQueuePriorities = &default_queue_priority; queue_create_info[1].pQueuePriorities = &default_queue_priority;
} }
VkPhysicalDeviceSynchronization2FeaturesKHR sync2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR,
.synchronization2 = VK_TRUE,
};
VkPhysicalDeviceVulkan12Features features_12 = { VkPhysicalDeviceVulkan12Features features_12 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
.bufferDeviceAddress = VK_TRUE, .bufferDeviceAddress = VK_TRUE,
@ -346,6 +353,7 @@ VkResult create_logical_device(VkPhysicalDevice physical_device, VkSurfaceKHR su
.descriptorBindingStorageBufferUpdateAfterBind = VK_TRUE, .descriptorBindingStorageBufferUpdateAfterBind = VK_TRUE,
.descriptorBindingSampledImageUpdateAfterBind = VK_TRUE, .descriptorBindingSampledImageUpdateAfterBind = VK_TRUE,
.timelineSemaphore = VK_TRUE, .timelineSemaphore = VK_TRUE,
.pNext = &sync2,
}; };
VkPhysicalDeviceFeatures device_features = { VkPhysicalDeviceFeatures device_features = {
@ -770,9 +778,7 @@ VkResult create_frame_context(VkDevice device, VmaAllocator allocator, VkCommand
VkResult result; VkResult result;
frame->ready = create_fence(device, VK_FENCE_CREATE_SIGNALED_BIT); frame->ready = create_fence(device, VK_FENCE_CREATE_SIGNALED_BIT);
for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
frame->transfer_ready[i] = create_fence(device, VK_FENCE_CREATE_SIGNALED_BIT);
}
frame->image = create_semaphore(device); frame->image = create_semaphore(device);
frame->render = create_semaphore(device); frame->render = create_semaphore(device);
frame->transfer = create_timeline_semaphore(device); frame->transfer = create_timeline_semaphore(device);
@ -783,11 +789,11 @@ VkResult create_frame_context(VkDevice device, VmaAllocator allocator, VkCommand
VkCommandBufferAllocateInfo command_info = { VkCommandBufferAllocateInfo command_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.commandBufferCount = 2, .commandBufferCount = 1,
.commandPool = transfer_pool, .commandPool = transfer_pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
}; };
VK_RESULT(vkAllocateCommandBuffers(device, &command_info, frame->transfer_commands)); VK_RESULT(vkAllocateCommandBuffers(device, &command_info, &frame->transfer_commands));
// TODO: better defaults // TODO: better defaults
frame->transfer_max_size = 1; frame->transfer_max_size = 1;
@ -876,6 +882,8 @@ VkResult init_vulkan(GLFWwindow* window, RenderContext* context) {
VK_RESULT(create_logical_device(context->physical_device, context->surface, &context->graphics_queue, &context->present_queue, &context->transfer_queue, &context->device)); VK_RESULT(create_logical_device(context->physical_device, context->surface, &context->graphics_queue, &context->present_queue, &context->transfer_queue, &context->device));
pVkCmdPipelineBarrier2KHR = (PFN_vkCmdPipelineBarrier2KHR)vkGetDeviceProcAddr(context->device, "vkCmdPipelineBarrier2KHR");
VK_RESULT(create_memory_allocator(context->instance, context->physical_device, context->device, &context->allocator)); VK_RESULT(create_memory_allocator(context->instance, context->physical_device, context->device, &context->allocator));
VkCommandPoolCreateInfo extra_pool_info = { VkCommandPoolCreateInfo extra_pool_info = {
@ -1093,27 +1101,46 @@ VkResult command_transition_image_layout(VkDevice device, VkCommandPool transfer
return command_end_single(device, command_buffer, transfer_pool, transfer_queue); return command_end_single(device, command_buffer, transfer_pool, transfer_queue);
} }
VkResult add_transfer( VkResult add_transfers(
void* data, void* data,
VkBuffer* buffers, VkBuffer* buffers,
VkDeviceSize offset, VkDeviceSize offset,
VkDeviceSize size, VkDeviceSize size,
RenderContext* gpu) { RenderContext* gpu) {
VkResult result; VkResult result;
FrameContext* frame = &gpu->frame[gpu->current_frame]; for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
VK_RESULT(add_transfer(data, buffers[i], offset, size, i, gpu));
}
return VK_SUCCESS;
}
while(frame->transfer_written + size >= frame->transfer_max_size) { VkResult add_transfer(
void* data,
VkBuffer buffer,
VkDeviceSize offset,
VkDeviceSize size,
uint32_t frame_index,
RenderContext* gpu) {
VkResult result;
FrameContext* frame = &gpu->frame[frame_index];
if(frame->transfer_written + size >= frame->transfer_max_size) {
uint32_t new_size = 2*frame->transfer_max_size;
while(frame->transfer_written + size >= new_size) {
new_size *= 2;
}
VkBuffer new_transfer; VkBuffer new_transfer;
VmaAllocation new_transfer_memory; VmaAllocation new_transfer_memory;
void* new_transfer_data; void* new_transfer_data;
VK_RESULT(create_transfer_buffer(gpu->allocator, 2*frame->transfer_max_size, &new_transfer, &new_transfer_memory, &new_transfer_data)); VK_RESULT(create_transfer_buffer(gpu->allocator, new_size, &new_transfer, &new_transfer_memory, &new_transfer_data));
memcpy(new_transfer_data, frame->transfer_mapped, frame->transfer_written); memcpy(new_transfer_data, frame->transfer_mapped, frame->transfer_written);
destroy_transfer_buffer(gpu->allocator, frame->transfer_buffer, frame->transfer_memory); destroy_transfer_buffer(gpu->allocator, frame->transfer_buffer, frame->transfer_memory);
frame->transfer_buffer = new_transfer; frame->transfer_buffer = new_transfer;
frame->transfer_memory = new_transfer_memory; frame->transfer_memory = new_transfer_memory;
frame->transfer_mapped = new_transfer_data; frame->transfer_mapped = new_transfer_data;
frame->transfer_max_size *= 2; frame->transfer_max_size = new_size;
} }
if(frame->transfer_count + 1 >= frame->transfer_max_count) { if(frame->transfer_count + 1 >= frame->transfer_max_count) {
@ -1130,9 +1157,7 @@ VkResult add_transfer(
memcpy(frame->transfer_mapped + frame->transfer_written, data, size); memcpy(frame->transfer_mapped + frame->transfer_written, data, size);
frame->transfer_infos[frame->transfer_count].size = size; frame->transfer_infos[frame->transfer_count].size = size;
for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { frame->transfer_infos[frame->transfer_count].buffer = buffer;
frame->transfer_infos[frame->transfer_count].buffers[i] = buffers[i];
}
frame->transfer_infos[frame->transfer_count].offset = offset; frame->transfer_infos[frame->transfer_count].offset = offset;
frame->transfer_written += size; frame->transfer_written += size;

@ -8,7 +8,6 @@
#include "vulkan/vk_enum_string_helper.h" #include "vulkan/vk_enum_string_helper.h"
#include "vulkan/vulkan_core.h" #include "vulkan/vulkan_core.h"
#include "pthread.h" #include "pthread.h"
#include "stdatomic.h"
typedef struct ClientContextStruct { typedef struct ClientContextStruct {
GLFWwindow* window; GLFWwindow* window;
@ -169,7 +168,7 @@ VkResult main_thread(ClientContext* context) {
.size = 32, .size = 32,
.color = {1.0, 1.0, 1.0, 1.0}, .color = {1.0, 1.0, 1.0, 1.0},
.offset = 0, .offset = 0,
.length = 4, .length = 0,
.font = 0, .font = 0,
}; };
@ -189,12 +188,13 @@ VkResult main_thread(ClientContext* context) {
create_container(&fps_container, &context->render, &context->ui); create_container(&fps_container, &context->render, &context->ui);
// //
double last_draw = -1; double last_draw = 0;
double draw_interval = 1; double draw_interval = 0.1;
double frame_count = 0; double frame_count = 0;
uint32_t* mapped_codes = context->ui.containers[0].layers[0].codes_buffer; uint32_t* mapped_codes = context->ui.containers[0].layers[0].codes_buffer;
GPUString* mapped_string = &context->ui.containers[0].layers[0].strings_buffer[0]; GPUString* mapped_string = context->ui.containers[0].layers[0].strings_buffer;
char str[11]; char str[11];
int test = 0;
// //
while(glfwWindowShouldClose(context->window) == 0) { while(glfwWindowShouldClose(context->window) == 0) {
@ -203,33 +203,29 @@ VkResult main_thread(ClientContext* context) {
// //
if(frame_time - last_draw > draw_interval) { if(frame_time - last_draw > draw_interval) {
test = (test + 1) % 2;
snprintf(str, 11, "%3.2f", frame_count/(frame_time-last_draw)); snprintf(str, 11, "%3.2f", frame_count/(frame_time-last_draw));
map_string(str, mapped_codes, 0, 0, &context->ui); map_string(str, mapped_codes, 0, 0, &context->ui);
mapped_string->size = 32; if(test) {
mapped_string->pos[0] = 0;
mapped_string->pos[1] = 32;
mapped_string->color[0] = 1.0;
mapped_string->color[1] = 1.0;
mapped_string->color[2] = 1.0;
mapped_string->color[3] = 1.0;
mapped_string->font = 0;
mapped_string->offset = 0;
mapped_string->length = strlen(str); mapped_string->length = strlen(str);
} else {
mapped_string->length = 0;
}
last_draw = frame_time; last_draw = frame_time;
frame_count = 0; frame_count = 0;
VK_RESULT(add_transfer( VK_RESULT(add_transfers(
context->ui.containers[0].layers[0].codes_buffer, context->ui.containers[0].layers[0].codes_buffer,
context->ui.containers[0].layers[0].codes, context->ui.containers[0].layers[0].codes,
0, 0,
10*sizeof(uint32_t), 10*sizeof(uint32_t),
&context->render)); &context->render));
VK_RESULT(add_transfer( VK_RESULT(add_transfers(
context->ui.containers[0].layers[0].strings_buffer, &context->ui.containers[0].layers[0].strings_buffer[0].length,
context->ui.containers[0].layers[0].strings, context->ui.containers[0].layers[0].strings,
0, offsetof(GPUString, length),
sizeof(GPUString), sizeof(uint32_t),
&context->render)); &context->render));
} }
// //

@ -9,7 +9,7 @@
#include "vk_mem_alloc.h" #include "vk_mem_alloc.h"
#include "vulkan/vulkan_core.h" #include "vulkan/vulkan_core.h"
#include "spng.h" #include "spng.h"
#include "stdatomic.h" #include <sys/param.h>
VkShaderModule load_shader_file(const char* path, VkDevice device) { VkShaderModule load_shader_file(const char* path, VkDevice device) {
FILE* file; FILE* file;
@ -292,13 +292,12 @@ VkResult create_container(
} }
VkResult result; VkResult result;
for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUContainer), &context->containers[index].container[i], &context->containers[index].container_memory[i]));
context->containers[index].address[i] = buffer_address(gpu->device, context->containers[index].container[i]);
}
VK_RESULT(create_storage_buffer(gpu->allocator, 0, sizeof(GPUContainer), &context->containers[index].container, &context->containers[index].container_memory)); fprintf(stderr, "Created container with storage buffers %p/%p\n", context->containers[index].container[0], context->containers[index].container[1]);
VkBuffer transfer;
VmaAllocation transfer_memory;
void* mapped;
VK_RESULT(create_transfer_buffer(gpu->allocator, sizeof(GPUContainer), &transfer, &transfer_memory, &mapped));
context->containers[index].data.offset[0] = container->offset[0]; context->containers[index].data.offset[0] = container->offset[0];
context->containers[index].data.offset[1] = container->offset[1]; context->containers[index].data.offset[1] = container->offset[1];
@ -306,22 +305,15 @@ VkResult create_container(
context->containers[index].data.size[1] = container->size[1]; context->containers[index].data.size[1] = container->size[1];
context->containers[index].data.anchor = container->anchor; context->containers[index].data.anchor = container->anchor;
context->containers[index].data.context = context->address; context->containers[index].data.context = context->address;
memcpy(mapped, &context->containers[index].data, sizeof(GPUContainer)); add_transfers(&context->containers[index].data, context->containers[index].container, 0, sizeof(GPUContainer), gpu);
VkCommandBuffer command_buffer = command_begin_single(gpu->device, gpu->transfer_pool);
command_copy_buffer(command_buffer, transfer, context->containers[index].container, 0, 0, sizeof(GPUContainer));
VK_RESULT(command_end_single(gpu->device, command_buffer, gpu->transfer_pool, gpu->transfer_queue));
destroy_transfer_buffer(gpu->allocator, transfer, transfer_memory);
context->containers[index].address = buffer_address(gpu->device, context->containers[index].container);
context->containers[index].id = container->id; context->containers[index].id = container->id;
context->containers[index].layers = malloc(sizeof(Layer)*container->layer_count); context->containers[index].layers = malloc(sizeof(Layer)*container->layer_count);
for(uint32_t i = 0; i < container->layer_count; i++) { for(uint32_t i = 0; i < container->layer_count; i++) {
VK_RESULT(create_layer(i, &container->layers[i], gpu, &context->containers[index])); VK_RESULT(create_layer(i, &container->layers[i], gpu, &context->containers[index]));
} }
__sync_synchronize(); context->containers[index].layer_count = container->layer_count;
atomic_store(&context->containers[index].layer_count, container->layer_count);
return VK_SUCCESS; return VK_SUCCESS;
} }
@ -370,11 +362,21 @@ VkResult create_layer(
} else { } else {
container->layers[index].data.drawables = 0x00000000; container->layers[index].data.drawables = 0x00000000;
} }
container->layers[index].address[i] = buffer_address(gpu->device, container->layers[index].layer[i]);
}
fprintf(stderr, "Created layer with storage buffers %p/%p\n", container->layers[index].layer[0], container->layers[index].layer[1]);
fprintf(stderr, "String Buffers %p/%p\n", container->layers[index].strings[0], container->layers[index].strings[1]);
fprintf(stderr, "Code Buffers %p/%p\n", container->layers[index].codes[0], container->layers[index].codes[1]);
VkBuffer transfer; for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
VmaAllocation transfer_memory; add_transfer(
void* mapped; &container->address[i],
VK_RESULT(create_transfer_buffer(gpu->allocator, sizeof(GPULayer) + sizeof(GPUString) * input->num_strings + sizeof(GPUDrawable)*input->num_drawables + sizeof(uint32_t)*input->num_codes, &transfer, &transfer_memory, &mapped)); container->layers[index].layer[i],
offsetof(GPULayer, container),
sizeof(VkDeviceAddress),
i,
gpu);
}
container->layers[index].data.draw.first_vertex = 0; container->layers[index].data.draw.first_vertex = 0;
container->layers[index].data.draw.vertex_count = 6; container->layers[index].data.draw.vertex_count = 6;
@ -387,42 +389,23 @@ VkResult create_layer(
container->layers[index].data.max_drawables = max_drawables + max_codes; container->layers[index].data.max_drawables = max_drawables + max_codes;
container->layers[index].data.max_strings = max_strings; container->layers[index].data.max_strings = max_strings;
container->layers[index].data.max_codes = max_codes;
container->layers[index].data.num_drawables = max_drawables; container->layers[index].data.num_drawables = max_drawables;
container->layers[index].data.container = container->address; add_transfers(&container->layers[index].data, container->layers[index].layer, 0, sizeof(GPULayer)-sizeof(VkDeviceAddress), gpu);
memcpy(mapped, &container->layers[index].data, sizeof(GPULayer));
VkCommandBuffer command_buffer = command_begin_single(gpu->device, gpu->transfer_pool);
command_copy_buffer(command_buffer, transfer, container->layers[index].layer[i], 0, 0, sizeof(GPULayer));
if(input->num_strings > 0) { if(input->num_strings > 0) {
GPUString* strings = (GPUString*)(mapped + sizeof(GPULayer)); memcpy(container->layers[index].strings_buffer, input->strings, sizeof(GPUString)*input->num_strings);
for(uint32_t i = 0; i < input->num_strings; i++) { add_transfers(container->layers[index].strings_buffer, container->layers[index].strings, 0, sizeof(GPUString)*input->num_strings, gpu);
memcpy(&strings[i], &input->strings[i], sizeof(GPUString));
memcpy(&container->layers[index].strings_buffer[i], &input->strings[i], sizeof(GPUString));
}
command_copy_buffer(command_buffer, transfer, container->layers[index].strings[i], sizeof(GPULayer), 0, sizeof(GPUString)*input->num_strings);
} }
if(input->num_drawables > 0) { if(input->num_drawables > 0) {
GPUDrawable* drawables = (GPUDrawable*)(mapped + sizeof(GPULayer) + sizeof(GPUString)*input->num_strings); memcpy(container->layers[index].drawables_buffer, input->drawables, sizeof(GPUDrawable)*input->num_drawables);
for(uint32_t i = 0; i < input->num_drawables; i++) { add_transfers(container->layers[index].drawables_buffer, container->layers[index].drawables, 0, sizeof(GPUDrawable)*input->num_drawables, gpu);
memcpy(&drawables[i], &input->drawables[i], sizeof(GPUDrawable));
memcpy(&container->layers[index].drawables_buffer[i], &input->drawables[i], sizeof(GPUDrawable));
}
command_copy_buffer(command_buffer, transfer, container->layers[index].drawables[i], sizeof(GPULayer) + sizeof(GPUString)*input->num_strings, 0, sizeof(GPUDrawable)*input->num_drawables);
} }
if(input->num_codes > 0) { if(input->num_codes > 0) {
uint32_t* codes = (uint32_t*)(mapped + sizeof(GPULayer) + sizeof(GPUString)*input->num_strings + sizeof(GPUDrawable)*input->num_drawables); memcpy(container->layers[index].codes_buffer, input->codes, sizeof(uint32_t)*input->num_codes);
for(uint32_t i = 0; i < input->num_codes; i++) { add_transfers(container->layers[index].codes_buffer, container->layers[index].codes, 0, sizeof(uint32_t)*input->num_codes, gpu);
codes[i] = input->codes[i];
container->layers[index].codes_buffer[i] = input->codes[i];
}
command_copy_buffer(command_buffer, transfer, container->layers[index].codes[i], sizeof(GPULayer) + sizeof(GPUString)*input->num_strings + sizeof(GPUDrawable)*input->num_drawables, 0, sizeof(uint32_t)*input->num_codes);
}
VK_RESULT(command_end_single(gpu->device, command_buffer, gpu->transfer_pool, gpu->transfer_queue));
destroy_transfer_buffer(gpu->allocator, transfer, transfer_memory);
container->layers[index].address[i] = buffer_address(gpu->device, container->layers[index].layer[i]);
} }
return VK_SUCCESS; return VK_SUCCESS;