diff --git a/client/include/gpu.h b/client/include/gpu.h index 30402ff..dbbb3c8 100644 --- a/client/include/gpu.h +++ b/client/include/gpu.h @@ -79,11 +79,13 @@ typedef struct FrameContextStruct { VkSemaphore image; VkSemaphore render; VkSemaphore transfer; + VkSemaphore compute; VkSemaphore frame; uint64_t frame_index; - uint64_t transfer_index; + uint64_t compute_index; + VkCommandBuffer compute_commands; VkCommandBuffer transfer_commands; VkBuffer transfer_buffer; VmaAllocation transfer_memory; diff --git a/client/src/draw.c b/client/src/draw.c index bfcfb5b..eda24c9 100644 --- a/client/src/draw.c +++ b/client/src/draw.c @@ -71,38 +71,24 @@ VkResult draw_frame( VK_RESULT(vkBeginCommandBuffer(transfer_commands, &begin_info)); VkDeviceSize src_offset = 0; - for(uint32_t transfer_index = 0; transfer_index < context->frame[context->current_frame].transfer_count; transfer_index++) { + for(uint32_t tid = 0; tid < context->frame[context->current_frame].transfer_count; tid++) { command_copy_buffer( transfer_commands, context->frame[context->current_frame].transfer_buffer, - context->frame[context->current_frame].transfer_infos[transfer_index].buffer, + context->frame[context->current_frame].transfer_infos[tid].buffer, src_offset, - context->frame[context->current_frame].transfer_infos[transfer_index].offset, - context->frame[context->current_frame].transfer_infos[transfer_index].size); - src_offset += context->frame[context->current_frame].transfer_infos[transfer_index].size; - VkBufferMemoryBarrier barrier = { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .buffer = context->frame[context->current_frame].transfer_infos[transfer_index].buffer, - .offset = context->frame[context->current_frame].transfer_infos[transfer_index].offset, - .size = context->frame[context->current_frame].transfer_infos[transfer_index].size, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, - }; - vkCmdPipelineBarrier(transfer_commands, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, 0, 0, NULL, 1, &barrier, 0, NULL); + context->frame[context->current_frame].transfer_infos[tid].offset, + context->frame[context->current_frame].transfer_infos[tid].size); + src_offset += context->frame[context->current_frame].transfer_infos[tid].size; } - record_ui_compute(transfer_commands, ui, context->current_frame); VK_RESULT(vkEndCommandBuffer(transfer_commands)); - VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT}; - context->frame[context->current_frame].transfer_index += 1; + VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_ALL_COMMANDS_BIT}; VkSemaphore transfer_signals[] = {context->frame[context->current_frame].transfer}; - uint64_t transfer_signal_values[] = {context->frame[context->current_frame].transfer_index}; VkSemaphore transfer_waits[] = {context->frame[context->current_frame].frame}; uint64_t transfer_wait_values[] = {context->frame[context->current_frame].frame_index}; VkTimelineSemaphoreSubmitInfo timeline_info = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .signalSemaphoreValueCount = sizeof(transfer_signal_values)/sizeof(uint64_t), - .pSignalSemaphoreValues = transfer_signal_values, .waitSemaphoreValueCount = sizeof(transfer_wait_values)/sizeof(uint64_t), .pWaitSemaphoreValues = transfer_wait_values, }; @@ -120,6 +106,35 @@ VkResult draw_frame( VK_RESULT(vkQueueSubmit(context->transfer_queue.handle, 1, &submit_info, VK_NULL_HANDLE)); context->frame[context->current_frame].transfer_count = 0; context->frame[context->current_frame].transfer_written = 0; + + + VkCommandBuffer compute_commands = context->frame[context->current_frame].compute_commands; + VK_RESULT(vkResetCommandBuffer(compute_commands, 0)); + VK_RESULT(vkBeginCommandBuffer(compute_commands, &begin_info)); + record_ui_compute(compute_commands, ui, context->current_frame); + VK_RESULT(vkEndCommandBuffer(compute_commands)); + VkPipelineStageFlags compute_wait_stages[] = {VK_PIPELINE_STAGE_ALL_COMMANDS_BIT}; + context->frame[context->current_frame].compute_index += 1; + VkSemaphore compute_signals[] = {context->frame[context->current_frame].compute}; + uint64_t compute_signal_values[] = {context->frame[context->current_frame].compute_index}; + VkSemaphore compute_waits[] = {context->frame[context->current_frame].transfer}; + VkTimelineSemaphoreSubmitInfo compute_timeline = { + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .signalSemaphoreValueCount = sizeof(compute_signal_values)/sizeof(uint64_t), + .pSignalSemaphoreValues = compute_signal_values, + }; + VkSubmitInfo compute_submit = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &compute_commands, + .pSignalSemaphores = compute_signals, + .signalSemaphoreCount = sizeof(compute_signals)/sizeof(VkSemaphore), + .pWaitSemaphores = compute_waits, + .pWaitDstStageMask = compute_wait_stages, + .waitSemaphoreCount = sizeof(compute_waits)/sizeof(VkSemaphore), + .pNext = &compute_timeline, + }; + VK_RESULT(vkQueueSubmit(context->transfer_queue.handle, 1, &compute_submit, VK_NULL_HANDLE)); } uint32_t image_index; @@ -165,10 +180,10 @@ VkResult draw_frame( VK_RESULT(vkEndCommandBuffer(command_buffer)); VkPipelineStageFlags wait_stages[] = {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT}; - VkSemaphore wait_semaphores[] = {context->frame[context->current_frame].image, context->frame[context->current_frame].transfer}; + VkSemaphore wait_semaphores[] = {context->frame[context->current_frame].image, context->frame[context->current_frame].compute}; VkSemaphore signal_semaphores[] = {context->frame[context->current_frame].render, context->frame[context->current_frame].frame}; context->frame[context->current_frame].frame_index += 1; - uint64_t wait_values[] = {0, context->frame[context->current_frame].transfer_index}; + uint64_t wait_values[] = {0, context->frame[context->current_frame].compute_index}; uint64_t signal_values[] = {0, context->frame[context->current_frame].frame_index}; VkTimelineSemaphoreSubmitInfo timeline_info = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, diff --git a/client/src/gpu.c b/client/src/gpu.c index 511fed2..886df8c 100644 --- a/client/src/gpu.c +++ b/client/src/gpu.c @@ -781,11 +781,12 @@ VkResult create_frame_context(VkDevice device, VmaAllocator allocator, VkCommand frame->image = create_semaphore(device); frame->render = create_semaphore(device); - frame->transfer = create_timeline_semaphore(device); + frame->transfer = create_semaphore(device); frame->frame = create_timeline_semaphore(device); + frame->compute = create_timeline_semaphore(device); frame->frame_index = 0; - frame->transfer_index = 0; + frame->compute_index = 0; VkCommandBufferAllocateInfo command_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, @@ -794,6 +795,7 @@ VkResult create_frame_context(VkDevice device, VmaAllocator allocator, VkCommand .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, }; VK_RESULT(vkAllocateCommandBuffers(device, &command_info, &frame->transfer_commands)); + VK_RESULT(vkAllocateCommandBuffers(device, &command_info, &frame->compute_commands)); // TODO: better defaults frame->transfer_max_size = 1;