From 82166a8181858930630e7da3379b9b1de37074fe Mon Sep 17 00:00:00 2001 From: Noah Metz Date: Fri, 12 Jan 2024 19:00:20 -0700 Subject: [PATCH] Moved to homebrew clang, added gpu memory management functions\n --- Makefile | 4 +- include/gpu_mem.h | 54 ++++++++ src/gpu_mem.c | 320 ++++++++++++++++++++++++++++++++++++++++++++++ src/main.c | 59 +++++++-- 4 files changed, 421 insertions(+), 16 deletions(-) create mode 100644 include/gpu_mem.h create mode 100644 src/gpu_mem.c diff --git a/Makefile b/Makefile index 0356b9c..229cffc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) CFLAGS = -fsanitize=address -I $(ROOT_DIR)/include -I/usr/local/include -O0 -g -Wall -Wextra -LDFLAGS = -L/usr/local/lib -lglfw -lvulkan -ldl -Xlinker -rpath -Xlinker /usr/local/lib -CC = clang +LDFLAGS = -L/opt/homebrew/opt/llvm/lib -L/usr/local/lib -lglfw -lvulkan -ldl -Xlinker -rpath -Xlinker /usr/local/lib -Xlinker -rpath -Xlinker /opt/homebrew/opt/llvm/lib -Xpreprocessor -fooenmp +CC = /opt/homebrew/opt/llvm/bin/clang GDB = lldb SOURCES = $(wildcard src/*.c) diff --git a/include/gpu_mem.h b/include/gpu_mem.h new file mode 100644 index 0000000..dfe71ad --- /dev/null +++ b/include/gpu_mem.h @@ -0,0 +1,54 @@ +#ifndef GPU_MEM_H +#define GPU_MEM_H + +#include +#include + +typedef struct GPUMemoryTypeStruct { + VkMemoryPropertyFlags flags; + uint32_t index; +} GPUMemoryType; + +typedef struct GPUMemoryChunkStruct { + VkDeviceSize size; + VkDeviceSize offset; + struct GPUMemoryChunkStruct* next; +} GPUMemoryChunk; + +typedef struct GPUPageStruct { + VkDeviceMemory memory; + VkDeviceSize size; + GPUMemoryType type; + + GPUMemoryChunk* free; + GPUMemoryChunk* allocated; + +} GPUPage; + +typedef struct GPUBufferStruct { + GPUPage* page; + GPUMemoryChunk* memory; + VkBuffer handle; +} GPUBuffer; + +typedef struct GPUImageStruct { + GPUPage* page; + GPUMemoryChunk* memory; + VkImage handle; +} GPUImage; + +GPUMemoryType pick_memory(VkPhysicalDeviceMemoryProperties memories, uint32_t filter, VkMemoryPropertyFlags include, VkMemoryPropertyFlags exclude); + +VkResult gpu_page_allocate(VkDevice device, VkPhysicalDeviceMemoryProperties memories, VkDeviceSize size, uint32_t filter, VkMemoryPropertyFlags include, VkMemoryPropertyFlags exclude, GPUPage** handle); +void gpu_page_free(VkDevice device, GPUPage* page); +VkResult gpu_buffer_malloc(VkDevice device, GPUPage* page, VkDeviceSize size, VkBufferUsageFlags usage, GPUBuffer* buffer); +VkResult gpu_image_malloc(VkDevice device, GPUPage* page, VkImageCreateInfo* info, GPUImage* image); +void gpu_buffer_free(VkDevice device, GPUBuffer buffer); + +void gpu_free(GPUPage* page, GPUMemoryChunk* memory); + + +void fprintchunks(FILE* out, GPUMemoryChunk* start); + + +#endif diff --git a/src/gpu_mem.c b/src/gpu_mem.c new file mode 100644 index 0000000..987a9a4 --- /dev/null +++ b/src/gpu_mem.c @@ -0,0 +1,320 @@ +#include +#include +#include +#include +#include + +GPUMemoryType pick_memory(VkPhysicalDeviceMemoryProperties memories, uint32_t filter, VkMemoryPropertyFlags include, VkMemoryPropertyFlags exclude) { + for(uint32_t i = 0; i < memories.memoryTypeCount; i++){ + if((filter & (1 << i)) + && ((include & memories.memoryTypes[i].propertyFlags) == include) + && ((exclude & memories.memoryTypes[i].propertyFlags) == 0)) { + GPUMemoryType ret = { + .flags = memories.memoryTypes[i].propertyFlags, + .index = i, + }; + return ret; + } + } + + GPUMemoryType err = { + .flags = 0, + .index = 0xFFFFFFFF, + }; + return err; +} + +VkResult gpu_page_allocate(VkDevice device, VkPhysicalDeviceMemoryProperties memories, VkDeviceSize size, uint32_t filter, VkMemoryPropertyFlags include, VkMemoryPropertyFlags exclude, GPUPage** handle) { + if(handle == NULL) { + return VK_ERROR_VALIDATION_FAILED_EXT; + } + + GPUPage* output = malloc(sizeof(GPUPage)); + if(output == NULL) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + GPUMemoryChunk* initial_chunk = malloc(sizeof(GPUMemoryChunk)); + if(initial_chunk == 0) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + initial_chunk->size = size; + initial_chunk->offset = 0; + initial_chunk->next = NULL; + + GPUMemoryType memory_type = pick_memory(memories, filter, include, exclude); + if(memory_type.index == 0xFFFFFFFF) { + return VK_ERROR_UNKNOWN; + } + + VkMemoryAllocateInfo allocate_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = size, + .memoryTypeIndex = memory_type.index, + .pNext = NULL, + }; + + VkDeviceMemory memory = VK_NULL_HANDLE; + VkResult result = vkAllocateMemory(device, &allocate_info, 0, &memory); + if(result != VK_SUCCESS) { + return result; + } + + output->memory = memory; + output->size = size; + output->type = memory_type; + output->free = initial_chunk; + output->allocated = NULL; + + *handle = output; + + return VK_SUCCESS; +} + +void gpu_page_free(VkDevice device, GPUPage* page) { + if(page == NULL) { + return; + } + + GPUMemoryChunk* cur = page->free; + while(cur != NULL) { + GPUMemoryChunk* last = cur; + cur = cur->next; + free(last); + } + + vkFreeMemory(device, page->memory, 0); + free(page); +} + +void gpu_add_allocation(GPUPage* page, GPUMemoryChunk* allocation, VkDeviceSize size, GPUMemoryChunk* prev, GPUMemoryChunk* cur) { + if(page->allocated == NULL) { + page->allocated = allocation; + } else { + GPUMemoryChunk* alloc_cur = page->allocated; + while(alloc_cur->next != NULL) { + alloc_cur = alloc_cur->next; + } + alloc_cur->next = allocation; + } + + if(cur->size == size && prev == NULL) { + free(cur); + page->free = NULL; + } else if(cur->size == size && prev != NULL) { + prev->next = cur->next; + free(cur); + } else if(cur->size > size) { + cur->offset += size; + cur->size -= size; + } + +} + +VkResult gpu_new_allocation(GPUPage* page, GPUMemoryChunk** prev, GPUMemoryChunk** cur, GPUMemoryChunk** allocation, VkDeviceSize size) { + if(prev == NULL || cur == NULL || allocation == NULL) { + return VK_ERROR_VALIDATION_FAILED_EXT; + } + + *cur = page->free; + *prev = NULL; + + + // Find a chunk + while(*cur != NULL) { + if((*cur)->size >= size) { + break; + } + *prev = *cur; + *cur = (*cur)->next; + } + + if(*cur == NULL) { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + + *allocation = malloc(sizeof(GPUMemoryChunk)); + if(*allocation == NULL) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + + (*allocation)->next = NULL; + (*allocation)->size = size; + (*allocation)->offset = (*cur)->offset; + + return VK_SUCCESS; +} + +VkResult gpu_image_malloc(VkDevice device, GPUPage* page, VkImageCreateInfo* info, GPUImage* image) { + if(image == NULL || info == NULL || page == NULL) { + return VK_ERROR_VALIDATION_FAILED_EXT; + } + + VkResult result = vkCreateImage(device, info, 0, &image->handle); + if(result != VK_SUCCESS) { + return result; + } + + VkMemoryRequirements requirements; + vkGetImageMemoryRequirements(device, image->handle, &requirements); + + GPUMemoryChunk* cur; + GPUMemoryChunk* prev; + GPUMemoryChunk* allocation; + + result = gpu_new_allocation(page, &prev, &cur, &allocation, requirements.size); + if(result != VK_SUCCESS) { + return result; + } + + result = vkBindImageMemory(device, image->handle, page->memory, cur->offset); + if(result != VK_SUCCESS) { + return result; + } + + image->page = page; + image->memory = allocation; + + gpu_add_allocation(page, allocation, requirements.size, prev, cur); + + return VK_SUCCESS; +} + +VkResult gpu_buffer_malloc(VkDevice device, GPUPage* page, VkDeviceSize size, VkBufferUsageFlags usage, GPUBuffer* buffer) { + if(buffer == NULL || page == NULL) { + return VK_ERROR_VALIDATION_FAILED_EXT; + } + + VkBufferCreateInfo buffer_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = size, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + VkResult result = vkCreateBuffer(device, &buffer_info, 0, &buffer->handle); + if(result != VK_SUCCESS) { + return result; + } + + GPUMemoryChunk* cur; + GPUMemoryChunk* prev; + GPUMemoryChunk* allocation; + + result = gpu_new_allocation(page, &prev, &cur, &allocation, size); + if(result != VK_SUCCESS) { + return result; + } + + result = vkBindBufferMemory(device, buffer->handle, page->memory, cur->offset); + if(result != VK_SUCCESS) { + return result; + } + + buffer->page = page; + buffer->memory = allocation; + + gpu_add_allocation(page, allocation, size, prev, cur); + + return VK_SUCCESS; +} + +void gpu_buffer_free(VkDevice device, GPUBuffer buffer) { + vkDestroyBuffer(device, buffer.handle, 0); + gpu_free(buffer.page, buffer.memory); +} + +void gpu_free(GPUPage* page, GPUMemoryChunk* memory) { + + if(memory == page->allocated) { + page->allocated = memory->next; + } else { + GPUMemoryChunk* cur = page->allocated; + while(cur->next != NULL) { + if(cur->next == memory) { + cur->next = memory->next; + break; + } + cur = cur->next; + } + if(cur == NULL) { + return; + } + } + + memory->next = NULL; + + GPUMemoryChunk* free_cur = page->free; + GPUMemoryChunk* free_prev = NULL; + while(free_cur != NULL) { + if(free_cur->offset > memory->offset) { + break; + } + free_prev = free_cur; + free_cur = free_cur->next; + } + if(free_cur == NULL && free_prev == NULL) { + page->free = memory; + } else { + bool left_cont = false; + if (free_prev != NULL) { + left_cont = ((free_prev->offset + free_prev->size) == memory->offset); + } + bool right_cont = false; + if (free_cur != NULL) { + right_cont = ((memory->offset + memory->size) == free_cur->offset); + } + + fprintf(stderr, "l: %d, r: %d\n", left_cont, right_cont); + + if(left_cont && right_cont) { + free_prev->next = free_cur->next; + free_prev->size += free_cur->size; + free_prev->size += memory->size; + + free(free_cur); + free(memory); + } else if(!left_cont && right_cont) { + free_cur->offset -= memory->size; + free_cur->size += memory->size; + free(memory); + } else if(left_cont && !right_cont) { + free_prev->size += memory->size; + free(memory); + } else if(!left_cont && !right_cont) { + if(free_cur == NULL) { + memory->next = NULL; + } else { + memory->next = free_cur->next; + } + + if(free_prev == NULL) { + memory->next = page->free; + page->free = memory; + } else { + free_prev->next = memory; + } + } + } +} + +void fprintchunks(FILE* out, GPUMemoryChunk* start) { + if(start == NULL) { + fprintf(out, "Chunks: {}\n"); + return; + } + + fprintf(out, "Chunks: {"); + GPUMemoryChunk* cur = start; + while(cur != NULL) { + if(cur->next == NULL) { + fprintf(out, "%llu@%llu}", cur->size, cur->offset); + } else { + fprintf(out, "%llu@%llu, ", cur->size, cur->offset); + } + cur = cur->next; + } + fprintf(out, "\n"); +} diff --git a/src/main.c b/src/main.c index 36345ed..101450b 100644 --- a/src/main.c +++ b/src/main.c @@ -1,5 +1,7 @@ #define VK_USE_PLATFORM_MACOS_MVK #include "vulkan/vulkan_core.h" +#include "vulkan/vk_enum_string_helper.h" + #define GLFW_INCLUDE_VULKAN #include #define GLFW_EXPOSE_NATIVE_COCOA @@ -19,6 +21,7 @@ #include #include +#include typedef struct AllocatedBufferStruct { VkDeviceMemory memory; @@ -264,17 +267,6 @@ void glfw_error(int error, const char* description) { fprintf(stderr, "GLFW_ERR: 0x%02x - %s\n", error, description); } -uint32_t pick_memory(VkPhysicalDeviceMemoryProperties properties, uint32_t filter, VkMemoryPropertyFlags include, VkMemoryPropertyFlags exclude) { - for(uint32_t i = 0; i < properties.memoryTypeCount; i++){ - if((filter & (1 << i)) - && ((include & properties.memoryTypes[i].propertyFlags) == include) - && ((exclude & properties.memoryTypes[i].propertyFlags) == 0)) { - return i; - } - } - return 0xFFFFFFFF; -} - GLFWwindow* init_window(int width, int height) { glfwInit(); glfwSetErrorCallback(glfw_error); @@ -1035,7 +1027,7 @@ AllocatedImage allocate_image(VkPhysicalDeviceMemoryProperties memories, VkDevic VkMemoryAllocateInfo memory_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = memory_requirements.size, - .memoryTypeIndex = pick_memory(memories, memory_requirements.memoryTypeBits, include, exclude), + .memoryTypeIndex = pick_memory(memories, memory_requirements.memoryTypeBits, include, exclude).index, }; result = vkAllocateMemory(device, &memory_info, 0, &allocated.memory); @@ -1084,7 +1076,7 @@ AllocatedBuffer allocate_buffer(VkPhysicalDeviceMemoryProperties memories, VkDev VkMemoryAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = memory_requirements.size, - .memoryTypeIndex = pick_memory(memories, memory_requirements.memoryTypeBits, include, exclude), + .memoryTypeIndex = pick_memory(memories, memory_requirements.memoryTypeBits, include, exclude).index, }; result = vkAllocateMemory(device, &alloc_info, 0, &ret.memory); @@ -1602,7 +1594,7 @@ int create_depth_image(VulkanContext* context) { VkMemoryAllocateInfo depth_memory_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = depth_image_requirements.size, - .memoryTypeIndex = pick_memory(context->memories, depth_image_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT, 0), + .memoryTypeIndex = pick_memory(context->memories, depth_image_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT, 0).index, }; VkDeviceMemory depth_image_memory; @@ -3628,6 +3620,45 @@ int main() { return 2; } + GPUPage* page = NULL; + VkResult result = gpu_page_allocate(context->device, context->memories, 500, 0xFFFFFFFF, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT, &page); + if(result != VK_SUCCESS) { + return -1; + } + + GPUBuffer buffers[10] = {0}; + for(int i = 0; i < 10; i++) { + result = gpu_buffer_malloc(context->device, page, 100, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, &buffers[i]); + if(result != VK_SUCCESS) { + fprintf(stderr, "gpu_malloc error: %s\n", string_VkResult(result)); + } else { + fprintf(stderr, "gpu_malloc: %p@%llu\n", buffers[i].handle, buffers[i].memory->offset); + fprintchunks(stderr, page->allocated); + fprintchunks(stderr, page->free); + } + } + + int test[] = {3, 0, 2, 4, 1}; + for(size_t i = 0; i < (sizeof(test)/sizeof(int)); i++) { + int idx = test[i]; + fprintf(stderr, "freeing %llu@%llu\n", buffers[idx].memory->size, buffers[idx].memory->offset); + gpu_buffer_free(context->device, buffers[idx]); + fprintchunks(stderr, page->free); + } + + for(int i = 0; i < 10; i++) { + result = gpu_buffer_malloc(context->device, page, 100, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, &buffers[i]); + if(result != VK_SUCCESS) { + fprintf(stderr, "gpu_malloc error: %s\n", string_VkResult(result)); + } else { + fprintf(stderr, "gpu_malloc: %p@%llu\n", buffers[i].handle, buffers[i].memory->offset); + fprintchunks(stderr, page->allocated); + fprintchunks(stderr, page->free); + } + } + + gpu_page_free(context->device, page); + glfwSetKeyCallback(window, key_callback); main_loop(window, context);