From 82166a8181858930630e7da3379b9b1de37074fe Mon Sep 17 00:00:00 2001
From: Noah Metz <noah@metznet.ca>
Date: Fri, 12 Jan 2024 19:00:20 -0700
Subject: [PATCH] Moved to homebrew clang, added gpu memory management
 functions\n

---
 Makefile          |   4 +-
 include/gpu_mem.h |  54 ++++++++
 src/gpu_mem.c     | 320 ++++++++++++++++++++++++++++++++++++++++++++++
 src/main.c        |  59 +++++++--
 4 files changed, 421 insertions(+), 16 deletions(-)
 create mode 100644 include/gpu_mem.h
 create mode 100644 src/gpu_mem.c

diff --git a/Makefile b/Makefile
index 0356b9c..229cffc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
 CFLAGS = -fsanitize=address -I $(ROOT_DIR)/include -I/usr/local/include -O0 -g -Wall -Wextra
-LDFLAGS = -L/usr/local/lib -lglfw -lvulkan -ldl -Xlinker -rpath -Xlinker /usr/local/lib
-CC = clang
+LDFLAGS = -L/opt/homebrew/opt/llvm/lib -L/usr/local/lib -lglfw -lvulkan -ldl -Xlinker -rpath -Xlinker /usr/local/lib -Xlinker -rpath -Xlinker /opt/homebrew/opt/llvm/lib -Xpreprocessor -fooenmp
+CC = /opt/homebrew/opt/llvm/bin/clang
 GDB = lldb
 
 SOURCES = $(wildcard src/*.c)
diff --git a/include/gpu_mem.h b/include/gpu_mem.h
new file mode 100644
index 0000000..dfe71ad
--- /dev/null
+++ b/include/gpu_mem.h
@@ -0,0 +1,54 @@
+#ifndef GPU_MEM_H
+#define GPU_MEM_H
+
+#include <vulkan/vulkan_core.h>
+#include <stdio.h>
+
+typedef struct GPUMemoryTypeStruct {
+  VkMemoryPropertyFlags flags;
+  uint32_t index;
+} GPUMemoryType;
+
+typedef struct GPUMemoryChunkStruct {
+  VkDeviceSize size;
+  VkDeviceSize offset;
+  struct GPUMemoryChunkStruct* next;
+} GPUMemoryChunk;
+
+typedef struct GPUPageStruct {
+  VkDeviceMemory  memory;
+  VkDeviceSize    size;
+  GPUMemoryType   type;
+
+  GPUMemoryChunk* free;
+  GPUMemoryChunk* allocated;
+
+} GPUPage;
+
+typedef struct GPUBufferStruct {
+  GPUPage*        page;
+  GPUMemoryChunk* memory;
+  VkBuffer        handle;
+} GPUBuffer;
+
+typedef struct GPUImageStruct {
+  GPUPage*        page;
+  GPUMemoryChunk* memory;
+  VkImage         handle;
+} GPUImage;
+
+GPUMemoryType pick_memory(VkPhysicalDeviceMemoryProperties memories, uint32_t filter, VkMemoryPropertyFlags include, VkMemoryPropertyFlags exclude);
+
+VkResult gpu_page_allocate(VkDevice device, VkPhysicalDeviceMemoryProperties memories, VkDeviceSize size, uint32_t filter, VkMemoryPropertyFlags include, VkMemoryPropertyFlags exclude, GPUPage** handle);
+void gpu_page_free(VkDevice device, GPUPage* page);
+VkResult gpu_buffer_malloc(VkDevice device, GPUPage* page, VkDeviceSize size, VkBufferUsageFlags usage, GPUBuffer* buffer);
+VkResult gpu_image_malloc(VkDevice device, GPUPage* page, VkImageCreateInfo* info, GPUImage* image);
+void gpu_buffer_free(VkDevice device, GPUBuffer buffer);
+
+void gpu_free(GPUPage* page, GPUMemoryChunk* memory);
+
+
+void fprintchunks(FILE* out, GPUMemoryChunk* start);
+
+
+#endif
diff --git a/src/gpu_mem.c b/src/gpu_mem.c
new file mode 100644
index 0000000..987a9a4
--- /dev/null
+++ b/src/gpu_mem.c
@@ -0,0 +1,320 @@
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <gpu_mem.h>
+
+GPUMemoryType pick_memory(VkPhysicalDeviceMemoryProperties memories, uint32_t filter, VkMemoryPropertyFlags include, VkMemoryPropertyFlags exclude) {
+  for(uint32_t i = 0; i < memories.memoryTypeCount; i++){
+    if((filter & (1 << i))
+        && ((include & memories.memoryTypes[i].propertyFlags) == include)
+        && ((exclude & memories.memoryTypes[i].propertyFlags) == 0)) {
+      GPUMemoryType ret = {
+        .flags = memories.memoryTypes[i].propertyFlags,
+        .index = i,
+      };
+      return ret;
+    }
+  }
+
+  GPUMemoryType err = {
+    .flags = 0,
+    .index = 0xFFFFFFFF,
+  };
+  return err;
+}
+
+VkResult gpu_page_allocate(VkDevice device, VkPhysicalDeviceMemoryProperties memories, VkDeviceSize size, uint32_t filter, VkMemoryPropertyFlags include, VkMemoryPropertyFlags exclude, GPUPage** handle) {
+  if(handle == NULL) {
+    return VK_ERROR_VALIDATION_FAILED_EXT;
+  }
+
+  GPUPage* output = malloc(sizeof(GPUPage));
+  if(output == NULL) {
+    return VK_ERROR_OUT_OF_HOST_MEMORY;
+  }
+
+  GPUMemoryChunk* initial_chunk = malloc(sizeof(GPUMemoryChunk));
+  if(initial_chunk == 0) {
+    return VK_ERROR_OUT_OF_HOST_MEMORY;
+  }
+
+  initial_chunk->size = size;
+  initial_chunk->offset = 0;
+  initial_chunk->next = NULL;
+
+  GPUMemoryType memory_type = pick_memory(memories, filter, include, exclude);
+  if(memory_type.index == 0xFFFFFFFF) {
+    return VK_ERROR_UNKNOWN;
+  }
+
+  VkMemoryAllocateInfo allocate_info = {
+    .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+    .allocationSize = size,
+    .memoryTypeIndex = memory_type.index,
+    .pNext = NULL,
+  };
+
+  VkDeviceMemory memory = VK_NULL_HANDLE;
+  VkResult result = vkAllocateMemory(device, &allocate_info, 0, &memory);
+  if(result != VK_SUCCESS) {
+    return result;
+  }
+
+  output->memory = memory;
+  output->size = size;
+  output->type = memory_type;
+  output->free = initial_chunk;
+  output->allocated = NULL;
+
+  *handle = output;
+
+  return VK_SUCCESS;
+}
+
+void gpu_page_free(VkDevice device, GPUPage* page) {
+  if(page == NULL) {
+    return;
+  }
+
+  GPUMemoryChunk* cur = page->free;
+  while(cur != NULL) {
+    GPUMemoryChunk* last = cur;
+    cur = cur->next;
+    free(last);
+  }
+
+  vkFreeMemory(device, page->memory, 0);
+  free(page);
+}
+
+void gpu_add_allocation(GPUPage* page, GPUMemoryChunk* allocation, VkDeviceSize size, GPUMemoryChunk* prev, GPUMemoryChunk* cur) {
+  if(page->allocated == NULL) {
+    page->allocated = allocation;
+  } else {
+    GPUMemoryChunk* alloc_cur = page->allocated;
+    while(alloc_cur->next != NULL) {
+      alloc_cur = alloc_cur->next;
+    }
+    alloc_cur->next = allocation;
+  }
+
+  if(cur->size == size && prev == NULL) {
+    free(cur);
+    page->free = NULL;
+  } else if(cur->size == size && prev != NULL) {
+    prev->next = cur->next;
+    free(cur);
+  } else if(cur->size > size) {
+    cur->offset += size;
+    cur->size -= size;
+  }
+
+}
+
+VkResult gpu_new_allocation(GPUPage* page, GPUMemoryChunk** prev, GPUMemoryChunk** cur, GPUMemoryChunk** allocation, VkDeviceSize size) {
+  if(prev == NULL || cur == NULL || allocation == NULL) {
+    return VK_ERROR_VALIDATION_FAILED_EXT;
+  }
+
+  *cur = page->free;
+  *prev = NULL;
+
+
+  // Find a chunk
+  while(*cur != NULL) {
+    if((*cur)->size >= size) {
+      break;
+    }
+    *prev = *cur;
+    *cur = (*cur)->next;
+  }
+
+  if(*cur == NULL) {
+    return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+  }
+
+  *allocation = malloc(sizeof(GPUMemoryChunk));
+  if(*allocation == NULL) {
+    return VK_ERROR_OUT_OF_HOST_MEMORY;
+  }
+
+
+  (*allocation)->next = NULL;
+  (*allocation)->size = size;
+  (*allocation)->offset = (*cur)->offset;
+
+  return VK_SUCCESS;
+}
+
+VkResult gpu_image_malloc(VkDevice device, GPUPage* page, VkImageCreateInfo* info, GPUImage* image) {
+  if(image == NULL || info == NULL || page == NULL) {
+    return VK_ERROR_VALIDATION_FAILED_EXT;
+  }
+
+  VkResult result = vkCreateImage(device, info, 0, &image->handle);
+  if(result != VK_SUCCESS) {
+    return result;
+  }
+
+  VkMemoryRequirements requirements;
+  vkGetImageMemoryRequirements(device, image->handle, &requirements);
+
+  GPUMemoryChunk* cur;
+  GPUMemoryChunk* prev;
+  GPUMemoryChunk* allocation;
+
+  result = gpu_new_allocation(page, &prev, &cur, &allocation, requirements.size);
+  if(result != VK_SUCCESS) {
+    return result;
+  }
+
+  result = vkBindImageMemory(device, image->handle, page->memory, cur->offset);
+  if(result != VK_SUCCESS) {
+    return result;
+  }
+
+  image->page = page;
+  image->memory = allocation;
+
+  gpu_add_allocation(page, allocation, requirements.size, prev, cur);
+
+  return VK_SUCCESS;
+}
+
+VkResult gpu_buffer_malloc(VkDevice device, GPUPage* page, VkDeviceSize size, VkBufferUsageFlags usage, GPUBuffer* buffer) {
+  if(buffer == NULL || page == NULL) {
+    return VK_ERROR_VALIDATION_FAILED_EXT;
+  }
+
+  VkBufferCreateInfo buffer_info = {
+    .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+    .size = size,
+    .usage = usage,
+    .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+  };
+
+  VkResult result = vkCreateBuffer(device, &buffer_info, 0, &buffer->handle);
+  if(result != VK_SUCCESS) {
+    return result;
+  }
+
+  GPUMemoryChunk* cur;
+  GPUMemoryChunk* prev;
+  GPUMemoryChunk* allocation;
+
+  result = gpu_new_allocation(page, &prev, &cur, &allocation, size);
+  if(result != VK_SUCCESS) {
+    return result;
+  }
+
+  result = vkBindBufferMemory(device, buffer->handle, page->memory, cur->offset);
+  if(result != VK_SUCCESS) {
+    return result;
+  }
+
+  buffer->page = page;
+  buffer->memory = allocation;
+
+  gpu_add_allocation(page, allocation, size, prev, cur);
+
+  return VK_SUCCESS;
+}
+
+void gpu_buffer_free(VkDevice device, GPUBuffer buffer) {
+  vkDestroyBuffer(device, buffer.handle, 0);
+  gpu_free(buffer.page, buffer.memory);
+}
+
+void gpu_free(GPUPage* page, GPUMemoryChunk* memory) {
+
+  if(memory == page->allocated) {
+    page->allocated = memory->next;
+  } else {
+    GPUMemoryChunk* cur = page->allocated;
+    while(cur->next != NULL) {
+      if(cur->next == memory) {
+        cur->next = memory->next;
+        break;
+      }
+      cur = cur->next;
+    }
+    if(cur == NULL) {
+      return;
+    }
+  }
+
+  memory->next = NULL;
+
+  GPUMemoryChunk* free_cur = page->free;
+  GPUMemoryChunk* free_prev = NULL;
+  while(free_cur != NULL) {
+    if(free_cur->offset > memory->offset) {
+      break;
+    }
+    free_prev = free_cur;
+    free_cur = free_cur->next;
+  }
+  if(free_cur == NULL && free_prev == NULL) {
+    page->free = memory;
+  } else {
+    bool left_cont = false;
+    if (free_prev != NULL) {
+      left_cont = ((free_prev->offset + free_prev->size) == memory->offset);
+    }
+    bool right_cont = false;
+    if (free_cur != NULL) {
+      right_cont = ((memory->offset + memory->size) == free_cur->offset);
+    }
+
+    fprintf(stderr, "l: %d, r: %d\n", left_cont, right_cont);
+
+    if(left_cont && right_cont) {
+      free_prev->next = free_cur->next;
+      free_prev->size += free_cur->size;
+      free_prev->size += memory->size;
+
+      free(free_cur);
+      free(memory);
+    } else if(!left_cont && right_cont) {
+      free_cur->offset -= memory->size;
+      free_cur->size += memory->size;
+      free(memory);
+    } else if(left_cont && !right_cont) {
+      free_prev->size += memory->size;
+      free(memory);
+    } else if(!left_cont && !right_cont) {
+      if(free_cur == NULL) {
+        memory->next = NULL;
+      } else {
+        memory->next = free_cur->next;
+      }
+
+      if(free_prev == NULL) {
+        memory->next = page->free;
+        page->free = memory;
+      } else {
+        free_prev->next = memory;
+      }
+    }
+  }
+}
+
+void fprintchunks(FILE* out, GPUMemoryChunk* start) {
+  if(start == NULL) {
+    fprintf(out, "Chunks: {}\n");
+    return;
+  }
+
+  fprintf(out, "Chunks: {");
+  GPUMemoryChunk* cur = start;
+  while(cur != NULL) {
+    if(cur->next == NULL) {
+      fprintf(out, "%llu@%llu}", cur->size, cur->offset);
+    } else {
+      fprintf(out, "%llu@%llu, ", cur->size, cur->offset);
+    }
+    cur = cur->next;
+  }
+  fprintf(out, "\n");
+}
diff --git a/src/main.c b/src/main.c
index 36345ed..101450b 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,5 +1,7 @@
 #define VK_USE_PLATFORM_MACOS_MVK
 #include "vulkan/vulkan_core.h"
+#include "vulkan/vk_enum_string_helper.h"
+
 #define GLFW_INCLUDE_VULKAN
 #include <GLFW/glfw3.h>
 #define GLFW_EXPOSE_NATIVE_COCOA
@@ -19,6 +21,7 @@
 
 #include <ply.h>
 #include <map.h>
+#include <gpu_mem.h>
 
 typedef struct AllocatedBufferStruct {
   VkDeviceMemory memory;
@@ -264,17 +267,6 @@ void glfw_error(int error, const char* description) {
   fprintf(stderr, "GLFW_ERR: 0x%02x - %s\n", error, description);
 }
 
-uint32_t pick_memory(VkPhysicalDeviceMemoryProperties properties, uint32_t filter, VkMemoryPropertyFlags include, VkMemoryPropertyFlags exclude) {
-  for(uint32_t i = 0; i < properties.memoryTypeCount; i++){
-    if((filter & (1 << i))
-        && ((include & properties.memoryTypes[i].propertyFlags) == include)
-        && ((exclude & properties.memoryTypes[i].propertyFlags) == 0)) {
-      return i;
-    }
-  }
-  return 0xFFFFFFFF;
-}
-
 GLFWwindow* init_window(int width, int height) {
   glfwInit();
   glfwSetErrorCallback(glfw_error);
@@ -1035,7 +1027,7 @@ AllocatedImage allocate_image(VkPhysicalDeviceMemoryProperties memories, VkDevic
   VkMemoryAllocateInfo memory_info = {
     .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
     .allocationSize = memory_requirements.size,
-    .memoryTypeIndex = pick_memory(memories, memory_requirements.memoryTypeBits, include, exclude),
+    .memoryTypeIndex = pick_memory(memories, memory_requirements.memoryTypeBits, include, exclude).index,
   };
 
   result = vkAllocateMemory(device, &memory_info, 0, &allocated.memory);
@@ -1084,7 +1076,7 @@ AllocatedBuffer allocate_buffer(VkPhysicalDeviceMemoryProperties memories, VkDev
   VkMemoryAllocateInfo alloc_info = {
     .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
     .allocationSize = memory_requirements.size,
-    .memoryTypeIndex = pick_memory(memories, memory_requirements.memoryTypeBits, include, exclude),
+    .memoryTypeIndex = pick_memory(memories, memory_requirements.memoryTypeBits, include, exclude).index,
   };
 
   result = vkAllocateMemory(device, &alloc_info, 0, &ret.memory);
@@ -1602,7 +1594,7 @@ int create_depth_image(VulkanContext* context) {
   VkMemoryAllocateInfo depth_memory_info = {
     .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
     .allocationSize = depth_image_requirements.size,
-    .memoryTypeIndex = pick_memory(context->memories, depth_image_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT, 0),
+    .memoryTypeIndex = pick_memory(context->memories, depth_image_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT, 0).index,
   };
 
   VkDeviceMemory depth_image_memory;
@@ -3628,6 +3620,45 @@ int main() {
     return 2;
   }
 
+  GPUPage* page = NULL;
+  VkResult result = gpu_page_allocate(context->device, context->memories, 500, 0xFFFFFFFF, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT, &page);
+  if(result != VK_SUCCESS) {
+    return -1;
+  }
+
+  GPUBuffer buffers[10] = {0};
+  for(int i = 0; i < 10; i++) {
+    result = gpu_buffer_malloc(context->device, page, 100, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, &buffers[i]);
+    if(result != VK_SUCCESS) {
+      fprintf(stderr, "gpu_malloc error: %s\n", string_VkResult(result));
+    } else {
+      fprintf(stderr, "gpu_malloc: %p@%llu\n", buffers[i].handle, buffers[i].memory->offset);
+      fprintchunks(stderr, page->allocated);
+      fprintchunks(stderr, page->free);
+    }
+  }
+
+  int test[] = {3, 0, 2, 4, 1};
+  for(size_t i = 0; i < (sizeof(test)/sizeof(int)); i++) {
+    int idx = test[i];
+    fprintf(stderr, "freeing %llu@%llu\n", buffers[idx].memory->size, buffers[idx].memory->offset);
+    gpu_buffer_free(context->device, buffers[idx]);
+    fprintchunks(stderr, page->free);
+  }
+
+  for(int i = 0; i < 10; i++) {
+    result = gpu_buffer_malloc(context->device, page, 100, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, &buffers[i]);
+    if(result != VK_SUCCESS) {
+      fprintf(stderr, "gpu_malloc error: %s\n", string_VkResult(result));
+    } else {
+      fprintf(stderr, "gpu_malloc: %p@%llu\n", buffers[i].handle, buffers[i].memory->offset);
+      fprintchunks(stderr, page->allocated);
+      fprintchunks(stderr, page->free);
+    }
+  }
+
+  gpu_page_free(context->device, page);
+
   glfwSetKeyCallback(window, key_callback);
   main_loop(window, context);