#include "gpu.h" #include "GLFW/glfw3.h" #include "stdio.h" #include "string.h" #include "vk_mem_alloc.h" #include "vulkan/vulkan_core.h" const char * validation_layers[] = { "VK_LAYER_KHRONOS_validation", //"VK_LAYER_LUNARG_api_dump", "VK_LAYER_KHRONOS_synchronization2", "VK_LAYER_KHRONOS_shader_object", }; uint32_t validation_layer_count = sizeof(validation_layers) / sizeof(const char *); const char * instance_extensions[] = { VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, VK_EXT_DEBUG_UTILS_EXTENSION_NAME, VK_EXT_DEBUG_REPORT_EXTENSION_NAME, #ifdef __APPLE__ "VK_EXT_metal_surface", #endif VK_KHR_SURFACE_EXTENSION_NAME, }; uint32_t instance_extension_count = sizeof(instance_extensions) / sizeof(const char *); const char * device_extensions[] = { #ifdef __APPLE__ "VK_KHR_portability_subset", #endif VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME, VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, }; uint32_t device_extension_count = sizeof(device_extensions) / sizeof(const char *); VkFormat depth_formats[] = { VK_FORMAT_D32_SFLOAT, VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT }; uint32_t depth_format_count = sizeof(depth_formats) / sizeof(VkFormat); void glfw_error(int error, const char* description) { fprintf(stderr, "GLFW_ERR: 0x%02x - %s\n", error, description); } GLFWwindow* init_window() { glfwInit(); glfwSetErrorCallback(glfw_error); glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); // TODO: recreate the framebuffer on resize glfwWindowHint(GLFW_RESIZABLE, GLFW_TRUE); GLFWwindow* window = glfwCreateWindow(WINDOW_MIN_WIDTH, WINDOW_MIN_HEIGHT, "roleplay", 0, 0); glfwSetWindowSizeLimits(window, WINDOW_MIN_WIDTH, WINDOW_MIN_HEIGHT, GLFW_DONT_CARE, GLFW_DONT_CARE); return window; } bool check_validation_layers(const char ** layers, uint32_t num_layers) { uint32_t layer_count; VkResult result; result = vkEnumerateInstanceLayerProperties(&layer_count, 0); if(result != VK_SUCCESS) { return false; } VkLayerProperties* available_layers = malloc(sizeof(VkLayerProperties)*layer_count); result = vkEnumerateInstanceLayerProperties(&layer_count, available_layers); for(uint32_t i = 0; i < num_layers; i++) { bool found = false; for(uint32_t j = 0; j < layer_count; j++) { if(strcmp(layers[i], available_layers[j].layerName) == 0) { found = true; } } if(found == false) { free(available_layers); fprintf(stderr, "Failed to find layer %s\n", layers[i]); return false; } } free(available_layers); return true; } VkResult create_instance(VkInstance* instance) { if(check_validation_layers(validation_layers, validation_layer_count) == false) { return VK_ERROR_VALIDATION_FAILED_EXT; } VkApplicationInfo app_info = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pApplicationName = "roleplay", .applicationVersion = VK_MAKE_VERSION(0, 0, 1), .pEngineName = "roleplay", .engineVersion = VK_MAKE_VERSION(0, 0, 1), .apiVersion = VK_API_VERSION_1_2, }; uint32_t glfwExtensionCount = 0; const char** glfwExtensions; glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount); const char** requested_extensions = malloc(sizeof(char*)*(glfwExtensionCount + instance_extension_count)); for (uint32_t i = 0; i < glfwExtensionCount; i++) { requested_extensions[i] = glfwExtensions[i]; } for (uint32_t i = 0; i < instance_extension_count; i++) { requested_extensions[glfwExtensionCount + i] = instance_extensions[i]; } VkInstanceCreateInfo instance_info = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &app_info, .enabledLayerCount = validation_layer_count, .ppEnabledLayerNames = validation_layers, .enabledExtensionCount = glfwExtensionCount + instance_extension_count, .ppEnabledExtensionNames = requested_extensions, .flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR, }; VkResult result; VK_RESULT(vkCreateInstance(&instance_info, 0, instance)); free(requested_extensions); return VK_SUCCESS; } static VKAPI_ATTR VkBool32 VKAPI_CALL debug_callback( VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* callback_data, void* user_data) { (void)severity; (void)type; (void)user_data; fprintf(stderr, "Validation layer: %s\n", callback_data->pMessage); return VK_FALSE; } VkResult create_debug_messenger(VkInstance instance, VkDebugUtilsMessengerEXT* debug_messenger) { VkDebugUtilsMessengerCreateInfoEXT messenger_info = { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_DEVICE_ADDRESS_BINDING_BIT_EXT, .pfnUserCallback = debug_callback, .pUserData = 0, }; PFN_vkCreateDebugUtilsMessengerEXT func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT"); VkResult result; VK_RESULT(func(instance, &messenger_info, 0, debug_messenger)); return VK_SUCCESS; } VkResult get_best_physical_device(VkInstance instance, VkPhysicalDevice* device) { uint32_t device_count = 0; VkResult result; VK_RESULT(vkEnumeratePhysicalDevices(instance, &device_count, 0)); VkPhysicalDevice* devices = malloc(sizeof(VkPhysicalDevice)*device_count); result = vkEnumeratePhysicalDevices(instance, &device_count, devices); if(result != VK_SUCCESS) { free(devices); return result; } int top_score = -1; for(uint32_t i = 0; i < device_count; i++) { int score = 0; VkPhysicalDeviceProperties properties; vkGetPhysicalDeviceProperties(devices[i], &properties); VkPhysicalDeviceFeatures features; vkGetPhysicalDeviceFeatures(devices[i], &features); switch(properties.deviceType) { case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: score += 100; break; case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: score += 50; break; case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: score += 25; break; case VK_PHYSICAL_DEVICE_TYPE_CPU: score += 0; break; default: continue; } if(score > top_score) { top_score = score; *device = devices[i]; } } free(devices); return VK_SUCCESS; } VkResult create_logical_device(VkPhysicalDevice physical_device, VkSurfaceKHR surface, GPUQueue* graphics_queue, GPUQueue* present_queue, GPUQueue* transfer_queue, VkDevice* device) { if(graphics_queue == NULL || present_queue == NULL || transfer_queue == NULL || device == NULL) { return VK_ERROR_VALIDATION_FAILED_EXT; } uint32_t queue_family_count = 0; vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_family_count, NULL); VkQueueFamilyProperties* queue_families = malloc(sizeof(VkQueueFamilyProperties)*queue_family_count); vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_family_count, queue_families); graphics_queue->family = 0xFFFFFFFF; present_queue->family = 0xFFFFFFFF; for(uint32_t idx = 0; idx < queue_family_count; idx++) { VkBool32 present_support = VK_FALSE; vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, idx, surface, &present_support); VkBool32 graphics_support = (queue_families[idx].queueFlags & VK_QUEUE_GRAPHICS_BIT); if(graphics_support && present_support) { graphics_queue->family = idx; graphics_queue->index = 0; present_queue->family = idx; present_queue->index = 0; break; } else if (graphics_support && (graphics_queue->family == 0xFFFFFFFF)) { graphics_queue->family = idx; graphics_queue->index = 0; } else if (present_support && (present_queue->family == 0xFFFFFFFF)) { graphics_queue->family = idx; present_queue->index = 0; } } transfer_queue->family = 0xFFFFFFFF; for(uint32_t idx = 0; idx < queue_family_count; idx++) { VkBool32 graphics_support = (queue_families[idx].queueFlags & VK_QUEUE_GRAPHICS_BIT); VkBool32 compute_support = (queue_families[idx].queueFlags & VK_QUEUE_COMPUTE_BIT); VkBool32 is_graphics_family = (graphics_queue->family == idx); VkBool32 is_present_family = (present_queue->family == idx); uint32_t queue_count = queue_families[idx].queueCount; if(is_graphics_family && (queue_count == 1)) { continue; } else if (is_present_family && (queue_count == 1)) { continue; } if(graphics_support && compute_support) { transfer_queue->family = idx; if(is_graphics_family || is_present_family) { transfer_queue->index = 1; } else { transfer_queue->index = 0; } } } if(graphics_queue->family == 0xFFFFFFFF || present_queue->family == 0xFFFFFFFF || transfer_queue->family == 0xFFFFFFFF) { return VK_ERROR_INITIALIZATION_FAILED; } VkDeviceQueueCreateInfo queue_create_info[3] = {}; uint32_t queue_count = 0; float default_queue_priority = 1.0f; if(graphics_queue->family == present_queue->family && graphics_queue->family == transfer_queue->family) { queue_count = 1; queue_create_info[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_create_info[0].queueFamilyIndex = graphics_queue->family; queue_create_info[0].queueCount = 2; queue_create_info[0].pQueuePriorities = &default_queue_priority; } else if (graphics_queue->family == present_queue->family) { queue_count = 2; queue_create_info[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_create_info[0].queueFamilyIndex = graphics_queue->family; queue_create_info[0].queueCount = 1; queue_create_info[0].pQueuePriorities = &default_queue_priority; queue_create_info[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_create_info[1].queueFamilyIndex = transfer_queue->family; queue_create_info[1].queueCount = 1; queue_create_info[1].pQueuePriorities = &default_queue_priority; } else if (graphics_queue->family == transfer_queue->family) { queue_count = 2; queue_create_info[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_create_info[0].queueFamilyIndex = graphics_queue->family; queue_create_info[0].queueCount = 2; queue_create_info[0].pQueuePriorities = &default_queue_priority; queue_create_info[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_create_info[1].queueFamilyIndex = present_queue->family; queue_create_info[1].queueCount = 1; queue_create_info[1].pQueuePriorities = &default_queue_priority; } else { queue_count = 3; queue_create_info[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_create_info[0].queueFamilyIndex = graphics_queue->family; queue_create_info[0].queueCount = 1; queue_create_info[0].pQueuePriorities = &default_queue_priority; queue_create_info[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_create_info[1].queueFamilyIndex = present_queue->family; queue_create_info[1].queueCount = 1; queue_create_info[1].pQueuePriorities = &default_queue_priority; queue_create_info[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_create_info[1].queueFamilyIndex = transfer_queue->family; queue_create_info[1].queueCount = 1; queue_create_info[1].pQueuePriorities = &default_queue_priority; } VkPhysicalDeviceSynchronization2FeaturesKHR sync2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR, .synchronization2 = VK_TRUE, }; VkPhysicalDeviceVulkan12Features features_12 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, .bufferDeviceAddress = VK_TRUE, .runtimeDescriptorArray = VK_TRUE, .descriptorIndexing = VK_TRUE, .descriptorBindingPartiallyBound = VK_TRUE, .descriptorBindingVariableDescriptorCount = VK_TRUE, .descriptorBindingUniformBufferUpdateAfterBind = VK_TRUE, .descriptorBindingStorageBufferUpdateAfterBind = VK_TRUE, .descriptorBindingSampledImageUpdateAfterBind = VK_TRUE, .timelineSemaphore = VK_TRUE, .pNext = &sync2, }; VkPhysicalDeviceFeatures device_features = { .samplerAnisotropy = VK_TRUE, }; VkDeviceCreateInfo device_create_info = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pQueueCreateInfos = queue_create_info, .queueCreateInfoCount = queue_count, .pEnabledFeatures = &device_features, .enabledExtensionCount = device_extension_count, .ppEnabledExtensionNames = device_extensions, .enabledLayerCount = validation_layer_count, .ppEnabledLayerNames = validation_layers, .pNext = &features_12, }; VkResult result; VK_RESULT(vkCreateDevice(physical_device, &device_create_info, 0, device)); vkGetDeviceQueue(*device, graphics_queue->family, graphics_queue->index, &graphics_queue->handle); vkGetDeviceQueue(*device, present_queue->family, present_queue->index, &present_queue->handle); vkGetDeviceQueue(*device, transfer_queue->family, transfer_queue->index, &transfer_queue->handle); return VK_SUCCESS; } VkResult create_memory_allocator(VkInstance instance, VkPhysicalDevice physical_device, VkDevice device, VmaAllocator* allocator) { VmaAllocatorCreateInfo allocator_create_info = { .vulkanApiVersion = VK_API_VERSION_1_2, .instance = instance, .physicalDevice = physical_device, .device = device, .flags = VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT, }; VkResult result; VK_RESULT(vmaCreateAllocator(&allocator_create_info, allocator)); return VK_SUCCESS; } VkResult get_swapchain_details(VkPhysicalDevice physical_device, VkSurfaceKHR surface, SwapchainDetails* details) { details->formats = 0; details->present_modes = 0; VkResult result; VK_RESULT(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &details->capabilities)); VK_RESULT(vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &details->formats_count, 0)); details->formats = malloc(sizeof(VkSurfaceFormatKHR)*details->formats_count); result = vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &details->formats_count, details->formats); if(result != VK_SUCCESS) { free(details->formats); return result; } result = vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &details->present_modes_count, 0); if(result != VK_SUCCESS) { free(details->formats); return result; } details->present_modes = malloc(sizeof(VkPresentModeKHR)*details->present_modes_count); result = vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &details->present_modes_count, details->present_modes); if(result != VK_SUCCESS) { free(details->formats); free(details->present_modes); return result; } return VK_SUCCESS; } VkSurfaceFormatKHR choose_swapchain_format(SwapchainDetails swapchain_details) { for(uint32_t i = 0; i < swapchain_details.formats_count; i++) { VkSurfaceFormatKHR format = swapchain_details.formats[i]; if(format.format == VK_FORMAT_B8G8R8A8_SRGB && format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { return format; } } return swapchain_details.formats[0]; } VkPresentModeKHR choose_present_mode(SwapchainDetails swapchain_details) { for(uint32_t i = 0; i < swapchain_details.present_modes_count; i++) { if(swapchain_details.present_modes[i] == VK_PRESENT_MODE_MAILBOX_KHR) { return VK_PRESENT_MODE_MAILBOX_KHR; } } return VK_PRESENT_MODE_FIFO_KHR; } VkExtent2D choose_swapchain_extent(SwapchainDetails swapchain_details) { return swapchain_details.capabilities.currentExtent; } VkResult create_swapchain(VkDevice device, VkSurfaceFormatKHR format, VkPresentModeKHR present_mode, VkExtent2D extent, VkSurfaceKHR surface, VkSurfaceCapabilitiesKHR capabilities, uint32_t graphics_family_index, uint32_t present_family_index, VkSwapchainKHR* swapchain) { uint32_t image_count = capabilities.minImageCount + 1; uint32_t max_images = capabilities.maxImageCount; if((max_images > 0) && (image_count > max_images)) { image_count = max_images; } VkSwapchainCreateInfoKHR swapchain_info = { .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, .surface = surface, .minImageCount = image_count, .imageFormat = format.format, .imageColorSpace = format.colorSpace, .imageExtent = extent, .imageArrayLayers = 1, .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, .preTransform = capabilities.currentTransform, .compositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR, .presentMode = present_mode, .clipped = VK_TRUE, .oldSwapchain = *swapchain, }; uint32_t queue_families[2] = {graphics_family_index, present_family_index}; if(graphics_family_index != present_family_index) { swapchain_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT; swapchain_info.queueFamilyIndexCount = 2; swapchain_info.pQueueFamilyIndices = queue_families; } else { swapchain_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; swapchain_info.queueFamilyIndexCount = 0; swapchain_info.pQueueFamilyIndices = 0; } VkResult result; VK_RESULT(vkCreateSwapchainKHR(device, &swapchain_info, 0, swapchain)); return VK_SUCCESS; } VkResult get_swapchain_images(VkDevice device, VkSwapchainKHR swapchain, VkImage** images, uint32_t* image_count) { VkResult result; VK_RESULT(vkGetSwapchainImagesKHR(device, swapchain, image_count, 0)); *images = malloc(sizeof(VkImage)*(*image_count)); if(*images == 0) { return VK_ERROR_OUT_OF_HOST_MEMORY; } result = vkGetSwapchainImagesKHR(device, swapchain, image_count, *images); if(result != VK_SUCCESS) { free(*images); return result; } return VK_SUCCESS; } VkResult create_image_views(VkDevice device, uint32_t image_count, VkImage* images, VkSurfaceFormatKHR format, VkImageView** image_views) { *image_views = malloc(sizeof(VkImageView)*image_count); if(*image_views == 0) { return VK_ERROR_OUT_OF_HOST_MEMORY; } for(uint32_t i = 0; i < image_count; i++) { VkImageViewCreateInfo view_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = images[i], .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = format.format, .components = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, .b = VK_COMPONENT_SWIZZLE_IDENTITY, .a = VK_COMPONENT_SWIZZLE_IDENTITY, }, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .levelCount = 1, .baseArrayLayer = 0, .layerCount = 1, }, }; VkResult result = vkCreateImageView(device, &view_info, 0, &(*image_views)[i]); if(result != VK_SUCCESS) { free(*image_views); return result; } } return VK_SUCCESS; } VkResult find_depth_format(VkPhysicalDevice physical_device, VkImageTiling tiling, VkFormatFeatureFlags features, VkFormat* format) { for(uint32_t i = 0; i < depth_format_count; i++) { VkFormatProperties properties; vkGetPhysicalDeviceFormatProperties(physical_device, depth_formats[i], &properties); if(tiling == VK_IMAGE_TILING_LINEAR && (properties.linearTilingFeatures & features) == features) { *format = depth_formats[i]; return VK_SUCCESS; } else if (tiling == VK_IMAGE_TILING_OPTIMAL && (properties.optimalTilingFeatures & features) == features) { *format = depth_formats[i]; return VK_SUCCESS; } } return VK_ERROR_VALIDATION_FAILED_EXT; } VkResult create_render_pass(VkDevice device, VkSurfaceFormatKHR format, VkFormat depth_format, VkRenderPass* render_pass) { VkAttachmentDescription attachments[] = { { .format = format.format, .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, }, { .format = depth_format, .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, .storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, }, }; VkAttachmentReference color_attachment_refs[] = { { .attachment = 0, .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }, }; VkAttachmentReference depth_attachment_ref = { .attachment = 1, .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, }; // Create a subpass with the color and depth attachments VkSubpassDescription subpasses[] = { { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .colorAttachmentCount = sizeof(color_attachment_refs)/sizeof(VkAttachmentReference), .pColorAttachments = color_attachment_refs, .pDepthStencilAttachment = &depth_attachment_ref, }, { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .colorAttachmentCount = sizeof(color_attachment_refs)/sizeof(VkAttachmentReference), .pColorAttachments = color_attachment_refs, }, }; // This basically says "make sure nothing else is writing to the depth_stencil or the color attachment during the pipeline VkSubpassDependency dependencies[] = { { .srcSubpass = VK_SUBPASS_EXTERNAL, .dstSubpass = 1, .srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT, .dstStageMask = VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT, }, { .srcSubpass = VK_SUBPASS_EXTERNAL, .dstSubpass = 0, .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, }, { .srcSubpass = VK_SUBPASS_EXTERNAL, .dstSubpass = 0, .srcStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, .srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, .dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, .dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, }, { .srcSubpass = 0, .dstSubpass = 1, .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, }, }; VkRenderPassCreateInfo render_info = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = sizeof(attachments)/sizeof(VkAttachmentDescription), .pAttachments = attachments, .subpassCount = sizeof(subpasses)/sizeof(VkSubpassDescription), .pSubpasses = subpasses, .dependencyCount = sizeof(dependencies)/sizeof(VkSubpassDependency), .pDependencies = dependencies, }; VkResult result; VK_RESULT(vkCreateRenderPass(device, &render_info, 0, render_pass)); return VK_SUCCESS; } VkResult create_swapchain_framebuffers(VkDevice device, uint32_t image_count, VkImageView* image_views, VkImageView depth_image_view, VkRenderPass render_pass, VkExtent2D extent, VkFramebuffer** framebuffers) { *framebuffers = malloc(sizeof(VkFramebuffer)*image_count); if(*framebuffers == 0) { return 0; } VkFramebuffer* framebuffer_ptr = *framebuffers; for(uint32_t i = 0; i < image_count; i++) { VkImageView attachments[] = { image_views[i], depth_image_view, }; VkFramebufferCreateInfo framebuffer_info = { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .renderPass = render_pass, .attachmentCount = 2, .pAttachments = attachments, .width = extent.width, .height = extent.height, .layers = 1, }; VkResult result = vkCreateFramebuffer(device, &framebuffer_info, 0, &framebuffer_ptr[i]); if(result != VK_SUCCESS) { free(*framebuffers); return result; } } return VK_SUCCESS; } VkSemaphore create_timeline_semaphore(VkDevice device) { VkSemaphoreTypeCreateInfo semaphore_type = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, .initialValue = 0, .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, }; VkSemaphoreCreateInfo semaphore_info = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, .pNext = &semaphore_type, }; VkSemaphore semaphore; VkResult result = vkCreateSemaphore(device, &semaphore_info, 0, &semaphore); if(result != VK_SUCCESS) { return 0; } return semaphore; } VkSemaphore create_semaphore(VkDevice device) { VkSemaphoreCreateInfo semaphore_info = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, }; VkSemaphore semaphore; VkResult result = vkCreateSemaphore(device, &semaphore_info, 0, &semaphore); if(result != VK_SUCCESS) { return 0; } return semaphore; } VkFence create_fence(VkDevice device, VkFenceCreateFlags flags) { VkFenceCreateInfo fence_info = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .flags = flags, }; VkFence fence; VkResult result = vkCreateFence(device, &fence_info, 0, &fence); if(result != VK_SUCCESS) { return VK_NULL_HANDLE; } return fence; } VkCommandBuffer* create_command_buffers(VkDevice device, VkCommandPool command_pool, uint32_t image_count) { VkCommandBufferAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = command_pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = image_count, }; VkCommandBuffer* command_buffers = malloc(sizeof(VkCommandBuffer)*image_count); if(command_buffers == 0) { return 0; } VkResult result = vkAllocateCommandBuffers(device, &alloc_info, command_buffers); if(result != VK_SUCCESS) { return VK_NULL_HANDLE; } return command_buffers; } VkResult create_frame_context(VkDevice device, VmaAllocator allocator, VkCommandPool transfer_pool, FrameContext* frame) { VkResult result; frame->ready = create_fence(device, VK_FENCE_CREATE_SIGNALED_BIT); frame->image = create_semaphore(device); frame->render = create_semaphore(device); frame->transfer = create_timeline_semaphore(device); frame->frame = create_timeline_semaphore(device); frame->compute = create_timeline_semaphore(device); frame->frame_index = 0; frame->transfer_index = 0; frame->compute_index = 0; VkCommandBufferAllocateInfo command_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandBufferCount = 1, .commandPool = transfer_pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, }; VK_RESULT(vkAllocateCommandBuffers(device, &command_info, &frame->transfer_commands)); VK_RESULT(vkAllocateCommandBuffers(device, &command_info, &frame->compute_commands)); // TODO: better defaults frame->transfer_max_size = 1; frame->transfer_max_count = 1; VK_RESULT(create_transfer_buffer(allocator, frame->transfer_max_size, &frame->transfer_buffer, &frame->transfer_memory, &frame->transfer_mapped)); frame->transfer_count = 0; frame->transfer_infos = malloc(sizeof(TransferInfo)*frame->transfer_max_count); return VK_SUCCESS; } VkResult create_depth_image(VkDevice device, VkFormat depth_format, VkExtent2D swapchain_extent, VmaAllocator allocator, VkCommandPool extra_graphics_pool, GPUQueue graphics_queue, VkImage* depth_image, VmaAllocation* depth_image_memory, VkImageView* depth_image_view) { VkExtent3D depth_extent = { .width = swapchain_extent.width, .height = swapchain_extent.height, .depth = 1, }; VkImageCreateInfo depth_image_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, .extent = depth_extent, .mipLevels = 1, .arrayLayers = 1, .format = depth_format, .tiling = VK_IMAGE_TILING_OPTIMAL, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .samples = VK_SAMPLE_COUNT_1_BIT, .flags = 0, }; VmaAllocationCreateInfo allocation_info = { .usage = VMA_MEMORY_USAGE_AUTO, }; VkResult result; VK_RESULT(vmaCreateImage(allocator, &depth_image_info, &allocation_info, depth_image, depth_image_memory, NULL)); VkImageViewCreateInfo depth_view_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = *depth_image, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = depth_format, .components = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, .b = VK_COMPONENT_SWIZZLE_IDENTITY, .a = VK_COMPONENT_SWIZZLE_IDENTITY, }, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT, .baseMipLevel = 0, .levelCount = 1, .baseArrayLayer = 0, .layerCount = 1, }, }; VK_RESULT(vkCreateImageView(device, &depth_view_info, 0, depth_image_view)); VK_RESULT(command_transition_image_layout(device, extra_graphics_pool, graphics_queue, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL, *depth_image, 0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, VK_IMAGE_ASPECT_DEPTH_BIT)); return VK_SUCCESS; } VkResult init_vulkan(GLFWwindow* window, RenderContext* context) { VkResult result; if(context == NULL) { return VK_ERROR_VALIDATION_FAILED_EXT; } int monitor_count; GLFWmonitor** monitors = glfwGetMonitors(&monitor_count); glfwGetMonitorContentScale(monitors[0], &context->window_scale[0], &context->window_scale[1]); VK_RESULT(create_instance(&context->instance)); VK_RESULT(create_debug_messenger(context->instance, &context->debug_messenger)); VK_RESULT(get_best_physical_device(context->instance, &context->physical_device)); VK_RESULT(glfwCreateWindowSurface(context->instance, window, 0, &context->surface)); VK_RESULT(create_logical_device(context->physical_device, context->surface, &context->graphics_queue, &context->present_queue, &context->transfer_queue, &context->device)); VK_RESULT(create_memory_allocator(context->instance, context->physical_device, context->device, &context->allocator)); VkCommandPoolCreateInfo extra_pool_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .queueFamilyIndex = context->graphics_queue.family, .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, }; VK_RESULT(vkCreateCommandPool(context->device, &extra_pool_info, 0, &context->extra_graphics_pool)); VkCommandPoolCreateInfo graphics_pool_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, .queueFamilyIndex = context->graphics_queue.family, }; VK_RESULT(vkCreateCommandPool(context->device, &graphics_pool_info, 0, &context->graphics_pool)); VkCommandPoolCreateInfo transfer_pool_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, .queueFamilyIndex = context->transfer_queue.family, }; VK_RESULT(vkCreateCommandPool(context->device, &transfer_pool_info, 0, &context->transfer_pool)); for(int i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { VK_RESULT(create_frame_context(context->device, context->allocator, context->transfer_pool, &context->frame[i])); } VK_RESULT(get_swapchain_details(context->physical_device, context->surface, &context->swapchain_details)); context->swapchain_format = choose_swapchain_format(context->swapchain_details); context->swapchain_present_mode = choose_present_mode(context->swapchain_details); context->swapchain_extent = choose_swapchain_extent(context->swapchain_details); context->swapchain = VK_NULL_HANDLE; VK_RESULT(create_swapchain(context->device, context->swapchain_format, context->swapchain_present_mode, context->swapchain_extent, context->surface, context->swapchain_details.capabilities, context->graphics_queue.family, context->present_queue.family, &context->swapchain)); VK_RESULT(get_swapchain_images(context->device, context->swapchain, &context->swapchain_images, &context->swapchain_image_count)); context->swapchain_command_buffers = create_command_buffers(context->device, context->graphics_pool, context->swapchain_image_count); if(context->swapchain_command_buffers == NULL) { return VK_ERROR_VALIDATION_FAILED_EXT; } VK_RESULT(create_image_views(context->device, context->swapchain_image_count, context->swapchain_images, context->swapchain_format, &context->swapchain_image_views)); VK_RESULT(find_depth_format(context->physical_device, VK_IMAGE_TILING_OPTIMAL, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, &context->depth_format)); VK_RESULT(create_render_pass(context->device, context->swapchain_format, context->depth_format, &context->render_pass)); VK_RESULT(create_depth_image(context->device, context->depth_format, context->swapchain_extent, context->allocator, context->extra_graphics_pool, context->graphics_queue, &context->depth_image, &context->depth_image_memory, &context->depth_image_view)); VK_RESULT(create_swapchain_framebuffers(context->device, context->swapchain_image_count, context->swapchain_image_views, context->depth_image_view, context->render_pass, context->swapchain_extent, &context->swapchain_framebuffers)); context->current_frame = 0; return VK_SUCCESS; } VkResult create_transfer_buffer( VmaAllocator allocator, VkDeviceSize size, VkBuffer* buffer, VmaAllocation* memory, void** mapped) { VkBufferCreateInfo buffer_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .size = size, }; VmaAllocationCreateInfo memory_info = { .usage = VMA_MEMORY_USAGE_CPU_TO_GPU, }; VkResult result; VK_RESULT(vmaCreateBuffer(allocator, &buffer_info, &memory_info, buffer, memory, NULL)); result = vmaMapMemory(allocator, *memory, mapped); if(result != VK_SUCCESS) { vmaDestroyBuffer(allocator, *buffer, *memory); return result; } return VK_SUCCESS; } VkResult create_storage_buffer( VmaAllocator allocator, VkBufferUsageFlags usage, VkDeviceSize size, VkBuffer* buffer, VmaAllocation* memory) { VkBufferCreateInfo buffer_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = size, .usage = usage | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; VmaAllocationCreateInfo memory_info = { .usage = VMA_MEMORY_USAGE_GPU_ONLY, }; return vmaCreateBuffer(allocator, &buffer_info, &memory_info, buffer, memory, NULL); }; void destroy_transfer_buffer( VmaAllocator allocator, VkBuffer buffer, VmaAllocation memory) { vmaUnmapMemory(allocator, memory); vmaDestroyBuffer(allocator, buffer, memory); } VkDeviceAddress buffer_address(VkDevice device, VkBuffer buffer) { VkBufferDeviceAddressInfo info = { .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, .buffer = buffer, }; return vkGetBufferDeviceAddress(device, &info); } VkCommandBuffer command_begin_single(VkDevice device, VkCommandPool transfer_pool) { VkCommandBufferAllocateInfo command_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandPool = transfer_pool, .commandBufferCount = 1, }; VkCommandBuffer command_buffer; VkResult result = vkAllocateCommandBuffers(device, &command_info, &command_buffer); if(result != VK_SUCCESS) { return VK_NULL_HANDLE; } VkCommandBufferBeginInfo begin_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; result = vkBeginCommandBuffer(command_buffer, &begin_info); if(result != VK_SUCCESS) { vkFreeCommandBuffers(device, transfer_pool, 1, &command_buffer); return VK_NULL_HANDLE; } return command_buffer; } VkResult command_end_single(VkDevice device, VkCommandBuffer command_buffer, VkCommandPool transfer_pool, GPUQueue transfer_queue) { VkResult result = vkEndCommandBuffer(command_buffer); if(result != VK_SUCCESS) { vkFreeCommandBuffers(device, transfer_pool, 1, &command_buffer); return result; } VkSubmitInfo submit_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, .pCommandBuffers = &command_buffer, }; VkFenceCreateInfo fence_info = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, }; VkFence submit_fence; result = vkCreateFence(device, &fence_info, NULL, &submit_fence); if(result != VK_SUCCESS) { vkFreeCommandBuffers(device, transfer_pool, 1, &command_buffer); return result; } result = vkQueueSubmit(transfer_queue.handle, 1, &submit_info, submit_fence); if(result != VK_SUCCESS) { vkFreeCommandBuffers(device, transfer_pool, 1, &command_buffer); return result; } result = vkWaitForFences(device, 1, &submit_fence, VK_TRUE, UINT64_MAX); vkFreeCommandBuffers(device, transfer_pool, 1, &command_buffer); return result; } void command_copy_buffer(VkCommandBuffer command_buffer, VkBuffer src, VkBuffer dst, VkDeviceSize src_offset, VkDeviceSize dst_offset, VkDeviceSize size) { VkBufferCopy copy = { .srcOffset = src_offset, .dstOffset = dst_offset, .size = size, }; vkCmdCopyBuffer(command_buffer, src, dst, 1, ©); } VkResult command_transition_image_layout(VkDevice device, VkCommandPool transfer_pool, GPUQueue transfer_queue, VkImageLayout old_layout, VkImageLayout new_layout, VkImage image, VkAccessFlags src_mask, VkAccessFlags dst_mask, VkPipelineStageFlags source, VkPipelineStageFlags dest, uint32_t source_family, uint32_t dest_family, VkImageAspectFlags aspect_flags) { VkCommandBuffer command_buffer = command_begin_single(device, transfer_pool); VkImageMemoryBarrier barrier = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .oldLayout = old_layout, .newLayout = new_layout, .srcQueueFamilyIndex = source_family, .dstQueueFamilyIndex = dest_family, .image = image, .subresourceRange = { .aspectMask = aspect_flags, .levelCount = 1, .layerCount = 1, .baseMipLevel = 0, .baseArrayLayer = 0, }, .srcAccessMask = src_mask, .dstAccessMask = dst_mask, }; vkCmdPipelineBarrier(command_buffer, source, dest, 0, 0, 0, 0, 0, 1, &barrier); return command_end_single(device, command_buffer, transfer_pool, transfer_queue); } VkResult add_transfers( void* data, VkBuffer* buffers, VkDeviceSize offset, VkDeviceSize size, RenderContext* gpu) { VkResult result; for(uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { VK_RESULT(add_transfer(data, buffers[i], offset, size, i, gpu)); } return VK_SUCCESS; } VkResult add_transfer( void* data, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, uint32_t frame_index, RenderContext* gpu) { VkResult result; FrameContext* frame = &gpu->frame[frame_index]; if(frame->transfer_written + size >= frame->transfer_max_size) { uint32_t new_size = 2*frame->transfer_max_size; while(frame->transfer_written + size >= new_size) { new_size *= 2; } VkBuffer new_transfer; VmaAllocation new_transfer_memory; void* new_transfer_data; VK_RESULT(create_transfer_buffer(gpu->allocator, new_size, &new_transfer, &new_transfer_memory, &new_transfer_data)); memcpy(new_transfer_data, frame->transfer_mapped, frame->transfer_written); destroy_transfer_buffer(gpu->allocator, frame->transfer_buffer, frame->transfer_memory); frame->transfer_buffer = new_transfer; frame->transfer_memory = new_transfer_memory; frame->transfer_mapped = new_transfer_data; frame->transfer_max_size = new_size; } if(frame->transfer_count + 1 >= frame->transfer_max_count) { void* new_infos = malloc(sizeof(TransferInfo)*2*frame->transfer_max_count); if(new_infos == NULL) { return VK_ERROR_OUT_OF_HOST_MEMORY; } memcpy(new_infos, frame->transfer_infos, sizeof(TransferInfo)*frame->transfer_count); free(frame->transfer_infos); frame->transfer_infos = new_infos; frame->transfer_max_count *= 2; } memcpy(frame->transfer_mapped + frame->transfer_written, data, size); frame->transfer_infos[frame->transfer_count].size = size; frame->transfer_infos[frame->transfer_count].buffer = buffer; frame->transfer_infos[frame->transfer_count].offset = offset; frame->transfer_written += size; frame->transfer_count += 1; return VK_SUCCESS; }