Use a staging buffer to optimize vert buffer access while drawing.

parent 1cab7f52
......@@ -1407,18 +1407,19 @@ class HelloTriangleApplication {
throw std::runtime_error("Failed to find suitable memory type!");
}
void createVertexBuffer() {
void createBuffer(VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer& buffer, VkDeviceMemory& bufferMemory) {
// Buffers in Vulkan are regions of memory storing arbitrary data to be read by the GPU. Here, they store vertex data.
// Buffers DO NOT automatically allocate memory for themselves!
// This creates many different kinds of buffers.
VkBufferCreateInfo bufferInfo = {};
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.size = sizeof(vertices[0]) * vertices.size(); //The buffer will be the size of one vertex, multiplied by the # of vertices.
bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; //The purpose for the data in the buffer. Can specify multiple purposes.
bufferInfo.size = size; //The buffer will be the size of one vertex, multiplied by the # of vertices.
bufferInfo.usage = usage; //The purpose for the data in the buffer. Can specify multiple purposes.
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; //Can be owned by a specific queue family, or shared. Owned = better performance.
// Ours will only be used by the graphics queue - so we'll let that own this buffer for better performance.
if (vkCreateBuffer(device, &bufferInfo, nullptr, &vertexBuffer) != VK_SUCCESS) {
if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) {
throw std::runtime_error("Failed to create vertex buffer!");
}
......@@ -1426,39 +1427,106 @@ class HelloTriangleApplication {
// \/ Struct Fields: size (bytes of required memory), alignment (offset in bytes to where the buffer begins), memoryTypeBits (bit field of memory types suitable for the buffer.
VkMemoryRequirements memRequirements;
vkGetBufferMemoryRequirements(device, vertexBuffer, &memRequirements); //We need to query the buffer's memory requirements.
vkGetBufferMemoryRequirements(device, buffer, &memRequirements); //We need to query the buffer's memory requirements.
//Time to actually allocate the memory!
VkMemoryAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memRequirements.size; //bytes of required memory.
allocInfo.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
// Use a memory type that lets us write to it from the CPU (HOST_VISIBLE_BIT), and also use COHERENT_BIT.
allocInfo.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties);
// Use a memory type that lets us write to it from the CPU (HOST_VISIBLE_BIT), and also use COHERENT_BIT, for vertex buffer.
if (vkAllocateMemory(device, &allocInfo, nullptr, &vertexBufferMemory) != VK_SUCCESS) {
if (vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory) != VK_SUCCESS) {
throw std::runtime_error("Failed to allocate vertex buffer memory!");
}
//Now, we can associate the newly allocated memory with the vertex buffer.
vkBindBufferMemory(device, vertexBuffer, vertexBufferMemory, 0); //4th param is the offset within this region of memory.
vkBindBufferMemory(device, buffer, bufferMemory, 0); //4th param is the offset within this region of memory.
//Since we've allocated this memory specifically for the vertex buffer, the offset is 0. If non-zero, must be divisible by memRequirements.alignment.
}
void copyBuffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size) {
//Buffer to copy from, to, and the size of memory to copy.
// Memory transfer ops are executed using command buffers, just like draw commands.
// So, we must allocate a temporary command buffer - using a special command pool for these will optimize the program.
VkCommandBufferAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; //A primary command buffer.
allocInfo.commandPool = commandPool; //Just use the normal command pool for now.
allocInfo.commandBufferCount = 1;
VkCommandBuffer commandBuffer;
vkAllocateCommandBuffers(device, &allocInfo, &commandBuffer); //Allocate the temp command buffer.
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; //We're only gonna be using it once. The driver can optimize.
//Start recording the copy op.
vkBeginCommandBuffer(commandBuffer, &beginInfo);
VkBufferCopy copyRegion = {};
copyRegion.srcOffset = 0;
copyRegion.dstOffset = 0;
copyRegion.size = size; //Size of the region to copy.
// This lets us copy the CPU-bound staging buffer to the device-bound vertex buffer. More efficient than
vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, &copyRegion); //Copy the src buffer to the dst buffer.
vkEndCommandBuffer(commandBuffer);
//Now let's submit the command buffer for execution.
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBuffer;
//We need to fill the vertex buffer, by mapping the buffer memory into CPU-accessible memory using vkMapMemory.
vkQueueSubmit(graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE); //Submit to the graphics queue for execution.
vkQueueWaitIdle(graphicsQueue); //Wait for the transfer queue to become idle.
// We could also use fences with vkWaitForFences, to let us schedule multiple transfers simultaneously, then wait for them all to complete,
// giving the driver more optimization opportunities.
vkFreeCommandBuffers(device, commandPool, 1, &commandBuffer); //Kill the temp command buffer.
}
void createVertexBuffer() {
// We're gonna use a host-visible (CPU-visible) buffer as a temporary buffer, then use a device-local one as the actual vertex buffer.
VkDeviceSize bufferSize = sizeof(vertices[0]) * vertices.size();
VkBuffer stagingBuffer; //The temp buffer we write to from CPU.
VkDeviceMemory stagingBufferMemory; //The memory for the staging buffer.
//Create the staging buffer
createBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingBufferMemory);
//COHERENT_BIT means that it's synchronized to the host the CPU. CPU = host.
//We need to fill the staging buffer, by mapping the buffer memory into CPU-accessible memory using vkMapMemory.
void* data;
vkMapMemory(device, vertexBufferMemory, 0, bufferInfo.size, 0, &data);
vkMapMemory(device, stagingBufferMemory, 0, bufferSize, 0, &data);
// This allows us to access a region of specified memory defined by an offset and a size (0 and bufferInfo.sie).
// One can also specify VK_WHOLE_SIZE to map all of the memory.
// Second to last param can be used for flags, but there aren't any yet.
memcpy(data, vertices.data(), (size_t) bufferInfo.size); //memcpy copies into the "data" pointer, from "vertices.data()" pointer, using bufferInfo.size bytes).
vkUnmapMemory(device, vertexBufferMemory); //Unmap once CPU --> GPU memory copy is complete.
memcpy(data, vertices.data(), (size_t) bufferSize); //memcpy copies into the "data" pointer, from "vertices.data()" pointer, using bufferInfo.size bytes).
vkUnmapMemory(device, stagingBufferMemory); //Unmap once CPU --> GPU memory copy is complete.
//Unfortunately the driver might not immediately copy the data, for example b/c of caching - buffer writes might not be visible in mapped memory yet. Two ways to prevent:
// Call a host-coherent memory heap. Ensures the mapped memory always matches the contents of the allocated memory. Worse performance, but doesn't matter here.
// Do this by specifying VK_MEMORY_PROPERTY_HOST_COHERENT_BIT when creating the buffer, as a memory property flag.
// Call vkFlushMappedMemoryRanges after writing to mapped memory, then call vkInvalidateMappedMemoryRanges after reading from mapped memory. More work!
createBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, vertexBuffer, vertexBufferMemory);
// TRANSFER_DST_BIT: Buffer can be used as a destination in a memory transfer op. Replace DST with SRC for source of mem transfer. Otherwise, the buffer is allocated from a device-local memtype.
// Device-local means we can't use vkMapMemory on this buffer. We can, however, use the copyBuffer function.
copyBuffer(stagingBuffer, vertexBuffer, bufferSize); //Do the buffer copy. The GPU gets to read from optimized local memory, the CPU gets to actually write memory at all!
vkDestroyBuffer(device, stagingBuffer, nullptr);
vkFreeMemory(device, stagingBufferMemory, nullptr); //Kill the temp staging buffer and its memory.
}
void initVulkan() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment