// Copyright 2016 Dolphin Emulator Project
// Copyright 2020 DuckStation Emulator Project
// Licensed under GPLv2+
// Refer to the LICENSE file included.

#include "stream_buffer.h"
#include "../align.h"
#include "../assert.h"
#include "../log.h"
#include "context.h"
#include "util.h"
Log_SetChannel(Vulkan::StreamBuffer);

namespace Vulkan {

StreamBuffer::StreamBuffer() = default;

StreamBuffer::StreamBuffer(StreamBuffer&& move)
  : m_usage(move.m_usage), m_size(move.m_size), m_current_offset(move.m_current_offset),
    m_current_space(move.m_current_space), m_current_gpu_position(move.m_current_gpu_position), m_buffer(move.m_buffer),
    m_memory(move.m_memory), m_host_pointer(move.m_host_pointer), m_tracked_fences(std::move(move.m_tracked_fences)),
    m_coherent_mapping(move.m_coherent_mapping)
{
}

StreamBuffer::~StreamBuffer()
{
  if (IsValid())
    Destroy(true);
}

StreamBuffer& StreamBuffer::operator=(StreamBuffer&& move)
{
  if (IsValid())
    Destroy(true);

  std::swap(m_usage, move.m_usage);
  std::swap(m_size, move.m_size);
  std::swap(m_current_offset, move.m_current_offset);
  std::swap(m_current_space, move.m_current_space);
  std::swap(m_current_gpu_position, move.m_current_gpu_position);
  std::swap(m_buffer, move.m_buffer);
  std::swap(m_memory, move.m_memory);
  std::swap(m_host_pointer, move.m_host_pointer);
  std::swap(m_tracked_fences, move.m_tracked_fences);
  std::swap(m_coherent_mapping, move.m_coherent_mapping);

  return *this;
}

bool StreamBuffer::Create(VkBufferUsageFlags usage, u32 size)
{
  // Create the buffer descriptor
  VkBufferCreateInfo buffer_create_info = {
    VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType        sType
    nullptr,                              // const void*            pNext
    0,                                    // VkBufferCreateFlags    flags
    static_cast<VkDeviceSize>(size),    // VkDeviceSize           size
    usage,                                // VkBufferUsageFlags     usage
    VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode          sharingMode
    0,                                    // uint32_t               queueFamilyIndexCount
    nullptr                               // const uint32_t*        pQueueFamilyIndices
  };

  VkBuffer buffer = VK_NULL_HANDLE;
  VkResult res = vkCreateBuffer(g_vulkan_context->GetDevice(), &buffer_create_info, nullptr, &buffer);
  if (res != VK_SUCCESS)
  {
    LOG_VULKAN_ERROR(res, "vkCreateBuffer failed: ");
    return false;
  }

  // Get memory requirements (types etc) for this buffer
  VkMemoryRequirements memory_requirements;
  vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements);

  // Aim for a coherent mapping if possible.
  u32 memory_type_index =
    g_vulkan_context->GetUploadMemoryType(memory_requirements.memoryTypeBits, &m_coherent_mapping);

  // Allocate memory for backing this buffer
  VkMemoryAllocateInfo memory_allocate_info = {
    VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType    sType
    nullptr,                                // const void*        pNext
    memory_requirements.size,               // VkDeviceSize       allocationSize
    memory_type_index                       // uint32_t           memoryTypeIndex
  };
  VkDeviceMemory memory = VK_NULL_HANDLE;
  res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_allocate_info, nullptr, &memory);
  if (res != VK_SUCCESS)
  {
    LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: ");
    vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
    return false;
  }

  // Bind memory to buffer
  res = vkBindBufferMemory(g_vulkan_context->GetDevice(), buffer, memory, 0);
  if (res != VK_SUCCESS)
  {
    LOG_VULKAN_ERROR(res, "vkBindBufferMemory failed: ");
    vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
    vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr);
    return false;
  }

  // Map this buffer into user-space
  void* mapped_ptr = nullptr;
  res = vkMapMemory(g_vulkan_context->GetDevice(), memory, 0, size, 0, &mapped_ptr);
  if (res != VK_SUCCESS)
  {
    LOG_VULKAN_ERROR(res, "vkMapMemory failed: ");
    vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
    vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr);
    return false;
  }

  // Unmap current host pointer (if there was a previous buffer)
  if (m_host_pointer)
    vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory);

  if (IsValid())
    Destroy(true);

  // Replace with the new buffer
  m_usage = usage;
  m_size = size;
  m_buffer = buffer;
  m_memory = memory;
  m_host_pointer = reinterpret_cast<u8*>(mapped_ptr);
  m_current_offset = 0;
  m_current_gpu_position = 0;
  m_tracked_fences.clear();
  return true;
}

void StreamBuffer::Destroy(bool defer)
{
  if (m_host_pointer)
  {
    vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory);
    m_host_pointer = nullptr;
  }

  if (m_buffer != VK_NULL_HANDLE)
  {
    if (defer)
      g_vulkan_context->DeferBufferDestruction(m_buffer);
    else
      vkDestroyBuffer(g_vulkan_context->GetDevice(), m_buffer, nullptr);
    m_buffer = VK_NULL_HANDLE;
  }
  if (m_memory != VK_NULL_HANDLE)
  {
    if (defer)
      g_vulkan_context->DeferDeviceMemoryDestruction(m_memory);
    else
      vkFreeMemory(g_vulkan_context->GetDevice(), m_memory, nullptr);
    m_memory = VK_NULL_HANDLE;
  }
}

bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
{
  const u32 required_bytes = num_bytes + alignment;

  // Check for sane allocations
  if (required_bytes > m_size)
  {
    Log_ErrorPrintf("Attempting to allocate %u bytes from a %u byte stream buffer", static_cast<u32>(num_bytes),
                    static_cast<u32>(m_size));
    Panic("Stream buffer overflow");
    return false;
  }

  // Is the GPU behind or up to date with our current offset?
  UpdateCurrentFencePosition();
  if (m_current_offset >= m_current_gpu_position)
  {
    const u32 remaining_bytes = m_size - m_current_offset;
    if (required_bytes <= remaining_bytes)
    {
      // Place at the current position, after the GPU position.
      m_current_offset = Common::AlignUp(m_current_offset, alignment);
      m_current_space = m_size - m_current_offset;
      return true;
    }

    // Check for space at the start of the buffer
    // We use < here because we don't want to have the case of m_current_offset ==
    // m_current_gpu_position. That would mean the code above would assume the
    // GPU has caught up to us, which it hasn't.
    if (required_bytes < m_current_gpu_position)
    {
      // Reset offset to zero, since we're allocating behind the gpu now
      m_current_offset = 0;
      m_current_space = m_current_gpu_position;
      return true;
    }
  }

  // Is the GPU ahead of our current offset?
  if (m_current_offset < m_current_gpu_position)
  {
    // We have from m_current_offset..m_current_gpu_position space to use.
    const u32 remaining_bytes = m_current_gpu_position - m_current_offset;
    if (required_bytes < remaining_bytes)
    {
      // Place at the current position, since this is still behind the GPU.
      m_current_offset = Common::AlignUp(m_current_offset, alignment);
      m_current_space = m_current_gpu_position - m_current_offset;
      return true;
    }
  }

  // Can we find a fence to wait on that will give us enough memory?
  if (WaitForClearSpace(required_bytes))
  {
    const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset;
    m_current_offset += align_diff;
    m_current_space -= align_diff;
    return true;
  }

  // We tried everything we could, and still couldn't get anything. This means that too much space
  // in the buffer is being used by the command buffer currently being recorded. Therefore, the
  // only option is to execute it, and wait until it's done.
  return false;
}

void StreamBuffer::CommitMemory(u32 final_num_bytes)
{
  Assert((m_current_offset + final_num_bytes) <= m_size);
  Assert(final_num_bytes <= m_current_space);

  // For non-coherent mappings, flush the memory range
  if (!m_coherent_mapping)
  {
    VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, nullptr, m_memory, m_current_offset,
                                 final_num_bytes};
    vkFlushMappedMemoryRanges(g_vulkan_context->GetDevice(), 1, &range);
  }

  m_current_offset += final_num_bytes;
  m_current_space -= final_num_bytes;
}

void StreamBuffer::UpdateCurrentFencePosition()
{
  // Don't create a tracking entry if the GPU is caught up with the buffer.
  if (m_current_offset == m_current_gpu_position)
    return;

  // Has the offset changed since the last fence?
  const u64 counter = g_vulkan_context->GetCurrentFenceCounter();
  if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
  {
    // Still haven't executed a command buffer, so just update the offset.
    m_tracked_fences.back().second = m_current_offset;
    return;
  }

  // New buffer, so update the GPU position while we're at it.
  UpdateGPUPosition();
  m_tracked_fences.emplace_back(counter, m_current_offset);
}

void StreamBuffer::UpdateGPUPosition()
{
  auto start = m_tracked_fences.begin();
  auto end = start;

  const u64 completed_counter = g_vulkan_context->GetCompletedFenceCounter();
  while (end != m_tracked_fences.end() && completed_counter >= end->first)
  {
    m_current_gpu_position = end->second;
    ++end;
  }

  if (start != end)
    m_tracked_fences.erase(start, end);
}

bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
{
  u32 new_offset = 0;
  u32 new_space = 0;
  u32 new_gpu_position = 0;

  auto iter = m_tracked_fences.begin();
  for (; iter != m_tracked_fences.end(); ++iter)
  {
    // Would this fence bring us in line with the GPU?
    // This is the "last resort" case, where a command buffer execution has been forced
    // after no additional data has been written to it, so we can assume that after the
    // fence has been signaled the entire buffer is now consumed.
    u32 gpu_position = iter->second;
    if (m_current_offset == gpu_position)
    {
      new_offset = 0;
      new_space = m_size;
      new_gpu_position = 0;
      break;
    }

    // Assuming that we wait for this fence, are we allocating in front of the GPU?
    if (m_current_offset > gpu_position)
    {
      // This would suggest the GPU has now followed us and wrapped around, so we have from
      // m_current_position..m_size free, as well as and 0..gpu_position.
      const u32 remaining_space_after_offset = m_size - m_current_offset;
      if (remaining_space_after_offset >= num_bytes)
      {
        // Switch to allocating in front of the GPU, using the remainder of the buffer.
        new_offset = m_current_offset;
        new_space = m_size - m_current_offset;
        new_gpu_position = gpu_position;
        break;
      }

      // We can wrap around to the start, behind the GPU, if there is enough space.
      // We use > here because otherwise we'd end up lining up with the GPU, and then the
      // allocator would assume that the GPU has consumed what we just wrote.
      if (gpu_position > num_bytes)
      {
        new_offset = 0;
        new_space = gpu_position;
        new_gpu_position = gpu_position;
        break;
      }
    }
    else
    {
      // We're currently allocating behind the GPU. This would give us between the current
      // offset and the GPU position worth of space to work with. Again, > because we can't
      // align the GPU position with the buffer offset.
      u32 available_space_inbetween = gpu_position - m_current_offset;
      if (available_space_inbetween > num_bytes)
      {
        // Leave the offset as-is, but update the GPU position.
        new_offset = m_current_offset;
        new_space = gpu_position - m_current_offset;
        new_gpu_position = gpu_position;
        break;
      }
    }
  }

  // Did any fences satisfy this condition?
  // Has the command buffer been executed yet? If not, the caller should execute it.
  if (iter == m_tracked_fences.end() || iter->first == g_vulkan_context->GetCurrentFenceCounter())
    return false;

  // Wait until this fence is signaled. This will fire the callback, updating the GPU position.
  g_vulkan_context->WaitForFenceCounter(iter->first);
  m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
  m_current_offset = new_offset;
  m_current_space = new_space;
  m_current_gpu_position = new_gpu_position;
  return true;
}

} // namespace Vulkan