From 14e7f8fd13127d15986f934daf644459fe516e62 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 10 Jul 2021 21:37:08 +1000 Subject: [PATCH] GPU: Add D3D12 renderer --- src/common/CMakeLists.txt | 14 + src/common/common.vcxproj | 14 + src/common/common.vcxproj.filters | 45 + src/common/d3d12/context.cpp | 453 +++++++ src/common/d3d12/context.h | 142 +++ src/common/d3d12/descriptor_heap_manager.cpp | 101 ++ src/common/d3d12/descriptor_heap_manager.h | 70 ++ src/common/d3d12/shader_cache.cpp | 481 ++++++++ src/common/d3d12/shader_cache.h | 118 ++ src/common/d3d12/staging_texture.cpp | 234 ++++ src/common/d3d12/staging_texture.h | 62 + src/common/d3d12/stream_buffer.cpp | 263 ++++ src/common/d3d12/stream_buffer.h | 58 + src/common/d3d12/texture.cpp | 390 ++++++ src/common/d3d12/texture.h | 80 ++ src/common/d3d12/util.cpp | 343 ++++++ src/common/d3d12/util.h | 152 +++ src/core/CMakeLists.txt | 2 + src/core/core.vcxproj | 2 + src/core/core.vcxproj.filters | 4 +- src/core/gpu.h | 3 + src/core/gpu_hw_d3d12.cpp | 1097 +++++++++++++++++ src/core/gpu_hw_d3d12.h | 102 ++ src/core/host_display.h | 1 + src/core/settings.cpp | 2 + src/core/shadergen.cpp | 3 +- src/core/system.cpp | 3 + src/core/types.h | 1 + .../nogui_host_interface.cpp | 5 + src/duckstation-qt/qthostinterface.cpp | 5 + src/frontend-common/CMakeLists.txt | 4 + src/frontend-common/d3d12_host_display.cpp | 871 +++++++++++++ src/frontend-common/d3d12_host_display.h | 127 ++ src/frontend-common/frontend-common.vcxproj | 4 + .../frontend-common.vcxproj.filters | 4 + src/frontend-common/imgui_impl_dx12.cpp | 533 ++++++++ src/frontend-common/imgui_impl_dx12.h | 47 + 37 files changed, 5838 insertions(+), 2 deletions(-) create mode 100644 src/common/d3d12/context.cpp create mode 100644 src/common/d3d12/context.h create mode 100644 src/common/d3d12/descriptor_heap_manager.cpp create mode 100644 src/common/d3d12/descriptor_heap_manager.h create mode 100644 src/common/d3d12/shader_cache.cpp create mode 100644 src/common/d3d12/shader_cache.h create mode 100644 src/common/d3d12/staging_texture.cpp create mode 100644 src/common/d3d12/staging_texture.h create mode 100644 src/common/d3d12/stream_buffer.cpp create mode 100644 src/common/d3d12/stream_buffer.h create mode 100644 src/common/d3d12/texture.cpp create mode 100644 src/common/d3d12/texture.h create mode 100644 src/common/d3d12/util.cpp create mode 100644 src/common/d3d12/util.h create mode 100644 src/core/gpu_hw_d3d12.cpp create mode 100644 src/core/gpu_hw_d3d12.h create mode 100644 src/frontend-common/d3d12_host_display.cpp create mode 100644 src/frontend-common/d3d12_host_display.h create mode 100644 src/frontend-common/imgui_impl_dx12.cpp create mode 100644 src/frontend-common/imgui_impl_dx12.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 3bd9b9755..b558f9532 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -121,6 +121,20 @@ target_link_libraries(common PRIVATE glad stb Threads::Threads libchdr glslang v if(WIN32) target_sources(common PRIVATE + d3d12/context.cpp + d3d12/context.h + d3d12/descriptor_heap_manager.cpp + d3d12/descriptor_heap_manager.h + d3d12/shader_cache.cpp + d3d12/shader_cache.h + d3d12/staging_texture.cpp + d3d12/staging_texture.h + d3d12/stream_buffer.cpp + d3d12/stream_buffer.h + d3d12/texture.cpp + d3d12/texture.h + d3d12/util.cpp + d3d12/util.h d3d11/shader_cache.cpp d3d11/shader_cache.h d3d11/shader_compiler.cpp diff --git a/src/common/common.vcxproj b/src/common/common.vcxproj index c49885fb0..797d14ed2 100644 --- a/src/common/common.vcxproj +++ b/src/common/common.vcxproj @@ -18,6 +18,13 @@ + + + + + + + @@ -98,6 +105,13 @@ + + + + + + + diff --git a/src/common/common.vcxproj.filters b/src/common/common.vcxproj.filters index 169ad7bf8..1b68fbbbb 100644 --- a/src/common/common.vcxproj.filters +++ b/src/common/common.vcxproj.filters @@ -113,6 +113,27 @@ + + d3d12 + + + d3d12 + + + d3d12 + + + d3d12 + + + d3d12 + + + d3d12 + + + d3d12 + @@ -221,6 +242,27 @@ + + d3d12 + + + d3d12 + + + d3d12 + + + d3d12 + + + d3d12 + + + d3d12 + + + d3d12 + @@ -238,5 +280,8 @@ {fd4150b0-6f82-4251-ab23-34c25fbc5b5e} + + {358e11c4-34af-4169-9a66-ec66342a6a2f} + diff --git a/src/common/d3d12/context.cpp b/src/common/d3d12/context.cpp new file mode 100644 index 000000000..43e49370a --- /dev/null +++ b/src/common/d3d12/context.cpp @@ -0,0 +1,453 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "context.h" +#include "../assert.h" +#include "../log.h" +#include "../scope_guard.h" +#include +#include +#include +#include +#include +Log_SetChannel(D3D12::Context); + +std::unique_ptr g_d3d12_context; + +namespace D3D12 { + +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + +// Private D3D12 state +static HMODULE s_d3d12_library; +static PFN_D3D12_CREATE_DEVICE s_d3d12_create_device; +static PFN_D3D12_GET_DEBUG_INTERFACE s_d3d12_get_debug_interface; +static PFN_D3D12_SERIALIZE_ROOT_SIGNATURE s_d3d12_serialize_root_signature; + +static bool LoadD3D12Library() +{ + if (!(s_d3d12_library = LoadLibrary("d3d12.dll")) || + !(s_d3d12_create_device = + reinterpret_cast(GetProcAddress(s_d3d12_library, "D3D12CreateDevice"))) || + !(s_d3d12_get_debug_interface = + reinterpret_cast(GetProcAddress(s_d3d12_library, "D3D12GetDebugInterface"))) || + !(s_d3d12_serialize_root_signature = reinterpret_cast( + GetProcAddress(s_d3d12_library, "D3D12SerializeRootSignature")))) + { + Log_ErrorPrintf("d3d12.dll could not be loaded."); + s_d3d12_create_device = nullptr; + s_d3d12_get_debug_interface = nullptr; + s_d3d12_serialize_root_signature = nullptr; + if (s_d3d12_library) + FreeLibrary(s_d3d12_library); + s_d3d12_library = nullptr; + return false; + } + + return true; +} + +static void UnloadD3D12Library() +{ + s_d3d12_serialize_root_signature = nullptr; + s_d3d12_get_debug_interface = nullptr; + s_d3d12_create_device = nullptr; + if (s_d3d12_library) + { + FreeLibrary(s_d3d12_library); + s_d3d12_library = nullptr; + } +} + +#else + +static const PFN_D3D12_CREATE_DEVICE s_d3d12_create_device = D3D12CreateDevice; +static const PFN_D3D12_GET_DEBUG_INTERFACE s_d3d12_get_debug_interface = D3D12GetDebugInterface; +static const PFN_D3D12_SERIALIZE_ROOT_SIGNATURE s_d3d12_serialize_root_signature = D3D12SerializeRootSignature; + +static bool LoadD3D12Library() +{ + return true; +} + +static void UnloadD3D12Library() {} + +#endif + +Context::Context() = default; + +Context::~Context() +{ + DestroyResources(); +} + +Context::ComPtr Context::SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc) +{ + ComPtr blob; + ComPtr error_blob; + const HRESULT hr = s_d3d12_serialize_root_signature(desc, D3D_ROOT_SIGNATURE_VERSION_1, blob.GetAddressOf(), + error_blob.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("D3D12SerializeRootSignature() failed: %08X", hr); + if (error_blob) + Log_ErrorPrintf("%s", error_blob->GetBufferPointer()); + + return {}; + } + + return blob; +} + +D3D12::Context::ComPtr Context::CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc) +{ + ComPtr blob = SerializeRootSignature(desc); + if (!blob) + return {}; + + ComPtr rs; + const HRESULT hr = + m_device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(rs.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateRootSignature() failed: %08X", hr); + return {}; + } + + return rs; +} + +bool Context::SupportsTextureFormat(DXGI_FORMAT format) +{ + constexpr u32 required = D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE; + + D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {format}; + return SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))) && + (support.Support1 & required) == required; +} + +bool Context::Create(IDXGIFactory* dxgi_factory, u32 adapter_index, bool enable_debug_layer) +{ + Assert(!g_d3d12_context); + + if (!LoadD3D12Library()) + return false; + + g_d3d12_context.reset(new Context()); + if (!g_d3d12_context->CreateDevice(dxgi_factory, adapter_index, enable_debug_layer) || + !g_d3d12_context->CreateCommandQueue() || !g_d3d12_context->CreateFence() || + !g_d3d12_context->CreateDescriptorHeaps() || !g_d3d12_context->CreateCommandLists() || + !g_d3d12_context->CreateTextureStreamBuffer()) + { + Destroy(); + return false; + } + + return true; +} + +void Context::Destroy() +{ + if (g_d3d12_context) + g_d3d12_context.reset(); + + UnloadD3D12Library(); +} + +bool Context::CreateDevice(IDXGIFactory* dxgi_factory, u32 adapter_index, bool enable_debug_layer) +{ + ComPtr adapter; + HRESULT hr = dxgi_factory->EnumAdapters(adapter_index, &adapter); + if (FAILED(hr)) + { + Log_ErrorPrintf("Adapter %u not found, using default", adapter_index); + adapter = nullptr; + } + else + { + DXGI_ADAPTER_DESC adapter_desc; + if (SUCCEEDED(adapter->GetDesc(&adapter_desc))) + { + char adapter_name_buffer[128]; + const int name_length = WideCharToMultiByte(CP_UTF8, 0, adapter_desc.Description, + static_cast(std::wcslen(adapter_desc.Description)), + adapter_name_buffer, countof(adapter_name_buffer), 0, nullptr); + if (name_length >= 0) + { + adapter_name_buffer[name_length] = 0; + Log_InfoPrintf("D3D Adapter: %s", adapter_name_buffer); + } + } + } + + // Enabling the debug layer will fail if the Graphics Tools feature is not installed. + if (enable_debug_layer) + { + hr = s_d3d12_get_debug_interface(IID_PPV_ARGS(&m_debug_interface)); + if (SUCCEEDED(hr)) + { + m_debug_interface->EnableDebugLayer(); + } + else + { + Log_ErrorPrintf("Debug layer requested but not available."); + enable_debug_layer = false; + } + } + + // Create the actual device. + hr = s_d3d12_create_device(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device)); + AssertMsg(SUCCEEDED(hr), "Create D3D12 device"); + if (FAILED(hr)) + return false; + + if (enable_debug_layer) + { + ComPtr info_queue; + if (SUCCEEDED(m_device.As(&info_queue))) + { + info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); + info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); + + D3D12_INFO_QUEUE_FILTER filter = {}; + std::array id_list{ + D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, + D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, + D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, + D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, + D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE, + }; + filter.DenyList.NumIDs = static_cast(id_list.size()); + filter.DenyList.pIDList = id_list.data(); + info_queue->PushStorageFilter(&filter); + } + } + + return true; +} + +bool Context::CreateCommandQueue() +{ + const D3D12_COMMAND_QUEUE_DESC queue_desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, + D3D12_COMMAND_QUEUE_FLAG_NONE}; + HRESULT hr = m_device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&m_command_queue)); + AssertMsg(SUCCEEDED(hr), "Create command queue"); + return SUCCEEDED(hr); +} + +bool Context::CreateFence() +{ + HRESULT hr = m_device->CreateFence(m_completed_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence)); + AssertMsg(SUCCEEDED(hr), "Create fence"); + if (FAILED(hr)) + return false; + + m_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); + AssertMsg(m_fence_event != NULL, "Create fence event"); + if (!m_fence_event) + return false; + + return true; +} + +bool Context::CreateDescriptorHeaps() +{ + static constexpr size_t MAX_SRVS = 16384; + static constexpr size_t MAX_RTVS = 8192; + static constexpr size_t MAX_DSVS = 128; + static constexpr size_t MAX_SAMPLERS = 128; + + if (!m_descriptor_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, MAX_SRVS, true) || + !m_rtv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MAX_RTVS, false) || + !m_dsv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MAX_DSVS, false) || + !m_sampler_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, MAX_SAMPLERS, true)) + { + return false; + } + + m_gpu_descriptor_heaps[0] = m_descriptor_heap_manager.GetDescriptorHeap(); + m_gpu_descriptor_heaps[1] = m_sampler_heap_manager.GetDescriptorHeap(); + + // Allocate null SRV descriptor for unbound textures. + constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; + + if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor)) + { + Panic("Failed to allocate null descriptor"); + return false; + } + + m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle); + return true; +} + +bool Context::CreateCommandLists() +{ + for (u32 i = 0; i < NUM_COMMAND_LISTS; i++) + { + CommandListResources& res = m_command_lists[i]; + HRESULT hr = m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(res.command_allocator.GetAddressOf())); + AssertMsg(SUCCEEDED(hr), "Create command allocator"); + if (FAILED(hr)) + return false; + + hr = m_device->CreateCommandList(1, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocator.Get(), nullptr, + IID_PPV_ARGS(res.command_list.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("Failed to create command list: %08X", hr); + return false; + } + + // Close the command list, since the first thing we do is reset them. + hr = res.command_list->Close(); + AssertMsg(SUCCEEDED(hr), "Closing new command list failed"); + if (FAILED(hr)) + return false; + } + + MoveToNextCommandList(); + return true; +} + +bool Context::CreateTextureStreamBuffer() +{ + return m_texture_stream_buffer.Create(TEXTURE_UPLOAD_BUFFER_SIZE); +} + +void Context::MoveToNextCommandList() +{ + m_current_command_list = (m_current_command_list + 1) % NUM_COMMAND_LISTS; + m_current_fence_value++; + + // We may have to wait if this command list hasn't finished on the GPU. + CommandListResources& res = m_command_lists[m_current_command_list]; + WaitForFence(res.ready_fence_value); + + // Begin command list. + res.command_allocator->Reset(); + res.command_list->Reset(res.command_allocator.Get(), nullptr); + res.command_list->SetDescriptorHeaps(static_cast(m_gpu_descriptor_heaps.size()), m_gpu_descriptor_heaps.data()); + res.ready_fence_value = m_current_fence_value; +} + +void Context::ExecuteCommandList(bool wait_for_completion) +{ + CommandListResources& res = m_command_lists[m_current_command_list]; + + // Close and queue command list. + HRESULT hr = res.command_list->Close(); + AssertMsg(SUCCEEDED(hr), "Close command list"); + const std::array execute_lists{res.command_list.Get()}; + m_command_queue->ExecuteCommandLists(static_cast(execute_lists.size()), execute_lists.data()); + + // Update fence when GPU has completed. + hr = m_command_queue->Signal(m_fence.Get(), m_current_fence_value); + AssertMsg(SUCCEEDED(hr), "Signal fence"); + + MoveToNextCommandList(); + if (wait_for_completion) + WaitForFence(res.ready_fence_value); +} + +void Context::DeferResourceDestruction(ID3D12Resource* resource) +{ + if (!resource) + return; + + resource->AddRef(); + m_command_lists[m_current_command_list].pending_resources.push_back(resource); +} + +void Context::DeferDescriptorDestruction(DescriptorHeapManager& manager, u32 index) +{ + m_command_lists[m_current_command_list].pending_descriptors.emplace_back(manager, index); +} + +void Context::DeferDescriptorDestruction(DescriptorHeapManager& manager, DescriptorHandle* handle) +{ + if (handle->index == DescriptorHandle::INVALID_INDEX) + return; + + m_command_lists[m_current_command_list].pending_descriptors.emplace_back(manager, handle->index); + handle->Clear(); +} + +void Context::DestroyPendingResources(CommandListResources& cmdlist) +{ + for (const auto& dd : cmdlist.pending_descriptors) + dd.first.Free(dd.second); + cmdlist.pending_descriptors.clear(); + + for (ID3D12Resource* res : cmdlist.pending_resources) + res->Release(); + cmdlist.pending_resources.clear(); +} + +void Context::DestroyResources() +{ + ExecuteCommandList(true); + + m_texture_stream_buffer.Destroy(false); + m_descriptor_heap_manager.Free(&m_null_srv_descriptor); + m_sampler_heap_manager.Destroy(); + m_dsv_heap_manager.Destroy(); + m_rtv_heap_manager.Destroy(); + m_descriptor_heap_manager.Destroy(); + m_command_lists = {}; + m_current_command_list = 0; + m_completed_fence_value = 0; + m_current_fence_value = 0; + if (m_fence_event) + { + CloseHandle(m_fence_event); + m_fence_event = {}; + } + + m_command_queue.Reset(); + m_debug_interface.Reset(); + m_device.Reset(); +} + +void Context::WaitForFence(u64 fence) +{ + if (m_completed_fence_value >= fence) + return; + + // Try non-blocking check. + m_completed_fence_value = m_fence->GetCompletedValue(); + if (m_completed_fence_value < fence) + { + // Fall back to event. + HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event); + AssertMsg(SUCCEEDED(hr), "Set fence event on completion"); + WaitForSingleObject(m_fence_event, INFINITE); + m_completed_fence_value = m_fence->GetCompletedValue(); + } + + // Release resources for as many command lists which have completed. + u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS; + for (u32 i = 0; i < NUM_COMMAND_LISTS; i++) + { + CommandListResources& res = m_command_lists[index]; + if (m_completed_fence_value < res.ready_fence_value) + break; + + DestroyPendingResources(res); + index = (index + 1) % NUM_COMMAND_LISTS; + } +} + +void Context::WaitForGPUIdle() +{ + u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS; + for (u32 i = 0; i < (NUM_COMMAND_LISTS - 1); i++) + { + WaitForFence(m_command_lists[index].ready_fence_value); + index = (index + 1) % NUM_COMMAND_LISTS; + } +} +} // namespace D3D12 diff --git a/src/common/d3d12/context.h b/src/common/d3d12/context.h new file mode 100644 index 000000000..474eaeb3d --- /dev/null +++ b/src/common/d3d12/context.h @@ -0,0 +1,142 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "../types.h" +#include "../windows_headers.h" +#include "descriptor_heap_manager.h" +#include "stream_buffer.h" +#include +#include +#include +#include +#include + +struct IDXGIFactory; + +namespace D3D12 { + +class Context +{ +public: + template + using ComPtr = Microsoft::WRL::ComPtr; + + enum : u32 + { + // Number of command lists. One is being built while the other(s) are executed. + NUM_COMMAND_LISTS = 3, + + // Textures that don't fit into this buffer will be uploaded with a staging buffer. + TEXTURE_UPLOAD_BUFFER_SIZE = 16 * 1024 * 1024, + }; + + ~Context(); + + // Creates new device and context. + static bool Create(IDXGIFactory* dxgi_factory, u32 adapter_index, bool enable_debug_layer); + + // Destroys active context. + static void Destroy(); + + ID3D12Device* GetDevice() const { return m_device.Get(); } + ID3D12CommandQueue* GetCommandQueue() const { return m_command_queue.Get(); } + + // Returns the current command list, commands can be recorded directly. + ID3D12GraphicsCommandList* GetCommandList() const + { + return m_command_lists[m_current_command_list].command_list.Get(); + } + + // Descriptor manager access. + DescriptorHeapManager& GetDescriptorHeapManager() { return m_descriptor_heap_manager; } + DescriptorHeapManager& GetRTVHeapManager() { return m_rtv_heap_manager; } + DescriptorHeapManager& GetDSVHeapManager() { return m_dsv_heap_manager; } + DescriptorHeapManager& GetSamplerHeapManager() { return m_sampler_heap_manager; } + ID3D12DescriptorHeap* const* GetGPUDescriptorHeaps() const { return m_gpu_descriptor_heaps.data(); } + u32 GetGPUDescriptorHeapCount() const { return static_cast(m_gpu_descriptor_heaps.size()); } + const DescriptorHandle& GetNullSRVDescriptor() const { return m_null_srv_descriptor; } + StreamBuffer& GetTextureStreamBuffer() { return m_texture_stream_buffer; } + + // Root signature access. + ComPtr SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc); + ComPtr CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc); + + // Fence value for current command list. + u64 GetCurrentFenceValue() const { return m_current_fence_value; } + + // Last "completed" fence. + u64 GetCompletedFenceValue() const { return m_completed_fence_value; } + + // Feature level to use when compiling shaders. + D3D_FEATURE_LEVEL GetFeatureLevel() const { return m_feature_level; } + + // Test for support for the specified texture format. + bool SupportsTextureFormat(DXGI_FORMAT format); + + // Executes the current command list. + void ExecuteCommandList(bool wait_for_completion); + + // Waits for a specific fence. + void WaitForFence(u64 fence); + + // Waits for any in-flight command buffers to complete. + void WaitForGPUIdle(); + + // Defers destruction of a D3D resource (associates it with the current list). + void DeferResourceDestruction(ID3D12Resource* resource); + + // Defers destruction of a descriptor handle (associates it with the current list). + void DeferDescriptorDestruction(DescriptorHeapManager& manager, u32 index); + void DeferDescriptorDestruction(DescriptorHeapManager& manager, DescriptorHandle* handle); + +private: + struct CommandListResources + { + ComPtr command_allocator; + ComPtr command_list; + std::vector pending_resources; + std::vector> pending_descriptors; + u64 ready_fence_value = 0; + }; + + Context(); + + bool CreateDevice(IDXGIFactory* dxgi_factory, u32 adapter_index, bool enable_debug_layer); + bool CreateCommandQueue(); + bool CreateFence(); + bool CreateDescriptorHeaps(); + bool CreateCommandLists(); + bool CreateTextureStreamBuffer(); + void MoveToNextCommandList(); + void DestroyPendingResources(CommandListResources& cmdlist); + void DestroyResources(); + + ComPtr m_debug_interface; + ComPtr m_device; + ComPtr m_command_queue; + + ComPtr m_fence = nullptr; + HANDLE m_fence_event = {}; + u32 m_current_fence_value = 0; + u64 m_completed_fence_value = 0; + + std::array m_command_lists; + u32 m_current_command_list = NUM_COMMAND_LISTS - 1; + + DescriptorHeapManager m_descriptor_heap_manager; + DescriptorHeapManager m_rtv_heap_manager; + DescriptorHeapManager m_dsv_heap_manager; + DescriptorHeapManager m_sampler_heap_manager; + std::array m_gpu_descriptor_heaps = {}; + DescriptorHandle m_null_srv_descriptor; + StreamBuffer m_texture_stream_buffer; + + D3D_FEATURE_LEVEL m_feature_level = D3D_FEATURE_LEVEL_11_0; +}; + +} // namespace D3D12 + +extern std::unique_ptr g_d3d12_context; diff --git a/src/common/d3d12/descriptor_heap_manager.cpp b/src/common/d3d12/descriptor_heap_manager.cpp new file mode 100644 index 000000000..573e47514 --- /dev/null +++ b/src/common/d3d12/descriptor_heap_manager.cpp @@ -0,0 +1,101 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "descriptor_heap_manager.h" +#include "../assert.h" +#include "../log.h" +#include "context.h" +Log_SetChannel(DescriptorHeapManager); + +namespace D3D12 { +DescriptorHeapManager::DescriptorHeapManager() = default; +DescriptorHeapManager::~DescriptorHeapManager() = default; + +bool DescriptorHeapManager::Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors, + bool shader_visible) +{ + D3D12_DESCRIPTOR_HEAP_DESC desc = {type, static_cast(num_descriptors), + shader_visible ? D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE : + D3D12_DESCRIPTOR_HEAP_FLAG_NONE}; + + HRESULT hr = device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&m_descriptor_heap)); + AssertMsg(SUCCEEDED(hr), "Create descriptor heap"); + if (FAILED(hr)) + return false; + + m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart(); + m_heap_base_gpu = m_descriptor_heap->GetGPUDescriptorHandleForHeapStart(); + m_num_descriptors = num_descriptors; + m_descriptor_increment_size = device->GetDescriptorHandleIncrementSize(type); + + // Set all slots to unallocated (1) + const u32 bitset_count = num_descriptors / BITSET_SIZE + (((num_descriptors % BITSET_SIZE) != 0) ? 1 : 0); + m_free_slots.resize(bitset_count); + for (BitSetType& bs : m_free_slots) + bs.flip(); + + return true; +} + +void DescriptorHeapManager::Destroy() +{ + for (BitSetType& bs : m_free_slots) + Assert(bs.all()); + + m_num_descriptors = 0; + m_descriptor_increment_size = 0; + m_heap_base_cpu = {}; + m_heap_base_gpu = {}; + m_descriptor_heap.Reset(); + m_free_slots.clear(); +} + +bool DescriptorHeapManager::Allocate(DescriptorHandle* handle) +{ + // Start past the temporary slots, no point in searching those. + for (u32 group = 0; group < m_free_slots.size(); group++) + { + BitSetType& bs = m_free_slots[group]; + if (bs.none()) + continue; + + u32 bit = 0; + for (; bit < BITSET_SIZE; bit++) + { + if (bs[bit]) + break; + } + + u32 index = group * BITSET_SIZE + bit; + bs[bit] = false; + + handle->index = index; + handle->cpu_handle.ptr = m_heap_base_cpu.ptr + index * m_descriptor_increment_size; + handle->gpu_handle.ptr = m_heap_base_gpu.ptr + index * m_descriptor_increment_size; + return true; + } + + Panic("Out of fixed descriptors"); + return false; +} + +void DescriptorHeapManager::Free(u32 index) +{ + Assert(index < m_num_descriptors); + + u32 group = index / BITSET_SIZE; + u32 bit = index % BITSET_SIZE; + m_free_slots[group][bit] = true; +} + +void DescriptorHeapManager::Free(DescriptorHandle* handle) +{ + if (handle->index == DescriptorHandle::INVALID_INDEX) + return; + + Free(handle->index); + handle->Clear(); +} + +} // namespace D3D12 diff --git a/src/common/d3d12/descriptor_heap_manager.h b/src/common/d3d12/descriptor_heap_manager.h new file mode 100644 index 000000000..4be5144ed --- /dev/null +++ b/src/common/d3d12/descriptor_heap_manager.h @@ -0,0 +1,70 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "../types.h" +#include "../windows_headers.h" +#include +#include +#include +#include +#include + +namespace D3D12 { +// This class provides an abstraction for D3D12 descriptor heaps. +struct DescriptorHandle final +{ + enum : u32 + { + INVALID_INDEX = 0xFFFFFFFF + }; + + D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle{}; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle{}; + u32 index = INVALID_INDEX; + + ALWAYS_INLINE operator bool() const { return index != INVALID_INDEX; } + + ALWAYS_INLINE operator D3D12_CPU_DESCRIPTOR_HANDLE() const { return cpu_handle; } + ALWAYS_INLINE operator D3D12_GPU_DESCRIPTOR_HANDLE() const { return gpu_handle; } + + ALWAYS_INLINE void Clear() + { + cpu_handle = {}; + gpu_handle = {}; + index = INVALID_INDEX; + } +}; + +class DescriptorHeapManager final +{ +public: + DescriptorHeapManager(); + ~DescriptorHeapManager(); + + ID3D12DescriptorHeap* GetDescriptorHeap() const { return m_descriptor_heap.Get(); } + u32 GetDescriptorIncrementSize() const { return m_descriptor_increment_size; } + + bool Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors, bool shader_visible); + void Destroy(); + + bool Allocate(DescriptorHandle* handle); + void Free(DescriptorHandle* handle); + void Free(u32 index); + +private: + Microsoft::WRL::ComPtr m_descriptor_heap; + u32 m_num_descriptors = 0; + u32 m_descriptor_increment_size = 0; + + D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu = {}; + D3D12_GPU_DESCRIPTOR_HANDLE m_heap_base_gpu = {}; + + static constexpr u32 BITSET_SIZE = 1024; + using BitSetType = std::bitset; + std::vector m_free_slots = {}; +}; + +} // namespace D3D12 diff --git a/src/common/d3d12/shader_cache.cpp b/src/common/d3d12/shader_cache.cpp new file mode 100644 index 000000000..4baa09266 --- /dev/null +++ b/src/common/d3d12/shader_cache.cpp @@ -0,0 +1,481 @@ +#include "shader_cache.h" +#include "../d3d11/shader_compiler.h" +#include "../file_system.h" +#include "../log.h" +#include "../md5_digest.h" +#include +Log_SetChannel(D3D12::ShaderCache); + +#ifdef _UWP +#include +#endif + +namespace D3D12 { + +#pragma pack(push, 1) +struct CacheIndexEntry +{ + u64 source_hash_low; + u64 source_hash_high; + u32 source_length; + u32 shader_type; + u32 file_offset; + u32 blob_size; +}; +#pragma pack(pop) + +static bool CanUsePipelineCache() +{ +#ifdef _UWP + // GetCachedBlob crashes on XBox UWP for some reason... + const auto version_info = winrt::Windows::System::Profile::AnalyticsInfo::VersionInfo(); + const auto device_family = version_info.DeviceFamily(); + return (device_family != L"Windows.Xbox"); +#else + return true; +#endif +} + +ShaderCache::ShaderCache() : m_use_pipeline_cache(CanUsePipelineCache()) {} + +ShaderCache::~ShaderCache() +{ + if (m_pipeline_index_file) + std::fclose(m_pipeline_index_file); + if (m_pipeline_blob_file) + std::fclose(m_pipeline_blob_file); + if (m_shader_index_file) + std::fclose(m_shader_index_file); + if (m_shader_blob_file) + std::fclose(m_shader_blob_file); +} + +bool ShaderCache::CacheIndexKey::operator==(const CacheIndexKey& key) const +{ + return (source_hash_low == key.source_hash_low && source_hash_high == key.source_hash_high && + source_length == key.source_length && type == key.type); +} + +bool ShaderCache::CacheIndexKey::operator!=(const CacheIndexKey& key) const +{ + return (source_hash_low != key.source_hash_low || source_hash_high != key.source_hash_high || + source_length != key.source_length || type != key.type); +} + +void ShaderCache::Open(std::string_view base_path, D3D_FEATURE_LEVEL feature_level, bool debug) +{ + m_base_path = base_path; + m_feature_level = feature_level; + m_debug = debug; + + if (!base_path.empty()) + { + const std::string base_shader_filename = GetCacheBaseFileName(base_path, "shaders", feature_level, debug); + const std::string shader_index_filename = base_shader_filename + ".idx"; + const std::string shader_blob_filename = base_shader_filename + ".bin"; + + if (!ReadExisting(shader_index_filename, shader_blob_filename, m_shader_index_file, m_shader_blob_file, + m_shader_index)) + { + CreateNew(shader_index_filename, shader_blob_filename, m_shader_index_file, m_shader_blob_file); + } + + if (m_use_pipeline_cache) + { + const std::string base_pipelines_filename = GetCacheBaseFileName(base_path, "pipelines", feature_level, debug); + const std::string pipelines_index_filename = base_pipelines_filename + ".idx"; + const std::string pipelines_blob_filename = base_pipelines_filename + ".bin"; + + if (!ReadExisting(pipelines_index_filename, pipelines_blob_filename, m_pipeline_index_file, m_pipeline_blob_file, + m_pipeline_index)) + { + CreateNew(pipelines_index_filename, pipelines_blob_filename, m_pipeline_index_file, m_pipeline_blob_file); + } + } + } +} + +void ShaderCache::InvalidatePipelineCache() +{ + m_pipeline_index.clear(); + if (m_pipeline_blob_file) + { + std::fclose(m_pipeline_blob_file); + m_pipeline_blob_file = nullptr; + } + + if (m_pipeline_index_file) + { + std::fclose(m_pipeline_index_file); + m_pipeline_index_file = nullptr; + } + + if (m_use_pipeline_cache) + { + const std::string base_pipelines_filename = + GetCacheBaseFileName(m_base_path, "pipelines", m_feature_level, m_debug); + const std::string pipelines_index_filename = base_pipelines_filename + ".idx"; + const std::string pipelines_blob_filename = base_pipelines_filename + ".bin"; + CreateNew(pipelines_index_filename, pipelines_blob_filename, m_pipeline_index_file, m_pipeline_blob_file); + } +} + +bool ShaderCache::CreateNew(const std::string& index_filename, const std::string& blob_filename, std::FILE*& index_file, + std::FILE*& blob_file) +{ + if (FileSystem::FileExists(index_filename.c_str())) + { + Log_WarningPrintf("Removing existing index file '%s'", index_filename.c_str()); + FileSystem::DeleteFile(index_filename.c_str()); + } + if (FileSystem::FileExists(blob_filename.c_str())) + { + Log_WarningPrintf("Removing existing blob file '%s'", blob_filename.c_str()); + FileSystem::DeleteFile(blob_filename.c_str()); + } + + index_file = FileSystem::OpenCFile(index_filename.c_str(), "wb"); + if (!index_file) + { + Log_ErrorPrintf("Failed to open index file '%s' for writing", index_filename.c_str()); + return false; + } + + const u32 index_version = FILE_VERSION; + if (std::fwrite(&index_version, sizeof(index_version), 1, index_file) != 1) + { + Log_ErrorPrintf("Failed to write version to index file '%s'", index_filename.c_str()); + std::fclose(index_file); + index_file = nullptr; + FileSystem::DeleteFile(index_filename.c_str()); + return false; + } + + blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "w+b"); + if (!blob_file) + { + Log_ErrorPrintf("Failed to open blob file '%s' for writing", blob_filename.c_str()); + std::fclose(blob_file); + blob_file = nullptr; + FileSystem::DeleteFile(index_filename.c_str()); + return false; + } + + return true; +} + +bool ShaderCache::ReadExisting(const std::string& index_filename, const std::string& blob_filename, + std::FILE*& index_file, std::FILE*& blob_file, CacheIndex& index) +{ + index_file = FileSystem::OpenCFile(index_filename.c_str(), "r+b"); + if (!index_file) + return false; + + u32 file_version; + if (std::fread(&file_version, sizeof(file_version), 1, index_file) != 1 || file_version != FILE_VERSION) + { + Log_ErrorPrintf("Bad file version in '%s'", index_filename.c_str()); + std::fclose(index_file); + index_file = nullptr; + return false; + } + + blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "a+b"); + if (!blob_file) + { + Log_ErrorPrintf("Blob file '%s' is missing", blob_filename.c_str()); + std::fclose(index_file); + index_file = nullptr; + return false; + } + + std::fseek(blob_file, 0, SEEK_END); + const u32 blob_file_size = static_cast(std::ftell(blob_file)); + + for (;;) + { + CacheIndexEntry entry; + if (std::fread(&entry, sizeof(entry), 1, index_file) != 1 || (entry.file_offset + entry.blob_size) > blob_file_size) + { + if (std::feof(index_file)) + break; + + Log_ErrorPrintf("Failed to read entry from '%s', corrupt file?", index_filename.c_str()); + index.clear(); + std::fclose(blob_file); + blob_file = nullptr; + std::fclose(index_file); + index_file = nullptr; + return false; + } + + const CacheIndexKey key{entry.source_hash_low, entry.source_hash_high, entry.source_length, + static_cast(entry.shader_type)}; + const CacheIndexData data{entry.file_offset, entry.blob_size}; + index.emplace(key, data); + } + + // ensure we don't write before seeking + std::fseek(index_file, 0, SEEK_END); + + Log_InfoPrintf("Read %zu entries from '%s'", index.size(), index_filename.c_str()); + return true; +} + +std::string ShaderCache::GetCacheBaseFileName(const std::string_view& base_path, const std::string_view& type, + D3D_FEATURE_LEVEL feature_level, bool debug) +{ + std::string base_filename(base_path); + base_filename += "d3d12_"; + base_filename += type; + base_filename += "_"; + + switch (feature_level) + { + case D3D_FEATURE_LEVEL_10_0: + base_filename += "sm40"; + break; + case D3D_FEATURE_LEVEL_10_1: + base_filename += "sm41"; + break; + case D3D_FEATURE_LEVEL_11_0: + base_filename += "sm50"; + break; + default: + base_filename += "unk"; + break; + } + + if (debug) + base_filename += "_debug"; + + return base_filename; +} + +union MD5Hash +{ + struct + { + u64 low; + u64 high; + }; + u8 hash[16]; +}; + +ShaderCache::CacheIndexKey ShaderCache::GetShaderCacheKey(EntryType type, const std::string_view& shader_code) +{ + MD5Hash h; + MD5Digest digest; + digest.Update(shader_code.data(), static_cast(shader_code.length())); + digest.Final(h.hash); + + return CacheIndexKey{h.low, h.high, static_cast(shader_code.length()), type}; +} + +ShaderCache::CacheIndexKey ShaderCache::GetPipelineCacheKey(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& gpdesc) +{ + MD5Digest digest; + u32 length = sizeof(D3D12_GRAPHICS_PIPELINE_STATE_DESC); + + if (gpdesc.VS.BytecodeLength > 0) + { + digest.Update(gpdesc.VS.pShaderBytecode, static_cast(gpdesc.VS.BytecodeLength)); + length += static_cast(gpdesc.VS.BytecodeLength); + } + if (gpdesc.GS.BytecodeLength > 0) + { + digest.Update(gpdesc.GS.pShaderBytecode, static_cast(gpdesc.GS.BytecodeLength)); + length += static_cast(gpdesc.GS.BytecodeLength); + } + if (gpdesc.PS.BytecodeLength > 0) + { + digest.Update(gpdesc.PS.pShaderBytecode, static_cast(gpdesc.PS.BytecodeLength)); + length += static_cast(gpdesc.PS.BytecodeLength); + } + + digest.Update(&gpdesc.BlendState, sizeof(gpdesc.BlendState)); + digest.Update(&gpdesc.SampleMask, sizeof(gpdesc.SampleMask)); + digest.Update(&gpdesc.RasterizerState, sizeof(gpdesc.RasterizerState)); + digest.Update(&gpdesc.DepthStencilState, sizeof(gpdesc.DepthStencilState)); + + for (u32 i = 0; i < gpdesc.InputLayout.NumElements; i++) + { + const D3D12_INPUT_ELEMENT_DESC& ie = gpdesc.InputLayout.pInputElementDescs[i]; + digest.Update(ie.SemanticName, static_cast(std::strlen(ie.SemanticName))); + digest.Update(&ie.SemanticIndex, sizeof(ie.SemanticIndex)); + digest.Update(&ie.Format, sizeof(ie.Format)); + digest.Update(&ie.InputSlot, sizeof(ie.InputSlot)); + digest.Update(&ie.AlignedByteOffset, sizeof(ie.AlignedByteOffset)); + digest.Update(&ie.InputSlotClass, sizeof(ie.InputSlotClass)); + digest.Update(&ie.InstanceDataStepRate, sizeof(ie.InstanceDataStepRate)); + length += sizeof(D3D12_INPUT_ELEMENT_DESC); + } + + digest.Update(&gpdesc.IBStripCutValue, sizeof(gpdesc.IBStripCutValue)); + digest.Update(&gpdesc.PrimitiveTopologyType, sizeof(gpdesc.PrimitiveTopologyType)); + digest.Update(&gpdesc.NumRenderTargets, sizeof(gpdesc.NumRenderTargets)); + digest.Update(gpdesc.RTVFormats, sizeof(gpdesc.RTVFormats)); + digest.Update(&gpdesc.DSVFormat, sizeof(gpdesc.DSVFormat)); + digest.Update(&gpdesc.SampleDesc, sizeof(gpdesc.SampleDesc)); + digest.Update(&gpdesc.Flags, sizeof(gpdesc.Flags)); + + MD5Hash h; + digest.Final(h.hash); + + return CacheIndexKey{h.low, h.high, length, EntryType::GraphicsPipeline}; +} + +ShaderCache::ComPtr ShaderCache::GetShaderBlob(EntryType type, std::string_view shader_code) +{ + const auto key = GetShaderCacheKey(type, shader_code); + auto iter = m_shader_index.find(key); + if (iter == m_shader_index.end()) + return CompileAndAddShaderBlob(key, shader_code); + + ComPtr blob; + HRESULT hr = D3DCreateBlob(iter->second.blob_size, blob.GetAddressOf()); + if (FAILED(hr) || std::fseek(m_shader_blob_file, iter->second.file_offset, SEEK_SET) != 0 || + std::fread(blob->GetBufferPointer(), 1, iter->second.blob_size, m_shader_blob_file) != iter->second.blob_size) + { + Log_ErrorPrintf("Read blob from file failed"); + return {}; + } + + return blob; +} + +ShaderCache::ComPtr ShaderCache::GetPipelineState(ID3D12Device* device, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC& desc) +{ + const auto key = GetPipelineCacheKey(desc); + + auto iter = m_pipeline_index.find(key); + if (iter == m_pipeline_index.end()) + return CompileAndAddPipeline(device, key, desc); + + ComPtr blob; + HRESULT hr = D3DCreateBlob(iter->second.blob_size, blob.GetAddressOf()); + if (FAILED(hr) || std::fseek(m_pipeline_blob_file, iter->second.file_offset, SEEK_SET) != 0 || + std::fread(blob->GetBufferPointer(), 1, iter->second.blob_size, m_pipeline_blob_file) != iter->second.blob_size) + { + Log_ErrorPrintf("Read blob from file failed"); + return {}; + } + + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc_with_blob(desc); + desc_with_blob.CachedPSO.pCachedBlob = blob->GetBufferPointer(); + desc_with_blob.CachedPSO.CachedBlobSizeInBytes = blob->GetBufferSize(); + + ComPtr pso; + hr = device->CreateGraphicsPipelineState(&desc_with_blob, IID_PPV_ARGS(pso.GetAddressOf())); + if (FAILED(hr)) + { + Log_WarningPrintf("Creating cached PSO failed: %08X. Invalidating cache.", hr); + InvalidatePipelineCache(); + pso = CompileAndAddPipeline(device, key, desc); + } + + return pso; +} + +ShaderCache::ComPtr ShaderCache::CompileAndAddShaderBlob(const CacheIndexKey& key, + std::string_view shader_code) +{ + ComPtr blob; + + switch (key.type) + { + case EntryType::VertexShader: + blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Vertex, m_feature_level, shader_code, + m_debug); + break; + case EntryType::GeometryShader: + blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Geometry, m_feature_level, shader_code, + m_debug); + break; + case EntryType::PixelShader: + blob = + D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Pixel, m_feature_level, shader_code, m_debug); + break; + default: + break; + } + + if (!blob) + return {}; + + if (!m_shader_blob_file || std::fseek(m_shader_blob_file, 0, SEEK_END) != 0) + return blob; + + CacheIndexData data; + data.file_offset = static_cast(std::ftell(m_shader_blob_file)); + data.blob_size = static_cast(blob->GetBufferSize()); + + CacheIndexEntry entry = {}; + entry.source_hash_low = key.source_hash_low; + entry.source_hash_high = key.source_hash_high; + entry.source_length = key.source_length; + entry.shader_type = static_cast(key.type); + entry.blob_size = data.blob_size; + entry.file_offset = data.file_offset; + + if (std::fwrite(blob->GetBufferPointer(), 1, entry.blob_size, m_shader_blob_file) != entry.blob_size || + std::fflush(m_shader_blob_file) != 0 || std::fwrite(&entry, sizeof(entry), 1, m_shader_index_file) != 1 || + std::fflush(m_shader_index_file) != 0) + { + Log_ErrorPrintf("Failed to write shader blob to file"); + return blob; + } + + m_shader_index.emplace(key, data); + return blob; +} + +ShaderCache::ComPtr +ShaderCache::CompileAndAddPipeline(ID3D12Device* device, const CacheIndexKey& key, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC& gpdesc) +{ + ComPtr pso; + HRESULT hr = device->CreateGraphicsPipelineState(&gpdesc, IID_PPV_ARGS(pso.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("Creating cached PSO failed: %08X", hr); + return {}; + } + + if (!m_pipeline_blob_file || std::fseek(m_pipeline_blob_file, 0, SEEK_END) != 0) + return pso; + + ComPtr blob; + hr = pso->GetCachedBlob(blob.GetAddressOf()); + if (FAILED(hr)) + { + Log_WarningPrintf("Failed to get cached PSO data: %08X", hr); + return pso; + } + + CacheIndexData data; + data.file_offset = static_cast(std::ftell(m_pipeline_blob_file)); + data.blob_size = static_cast(blob->GetBufferSize()); + + CacheIndexEntry entry = {}; + entry.source_hash_low = key.source_hash_low; + entry.source_hash_high = key.source_hash_high; + entry.source_length = key.source_length; + entry.shader_type = static_cast(key.type); + entry.blob_size = data.blob_size; + entry.file_offset = data.file_offset; + + if (std::fwrite(blob->GetBufferPointer(), 1, entry.blob_size, m_pipeline_blob_file) != entry.blob_size || + std::fflush(m_pipeline_blob_file) != 0 || std::fwrite(&entry, sizeof(entry), 1, m_pipeline_index_file) != 1 || + std::fflush(m_pipeline_index_file) != 0) + { + Log_ErrorPrintf("Failed to write pipeline blob to file"); + return pso; + } + + m_shader_index.emplace(key, data); + return pso; +} + +} // namespace D3D12 diff --git a/src/common/d3d12/shader_cache.h b/src/common/d3d12/shader_cache.h new file mode 100644 index 000000000..a6840305a --- /dev/null +++ b/src/common/d3d12/shader_cache.h @@ -0,0 +1,118 @@ +#pragma once +#include "../hash_combine.h" +#include "../types.h" +#include "../windows_headers.h" +#include +#include +#include +#include +#include +#include + +namespace D3D12 { + +class ShaderCache +{ +public: + template + using ComPtr = Microsoft::WRL::ComPtr; + + enum class EntryType + { + VertexShader, + GeometryShader, + PixelShader, + ComputeShader, + GraphicsPipeline, + }; + + ShaderCache(); + ~ShaderCache(); + + void Open(std::string_view base_path, D3D_FEATURE_LEVEL feature_level, bool debug); + + ALWAYS_INLINE ComPtr GetVertexShader(std::string_view shader_code) + { + return GetShaderBlob(EntryType::VertexShader, shader_code); + } + ALWAYS_INLINE ComPtr GetGeometryShader(std::string_view shader_code) + { + return GetShaderBlob(EntryType::GeometryShader, shader_code); + } + ALWAYS_INLINE ComPtr GetPixelShader(std::string_view shader_code) + { + return GetShaderBlob(EntryType::PixelShader, shader_code); + } + ALWAYS_INLINE ComPtr GetComputeShader(std::string_view shader_code) + { + return GetShaderBlob(EntryType::ComputeShader, shader_code); + } + + ComPtr GetShaderBlob(EntryType type, std::string_view shader_code); + + ComPtr GetPipelineState(ID3D12Device* device, const D3D12_GRAPHICS_PIPELINE_STATE_DESC& desc); + +private: + static constexpr u32 FILE_VERSION = 1; + + struct CacheIndexKey + { + u64 source_hash_low; + u64 source_hash_high; + u32 source_length; + EntryType type; + + bool operator==(const CacheIndexKey& key) const; + bool operator!=(const CacheIndexKey& key) const; + }; + + struct CacheIndexEntryHasher + { + std::size_t operator()(const CacheIndexKey& e) const noexcept + { + std::size_t h = 0; + hash_combine(h, e.source_hash_low, e.source_hash_high, e.source_length, e.type); + return h; + } + }; + + struct CacheIndexData + { + u32 file_offset; + u32 blob_size; + }; + + using CacheIndex = std::unordered_map; + + static std::string GetCacheBaseFileName(const std::string_view& base_path, const std::string_view& type, + D3D_FEATURE_LEVEL feature_level, bool debug); + static CacheIndexKey GetShaderCacheKey(EntryType type, const std::string_view& shader_code); + static CacheIndexKey GetPipelineCacheKey(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& gpdesc); + + bool CreateNew(const std::string& index_filename, const std::string& blob_filename, std::FILE*& index_file, + std::FILE*& blob_file); + bool ReadExisting(const std::string& index_filename, const std::string& blob_filename, std::FILE*& index_file, + std::FILE*& blob_file, CacheIndex& index); + void InvalidatePipelineCache(); + void Close(); + + ComPtr CompileAndAddShaderBlob(const CacheIndexKey& key, std::string_view shader_code); + ComPtr CompileAndAddPipeline(ID3D12Device* device, const CacheIndexKey& key, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC& gpdesc); + + std::string m_base_path; + + std::FILE* m_shader_index_file = nullptr; + std::FILE* m_shader_blob_file = nullptr; + CacheIndex m_shader_index; + + std::FILE* m_pipeline_index_file = nullptr; + std::FILE* m_pipeline_blob_file = nullptr; + CacheIndex m_pipeline_index; + + D3D_FEATURE_LEVEL m_feature_level = D3D_FEATURE_LEVEL_11_0; + bool m_use_pipeline_cache = false; + bool m_debug = false; +}; + +} // namespace D3D12 diff --git a/src/common/d3d12/staging_texture.cpp b/src/common/d3d12/staging_texture.cpp new file mode 100644 index 000000000..ba434d9cb --- /dev/null +++ b/src/common/d3d12/staging_texture.cpp @@ -0,0 +1,234 @@ +#include "staging_texture.h" +#include "../align.h" +#include "../assert.h" +#include "../log.h" +#include "context.h" +#include "util.h" +Log_SetChannel(D3D12); + +namespace D3D12 { + +StagingTexture::StagingTexture() : m_width(0), m_height(0) {} + +StagingTexture::~StagingTexture() +{ + Destroy(); +} + +bool StagingTexture::Create(u32 width, u32 height, DXGI_FORMAT format, bool for_uploading) +{ + const u32 texel_size = GetTexelSize(format); + const u32 row_pitch = Common::AlignUpPow2(width * texel_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 buffer_size = height * row_pitch; + + const D3D12_HEAP_PROPERTIES heap_properties = {for_uploading ? D3D12_HEAP_TYPE_UPLOAD : D3D12_HEAP_TYPE_READBACK}; + + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Width = buffer_size; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + + D3D12_RESOURCE_STATES state = for_uploading ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST; + + ComPtr resource; + HRESULT hr = g_d3d12_context->GetDevice()->CreateCommittedResource( + &heap_properties, D3D12_HEAP_FLAG_NONE, &desc, state, nullptr, IID_PPV_ARGS(resource.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("Create buffer failed: 0x%08X", hr); + return false; + } + + m_resource = std::move(resource); + m_width = width; + m_height = height; + m_format = format; + m_buffer_size = buffer_size; + m_row_pitch = row_pitch; + m_texel_size = texel_size; + return true; +} + +void StagingTexture::Destroy(bool defer) +{ + if (IsMapped()) + Unmap(); + + if (m_resource && defer) + g_d3d12_context->DeferResourceDestruction(m_resource.Get()); + m_resource.Reset(); + m_width = 0; + m_height = 0; + m_format = DXGI_FORMAT_UNKNOWN; + m_buffer_size = 0; + m_row_pitch = 0; + m_texel_size = 0; +} + +bool StagingTexture::Map(bool writing) +{ + D3D12_RANGE range{0u, m_buffer_size}; + + Assert(!IsMapped()); + const HRESULT hr = m_resource->Map(0, writing ? nullptr : &range, &m_mapped_pointer); + if (FAILED(hr)) + { + Log_ErrorPrintf("Map staging buffer failed: 0x%08X", hr); + return false; + } + + m_mapped_for_write = writing; + return true; +} + +void StagingTexture::Unmap() +{ + Assert(IsMapped()); + + D3D12_RANGE range{0u, m_buffer_size}; + m_resource->Unmap(0, m_mapped_for_write ? &range : nullptr); + m_mapped_pointer = nullptr; + m_mapped_for_write = false; +} + +void StagingTexture::Flush() +{ + if (!m_needs_flush) + return; + + m_needs_flush = false; + + // If the completed fence is the same as the current command buffer fence, we need to execute + // the current list and wait for it to complete. This is the slowest path. Otherwise, if the + // command list with the copy has been submitted, we only need to wait for the fence. + if (m_completed_fence == g_d3d12_context->GetCurrentFenceValue()) + g_d3d12_context->ExecuteCommandList(true); + else + g_d3d12_context->WaitForFence(m_completed_fence); +} + +void StagingTexture::CopyToTexture(u32 src_x, u32 src_y, ID3D12Resource* dst_texture, u32 dst_subresource, u32 dst_x, + u32 dst_y, u32 width, u32 height) +{ + DebugAssert((src_x + width) <= m_width && (src_y + height) <= m_height); + + D3D12_TEXTURE_COPY_LOCATION dst; + dst.pResource = dst_texture; + dst.SubresourceIndex = 0; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + + D3D12_TEXTURE_COPY_LOCATION src; + src.pResource = m_resource.Get(); + src.SubresourceIndex = 0; + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src.PlacedFootprint.Offset = 0; + src.PlacedFootprint.Footprint.Width = m_width; + src.PlacedFootprint.Footprint.Height = m_height; + src.PlacedFootprint.Footprint.Depth = 1; + src.PlacedFootprint.Footprint.Format = m_format; + src.PlacedFootprint.Footprint.RowPitch = m_row_pitch; + + const D3D12_BOX src_box{src_x, src_y, 0u, src_x + width, src_y + height, 1u}; + g_d3d12_context->GetCommandList()->CopyTextureRegion(&dst, dst_x, dst_y, 0, &src, &src_box); +} + +void StagingTexture::CopyFromTexture(ID3D12Resource* src_texture, u32 src_subresource, u32 src_x, u32 src_y, u32 dst_x, + u32 dst_y, u32 width, u32 height) +{ + DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height); + + D3D12_TEXTURE_COPY_LOCATION src; + src.pResource = src_texture; + src.SubresourceIndex = 0; + src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + + D3D12_TEXTURE_COPY_LOCATION dst; + dst.pResource = m_resource.Get(); + dst.SubresourceIndex = 0; + dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst.PlacedFootprint.Offset = 0; + dst.PlacedFootprint.Footprint.Width = m_width; + dst.PlacedFootprint.Footprint.Height = m_height; + dst.PlacedFootprint.Footprint.Depth = 1; + dst.PlacedFootprint.Footprint.Format = m_format; + dst.PlacedFootprint.Footprint.RowPitch = m_row_pitch; + + const D3D12_BOX src_box{src_x, src_y, 0u, src_x + width, src_y + height, 1u}; + g_d3d12_context->GetCommandList()->CopyTextureRegion(&dst, dst_x, dst_y, 0, &src, &src_box); + m_completed_fence = g_d3d12_context->GetCurrentFenceValue(); + m_needs_flush = true; +} + +bool StagingTexture::ReadPixels(u32 x, u32 y, u32 width, u32 height, void* data, u32 row_pitch) +{ + if (m_needs_flush) + Flush(); + + const bool was_mapped = IsMapped(); + if (!was_mapped && !Map(false)) + return false; + + const u8* src_ptr = static_cast(m_mapped_pointer) + (y * m_row_pitch) + (x * m_texel_size); + u8* dst_ptr = reinterpret_cast(data); + if (m_row_pitch != row_pitch || width != m_width || x != 0) + { + const u32 copy_size = m_texel_size * width; + for (u32 row = 0; row < height; row++) + { + std::memcpy(dst_ptr, src_ptr, copy_size); + src_ptr += m_row_pitch; + dst_ptr += row_pitch; + } + } + else + { + std::memcpy(dst_ptr, src_ptr, row_pitch * height); + } + + return true; +} + +bool StagingTexture::WritePixels(u32 x, u32 y, u32 width, u32 height, const void* data, u32 row_pitch) +{ + const bool was_mapped = IsMapped(); + if (!was_mapped && !Map(true)) + return false; + + const u8* src_ptr = reinterpret_cast(data); + u8* dst_ptr = static_cast(m_mapped_pointer) + (y * m_row_pitch) + (x * m_texel_size); + if (m_row_pitch != row_pitch || width != m_width || x != 0) + { + const u32 copy_size = m_texel_size * width; + for (u32 row = 0; row < height; row++) + { + std::memcpy(dst_ptr, src_ptr, copy_size); + src_ptr += row_pitch; + dst_ptr += m_row_pitch; + } + } + else + { + std::memcpy(dst_ptr, src_ptr, m_row_pitch * height); + } + + if (!was_mapped) + Unmap(); + + return true; +} + +bool StagingTexture::EnsureSize(u32 width, u32 height, DXGI_FORMAT format, bool for_uploading) +{ + if (m_resource && m_width >= width && m_height >= height && m_format == format) + return true; + + return Create(width, height, format, for_uploading); +} + +} // namespace D3D12 \ No newline at end of file diff --git a/src/common/d3d12/staging_texture.h b/src/common/d3d12/staging_texture.h new file mode 100644 index 000000000..22bc0ba47 --- /dev/null +++ b/src/common/d3d12/staging_texture.h @@ -0,0 +1,62 @@ +#pragma once +#include "../types.h" +#include "../windows_headers.h" +#include +#include +#include + +namespace D3D12 { +class StagingTexture +{ +public: + template + using ComPtr = Microsoft::WRL::ComPtr; + + StagingTexture(); + ~StagingTexture(); + + ALWAYS_INLINE ID3D12Resource* GetD3DResource() const { return m_resource.Get(); } + + ALWAYS_INLINE u32 GetWidth() const { return m_width; } + ALWAYS_INLINE u32 GetHeight() const { return m_height; } + ALWAYS_INLINE DXGI_FORMAT GetFormat() const { return m_format; } + ALWAYS_INLINE bool IsMapped() const { return m_mapped_pointer != nullptr; } + ALWAYS_INLINE const void* GetMapPointer() const { return m_mapped_pointer; } + + ALWAYS_INLINE operator bool() const { return static_cast(m_resource); } + + bool Create(u32 width, u32 height, DXGI_FORMAT format, bool for_uploading); + void Destroy(bool defer = true); + + bool Map(bool writing); + void Unmap(); + void Flush(); + + void CopyToTexture(u32 src_x, u32 src_y, ID3D12Resource* dst_texture, u32 dst_subresource, u32 dst_x, u32 dst_y, + u32 width, u32 height); + void CopyFromTexture(ID3D12Resource* src_texture, u32 src_subresource, u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, + u32 width, u32 height); + + + bool ReadPixels(u32 x, u32 y, u32 width, u32 height, void* data, u32 row_pitch); + + bool WritePixels(u32 x, u32 y, u32 width, u32 height, const void* data, u32 row_pitch); + + bool EnsureSize(u32 width, u32 height, DXGI_FORMAT format, bool for_uploading); + +protected: + ComPtr m_resource; + u32 m_width; + u32 m_height; + DXGI_FORMAT m_format; + u32 m_texel_size; + u32 m_row_pitch; + u32 m_buffer_size; + + void* m_mapped_pointer = nullptr; + u64 m_completed_fence = 0; + bool m_mapped_for_write = false; + bool m_needs_flush = false; +}; + +} // namespace D3D12 \ No newline at end of file diff --git a/src/common/d3d12/stream_buffer.cpp b/src/common/d3d12/stream_buffer.cpp new file mode 100644 index 000000000..5a86d5cbd --- /dev/null +++ b/src/common/d3d12/stream_buffer.cpp @@ -0,0 +1,263 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "stream_buffer.h" +#include "../align.h" +#include "../assert.h" +#include "../log.h" +#include "context.h" +#include +#include +Log_SetChannel(D3D12::StreamBuffer); + +namespace D3D12 { +StreamBuffer::StreamBuffer() = default; + +StreamBuffer::~StreamBuffer() +{ + Destroy(); +} + +bool StreamBuffer::Create(u32 size) +{ + static const D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_UPLOAD}; + const D3D12_RESOURCE_DESC resource_desc = { + D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + + Microsoft::WRL::ComPtr buffer; + + HRESULT hr = g_d3d12_context->GetDevice()->CreateCommittedResource(&heap_properties, D3D12_HEAP_FLAG_NONE, + &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, IID_PPV_ARGS(buffer.GetAddressOf())); + AssertMsg(SUCCEEDED(hr), "Allocate buffer"); + if (FAILED(hr)) + return false; + + static const D3D12_RANGE read_range = {}; + u8* host_pointer; + hr = buffer->Map(0, &read_range, reinterpret_cast(&host_pointer)); + AssertMsg(SUCCEEDED(hr), "Map buffer"); + if (FAILED(hr)) + return false; + + Destroy(true); + + m_buffer = std::move(buffer); + m_host_pointer = host_pointer; + m_size = size; + m_gpu_pointer = m_buffer->GetGPUVirtualAddress(); + return true; +} + +bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) +{ + const u32 required_bytes = num_bytes + alignment; + + // Check for sane allocations + if (required_bytes > m_size) + { + Log_ErrorPrintf("Attempting to allocate %u bytes from a %u byte stream buffer", static_cast(num_bytes), + static_cast(m_size)); + Panic("Stream buffer overflow"); + return false; + } + + // Is the GPU behind or up to date with our current offset? + UpdateCurrentFencePosition(); + if (m_current_offset >= m_current_gpu_position) + { + const u32 remaining_bytes = m_size - m_current_offset; + if (required_bytes <= remaining_bytes) + { + // Place at the current position, after the GPU position. + m_current_offset = Common::AlignUp(m_current_offset, alignment); + m_current_space = m_size - m_current_offset; + return true; + } + + // Check for space at the start of the buffer + // We use < here because we don't want to have the case of m_current_offset == + // m_current_gpu_position. That would mean the code above would assume the + // GPU has caught up to us, which it hasn't. + if (required_bytes < m_current_gpu_position) + { + // Reset offset to zero, since we're allocating behind the gpu now + m_current_offset = 0; + m_current_space = m_current_gpu_position; + return true; + } + } + + // Is the GPU ahead of our current offset? + if (m_current_offset < m_current_gpu_position) + { + // We have from m_current_offset..m_current_gpu_position space to use. + const u32 remaining_bytes = m_current_gpu_position - m_current_offset; + if (required_bytes < remaining_bytes) + { + // Place at the current position, since this is still behind the GPU. + m_current_offset = Common::AlignUp(m_current_offset, alignment); + m_current_space = m_current_gpu_position - m_current_offset; + return true; + } + } + + // Can we find a fence to wait on that will give us enough memory? + if (WaitForClearSpace(required_bytes)) + { + const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset; + m_current_offset += align_diff; + m_current_space -= align_diff; + return true; + } + + // We tried everything we could, and still couldn't get anything. This means that too much space + // in the buffer is being used by the command buffer currently being recorded. Therefore, the + // only option is to execute it, and wait until it's done. + return false; +} + +void StreamBuffer::CommitMemory(u32 final_num_bytes) +{ + Assert((m_current_offset + final_num_bytes) <= m_size); + Assert(final_num_bytes <= m_current_space); + m_current_offset += final_num_bytes; + m_current_space -= final_num_bytes; +} + +void StreamBuffer::Destroy(bool defer) +{ + if (m_host_pointer) + { + const D3D12_RANGE written_range = {0, m_size}; + m_buffer->Unmap(0, &written_range); + m_host_pointer = nullptr; + } + + if (m_buffer && defer) + g_d3d12_context->DeferResourceDestruction(m_buffer.Get()); + m_buffer.Reset(); + + m_current_offset = 0; + m_current_space = 0; + m_current_gpu_position = 0; + m_tracked_fences.clear(); +} + +void StreamBuffer::UpdateCurrentFencePosition() +{ + // Don't create a tracking entry if the GPU is caught up with the buffer. + if (m_current_offset == m_current_gpu_position) + return; + + // Has the offset changed since the last fence? + const u64 fence = g_d3d12_context->GetCurrentFenceValue(); + if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence) + { + // Still haven't executed a command buffer, so just update the offset. + m_tracked_fences.back().second = m_current_offset; + return; + } + + UpdateGPUPosition(); + m_tracked_fences.emplace_back(fence, m_current_offset); +} + +void StreamBuffer::UpdateGPUPosition() +{ + auto start = m_tracked_fences.begin(); + auto end = start; + + const u64 completed_counter = g_d3d12_context->GetCompletedFenceValue(); + while (end != m_tracked_fences.end() && completed_counter >= end->first) + { + m_current_gpu_position = end->second; + ++end; + } + + if (start != end) + m_tracked_fences.erase(start, end); +} + +bool StreamBuffer::WaitForClearSpace(u32 num_bytes) +{ + u32 new_offset = 0; + u32 new_space = 0; + u32 new_gpu_position = 0; + + auto iter = m_tracked_fences.begin(); + for (; iter != m_tracked_fences.end(); ++iter) + { + // Would this fence bring us in line with the GPU? + // This is the "last resort" case, where a command buffer execution has been forced + // after no additional data has been written to it, so we can assume that after the + // fence has been signaled the entire buffer is now consumed. + u32 gpu_position = iter->second; + if (m_current_offset == gpu_position) + { + new_offset = 0; + new_space = m_size; + new_gpu_position = 0; + break; + } + + // Assuming that we wait for this fence, are we allocating in front of the GPU? + if (m_current_offset > gpu_position) + { + // This would suggest the GPU has now followed us and wrapped around, so we have from + // m_current_position..m_size free, as well as and 0..gpu_position. + const u32 remaining_space_after_offset = m_size - m_current_offset; + if (remaining_space_after_offset >= num_bytes) + { + // Switch to allocating in front of the GPU, using the remainder of the buffer. + new_offset = m_current_offset; + new_space = m_size - m_current_offset; + new_gpu_position = gpu_position; + break; + } + + // We can wrap around to the start, behind the GPU, if there is enough space. + // We use > here because otherwise we'd end up lining up with the GPU, and then the + // allocator would assume that the GPU has consumed what we just wrote. + if (gpu_position > num_bytes) + { + new_offset = 0; + new_space = gpu_position; + new_gpu_position = gpu_position; + break; + } + } + else + { + // We're currently allocating behind the GPU. This would give us between the current + // offset and the GPU position worth of space to work with. Again, > because we can't + // align the GPU position with the buffer offset. + u32 available_space_inbetween = gpu_position - m_current_offset; + if (available_space_inbetween > num_bytes) + { + // Leave the offset as-is, but update the GPU position. + new_offset = m_current_offset; + new_space = gpu_position - m_current_offset; + new_gpu_position = gpu_position; + break; + } + } + } + + // Did any fences satisfy this condition? + // Has the command buffer been executed yet? If not, the caller should execute it. + if (iter == m_tracked_fences.end() || iter->first == g_d3d12_context->GetCurrentFenceValue()) + return false; + + // Wait until this fence is signaled. This will fire the callback, updating the GPU position. + g_d3d12_context->WaitForFence(iter->first); + m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); + m_current_offset = new_offset; + m_current_space = new_space; + m_current_gpu_position = new_gpu_position; + return true; +} + +} // namespace D3D12 diff --git a/src/common/d3d12/stream_buffer.h b/src/common/d3d12/stream_buffer.h new file mode 100644 index 000000000..26fce2638 --- /dev/null +++ b/src/common/d3d12/stream_buffer.h @@ -0,0 +1,58 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "../types.h" +#include "../windows_headers.h" +#include +#include +#include +#include + +namespace D3D12 { +class StreamBuffer +{ +public: + StreamBuffer(); + ~StreamBuffer(); + + bool Create(u32 size); + + ALWAYS_INLINE bool IsValid() const { return static_cast(m_buffer); } + ALWAYS_INLINE ID3D12Resource* GetBuffer() const { return m_buffer.Get(); } + ALWAYS_INLINE D3D12_GPU_VIRTUAL_ADDRESS GetGPUPointer() const { return m_gpu_pointer; } + ALWAYS_INLINE void* GetHostPointer() const { return m_host_pointer; } + ALWAYS_INLINE void* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; } + ALWAYS_INLINE D3D12_GPU_VIRTUAL_ADDRESS GetCurrentGPUPointer() const { return m_gpu_pointer + m_current_offset; } + ALWAYS_INLINE u32 GetSize() const { return m_size; } + ALWAYS_INLINE u32 GetCurrentOffset() const { return m_current_offset; } + ALWAYS_INLINE u32 GetCurrentSpace() const { return m_current_space; } + + bool ReserveMemory(u32 num_bytes, u32 alignment); + void CommitMemory(u32 final_num_bytes); + + void Destroy(bool defer = true); + +private: + void UpdateCurrentFencePosition(); + void UpdateGPUPosition(); + + // Waits for as many fences as needed to allocate num_bytes bytes from the buffer. + bool WaitForClearSpace(u32 num_bytes); + + u32 m_size = 0; + u32 m_current_offset = 0; + u32 m_current_space = 0; + u32 m_current_gpu_position = 0; + + Microsoft::WRL::ComPtr m_buffer; + D3D12_GPU_VIRTUAL_ADDRESS m_gpu_pointer = {}; + u8* m_host_pointer = nullptr; + + // List of fences and the corresponding positions in the buffer + std::deque> m_tracked_fences; +}; + +} // namespace D3D12 diff --git a/src/common/d3d12/texture.cpp b/src/common/d3d12/texture.cpp new file mode 100644 index 000000000..9d266923c --- /dev/null +++ b/src/common/d3d12/texture.cpp @@ -0,0 +1,390 @@ +#include "texture.h" +#include "../align.h" +#include "../assert.h" +#include "../log.h" +#include "context.h" +#include "staging_texture.h" +#include "stream_buffer.h" +#include "util.h" +Log_SetChannel(D3D12); + +namespace D3D12 { + +Texture::Texture() = default; + +Texture::Texture(ID3D12Resource* resource, D3D12_RESOURCE_STATES state) : m_resource(std::move(resource)) +{ + const D3D12_RESOURCE_DESC desc = GetDesc(); + m_width = static_cast(desc.Width); + m_height = desc.Height; + m_samples = desc.SampleDesc.Count; + m_format = desc.Format; +} + +Texture::Texture(Texture&& texture) + : m_resource(std::move(texture.m_resource)), m_srv_descriptor(texture.m_srv_descriptor), + m_rtv_or_dsv_descriptor(texture.m_rtv_or_dsv_descriptor), m_width(texture.m_width), m_height(texture.m_height), + m_samples(texture.m_samples), m_format(texture.m_format), m_state(texture.m_state), + m_is_depth_view(texture.m_is_depth_view) +{ + texture.m_srv_descriptor = {}; + texture.m_rtv_or_dsv_descriptor = {}; + texture.m_width = 0; + texture.m_height = 0; + texture.m_samples = 0; + texture.m_format = DXGI_FORMAT_UNKNOWN; + texture.m_state = D3D12_RESOURCE_STATE_COMMON; + texture.m_is_depth_view = false; +} + +Texture::~Texture() +{ + Destroy(); +} + +Texture& Texture::operator=(Texture&& texture) +{ + Destroy(); + m_resource = std::move(texture.m_resource); + m_srv_descriptor = texture.m_srv_descriptor; + m_rtv_or_dsv_descriptor = texture.m_rtv_or_dsv_descriptor; + m_width = texture.m_width; + m_height = texture.m_height; + m_samples = texture.m_samples; + m_format = texture.m_format; + m_state = texture.m_state; + m_is_depth_view = texture.m_is_depth_view; + texture.m_srv_descriptor = {}; + texture.m_rtv_or_dsv_descriptor = {}; + texture.m_width = 0; + texture.m_height = 0; + texture.m_samples = 0; + texture.m_format = DXGI_FORMAT_UNKNOWN; + texture.m_state = D3D12_RESOURCE_STATE_COMMON; + texture.m_is_depth_view = false; + return *this; +} + +D3D12_RESOURCE_DESC Texture::GetDesc() const +{ + return m_resource->GetDesc(); +} + +bool Texture::Create(u32 width, u32 height, u32 samples, DXGI_FORMAT format, DXGI_FORMAT srv_format, + DXGI_FORMAT rtv_format, DXGI_FORMAT dsv_format, D3D12_RESOURCE_FLAGS flags) +{ + constexpr D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_DEFAULT}; + + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + desc.Width = width; + desc.Height = height; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = format; + desc.SampleDesc.Count = samples; + desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + desc.Flags = flags; + + D3D12_CLEAR_VALUE optimized_clear_value = {}; + D3D12_RESOURCE_STATES state; + if (rtv_format != DXGI_FORMAT_UNKNOWN) + { + optimized_clear_value.Format = rtv_format; + state = D3D12_RESOURCE_STATE_RENDER_TARGET; + } + else if (dsv_format != DXGI_FORMAT_UNKNOWN) + { + optimized_clear_value.Format = dsv_format; + state = D3D12_RESOURCE_STATE_DEPTH_WRITE; + } + else + { + state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + } + + ComPtr resource; + HRESULT hr = g_d3d12_context->GetDevice()->CreateCommittedResource( + &heap_properties, D3D12_HEAP_FLAG_NONE, &desc, state, + (rtv_format != DXGI_FORMAT_UNKNOWN || dsv_format != DXGI_FORMAT_UNKNOWN) ? &optimized_clear_value : nullptr, + IID_PPV_ARGS(resource.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("Create texture failed: 0x%08X", hr); + return false; + } + + DescriptorHandle srv_descriptor, rtv_descriptor; + bool is_depth_view = false; + if (srv_format != DXGI_FORMAT_UNKNOWN) + { + if (!CreateSRVDescriptor(resource.Get(), srv_format, samples > 1, &srv_descriptor)) + return false; + } + + if (rtv_format != DXGI_FORMAT_UNKNOWN) + { + Assert(dsv_format == DXGI_FORMAT_UNKNOWN); + if (!CreateRTVDescriptor(resource.Get(), rtv_format, samples > 1, &rtv_descriptor)) + { + g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); + return false; + } + } + else if (dsv_format != DXGI_FORMAT_UNKNOWN) + { + if (!CreateDSVDescriptor(resource.Get(), dsv_format, samples > 1, &rtv_descriptor)) + { + g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); + return false; + } + + is_depth_view = true; + } + + Destroy(true); + + m_resource = std::move(resource); + m_srv_descriptor = std::move(srv_descriptor); + m_rtv_or_dsv_descriptor = std::move(rtv_descriptor); + m_width = width; + m_height = height; + m_samples = samples; + m_format = format; + m_state = state; + m_is_depth_view = is_depth_view; + return true; +} + +bool Texture::Adopt(ComPtr texture, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format, + DXGI_FORMAT dsv_format, D3D12_RESOURCE_STATES state) +{ + const D3D12_RESOURCE_DESC desc(texture->GetDesc()); + + DescriptorHandle srv_descriptor, rtv_descriptor; + if (srv_format != DXGI_FORMAT_UNKNOWN) + { + if (!CreateSRVDescriptor(texture.Get(), srv_format, desc.SampleDesc.Count > 1, &srv_descriptor)) + return false; + } + + if (rtv_format != DXGI_FORMAT_UNKNOWN) + { + Assert(dsv_format == DXGI_FORMAT_UNKNOWN); + if (!CreateRTVDescriptor(texture.Get(), rtv_format, desc.SampleDesc.Count > 1, &rtv_descriptor)) + { + g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); + return false; + } + } + else if (dsv_format != DXGI_FORMAT_UNKNOWN) + { + if (!CreateDSVDescriptor(texture.Get(), dsv_format, desc.SampleDesc.Count > 1, &rtv_descriptor)) + { + g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); + return false; + } + } + + m_resource = std::move(texture); + m_srv_descriptor = std::move(srv_descriptor); + m_rtv_or_dsv_descriptor = std::move(rtv_descriptor); + m_width = static_cast(desc.Width); + m_height = desc.Height; + m_samples = desc.SampleDesc.Count; + m_format = desc.Format; + m_state = state; + return true; +} + +void Texture::Destroy(bool defer /* = true */) +{ + if (defer) + { + g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetDescriptorHeapManager(), &m_srv_descriptor); + if (m_is_depth_view) + g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetDSVHeapManager(), &m_rtv_or_dsv_descriptor); + else + g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetRTVHeapManager(), &m_rtv_or_dsv_descriptor); + g_d3d12_context->DeferResourceDestruction(m_resource.Get()); + m_resource.Reset(); + } + else + { + g_d3d12_context->GetDescriptorHeapManager().Free(&m_srv_descriptor); + if (m_is_depth_view) + g_d3d12_context->GetDSVHeapManager().Free(&m_rtv_or_dsv_descriptor); + else + g_d3d12_context->GetRTVHeapManager().Free(&m_rtv_or_dsv_descriptor); + + m_resource.Reset(); + } + + m_width = 0; + m_height = 0; + m_samples = 0; + m_format = DXGI_FORMAT_UNKNOWN; + m_is_depth_view = false; +} + +void Texture::TransitionToState(D3D12_RESOURCE_STATES state) const +{ + if (m_state == state) + return; + + ResourceBarrier(g_d3d12_context->GetCommandList(), m_resource.Get(), m_state, state); + m_state = state; +} + +bool Texture::BeginStreamUpdate(u32 x, u32 y, u32 width, u32 height, void** out_data, u32* out_data_pitch) +{ + const u32 copy_pitch = Common::AlignUpPow2(width * GetTexelSize(m_format), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 upload_size = copy_pitch * height; + + if (!g_d3d12_context->GetTextureStreamBuffer().ReserveMemory(upload_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) + { + Log_PerfPrintf("Executing command buffer while waiting for %u bytes (%ux%u) in upload buffer", upload_size, width, + height); + g_d3d12_context->ExecuteCommandList(false); + if (!g_d3d12_context->GetTextureStreamBuffer().ReserveMemory(upload_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) + { + Log_ErrorPrintf("Failed to reserve %u bytes for %ux%u upload", upload_size, width, height); + return false; + } + } + + *out_data = g_d3d12_context->GetTextureStreamBuffer().GetCurrentHostPointer(); + *out_data_pitch = copy_pitch; + return true; +} + +void Texture::EndStreamUpdate(u32 x, u32 y, u32 width, u32 height) +{ + const u32 copy_pitch = Common::AlignUpPow2(width * GetTexelSize(m_format), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 upload_size = copy_pitch * height; + + StreamBuffer& sb = g_d3d12_context->GetTextureStreamBuffer(); + const u32 sb_offset = sb.GetCurrentOffset(); + sb.CommitMemory(upload_size); + + D3D12_TEXTURE_COPY_LOCATION src; + src.pResource = sb.GetBuffer(); + src.SubresourceIndex = 0; + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src.PlacedFootprint.Offset = sb_offset; + src.PlacedFootprint.Footprint.Width = width; + src.PlacedFootprint.Footprint.Height = height; + src.PlacedFootprint.Footprint.Depth = 1; + src.PlacedFootprint.Footprint.RowPitch = copy_pitch; + src.PlacedFootprint.Footprint.Format = m_format; + + D3D12_TEXTURE_COPY_LOCATION dst; + dst.pResource = m_resource.Get(); + dst.SubresourceIndex = 0; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + + const D3D12_BOX src_box{0u, 0u, 0u, width, height, 1u}; + const D3D12_RESOURCE_STATES old_state = m_state; + TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); + g_d3d12_context->GetCommandList()->CopyTextureRegion(&dst, 0, 0, 0, &src, &src_box); + TransitionToState(old_state); +} + +bool Texture::LoadData(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch) +{ + const u32 texel_size = GetTexelSize(m_format); + const u32 upload_pitch = Common::AlignUpPow2(width * texel_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 upload_size = upload_pitch * height; + if (upload_size > g_d3d12_context->GetTextureStreamBuffer().GetSize()) + { + StagingTexture st; + if (!st.Create(width, height, m_format, true) || !st.WritePixels(0, 0, width, height, data, pitch)) + return false; + + D3D12_RESOURCE_STATES old_state = m_state; + TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); + st.CopyToTexture(0, 0, m_resource.Get(), 0, x, y, width, height); + st.Destroy(true); + TransitionToState(old_state); + return true; + } + + void* write_ptr; + u32 write_pitch; + if (!BeginStreamUpdate(x, y, width, height, &write_ptr, &write_pitch)) + return false; + + CopyToUploadBuffer(data, pitch, height, write_ptr, write_pitch); + EndStreamUpdate(x, y, width, height); + return true; +} + +void Texture::CopyToUploadBuffer(const void* src_data, u32 src_pitch, u32 height, void* dst_data, u32 dst_pitch) +{ + const u8* src_ptr = static_cast(src_data); + u8* dst_ptr = static_cast(dst_data); + if (src_pitch == dst_pitch) + { + std::memcpy(dst_ptr, src_ptr, dst_pitch * height); + } + else + { + const u32 copy_size = std::min(src_pitch, dst_pitch); + for (u32 row = 0; row < height; row++) + { + std::memcpy(dst_ptr, src_ptr, copy_size); + src_ptr += src_pitch; + dst_ptr += dst_pitch; + } + } +} + +bool Texture::CreateSRVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, DescriptorHandle* dh) +{ + if (!g_d3d12_context->GetDescriptorHeapManager().Allocate(dh)) + { + Log_ErrorPrintf("Failed to allocate SRV descriptor"); + return false; + } + + D3D12_SHADER_RESOURCE_VIEW_DESC desc = { + format, multisampled ? D3D12_SRV_DIMENSION_TEXTURE2DMS : D3D12_SRV_DIMENSION_TEXTURE2D, + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; + if (!multisampled) + desc.Texture2D.MipLevels = 1; + + g_d3d12_context->GetDevice()->CreateShaderResourceView(resource, &desc, dh->cpu_handle); + return true; +} + +bool Texture::CreateRTVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, DescriptorHandle* dh) +{ + if (!g_d3d12_context->GetRTVHeapManager().Allocate(dh)) + { + Log_ErrorPrintf("Failed to allocate SRV descriptor"); + return false; + } + + D3D12_RENDER_TARGET_VIEW_DESC desc = {format, + multisampled ? D3D12_RTV_DIMENSION_TEXTURE2DMS : D3D12_RTV_DIMENSION_TEXTURE2D}; + + g_d3d12_context->GetDevice()->CreateRenderTargetView(resource, &desc, dh->cpu_handle); + return true; +} + +bool Texture::CreateDSVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, DescriptorHandle* dh) +{ + if (!g_d3d12_context->GetDSVHeapManager().Allocate(dh)) + { + Log_ErrorPrintf("Failed to allocate SRV descriptor"); + return false; + } + + D3D12_DEPTH_STENCIL_VIEW_DESC desc = { + format, multisampled ? D3D12_DSV_DIMENSION_TEXTURE2DMS : D3D12_DSV_DIMENSION_TEXTURE2D, D3D12_DSV_FLAG_NONE}; + + g_d3d12_context->GetDevice()->CreateDepthStencilView(resource, &desc, dh->cpu_handle); + return true; +} + +} // namespace D3D12 \ No newline at end of file diff --git a/src/common/d3d12/texture.h b/src/common/d3d12/texture.h new file mode 100644 index 000000000..434a8c8a2 --- /dev/null +++ b/src/common/d3d12/texture.h @@ -0,0 +1,80 @@ +#pragma once +#include "../types.h" +#include "../windows_headers.h" +#include "descriptor_heap_manager.h" +#include +#include + +namespace D3D12 { + +class StreamBuffer; + +class Texture final +{ +public: + template + using ComPtr = Microsoft::WRL::ComPtr; + + Texture(); + Texture(ID3D12Resource* resource, D3D12_RESOURCE_STATES state); + Texture(Texture&& texture); + Texture(const Texture&) = delete; + ~Texture(); + + ALWAYS_INLINE ID3D12Resource* GetResource() const { return m_resource.Get(); } + ALWAYS_INLINE const DescriptorHandle& GetSRVDescriptor() const { return m_srv_descriptor; } + ALWAYS_INLINE const DescriptorHandle& GetRTVOrDSVDescriptor() const { return m_rtv_or_dsv_descriptor; } + ALWAYS_INLINE D3D12_RESOURCE_STATES GetState() const { return m_state; } + + ALWAYS_INLINE u32 GetWidth() const { return m_width; } + ALWAYS_INLINE u32 GetHeight() const { return m_height; } + ALWAYS_INLINE u32 GetSamples() const { return m_samples; } + ALWAYS_INLINE DXGI_FORMAT GetFormat() const { return m_format; } + ALWAYS_INLINE bool IsMultisampled() const { return m_samples > 1; } + + ALWAYS_INLINE operator ID3D12Resource*() const { return m_resource.Get(); } + ALWAYS_INLINE operator bool() const { return static_cast(m_resource); } + + bool Create(u32 width, u32 height, u32 samples, DXGI_FORMAT format, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format, + DXGI_FORMAT dsv_format, D3D12_RESOURCE_FLAGS flags); + bool Adopt(ComPtr texture, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format, DXGI_FORMAT dsv_format, + D3D12_RESOURCE_STATES state); + + D3D12_RESOURCE_DESC GetDesc() const; + + void Destroy(bool defer = true); + + void TransitionToState(D3D12_RESOURCE_STATES state) const; + + Texture& operator=(const Texture&) = delete; + Texture& operator=(Texture&& texture); + + bool BeginStreamUpdate(u32 x, u32 y, u32 width, u32 height, void** out_data, u32* out_data_pitch); + void EndStreamUpdate(u32 x, u32 y, u32 width, u32 height); + + bool LoadData(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch); + + static void CopyToUploadBuffer(const void* src_data, u32 src_pitch, u32 height, void* dst_data, u32 dst_pitch); + +private: + static bool CreateSRVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, + DescriptorHandle* dh); + static bool CreateRTVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, + DescriptorHandle* dh); + static bool CreateDSVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, + DescriptorHandle* dh); + + ComPtr m_resource; + DescriptorHandle m_srv_descriptor = {}; + DescriptorHandle m_rtv_or_dsv_descriptor = {}; + u32 m_width = 0; + u32 m_height = 0; + u32 m_samples = 0; + DXGI_FORMAT m_format = DXGI_FORMAT_UNKNOWN; + + mutable D3D12_RESOURCE_STATES m_state = D3D12_RESOURCE_STATE_COMMON; + + bool m_is_depth_view = false; +}; + +} // namespace D3D12 \ No newline at end of file diff --git a/src/common/d3d12/util.cpp b/src/common/d3d12/util.cpp new file mode 100644 index 000000000..ba241060e --- /dev/null +++ b/src/common/d3d12/util.cpp @@ -0,0 +1,343 @@ +#include "util.h" +#include "../assert.h" +#include "../log.h" +#include "../string.h" +#include "../string_util.h" +#include "context.h" +#include "shader_cache.h" +#include +#include +Log_SetChannel(D3D12); + +namespace D3D12 { + +u32 GetTexelSize(DXGI_FORMAT format) +{ + switch (format) + { + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + return 4; + + case DXGI_FORMAT_B5G5R5A1_UNORM: + case DXGI_FORMAT_B5G6R5_UNORM: + return 2; + + default: + Panic("Unknown format"); + return 1; + } +} + +void SetDefaultSampler(D3D12_SAMPLER_DESC* desc) +{ + desc->Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + desc->AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + desc->AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + desc->AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + desc->MipLODBias = 0; + desc->MaxAnisotropy = 1; + desc->ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + desc->BorderColor[0] = 1.0f; + desc->BorderColor[1] = 1.0f; + desc->BorderColor[2] = 1.0f; + desc->BorderColor[3] = 1.0f; + desc->MinLOD = -3.402823466e+38F; // -FLT_MAX + desc->MaxLOD = 3.402823466e+38F; // FLT_MAX +} + +#ifdef _DEBUG + +void SetObjectName(ID3D12Object* object, const char* name) +{ + object->SetName(StringUtil::UTF8StringToWideString(name).c_str()); +} + +void SetObjectNameFormatted(ID3D12Object* object, const char* format, ...) +{ + std::va_list ap; + va_start(ap, format); + + SmallString str; + str.FormatVA(format, ap); + + SetObjectName(object, str); + va_end(ap); +} + +#endif + +GraphicsPipelineBuilder::GraphicsPipelineBuilder() +{ + Clear(); +} + +void GraphicsPipelineBuilder::Clear() +{ + std::memset(&m_desc, 0, sizeof(m_desc)); + std::memset(m_input_elements.data(), 0, sizeof(D3D12_INPUT_ELEMENT_DESC) * m_input_elements.size()); + m_desc.NodeMask = 1; + m_desc.SampleMask = 0xFFFFFFFF; + m_desc.SampleDesc.Count = 1; +} + +Microsoft::WRL::ComPtr GraphicsPipelineBuilder::Create(ID3D12Device* device, bool clear /*= true*/) +{ + Microsoft::WRL::ComPtr ps; + HRESULT hr = device->CreateGraphicsPipelineState(&m_desc, IID_PPV_ARGS(ps.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateGraphicsPipelineState() failed: %08X", hr); + return {}; + } + + if (clear) + Clear(); + + return ps; +} + +Microsoft::WRL::ComPtr GraphicsPipelineBuilder::Create(ID3D12Device* device, ShaderCache& cache, + bool clear /*= true*/) +{ + Microsoft::WRL::ComPtr pso = cache.GetPipelineState(device, m_desc); + if (!pso) + return {}; + + if (clear) + Clear(); + + return pso; +} + +void GraphicsPipelineBuilder::SetRootSignature(ID3D12RootSignature* rs) +{ + m_desc.pRootSignature = rs; +} + +void GraphicsPipelineBuilder::SetVertexShader(ID3DBlob* blob) +{ + SetVertexShader(blob->GetBufferPointer(), static_cast(blob->GetBufferSize())); +} + +void GraphicsPipelineBuilder::SetVertexShader(const void* data, u32 data_size) +{ + m_desc.VS.pShaderBytecode = data; + m_desc.VS.BytecodeLength = data_size; +} + +void GraphicsPipelineBuilder::SetGeometryShader(ID3DBlob* blob) +{ + SetGeometryShader(blob->GetBufferPointer(), static_cast(blob->GetBufferSize())); +} + +void GraphicsPipelineBuilder::SetGeometryShader(const void* data, u32 data_size) +{ + m_desc.GS.pShaderBytecode = data; + m_desc.GS.BytecodeLength = data_size; +} + +void GraphicsPipelineBuilder::SetPixelShader(ID3DBlob* blob) +{ + SetPixelShader(blob->GetBufferPointer(), static_cast(blob->GetBufferSize())); +} + +void GraphicsPipelineBuilder::SetPixelShader(const void* data, u32 data_size) +{ + m_desc.PS.pShaderBytecode = data; + m_desc.PS.BytecodeLength = data_size; +} + +void GraphicsPipelineBuilder::AddVertexAttribute(const char* semantic_name, u32 semantic_index, DXGI_FORMAT format, + u32 buffer, u32 offset) +{ + const u32 index = m_desc.InputLayout.NumElements; + m_input_elements[index].SemanticIndex = semantic_index; + m_input_elements[index].SemanticName = semantic_name; + m_input_elements[index].Format = format; + m_input_elements[index].AlignedByteOffset = offset; + m_input_elements[index].InputSlot = buffer; + m_input_elements[index].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + m_input_elements[index].InstanceDataStepRate = 0; + + m_desc.InputLayout.pInputElementDescs = m_input_elements.data(); + m_desc.InputLayout.NumElements++; +} + +void GraphicsPipelineBuilder::SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE type) +{ + m_desc.PrimitiveTopologyType = type; +} + +void GraphicsPipelineBuilder::SetRasterizationState(D3D12_FILL_MODE polygon_mode, D3D12_CULL_MODE cull_mode, + bool front_face_ccw) +{ + m_desc.RasterizerState.FillMode = polygon_mode; + m_desc.RasterizerState.CullMode = cull_mode; + m_desc.RasterizerState.FrontCounterClockwise = front_face_ccw; +} + +void GraphicsPipelineBuilder::SetMultisamples(u32 multisamples) +{ + m_desc.RasterizerState.MultisampleEnable = multisamples > 1; + m_desc.SampleDesc.Count = multisamples; +} + +void GraphicsPipelineBuilder::SetNoCullRasterizationState() +{ + SetRasterizationState(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false); +} + +void GraphicsPipelineBuilder::SetDepthState(bool depth_test, bool depth_write, D3D12_COMPARISON_FUNC compare_op) +{ + m_desc.DepthStencilState.DepthEnable = depth_test; + m_desc.DepthStencilState.DepthWriteMask = depth_test ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; + m_desc.DepthStencilState.DepthFunc = compare_op; +} + +void GraphicsPipelineBuilder::SetNoDepthTestState() +{ + SetDepthState(false, false, D3D12_COMPARISON_FUNC_ALWAYS); +} + +void GraphicsPipelineBuilder::SetBlendState(u32 rt, bool blend_enable, D3D12_BLEND src_factor, D3D12_BLEND dst_factor, + D3D12_BLEND_OP op, D3D12_BLEND alpha_src_factor, + D3D12_BLEND alpha_dst_factor, D3D12_BLEND_OP alpha_op, + u8 write_mask /*= 0xFF*/) +{ + m_desc.BlendState.RenderTarget[rt].BlendEnable = blend_enable; + m_desc.BlendState.RenderTarget[rt].SrcBlend = src_factor; + m_desc.BlendState.RenderTarget[rt].DestBlend = dst_factor; + m_desc.BlendState.RenderTarget[rt].BlendOp = op; + m_desc.BlendState.RenderTarget[rt].SrcBlendAlpha = alpha_src_factor; + m_desc.BlendState.RenderTarget[rt].DestBlendAlpha = alpha_dst_factor; + m_desc.BlendState.RenderTarget[rt].BlendOpAlpha = alpha_op; + m_desc.BlendState.RenderTarget[rt].RenderTargetWriteMask = write_mask; + + if (rt > 0) + m_desc.BlendState.IndependentBlendEnable = TRUE; +} + +void GraphicsPipelineBuilder::SetNoBlendingState() +{ + SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, + D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_ALL); + m_desc.BlendState.IndependentBlendEnable = FALSE; +} + +void GraphicsPipelineBuilder::ClearRenderTargets() +{ + m_desc.NumRenderTargets = 0; + for (u32 i = 0; i < sizeof(m_desc.RTVFormats) / sizeof(m_desc.RTVFormats[0]); i++) + m_desc.RTVFormats[i] = DXGI_FORMAT_UNKNOWN; +} + +void GraphicsPipelineBuilder::SetRenderTarget(u32 rt, DXGI_FORMAT format) +{ + m_desc.RTVFormats[rt] = format; + if (rt >= m_desc.NumRenderTargets) + m_desc.NumRenderTargets = rt + 1; +} + +void GraphicsPipelineBuilder::ClearDepthStencilFormat() +{ + m_desc.DSVFormat = DXGI_FORMAT_UNKNOWN; +} + +void GraphicsPipelineBuilder::SetDepthStencilFormat(DXGI_FORMAT format) +{ + m_desc.DSVFormat = format; +} + +RootSignatureBuilder::RootSignatureBuilder() +{ + Clear(); +} + +void RootSignatureBuilder::Clear() +{ + m_desc = {}; + m_desc.pParameters = m_params.data(); + m_params = {}; + m_descriptor_ranges = {}; + m_num_descriptor_ranges = 0; +} + +Microsoft::WRL::ComPtr RootSignatureBuilder::Create(bool clear /*= true*/) +{ + Microsoft::WRL::ComPtr rs = g_d3d12_context->CreateRootSignature(&m_desc); + if (!rs) + return {}; + + if (clear) + Clear(); + + return rs; +} + +void RootSignatureBuilder::SetInputAssemblerFlag() +{ + m_desc.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; +} + +u32 RootSignatureBuilder::Add32BitConstants(u32 shader_reg, u32 num_values, D3D12_SHADER_VISIBILITY visibility) +{ + const u32 index = m_desc.NumParameters++; + + m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + m_params[index].ShaderVisibility = visibility; + m_params[index].Constants.ShaderRegister = shader_reg; + m_params[index].Constants.RegisterSpace = 0; + m_params[index].Constants.Num32BitValues = num_values; + + return index; +} + +u32 RootSignatureBuilder::AddCBVParameter(u32 shader_reg, D3D12_SHADER_VISIBILITY visibility) +{ + const u32 index = m_desc.NumParameters++; + + m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + m_params[index].ShaderVisibility = visibility; + m_params[index].Descriptor.ShaderRegister = shader_reg; + m_params[index].Descriptor.RegisterSpace = 0; + + return index; +} + +u32 RootSignatureBuilder::AddSRVParameter(u32 shader_reg, D3D12_SHADER_VISIBILITY visibility) +{ + const u32 index = m_desc.NumParameters++; + + m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + m_params[index].ShaderVisibility = visibility; + m_params[index].Descriptor.ShaderRegister = shader_reg; + m_params[index].Descriptor.RegisterSpace = 0; + + return index; +} + +u32 RootSignatureBuilder::AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE rt, u32 start_shader_reg, u32 num_shader_regs, + D3D12_SHADER_VISIBILITY visibility) +{ + const u32 index = m_desc.NumParameters++; + const u32 dr_index = m_num_descriptor_ranges++; + + m_descriptor_ranges[dr_index].RangeType = rt; + m_descriptor_ranges[dr_index].NumDescriptors = num_shader_regs; + m_descriptor_ranges[dr_index].BaseShaderRegister = start_shader_reg; + m_descriptor_ranges[dr_index].RegisterSpace = 0; + m_descriptor_ranges[dr_index].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + m_params[index].DescriptorTable.pDescriptorRanges = &m_descriptor_ranges[dr_index]; + m_params[index].DescriptorTable.NumDescriptorRanges = 1; + m_params[index].ShaderVisibility = visibility; + + return index; +} + +} // namespace D3D12 \ No newline at end of file diff --git a/src/common/d3d12/util.h b/src/common/d3d12/util.h new file mode 100644 index 000000000..a91960d27 --- /dev/null +++ b/src/common/d3d12/util.h @@ -0,0 +1,152 @@ +#pragma once +#include "../types.h" +#include "../windows_headers.h" +#include +#include +#include + +namespace D3D12 { + +class ShaderCache; + +static inline void ResourceBarrier(ID3D12GraphicsCommandList* cmdlist, ID3D12Resource* resource, + D3D12_RESOURCE_STATES from_state, D3D12_RESOURCE_STATES to_state) +{ + const D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + D3D12_RESOURCE_BARRIER_FLAG_NONE, + {{resource, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, from_state, to_state}}}; + cmdlist->ResourceBarrier(1, &barrier); +} + +static inline void SetViewport(ID3D12GraphicsCommandList* cmdlist, int x, int y, int width, int height, + float min_depth = 0.0f, float max_depth = 1.0f) +{ + const D3D12_VIEWPORT vp{static_cast(x), + static_cast(y), + static_cast(width), + static_cast(height), + min_depth, + max_depth}; + cmdlist->RSSetViewports(1, &vp); +} + +static inline void SetScissor(ID3D12GraphicsCommandList* cmdlist, int x, int y, int width, int height) +{ + const D3D12_RECT r{x, y, x + width, y + height}; + cmdlist->RSSetScissorRects(1, &r); +} + +static inline void SetViewportAndScissor(ID3D12GraphicsCommandList* cmdlist, int x, int y, int width, int height, + float min_depth = 0.0f, float max_depth = 1.0f) +{ + SetViewport(cmdlist, x, y, width, height, min_depth, max_depth); + SetScissor(cmdlist, x, y, width, height); +} + +u32 GetTexelSize(DXGI_FORMAT format); + +void SetDefaultSampler(D3D12_SAMPLER_DESC* desc); + +#ifdef _DEBUG + +void SetObjectName(ID3D12Object* object, const char* name); +void SetObjectNameFormatted(ID3D12Object* object, const char* format, ...); + +#else + +static inline void SetObjectName(ID3D12Object* object, const char* name) {} +static inline void SetObjectNameFormatted(ID3D12Object* object, const char* format, ...) {} + +#endif + +class RootSignatureBuilder +{ +public: + enum : u32 + { + MAX_PARAMETERS = 16, + MAX_DESCRIPTOR_RANGES = 16 + }; + + RootSignatureBuilder(); + + void Clear(); + + Microsoft::WRL::ComPtr Create(bool clear = true); + + void SetInputAssemblerFlag(); + + u32 Add32BitConstants(u32 shader_reg, u32 num_values, D3D12_SHADER_VISIBILITY visibility); + u32 AddCBVParameter(u32 shader_reg, D3D12_SHADER_VISIBILITY visibility); + u32 AddSRVParameter(u32 shader_reg, D3D12_SHADER_VISIBILITY visibility); + u32 AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE rt, u32 start_shader_reg, u32 num_shader_regs, + D3D12_SHADER_VISIBILITY visibility); + +private: + D3D12_ROOT_SIGNATURE_DESC m_desc{}; + std::array m_params{}; + std::array m_descriptor_ranges{}; + u32 m_num_descriptor_ranges = 0; +}; + +class GraphicsPipelineBuilder +{ +public: + enum : u32 + { + MAX_VERTEX_ATTRIBUTES = 16, + }; + + GraphicsPipelineBuilder(); + + ~GraphicsPipelineBuilder() = default; + + void Clear(); + + Microsoft::WRL::ComPtr Create(ID3D12Device* device, bool clear = true); + Microsoft::WRL::ComPtr Create(ID3D12Device* device, ShaderCache& cache, bool clear = true); + + void SetRootSignature(ID3D12RootSignature* rs); + + void SetVertexShader(const void* data, u32 data_size); + void SetGeometryShader(const void* data, u32 data_size); + void SetPixelShader(const void* data, u32 data_size); + + void SetVertexShader(ID3DBlob* blob); + void SetGeometryShader(ID3DBlob* blob); + void SetPixelShader(ID3DBlob* blob); + + void AddVertexAttribute(const char* semantic_name, u32 semantic_index, DXGI_FORMAT format, u32 buffer, u32 offset); + + void SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE type); + + void SetRasterizationState(D3D12_FILL_MODE polygon_mode, D3D12_CULL_MODE cull_mode, bool front_face_ccw); + + void SetMultisamples(u32 multisamples); + + void SetNoCullRasterizationState(); + + void SetDepthState(bool depth_test, bool depth_write, D3D12_COMPARISON_FUNC compare_op); + + void SetNoDepthTestState(); + + void SetBlendState(u32 rt, bool blend_enable, D3D12_BLEND src_factor, D3D12_BLEND dst_factor, D3D12_BLEND_OP op, + D3D12_BLEND alpha_src_factor, D3D12_BLEND alpha_dst_factor, D3D12_BLEND_OP alpha_op, + u8 write_mask = D3D12_COLOR_WRITE_ENABLE_ALL); + + void SetNoBlendingState(); + + void ClearRenderTargets(); + + void SetRenderTarget(u32 rt, DXGI_FORMAT format); + + void ClearDepthStencilFormat(); + + void SetDepthStencilFormat(DXGI_FORMAT format); + +private: + D3D12_GRAPHICS_PIPELINE_STATE_DESC m_desc{}; + std::array m_input_elements{}; +}; + +} // namespace D3D12 \ No newline at end of file diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f9dccc2da..b9a0e04fc 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -122,6 +122,8 @@ target_compile_definitions(core PUBLIC "-DWITH_IMGUI=1") if(WIN32) target_sources(core PRIVATE + gpu_hw_d3d12.cpp + gpu_hw_d3d12.h gpu_hw_d3d11.cpp gpu_hw_d3d11.h ) diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index bc48fcac8..47a4f0f04 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -36,6 +36,7 @@ + @@ -99,6 +100,7 @@ + diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 03e5d4196..147d956ce 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -58,6 +58,7 @@ + @@ -118,5 +119,6 @@ + - \ No newline at end of file + diff --git a/src/core/gpu.h b/src/core/gpu.h index 66c019f2b..7a1272dcf 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -146,6 +146,9 @@ public: // gpu_hw_d3d11.cpp static std::unique_ptr CreateHardwareD3D11Renderer(); + // gpu_hw_d3d12.cpp + static std::unique_ptr CreateHardwareD3D12Renderer(); + // gpu_hw_opengl.cpp static std::unique_ptr CreateHardwareOpenGLRenderer(); diff --git a/src/core/gpu_hw_d3d12.cpp b/src/core/gpu_hw_d3d12.cpp new file mode 100644 index 000000000..95948e0be --- /dev/null +++ b/src/core/gpu_hw_d3d12.cpp @@ -0,0 +1,1097 @@ +#include "gpu_hw_d3d12.h" +#include "common/assert.h" +#include "common/d3d11/shader_compiler.h" +#include "common/d3d12/context.h" +#include "common/d3d12/descriptor_heap_manager.h" +#include "common/d3d12/shader_cache.h" +#include "common/d3d12/util.h" +#include "common/log.h" +#include "common/scope_guard.h" +#include "common/timer.h" +#include "gpu_hw_shadergen.h" +#include "host_display.h" +#include "host_interface.h" +#include "system.h" +Log_SetChannel(GPU_HW_D3D12); + +GPU_HW_D3D12::GPU_HW_D3D12() = default; + +GPU_HW_D3D12::~GPU_HW_D3D12() +{ + if (m_host_display) + { + m_host_display->ClearDisplayTexture(); + ResetGraphicsAPIState(); + } + + DestroyResources(); +} + +GPURenderer GPU_HW_D3D12::GetRendererType() const +{ + return GPURenderer::HardwareD3D12; +} + +bool GPU_HW_D3D12::Initialize(HostDisplay* host_display) +{ + if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::D3D12) + { + Log_ErrorPrintf("Host render API is incompatible"); + return false; + } + + SetCapabilities(); + + if (!GPU_HW::Initialize(host_display)) + return false; + + if (!CreateRootSignatures()) + { + Log_ErrorPrintf("Failed to create root signatures"); + return false; + } + + if (!CreateSamplers()) + { + Log_ErrorPrintf("Failed to create samplers"); + return false; + } + + if (!CreateVertexBuffer()) + { + Log_ErrorPrintf("Failed to create vertex buffer"); + return false; + } + + if (!CreateUniformBuffer()) + { + Log_ErrorPrintf("Failed to create uniform buffer"); + return false; + } + + if (!CreateTextureBuffer()) + { + Log_ErrorPrintf("Failed to create texture buffer"); + return false; + } + + if (!CreateFramebuffer()) + { + Log_ErrorPrintf("Failed to create framebuffer"); + return false; + } + + if (!CompilePipelines()) + { + Log_ErrorPrintf("Failed to compile pipelines"); + return false; + } + + RestoreGraphicsAPIState(); + UpdateDepthBufferFromMaskBit(); + return true; +} + +void GPU_HW_D3D12::Reset(bool clear_vram) +{ + GPU_HW::Reset(clear_vram); + + if (clear_vram) + ClearFramebuffer(); +} + +void GPU_HW_D3D12::ResetGraphicsAPIState() +{ + GPU_HW::ResetGraphicsAPIState(); +} + +void GPU_HW_D3D12::RestoreGraphicsAPIState() +{ + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + cmdlist->OMSetRenderTargets(1, &m_vram_texture.GetRTVOrDSVDescriptor().cpu_handle, FALSE, + &m_vram_depth_texture.GetRTVOrDSVDescriptor().cpu_handle); + + const D3D12_VERTEX_BUFFER_VIEW vbv{m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), + sizeof(BatchVertex)}; + cmdlist->IASetVertexBuffers(0, 1, &vbv); + cmdlist->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + cmdlist->SetGraphicsRootSignature(m_batch_root_signature.Get()); + cmdlist->SetGraphicsRootConstantBufferView(0, + m_uniform_stream_buffer.GetGPUPointer() + m_current_uniform_buffer_offset); + cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_read_texture.GetSRVDescriptor().gpu_handle); + cmdlist->SetGraphicsRootDescriptorTable(2, m_point_sampler.gpu_handle); + + D3D12::SetViewport(cmdlist, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); + + SetScissorFromDrawingArea(); +} + +void GPU_HW_D3D12::UpdateSettings() +{ + GPU_HW::UpdateSettings(); + + bool framebuffer_changed, shaders_changed; + UpdateHWSettings(&framebuffer_changed, &shaders_changed); + + if (framebuffer_changed) + { + RestoreGraphicsAPIState(); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ResetGraphicsAPIState(); + } + + // Everything should be finished executing before recreating resources. + m_host_display->ClearDisplayTexture(); + g_d3d12_context->ExecuteCommandList(true); + + if (framebuffer_changed) + CreateFramebuffer(); + + if (shaders_changed) + { + // clear it since we draw a loading screen and it's not in the correct state + DestroyPipelines(); + CompilePipelines(); + } + + // this has to be done here, because otherwise we're using destroyed pipelines in the same cmdbuffer + if (framebuffer_changed) + { + RestoreGraphicsAPIState(); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); + UpdateDepthBufferFromMaskBit(); + UpdateDisplay(); + ResetGraphicsAPIState(); + } +} + +void GPU_HW_D3D12::MapBatchVertexPointer(u32 required_vertices) +{ + DebugAssert(!m_batch_start_vertex_ptr); + + const u32 required_space = required_vertices * sizeof(BatchVertex); + if (!m_vertex_stream_buffer.ReserveMemory(required_space, sizeof(BatchVertex))) + { + Log_PerfPrintf("Executing command buffer while waiting for %u bytes in vertex stream buffer", required_space); + g_d3d12_context->ExecuteCommandList(false); + RestoreGraphicsAPIState(); + if (!m_vertex_stream_buffer.ReserveMemory(required_space, sizeof(BatchVertex))) + Panic("Failed to reserve vertex stream buffer memory"); + } + + m_batch_start_vertex_ptr = static_cast(m_vertex_stream_buffer.GetCurrentHostPointer()); + m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; + m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + (m_vertex_stream_buffer.GetCurrentSpace() / sizeof(BatchVertex)); + m_batch_base_vertex = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(BatchVertex); +} + +void GPU_HW_D3D12::UnmapBatchVertexPointer(u32 used_vertices) +{ + DebugAssert(m_batch_start_vertex_ptr); + if (used_vertices > 0) + m_vertex_stream_buffer.CommitMemory(used_vertices * sizeof(BatchVertex)); + + m_batch_start_vertex_ptr = nullptr; + m_batch_end_vertex_ptr = nullptr; + m_batch_current_vertex_ptr = nullptr; +} + +void GPU_HW_D3D12::UploadUniformBuffer(const void* data, u32 data_size) +{ + if (!m_uniform_stream_buffer.ReserveMemory(data_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) + { + Log_PerfPrintf("Executing command buffer while waiting for %u bytes in uniform stream buffer", data_size); + g_d3d12_context->ExecuteCommandList(false); + RestoreGraphicsAPIState(); + if (!m_uniform_stream_buffer.ReserveMemory(data_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) + Panic("Failed to reserve uniform stream buffer memory"); + } + + m_current_uniform_buffer_offset = m_uniform_stream_buffer.GetCurrentOffset(); + std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), data, data_size); + m_uniform_stream_buffer.CommitMemory(data_size); + + g_d3d12_context->GetCommandList()->SetGraphicsRootConstantBufferView(0, m_uniform_stream_buffer.GetGPUPointer() + + m_current_uniform_buffer_offset); +} + +void GPU_HW_D3D12::SetCapabilities() +{ + // TODO: Query from device + const u32 max_texture_size = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; + const u32 max_texture_scale = max_texture_size / VRAM_WIDTH; + Log_InfoPrintf("Max texture size: %ux%u", max_texture_size, max_texture_size); + m_max_resolution_scale = max_texture_scale; + + m_max_multisamples = 1; + for (u32 multisamples = 2; multisamples < D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT; multisamples++) + { + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS fd = {DXGI_FORMAT_R8G8B8A8_UNORM, static_cast(multisamples)}; + + if (SUCCEEDED(g_d3d12_context->GetDevice()->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &fd, + sizeof(fd))) && + fd.NumQualityLevels > 0) + { + m_max_multisamples = multisamples; + } + } + + m_supports_dual_source_blend = true; + m_supports_per_sample_shading = true; + Log_InfoPrintf("Dual-source blend: %s", m_supports_dual_source_blend ? "supported" : "not supported"); + Log_InfoPrintf("Per-sample shading: %s", m_supports_per_sample_shading ? "supported" : "not supported"); + Log_InfoPrintf("Max multisamples: %u", m_max_multisamples); +} + +void GPU_HW_D3D12::DestroyResources() +{ + // Everything should be finished executing before recreating resources. + if (g_d3d12_context) + g_d3d12_context->ExecuteCommandList(true); + + DestroyFramebuffer(); + DestroyPipelines(); + + g_d3d12_context->GetSamplerHeapManager().Free(&m_point_sampler); + g_d3d12_context->GetSamplerHeapManager().Free(&m_linear_sampler); + g_d3d12_context->GetDescriptorHeapManager().Free(&m_texture_stream_buffer_srv); + + m_vertex_stream_buffer.Destroy(false); + m_uniform_stream_buffer.Destroy(false); + m_texture_stream_buffer.Destroy(false); + + m_single_sampler_root_signature.Reset(); + m_batch_root_signature.Reset(); +} + +bool GPU_HW_D3D12::CreateRootSignatures() +{ + D3D12::RootSignatureBuilder rsbuilder; + rsbuilder.SetInputAssemblerFlag(); + rsbuilder.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); + rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + m_batch_root_signature = rsbuilder.Create(); + if (!m_batch_root_signature) + return false; + + rsbuilder.Add32BitConstants(0, MAX_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); + rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + m_single_sampler_root_signature = rsbuilder.Create(); + if (!m_single_sampler_root_signature) + return false; + + return true; +} + +bool GPU_HW_D3D12::CreateSamplers() +{ + D3D12_SAMPLER_DESC desc = {}; + D3D12::SetDefaultSampler(&desc); + desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + desc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + + if (!g_d3d12_context->GetSamplerHeapManager().Allocate(&m_point_sampler)) + return false; + + g_d3d12_context->GetDevice()->CreateSampler(&desc, m_point_sampler.cpu_handle); + + desc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + + if (!g_d3d12_context->GetSamplerHeapManager().Allocate(&m_linear_sampler)) + return false; + + g_d3d12_context->GetDevice()->CreateSampler(&desc, m_linear_sampler.cpu_handle); + return true; +} + +bool GPU_HW_D3D12::CreateFramebuffer() +{ + DestroyFramebuffer(); + + // scale vram size to internal resolution + const u32 texture_width = VRAM_WIDTH * m_resolution_scale; + const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; + const DXGI_FORMAT texture_format = DXGI_FORMAT_R8G8B8A8_UNORM; + const DXGI_FORMAT depth_format = DXGI_FORMAT_D16_UNORM; + + if (!m_vram_texture.Create(texture_width, texture_height, m_multisamples, texture_format, texture_format, + texture_format, DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) || + !m_vram_depth_texture.Create( + texture_width, texture_height, m_multisamples, depth_format, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, + depth_format, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) || + !m_vram_read_texture.Create(texture_width, texture_height, m_multisamples, texture_format, texture_format, + DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_NONE) || + !m_display_texture.Create(texture_width, texture_height, m_multisamples, texture_format, texture_format, + texture_format, DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) || + !m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, m_multisamples, texture_format, texture_format, + texture_format, DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) || + !m_vram_readback_staging_texture.Create(VRAM_WIDTH / 2, VRAM_HEIGHT, texture_format, false)) + { + return false; + } + + D3D12::SetObjectName(m_vram_texture, "VRAM Texture"); + D3D12::SetObjectName(m_vram_depth_texture, "VRAM Depth Texture"); + D3D12::SetObjectName(m_vram_read_texture, "VRAM Read/Sample Texture"); + D3D12::SetObjectName(m_display_texture, "VRAM Display Texture"); + D3D12::SetObjectName(m_vram_read_texture, "VRAM Readback Texture"); + + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + m_vram_depth_texture.TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE); + m_vram_read_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + ClearDisplay(); + SetFullVRAMDirtyRectangle(); + return true; +} + +void GPU_HW_D3D12::ClearFramebuffer() +{ + static constexpr float clear_color[4] = {}; + + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + cmdlist->ClearRenderTargetView(m_vram_texture.GetRTVOrDSVDescriptor(), clear_color, 0, nullptr); + cmdlist->ClearDepthStencilView(m_vram_depth_texture.GetRTVOrDSVDescriptor(), D3D12_CLEAR_FLAG_DEPTH, + m_pgxp_depth_buffer ? 1.0f : 0.0f, 0, 0, nullptr); + SetFullVRAMDirtyRectangle(); +} + +void GPU_HW_D3D12::DestroyFramebuffer() +{ + m_vram_read_texture.Destroy(false); + m_vram_depth_texture.Destroy(false); + m_vram_texture.Destroy(false); + m_vram_readback_texture.Destroy(false); + m_display_texture.Destroy(false); + m_vram_readback_staging_texture.Destroy(false); +} + +bool GPU_HW_D3D12::CreateVertexBuffer() +{ + if (!m_vertex_stream_buffer.Create(VERTEX_BUFFER_SIZE)) + return false; + + D3D12::SetObjectName(m_vertex_stream_buffer.GetBuffer(), "Vertex Stream Buffer"); + return true; +} + +bool GPU_HW_D3D12::CreateUniformBuffer() +{ + if (!m_uniform_stream_buffer.Create(UNIFORM_BUFFER_SIZE)) + return false; + + D3D12::SetObjectName(m_vertex_stream_buffer.GetBuffer(), "Uniform Stream Buffer"); + return true; +} + +bool GPU_HW_D3D12::CreateTextureBuffer() +{ + if (!m_texture_stream_buffer.Create(VRAM_UPDATE_TEXTURE_BUFFER_SIZE)) + return false; + + if (!g_d3d12_context->GetDescriptorHeapManager().Allocate(&m_texture_stream_buffer_srv)) + return false; + + D3D12_SHADER_RESOURCE_VIEW_DESC desc = {}; + desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + desc.Format = DXGI_FORMAT_R16_UINT; + desc.Buffer.NumElements = VRAM_UPDATE_TEXTURE_BUFFER_SIZE / sizeof(u16); + desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + g_d3d12_context->GetDevice()->CreateShaderResourceView(m_texture_stream_buffer.GetBuffer(), &desc, + m_texture_stream_buffer_srv); + + D3D12::SetObjectName(m_texture_stream_buffer.GetBuffer(), "Texture Stream Buffer"); + return true; +} + +bool GPU_HW_D3D12::CompilePipelines() +{ + D3D12::ShaderCache shader_cache; + shader_cache.Open(g_host_interface->GetShaderCacheBasePath(), g_d3d12_context->GetFeatureLevel(), + g_settings.gpu_use_debug_device); + + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, + m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, + m_pgxp_depth_buffer, m_supports_dual_source_blend); + + Common::Timer compile_time; + const int progress_total = 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + 2 + 2 + 1 + 1 + (2 * 3); + int progress_value = 0; +#define UPDATE_PROGRESS() \ + do \ + { \ + progress_value++; \ + if (compile_time.GetTimeSeconds() >= 1.0f) \ + { \ + compile_time.Reset(); \ + g_host_interface->DisplayLoadingScreen("Compiling Shaders", 0, progress_total, progress_value); \ + } \ + } while (0) + + // vertex shaders - [textured] + // fragment shaders - [render_mode][texture_mode][dithering][interlacing] + DimensionalArray, 2> batch_vertex_shaders{}; + DimensionalArray, 2, 2, 9, 4> batch_fragment_shaders{}; + + for (u8 textured = 0; textured < 2; textured++) + { + const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured)); + batch_vertex_shaders[textured] = shader_cache.GetVertexShader(vs); + if (!batch_vertex_shaders[textured]) + return false; + + UPDATE_PROGRESS(); + } + + for (u8 render_mode = 0; render_mode < 4; render_mode++) + { + for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + { + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) + { + const std::string fs = shadergen.GenerateBatchFragmentShader( + static_cast(render_mode), static_cast(texture_mode), + ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); + + batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing] = shader_cache.GetPixelShader(fs); + if (!batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing]) + return false; + + UPDATE_PROGRESS(); + } + } + } + } + + D3D12::GraphicsPipelineBuilder gpbuilder; + + // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + for (u8 depth_test = 0; depth_test < 2; depth_test++) + { + for (u8 render_mode = 0; render_mode < 4; render_mode++) + { + for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) + { + for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + { + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) + { + const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); + + gpbuilder.SetRootSignature(m_batch_root_signature.Get()); + gpbuilder.SetRenderTarget(0, m_vram_texture.GetFormat()); + gpbuilder.SetDepthStencilFormat(m_vram_depth_texture.GetFormat()); + + gpbuilder.AddVertexAttribute("ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x)); + gpbuilder.AddVertexAttribute("ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color)); + if (textured) + { + gpbuilder.AddVertexAttribute("ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u)); + gpbuilder.AddVertexAttribute("ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage)); + if (m_using_uv_limits) + gpbuilder.AddVertexAttribute("ATTR", 4, DXGI_FORMAT_R8G8B8A8_UNORM, 0, + offsetof(BatchVertex, uv_limits)); + } + + gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); + gpbuilder.SetVertexShader(batch_vertex_shaders[BoolToUInt8(textured)].Get()); + gpbuilder.SetPixelShader(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing].Get()); + + gpbuilder.SetRasterizationState(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false); + gpbuilder.SetDepthState(true, true, + (depth_test != 0) ? (m_pgxp_depth_buffer ? D3D12_COMPARISON_FUNC_LESS_EQUAL : + D3D12_COMPARISON_FUNC_GREATER_EQUAL) : + D3D12_COMPARISON_FUNC_ALWAYS); + gpbuilder.SetNoBlendingState(); + gpbuilder.SetMultisamples(m_multisamples); + + if ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && + (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || + m_texture_filtering != GPUTextureFilter::Nearest) + { + gpbuilder.SetBlendState( + 0, true, D3D12_BLEND_ONE, + m_supports_dual_source_blend ? D3D12_BLEND_SRC1_ALPHA : D3D12_BLEND_SRC_ALPHA, + (static_cast(transparency_mode) == + GPUTransparencyMode::BackgroundMinusForeground && + static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? + D3D12_BLEND_OP_REV_SUBTRACT : + D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD); + } + + m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] = + gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache); + if (!m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]) + return false; + + D3D12::SetObjectNameFormatted( + m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + .Get(), + "Batch Pipeline %u,%u,%u,%u,%u,%u", depth_test, render_mode, texture_mode, transparency_mode, dithering, + interlacing); + + UPDATE_PROGRESS(); + } + } + } + } + } + } + + ComPtr fullscreen_quad_vertex_shader = + shader_cache.GetVertexShader(shadergen.GenerateScreenQuadVertexShader()); + if (!fullscreen_quad_vertex_shader) + return false; + + UPDATE_PROGRESS(); + + // common state + gpbuilder.SetRootSignature(m_single_sampler_root_signature.Get()); + gpbuilder.SetRenderTarget(0, m_vram_texture.GetFormat()); + gpbuilder.SetDepthStencilFormat(m_vram_depth_texture.GetFormat()); + gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); + gpbuilder.SetNoCullRasterizationState(); + gpbuilder.SetNoDepthTestState(); + gpbuilder.SetNoBlendingState(); + gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader.Get()); + gpbuilder.SetMultisamples(m_multisamples); + gpbuilder.SetRenderTarget(0, m_vram_texture.GetFormat()); + gpbuilder.SetDepthStencilFormat(m_vram_depth_texture.GetFormat()); + + // VRAM fill + { + for (u8 interlaced = 0; interlaced < 2; interlaced++) + { + ComPtr fs = shader_cache.GetPixelShader( + (interlaced == 0) ? shadergen.GenerateFillFragmentShader() : shadergen.GenerateInterlacedFillFragmentShader()); + if (!fs) + return false; + + gpbuilder.SetPixelShader(fs.Get()); + gpbuilder.SetDepthState(true, true, D3D12_COMPARISON_FUNC_ALWAYS); + + m_vram_fill_pipelines[interlaced] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); + if (!m_vram_fill_pipelines[interlaced]) + return false; + + D3D12::SetObjectNameFormatted(m_vram_fill_pipelines[interlaced].Get(), "VRAM Fill Pipeline Interlacing=%u", + interlaced); + + UPDATE_PROGRESS(); + } + } + + // VRAM copy + { + ComPtr fs = shader_cache.GetPixelShader(shadergen.GenerateVRAMCopyFragmentShader()); + if (!fs) + return false; + + gpbuilder.SetPixelShader(fs.Get()); + for (u8 depth_test = 0; depth_test < 2; depth_test++) + { + gpbuilder.SetDepthState((depth_test != 0), true, + (depth_test != 0) ? D3D12_COMPARISON_FUNC_GREATER_EQUAL : D3D12_COMPARISON_FUNC_ALWAYS); + + m_vram_copy_pipelines[depth_test] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); + if (!m_vram_copy_pipelines[depth_test]) + return false; + + D3D12::SetObjectNameFormatted(m_vram_copy_pipelines[depth_test].Get(), "VRAM Copy Pipeline Depth=%u", depth_test); + + UPDATE_PROGRESS(); + } + } + + // VRAM write + { + ComPtr fs = shader_cache.GetPixelShader(shadergen.GenerateVRAMWriteFragmentShader(false)); + if (!fs) + return false; + + gpbuilder.SetPixelShader(fs.Get()); + for (u8 depth_test = 0; depth_test < 2; depth_test++) + { + gpbuilder.SetDepthState(true, true, + (depth_test != 0) ? D3D12_COMPARISON_FUNC_GREATER_EQUAL : D3D12_COMPARISON_FUNC_ALWAYS); + m_vram_write_pipelines[depth_test] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); + if (!m_vram_write_pipelines[depth_test]) + return false; + + D3D12::SetObjectNameFormatted(m_vram_write_pipelines[depth_test].Get(), "VRAM Write Pipeline Depth=%u", + depth_test); + + UPDATE_PROGRESS(); + } + } + + // VRAM update depth + { + ComPtr fs = shader_cache.GetPixelShader(shadergen.GenerateVRAMUpdateDepthFragmentShader()); + if (!fs) + return false; + + gpbuilder.SetRootSignature(m_batch_root_signature.Get()); + gpbuilder.SetPixelShader(fs.Get()); + gpbuilder.SetDepthState(true, true, D3D12_COMPARISON_FUNC_ALWAYS); + gpbuilder.SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, + D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, 0); + gpbuilder.ClearRenderTargets(); + + m_vram_update_depth_pipeline = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); + if (!m_vram_update_depth_pipeline) + return false; + + D3D12::SetObjectName(m_vram_update_depth_pipeline.Get(), "VRAM Update Depth Pipeline"); + + UPDATE_PROGRESS(); + } + + gpbuilder.Clear(); + + // VRAM read + { + ComPtr fs = shader_cache.GetPixelShader(shadergen.GenerateVRAMReadFragmentShader()); + if (!fs) + return false; + + gpbuilder.SetRootSignature(m_single_sampler_root_signature.Get()); + gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); + gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader.Get()); + gpbuilder.SetPixelShader(fs.Get()); + gpbuilder.SetNoCullRasterizationState(); + gpbuilder.SetNoDepthTestState(); + gpbuilder.SetNoBlendingState(); + gpbuilder.SetRenderTarget(0, m_vram_readback_texture.GetFormat()); + gpbuilder.ClearDepthStencilFormat(); + + m_vram_readback_pipeline = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); + if (!m_vram_readback_pipeline) + return false; + + D3D12::SetObjectName(m_vram_update_depth_pipeline.Get(), "VRAM Readback Pipeline"); + + UPDATE_PROGRESS(); + } + + gpbuilder.Clear(); + + // Display + { + gpbuilder.SetRootSignature(m_single_sampler_root_signature.Get()); + gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); + gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader.Get()); + gpbuilder.SetNoCullRasterizationState(); + gpbuilder.SetNoDepthTestState(); + gpbuilder.SetNoBlendingState(); + gpbuilder.SetRenderTarget(0, m_display_texture.GetFormat()); + + for (u8 depth_24 = 0; depth_24 < 2; depth_24++) + { + for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++) + { + ComPtr fs = shader_cache.GetPixelShader(shadergen.GenerateDisplayFragmentShader( + ConvertToBoolUnchecked(depth_24), static_cast(interlace_mode), m_chroma_smoothing)); + if (!fs) + return false; + + gpbuilder.SetPixelShader(fs.Get()); + + m_display_pipelines[depth_24][interlace_mode] = + gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); + if (!m_display_pipelines[depth_24][interlace_mode]) + return false; + + D3D12::SetObjectNameFormatted(m_display_pipelines[depth_24][interlace_mode].Get(), + "Display Pipeline Depth=%u Interlace=%u", depth_24, interlace_mode); + + UPDATE_PROGRESS(); + } + } + } + +#undef UPDATE_PROGRESS + + return true; +} + +void GPU_HW_D3D12::DestroyPipelines() +{ + m_batch_pipelines = {}; + m_vram_fill_pipelines = {}; + m_vram_write_pipelines = {}; + m_vram_copy_pipelines = {}; + m_vram_readback_pipeline.Reset(); + m_vram_update_depth_pipeline.Reset(); + + m_display_pipelines = {}; +} + +void GPU_HW_D3D12::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) +{ + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + + // [primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + ID3D12PipelineState* pipeline = + m_batch_pipelines[BoolToUInt8(m_batch.check_mask_before_draw || m_batch.use_depth_buffer)][static_cast( + render_mode)][static_cast(m_batch.texture_mode)][static_cast(m_batch.transparency_mode)] + [BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)] + .Get(); + + cmdlist->SetPipelineState(pipeline); + cmdlist->DrawInstanced(num_vertices, 1, base_vertex, 0); +} + +void GPU_HW_D3D12::SetScissorFromDrawingArea() +{ + int left, top, right, bottom; + CalcScissorRect(&left, &top, &right, &bottom); + + D3D12::SetScissor(g_d3d12_context->GetCommandList(), left, top, right - left, bottom - top); +} + +void GPU_HW_D3D12::ClearDisplay() +{ + GPU_HW::ClearDisplay(); + + static constexpr float clear_color[4] = {0.0f, 0.0f, 0.0f, 1.0f}; + g_d3d12_context->GetCommandList()->ClearRenderTargetView(m_vram_texture.GetRTVOrDSVDescriptor(), clear_color, 0, + nullptr); +} + +void GPU_HW_D3D12::UpdateDisplay() +{ + GPU_HW::UpdateDisplay(); + + if (g_settings.debugging.show_vram) + { + if (IsUsingMultisampling()) + { + UpdateVRAMReadTexture(); + m_host_display->SetDisplayTexture(&m_vram_read_texture, HostDisplayPixelFormat::RGBA8, + m_vram_read_texture.GetWidth(), m_vram_read_texture.GetHeight(), 0, 0, + m_vram_read_texture.GetWidth(), m_vram_read_texture.GetHeight()); + } + else + { + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_host_display->SetDisplayTexture(&m_vram_texture, HostDisplayPixelFormat::RGBA8, m_vram_texture.GetWidth(), + m_vram_texture.GetHeight(), 0, 0, m_vram_texture.GetWidth(), + m_vram_texture.GetHeight()); + } + m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, + static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); + } + else + { + const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; + const u32 vram_offset_x = m_crtc_state.display_vram_left; + const u32 vram_offset_y = m_crtc_state.display_vram_top; + const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale; + const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale; + const u32 display_width = m_crtc_state.display_vram_width; + const u32 display_height = m_crtc_state.display_vram_height; + const u32 scaled_display_width = display_width * resolution_scale; + const u32 scaled_display_height = display_height * resolution_scale; + const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); + + if (IsDisplayDisabled()) + { + m_host_display->ClearDisplayTexture(); + } + else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && + !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && + (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) + { + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_host_display->SetDisplayTexture(&m_vram_texture, HostDisplayPixelFormat::RGBA8, m_vram_texture.GetWidth(), + m_vram_texture.GetHeight(), scaled_vram_offset_x, scaled_vram_offset_y, + scaled_display_width, scaled_display_height); + } + else + { + const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; + const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; + const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; + const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, + reinterpret_crop_left, reinterpret_field_offset}; + + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + m_display_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + cmdlist->OMSetRenderTargets(1, &m_display_texture.GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); + cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); + cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), uniforms, 0); + cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_texture.GetSRVDescriptor()); + cmdlist->SetPipelineState( + m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].Get()); + D3D12::SetViewportAndScissor(cmdlist, 0, 0, scaled_display_width, scaled_display_height); + cmdlist->DrawInstanced(3, 1, 0, 0); + + m_display_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + + m_host_display->SetDisplayTexture(&m_display_texture, HostDisplayPixelFormat::RGBA8, m_display_texture.GetWidth(), + m_display_texture.GetHeight(), 0, 0, scaled_display_width, + scaled_display_height); + + RestoreGraphicsAPIState(); + } + + m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, + m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, + m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, + GetDisplayAspectRatio()); + } +} + +void GPU_HW_D3D12::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +{ + if (IsUsingSoftwareRendererForReadbacks()) + { + ReadSoftwareRendererVRAM(x, y, width, height); + return; + } + + // Get bounds with wrap-around handled. + const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); + const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; + const u32 encoded_height = copy_rect.GetHeight(); + + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_vram_readback_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + + // Encode the 24-bit texture as 16-bit. + const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; + cmdlist->OMSetRenderTargets(1, &m_vram_readback_texture.GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); + cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); + cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), uniforms, 0); + cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_texture.GetSRVDescriptor()); + cmdlist->SetPipelineState(m_vram_readback_pipeline.Get()); + D3D12::SetViewportAndScissor(cmdlist, 0, 0, encoded_width, encoded_height); + cmdlist->DrawInstanced(3, 1, 0, 0); + + m_vram_readback_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + + // Stage the readback. + m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, encoded_width, + encoded_height); + + // And copy it into our shadow buffer (will execute command buffer and stall). + m_vram_readback_staging_texture.ReadPixels(0, 0, encoded_width, encoded_height, + &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], + VRAM_WIDTH * sizeof(u16)); + + RestoreGraphicsAPIState(); +} + +void GPU_HW_D3D12::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) +{ + if (IsUsingSoftwareRendererForReadbacks()) + FillSoftwareRendererVRAM(x, y, width, height, color); + + // TODO: Use fast clear + if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) + { + // CPU round trip if oversized for now. + Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + GPU::FillVRAM(x, y, width, height, color); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); + return; + } + + GPU_HW::FillVRAM(x, y, width, height, color); + + x *= m_resolution_scale; + y *= m_resolution_scale; + width *= m_resolution_scale; + height *= m_resolution_scale; + + const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); + + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); + cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), &uniforms, 0); + cmdlist->SetGraphicsRootDescriptorTable(1, g_d3d12_context->GetNullSRVDescriptor()); + cmdlist->SetPipelineState(m_vram_fill_pipelines[BoolToUInt8(IsInterlacedRenderingEnabled())].Get()); + D3D12::SetViewportAndScissor(cmdlist, x, y, width, height); + cmdlist->DrawInstanced(3, 1, 0, 0); + + RestoreGraphicsAPIState(); +} + +void GPU_HW_D3D12::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) +{ + if (IsUsingSoftwareRendererForReadbacks()) + UpdateSoftwareRendererVRAM(x, y, width, height, data, set_mask, check_mask); + + const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); + GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, set_mask, check_mask); + + const u32 data_size = width * height * sizeof(u16); + const u32 alignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; // ??? + if (!m_texture_stream_buffer.ReserveMemory(data_size, alignment)) + { + Log_PerfPrintf("Executing command buffer while waiting for %u bytes in stream buffer", data_size); + g_d3d12_context->ExecuteCommandList(false); + RestoreGraphicsAPIState(); + if (!m_texture_stream_buffer.ReserveMemory(data_size, alignment)) + { + Panic("Failed to allocate space in stream buffer for VRAM write"); + return; + } + } + + const u32 start_index = m_texture_stream_buffer.GetCurrentOffset() / sizeof(u16); + std::memcpy(m_texture_stream_buffer.GetCurrentHostPointer(), data, data_size); + m_texture_stream_buffer.CommitMemory(data_size); + + const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index, set_mask, check_mask); + + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); + cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), &uniforms, 0); + cmdlist->SetGraphicsRootDescriptorTable(1, m_texture_stream_buffer_srv); + cmdlist->SetPipelineState(m_vram_write_pipelines[BoolToUInt8(check_mask)].Get()); + + // the viewport should already be set to the full vram, so just adjust the scissor + const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; + D3D12::SetScissor(cmdlist, scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), + scaled_bounds.GetHeight()); + + cmdlist->DrawInstanced(3, 1, 0, 0); + + RestoreGraphicsAPIState(); +} + +void GPU_HW_D3D12::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) +{ + if (IsUsingSoftwareRendererForReadbacks()) + CopySoftwareRendererVRAM(src_x, src_y, dst_x, dst_y, width, height); + + if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling()) + { + const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); + const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); + if (m_vram_dirty_rect.Intersects(src_bounds)) + UpdateVRAMReadTexture(); + IncludeVRAMDirtyRectangle(dst_bounds); + + const VRAMCopyUBOData uniforms(GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height)); + const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); + + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); + cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), &uniforms, 0); + cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_read_texture.GetRTVOrDSVDescriptor()); + cmdlist->SetPipelineState(m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)].Get()); + D3D12::SetViewportAndScissor(cmdlist, dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), + dst_bounds_scaled.GetHeight()); + cmdlist->DrawInstanced(3, 1, 0, 0); + + RestoreGraphicsAPIState(); + + if (m_GPUSTAT.check_mask_before_draw) + m_current_depth++; + + return; + } + + if (m_vram_dirty_rect.Intersects(Common::Rectangle::FromExtents(src_x, src_y, width, height))) + UpdateVRAMReadTexture(); + + GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + + src_x *= m_resolution_scale; + src_y *= m_resolution_scale; + dst_x *= m_resolution_scale; + dst_y *= m_resolution_scale; + width *= m_resolution_scale; + height *= m_resolution_scale; + + const D3D12_TEXTURE_COPY_LOCATION src = {m_vram_read_texture.GetResource(), + D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX}; + const D3D12_TEXTURE_COPY_LOCATION dst = {m_vram_texture.GetResource(), D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX}; + const D3D12_BOX src_box = {src_x, src_y, 0u, src_x + width, src_y + height, 1u}; + + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); + m_vram_read_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); + + g_d3d12_context->GetCommandList()->CopyTextureRegion(&dst, dst_x, dst_y, 0, &src, &src_box); + + m_vram_read_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); +} + +void GPU_HW_D3D12::UpdateVRAMReadTexture() +{ + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); + m_vram_read_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); + + const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; + + if (m_vram_texture.IsMultisampled()) + { + cmdlist->ResolveSubresource(m_vram_read_texture, 0, m_vram_texture, 0, DXGI_FORMAT_R8G8B8A8_UNORM); + } + else + { + const D3D12_TEXTURE_COPY_LOCATION src = {m_vram_texture.GetResource(), D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX}; + const D3D12_TEXTURE_COPY_LOCATION dst = {m_vram_read_texture.GetResource(), + D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX}; + const D3D12_BOX src_box = {scaled_rect.left, scaled_rect.top, 0u, scaled_rect.right, scaled_rect.bottom, 1u}; + cmdlist->CopyTextureRegion(&dst, scaled_rect.left, scaled_rect.top, 0, &src, &src_box); + } + + m_vram_read_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + + GPU_HW::UpdateVRAMReadTexture(); +} + +void GPU_HW_D3D12::UpdateDepthBufferFromMaskBit() +{ + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + cmdlist->OMSetRenderTargets(0, nullptr, FALSE, &m_vram_depth_texture.GetRTVOrDSVDescriptor().cpu_handle); + cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_texture.GetSRVDescriptor()); + cmdlist->SetPipelineState(m_vram_update_depth_pipeline.Get()); + D3D12::SetViewportAndScissor(cmdlist, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); + cmdlist->DrawInstanced(3, 1, 0, 0); + + m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + + RestoreGraphicsAPIState(); +} + +void GPU_HW_D3D12::ClearDepthBuffer() +{ + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + cmdlist->ClearDepthStencilView(m_vram_depth_texture.GetRTVOrDSVDescriptor(), D3D12_CLEAR_FLAG_DEPTH, + m_pgxp_depth_buffer ? 1.0f : 0.0f, 0, 0, nullptr); +} + +std::unique_ptr GPU::CreateHardwareD3D12Renderer() +{ + return std::make_unique(); +} diff --git a/src/core/gpu_hw_d3d12.h b/src/core/gpu_hw_d3d12.h new file mode 100644 index 000000000..f569064a2 --- /dev/null +++ b/src/core/gpu_hw_d3d12.h @@ -0,0 +1,102 @@ +#pragma once +#include "common/dimensional_array.h" +#include "common/d3d12/staging_texture.h" +#include "common/d3d12/stream_buffer.h" +#include "common/d3d12/texture.h" +#include "gpu_hw.h" +#include +#include +#include + +class GPU_HW_D3D12 : public GPU_HW +{ +public: + template + using ComPtr = Microsoft::WRL::ComPtr; + + GPU_HW_D3D12(); + ~GPU_HW_D3D12() override; + + GPURenderer GetRendererType() const override; + + bool Initialize(HostDisplay* host_display) override; + void Reset(bool clear_vram) override; + + void ResetGraphicsAPIState() override; + void RestoreGraphicsAPIState() override; + void UpdateSettings() override; + +protected: + void ClearDisplay() override; + void UpdateDisplay() override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; + void UpdateVRAMReadTexture() override; + void UpdateDepthBufferFromMaskBit() override; + void ClearDepthBuffer() override; + void SetScissorFromDrawingArea() override; + void MapBatchVertexPointer(u32 required_vertices) override; + void UnmapBatchVertexPointer(u32 used_vertices) override; + void UploadUniformBuffer(const void* data, u32 data_size) override; + void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; + +private: + enum : u32 + { + MAX_PUSH_CONSTANTS_SIZE = 64, + }; + void SetCapabilities(); + void DestroyResources(); + + bool CreateRootSignatures(); + bool CreateSamplers(); + + bool CreateFramebuffer(); + void ClearFramebuffer(); + void DestroyFramebuffer(); + + bool CreateVertexBuffer(); + bool CreateUniformBuffer(); + bool CreateTextureBuffer(); + + bool CompilePipelines(); + void DestroyPipelines(); + + ComPtr m_batch_root_signature; + ComPtr m_single_sampler_root_signature; + + D3D12::Texture m_vram_texture; + D3D12::Texture m_vram_depth_texture; + D3D12::Texture m_vram_read_texture; + D3D12::Texture m_vram_readback_texture; + D3D12::StagingTexture m_vram_readback_staging_texture; + D3D12::Texture m_display_texture; + + D3D12::DescriptorHandle m_point_sampler; + D3D12::DescriptorHandle m_linear_sampler; + + D3D12::StreamBuffer m_vertex_stream_buffer; + D3D12::StreamBuffer m_uniform_stream_buffer; + D3D12::StreamBuffer m_texture_stream_buffer; + D3D12::DescriptorHandle m_texture_stream_buffer_srv; + + u32 m_current_uniform_buffer_offset = 0; + + // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + DimensionalArray, 2, 2, 5, 9, 4, 2> m_batch_pipelines; + + // [interlaced] + std::array, 2> m_vram_fill_pipelines; + + // [depth_test] + std::array, 2> m_vram_write_pipelines; + std::array, 2> m_vram_copy_pipelines; + + ComPtr m_vram_readback_pipeline; + ComPtr m_vram_update_depth_pipeline; + + // [depth_24][interlace_mode] + DimensionalArray, 3, 2> m_display_pipelines; +}; diff --git a/src/core/host_display.h b/src/core/host_display.h index e7c895b75..ea01f846d 100644 --- a/src/core/host_display.h +++ b/src/core/host_display.h @@ -41,6 +41,7 @@ public: { None, D3D11, + D3D12, Vulkan, OpenGL, OpenGLES diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 3987262be..4ad70ecf1 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -652,11 +652,13 @@ const char* Settings::GetCPUFastmemModeDisplayName(CPUFastmemMode mode) static constexpr auto s_gpu_renderer_names = make_array( #ifdef _WIN32 "D3D11", + "D3D12", #endif "Vulkan", "OpenGL", "Software"); static constexpr auto s_gpu_renderer_display_names = make_array( #ifdef _WIN32 TRANSLATABLE("GPURenderer", "Hardware (D3D11)"), + TRANSLATABLE("GPURenderer", "Hardware (D3D12)"), #endif TRANSLATABLE("GPURenderer", "Hardware (Vulkan)"), TRANSLATABLE("GPURenderer", "Hardware (OpenGL)"), TRANSLATABLE("GPURenderer", "Software")); diff --git a/src/core/shadergen.cpp b/src/core/shadergen.cpp index e17adc8be..b5f09542d 100644 --- a/src/core/shadergen.cpp +++ b/src/core/shadergen.cpp @@ -7,7 +7,7 @@ Log_SetChannel(ShaderGen); ShaderGen::ShaderGen(HostDisplay::RenderAPI render_api, bool supports_dual_source_blend) - : m_render_api(render_api), m_glsl(render_api != HostDisplay::RenderAPI::D3D11), + : m_render_api(render_api), m_glsl(render_api != HostDisplay::RenderAPI::D3D11 && render_api != HostDisplay::RenderAPI::D3D12), m_supports_dual_source_blend(supports_dual_source_blend), m_use_glsl_interface_blocks(false) { if (m_glsl) @@ -133,6 +133,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss) DefineMacro(ss, "API_OPENGL", m_render_api == HostDisplay::RenderAPI::OpenGL); DefineMacro(ss, "API_OPENGL_ES", m_render_api == HostDisplay::RenderAPI::OpenGLES); DefineMacro(ss, "API_D3D11", m_render_api == HostDisplay::RenderAPI::D3D11); + DefineMacro(ss, "API_D3D12", m_render_api == HostDisplay::RenderAPI::D3D12); DefineMacro(ss, "API_VULKAN", m_render_api == HostDisplay::RenderAPI::Vulkan); if (m_render_api == HostDisplay::RenderAPI::OpenGLES) diff --git a/src/core/system.cpp b/src/core/system.cpp index 35ad972bd..8f0a24602 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -1012,6 +1012,9 @@ bool CreateGPU(GPURenderer renderer) case GPURenderer::HardwareD3D11: g_gpu = GPU::CreateHardwareD3D11Renderer(); break; + case GPURenderer::HardwareD3D12: + g_gpu = GPU::CreateHardwareD3D12Renderer(); + break; #endif case GPURenderer::Software: diff --git a/src/core/types.h b/src/core/types.h index 6b0f0cda3..d197ad62b 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -56,6 +56,7 @@ enum class GPURenderer : u8 { #ifdef _WIN32 HardwareD3D11, + HardwareD3D12, #endif HardwareVulkan, HardwareOpenGL, diff --git a/src/duckstation-nogui/nogui_host_interface.cpp b/src/duckstation-nogui/nogui_host_interface.cpp index df33bf7a9..c1f480ef8 100644 --- a/src/duckstation-nogui/nogui_host_interface.cpp +++ b/src/duckstation-nogui/nogui_host_interface.cpp @@ -24,6 +24,7 @@ Log_SetChannel(NoGUIHostInterface); #ifdef _WIN32 #include "frontend-common/d3d11_host_display.h" +#include "frontend-common/d3d12_host_display.h" #endif NoGUIHostInterface::NoGUIHostInterface() = default; @@ -104,6 +105,10 @@ bool NoGUIHostInterface::CreateDisplay(bool fullscreen) break; #ifdef _WIN32 + case GPURenderer::HardwareD3D12: + m_display = std::make_unique(); + break; + case GPURenderer::HardwareD3D11: default: m_display = std::make_unique(); diff --git a/src/duckstation-qt/qthostinterface.cpp b/src/duckstation-qt/qthostinterface.cpp index fab04debb..e7eec3092 100644 --- a/src/duckstation-qt/qthostinterface.cpp +++ b/src/duckstation-qt/qthostinterface.cpp @@ -42,6 +42,7 @@ Log_SetChannel(QtHostInterface); #ifdef _WIN32 #include "common/windows_headers.h" #include "frontend-common/d3d11_host_display.h" +#include "frontend-common/d3d12_host_display.h" #include #include #endif @@ -598,6 +599,10 @@ HostDisplay* QtHostInterface::createHostDisplay() break; #ifdef _WIN32 + case GPURenderer::HardwareD3D12: + m_display = std::make_unique(); + break; + case GPURenderer::HardwareD3D11: default: m_display = std::make_unique(); diff --git a/src/frontend-common/CMakeLists.txt b/src/frontend-common/CMakeLists.txt index 7f72510ce..4a56607c8 100644 --- a/src/frontend-common/CMakeLists.txt +++ b/src/frontend-common/CMakeLists.txt @@ -51,10 +51,14 @@ if(WIN32) target_sources(frontend-common PRIVATE d3d11_host_display.cpp d3d11_host_display.h + d3d12_host_display.cpp + d3d12_host_display.h dinput_controller_interface.cpp dinput_controller_interface.h imgui_impl_dx11.cpp imgui_impl_dx11.h + imgui_impl_dx12.cpp + imgui_impl_dx12.h xaudio2_audio_stream.cpp xaudio2_audio_stream.h xinput_controller_interface.cpp diff --git a/src/frontend-common/d3d12_host_display.cpp b/src/frontend-common/d3d12_host_display.cpp new file mode 100644 index 000000000..793b3c9ba --- /dev/null +++ b/src/frontend-common/d3d12_host_display.cpp @@ -0,0 +1,871 @@ +#include "d3d12_host_display.h" +#include "common/assert.h" +#include "common/d3d11/shader_compiler.h" +#include "common/d3d12/context.h" +#include "common/d3d12/util.h" +#include "common/log.h" +#include "common/string_util.h" +#include "core/host_interface.h" +#include "core/settings.h" +#include "display_ps.hlsl.h" +#include "display_vs.hlsl.h" +#include "frontend-common/postprocessing_shadergen.h" +#include "imgui.h" +#include "imgui_impl_dx12.h" +#include +#include +Log_SetChannel(D3D12HostDisplay); + +namespace FrontendCommon { + +static constexpr std::array(HostDisplayPixelFormat::Count)> + s_display_pixel_format_mapping = {{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, + DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM}}; + +class D3D12HostDisplayTexture : public HostDisplayTexture +{ +public: + D3D12HostDisplayTexture(D3D12::Texture texture) : m_texture(std::move(texture)) {} + ~D3D12HostDisplayTexture() override = default; + + void* GetHandle() const override { return const_cast(&m_texture); } + u32 GetWidth() const override { return m_texture.GetWidth(); } + u32 GetHeight() const override { return m_texture.GetHeight(); } + u32 GetLayers() const override { return 1; } + u32 GetLevels() const override { return 1; } + u32 GetSamples() const override { return m_texture.GetSamples(); } + + HostDisplayPixelFormat GetFormat() const override + { + for (u32 i = 0; i < static_cast(s_display_pixel_format_mapping.size()); i++) + { + if (m_texture.GetFormat() == s_display_pixel_format_mapping[i]) + return static_cast(i); + } + + return HostDisplayPixelFormat::Count; + } + + const D3D12::Texture& GetTexture() const { return m_texture; } + D3D12::Texture& GetTexture() { return m_texture; } + +private: + D3D12::Texture m_texture; +}; + +D3D12HostDisplay::D3D12HostDisplay() = default; + +D3D12HostDisplay::~D3D12HostDisplay() +{ + AssertMsg(!g_d3d12_context, "Context should have been destroyed by now"); + AssertMsg(!m_swap_chain, "Swap chain should have been destroyed by now"); +} + +HostDisplay::RenderAPI D3D12HostDisplay::GetRenderAPI() const +{ + return HostDisplay::RenderAPI::D3D12; +} + +void* D3D12HostDisplay::GetRenderDevice() const +{ + return g_d3d12_context->GetDevice(); +} + +void* D3D12HostDisplay::GetRenderContext() const +{ + return g_d3d12_context.get(); +} + +bool D3D12HostDisplay::HasRenderDevice() const +{ + return static_cast(g_d3d12_context); +} + +bool D3D12HostDisplay::HasRenderSurface() const +{ + return static_cast(m_swap_chain); +} + +std::unique_ptr D3D12HostDisplay::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, + u32 samples, HostDisplayPixelFormat format, + const void* data, u32 data_stride, + bool dynamic /* = false */) +{ + if (layers != 1) + return {}; + + const DXGI_FORMAT dxgi_format = s_display_pixel_format_mapping[static_cast(format)]; + D3D12::Texture tex; + if (!tex.Create(width, height, samples, dxgi_format, dxgi_format, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, + D3D12_RESOURCE_FLAG_NONE)) + { + return {}; + } + + if (data && !tex.LoadData(0, 0, width, height, data, data_stride)) + return {}; + + return std::make_unique(std::move(tex)); +} + +void D3D12HostDisplay::UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, + const void* texture_data, u32 texture_data_stride) +{ + static_cast(texture)->GetTexture().LoadData(x, y, width, height, texture_data, + texture_data_stride); +} + +bool D3D12HostDisplay::DownloadTexture(const void* texture_handle, HostDisplayPixelFormat texture_format, u32 x, u32 y, + u32 width, u32 height, void* out_data, u32 out_data_stride) +{ + const D3D12::Texture* texture = static_cast(texture_handle); + + if (!m_readback_staging_texture.EnsureSize(width, height, texture->GetFormat(), false)) + return false; + + m_readback_staging_texture.CopyFromTexture(texture->GetResource(), 0, x, y, 0, 0, width, height); + return m_readback_staging_texture.ReadPixels(0, 0, width, height, out_data, out_data_stride); +} + +bool D3D12HostDisplay::SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const +{ + const DXGI_FORMAT dfmt = s_display_pixel_format_mapping[static_cast(format)]; + if (dfmt == DXGI_FORMAT_UNKNOWN) + return false; + + return g_d3d12_context->SupportsTextureFormat(dfmt); +} + +bool D3D12HostDisplay::BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer, + u32* out_pitch) +{ + ClearDisplayTexture(); + + const DXGI_FORMAT dxgi_format = s_display_pixel_format_mapping[static_cast(format)]; + if (m_display_pixels_texture.GetWidth() < width || m_display_pixels_texture.GetHeight() < height || + m_display_pixels_texture.GetFormat() != dxgi_format) + { + if (!m_display_pixels_texture.Create(width, height, 1, dxgi_format, dxgi_format, DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_NONE)) + { + return false; + } + } + + if (!m_display_pixels_texture.BeginStreamUpdate(0, 0, width, height, out_buffer, out_pitch)) + return false; + + SetDisplayTexture(&m_display_pixels_texture, format, m_display_pixels_texture.GetWidth(), + m_display_pixels_texture.GetHeight(), 0, 0, static_cast(width), static_cast(height)); + return true; +} + +void D3D12HostDisplay::EndSetDisplayPixels() +{ + m_display_pixels_texture.EndStreamUpdate(0, 0, m_display_pixels_texture.GetWidth(), + m_display_pixels_texture.GetHeight()); +} + +bool D3D12HostDisplay::GetHostRefreshRate(float* refresh_rate) +{ + if (m_swap_chain && IsFullscreen()) + { + DXGI_SWAP_CHAIN_DESC desc; + if (SUCCEEDED(m_swap_chain->GetDesc(&desc)) && desc.BufferDesc.RefreshRate.Numerator > 0 && + desc.BufferDesc.RefreshRate.Denominator > 0) + { + Log_InfoPrintf("using fs rr: %u %u", desc.BufferDesc.RefreshRate.Numerator, + desc.BufferDesc.RefreshRate.Denominator); + *refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / + static_cast(desc.BufferDesc.RefreshRate.Denominator); + return true; + } + } + + return HostDisplay::GetHostRefreshRate(refresh_rate); +} + +void D3D12HostDisplay::SetVSync(bool enabled) +{ + m_vsync = enabled; +} + +bool D3D12HostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool threaded_presentation) +{ + ComPtr temp_dxgi_factory; +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + HRESULT hr = CreateDXGIFactory(IID_PPV_ARGS(temp_dxgi_factory.GetAddressOf())); +#else + HRESULT hr = CreateDXGIFactory2(0, IID_PPV_ARGS(temp_dxgi_factory.GetAddressOf())); +#endif + + if (FAILED(hr)) + { + Log_ErrorPrintf("Failed to create DXGI factory: 0x%08X", hr); + return false; + } + + u32 adapter_index; + if (!adapter_name.empty()) + { + AdapterAndModeList adapter_info(GetAdapterAndModeList(temp_dxgi_factory.Get())); + for (adapter_index = 0; adapter_index < static_cast(adapter_info.adapter_names.size()); adapter_index++) + { + if (adapter_name == adapter_info.adapter_names[adapter_index]) + break; + } + if (adapter_index == static_cast(adapter_info.adapter_names.size())) + { + Log_WarningPrintf("Could not find adapter '%*s', using first (%s)", static_cast(adapter_name.size()), + adapter_name.data(), adapter_info.adapter_names[0].c_str()); + adapter_index = 0; + } + } + else + { + Log_InfoPrintf("No adapter selected, using first."); + adapter_index = 0; + } + + if (!D3D12::Context::Create(temp_dxgi_factory.Get(), adapter_index, debug_device)) + return false; + + if (FAILED(hr)) + { + Log_ErrorPrintf("Failed to create D3D device: 0x%08X", hr); + return false; + } + +#if 0 + // we need the specific factory for the device, otherwise MakeWindowAssociation() is flaky. + ComPtr dxgi_device; + if (FAILED(m_device.As(&dxgi_device)) || FAILED(dxgi_device->GetParent(IID_PPV_ARGS(dxgi_adapter.GetAddressOf()))) || + FAILED(dxgi_adapter->GetParent(IID_PPV_ARGS(m_dxgi_factory.GetAddressOf())))) + { + Log_WarningPrint("Failed to get parent adapter/device/factory"); + return false; + } +#else + m_dxgi_factory = std::move(temp_dxgi_factory); +#endif + + m_allow_tearing_supported = false; + ComPtr dxgi_factory5; + hr = m_dxgi_factory.As(&dxgi_factory5); + if (SUCCEEDED(hr)) + { + BOOL allow_tearing_supported = false; + hr = dxgi_factory5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, + sizeof(allow_tearing_supported)); + if (SUCCEEDED(hr)) + m_allow_tearing_supported = (allow_tearing_supported == TRUE); + } + + m_window_info = wi; + return true; +} + +bool D3D12HostDisplay::InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, + bool threaded_presentation) +{ + if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain(nullptr)) + return false; + + if (!CreateResources()) + return false; + + return true; +} + +void D3D12HostDisplay::DestroyRenderDevice() +{ + g_d3d12_context->ExecuteCommandList(true); + + DestroyResources(); + DestroyRenderSurface(); + if (g_d3d12_context) + g_d3d12_context->Destroy(); +} + +bool D3D12HostDisplay::MakeRenderContextCurrent() +{ + return true; +} + +bool D3D12HostDisplay::DoneRenderContextCurrent() +{ + return true; +} + +bool D3D12HostDisplay::CreateSwapChain(const DXGI_MODE_DESC* fullscreen_mode) +{ + HRESULT hr; + +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + if (m_window_info.type != WindowInfo::Type::Win32) + return false; + + const HWND window_hwnd = reinterpret_cast(m_window_info.window_handle); + RECT client_rc{}; + GetClientRect(window_hwnd, &client_rc); + const u32 width = static_cast(client_rc.right - client_rc.left); + const u32 height = static_cast(client_rc.bottom - client_rc.top); + + DXGI_SWAP_CHAIN_DESC swap_chain_desc = {}; + swap_chain_desc.BufferDesc.Width = width; + swap_chain_desc.BufferDesc.Height = height; + swap_chain_desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.BufferCount = 3; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.OutputWindow = window_hwnd; + swap_chain_desc.Windowed = TRUE; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + + m_using_allow_tearing = (m_allow_tearing_supported && !fullscreen_mode); + if (m_using_allow_tearing) + swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + + if (fullscreen_mode) + { + swap_chain_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; + swap_chain_desc.Windowed = FALSE; + swap_chain_desc.BufferDesc = *fullscreen_mode; + } + + Log_InfoPrintf("Creating a %dx%d %s swap chain", swap_chain_desc.BufferDesc.Width, swap_chain_desc.BufferDesc.Height, + swap_chain_desc.Windowed ? "windowed" : "full-screen"); + + hr = + m_dxgi_factory->CreateSwapChain(g_d3d12_context->GetCommandQueue(), &swap_chain_desc, m_swap_chain.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateSwapChain failed: 0x%08X", hr); + return false; + } + + hr = m_dxgi_factory->MakeWindowAssociation(swap_chain_desc.OutputWindow, DXGI_MWA_NO_WINDOW_CHANGES); + if (FAILED(hr)) + Log_WarningPrintf("MakeWindowAssociation() to disable ALT+ENTER failed"); +#else + if (m_window_info.type != WindowInfo::Type::WinRT) + return false; + + ComPtr factory2; + hr = m_dxgi_factory.As(&factory2); + if (FAILED(hr)) + { + Log_ErrorPrintf("Failed to get DXGI factory: %08X", hr); + return false; + } + + DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {}; + swap_chain_desc.Width = m_window_info.surface_width; + swap_chain_desc.Height = m_window_info.surface_height; + swap_chain_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.BufferCount = 3; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + + m_using_allow_tearing = (m_allow_tearing_supported && !fullscreen_mode); + if (m_using_allow_tearing) + swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + + ComPtr swap_chain1; + hr = factory2->CreateSwapChainForCoreWindow(g_d3d12_context->GetCommandQueue(), + static_cast(m_window_info.window_handle), &swap_chain_desc, + nullptr, swap_chain1.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateSwapChainForCoreWindow failed: 0x%08X", hr); + return false; + } + + m_swap_chain = swap_chain1; +#endif + + return CreateSwapChainRTV(); +} + +bool D3D12HostDisplay::CreateSwapChainRTV() +{ + DXGI_SWAP_CHAIN_DESC swap_chain_desc; + HRESULT hr = m_swap_chain->GetDesc(&swap_chain_desc); + if (FAILED(hr)) + return false; + + for (u32 i = 0; i < swap_chain_desc.BufferCount; i++) + { + ComPtr backbuffer; + hr = m_swap_chain->GetBuffer(i, IID_PPV_ARGS(backbuffer.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("GetBuffer for RTV failed: 0x%08X", hr); + return false; + } + + D3D12::Texture tex; + if (!tex.Adopt(std::move(backbuffer), DXGI_FORMAT_UNKNOWN, swap_chain_desc.BufferDesc.Format, DXGI_FORMAT_UNKNOWN, + D3D12_RESOURCE_STATE_PRESENT)) + { + return false; + } + + m_swap_chain_buffers.push_back(std::move(tex)); + } + + m_window_info.surface_width = swap_chain_desc.BufferDesc.Width; + m_window_info.surface_height = swap_chain_desc.BufferDesc.Height; + Log_InfoPrintf("Swap chain buffer size: %ux%u", m_window_info.surface_width, m_window_info.surface_height); + + if (m_window_info.type == WindowInfo::Type::Win32) + { + BOOL fullscreen = FALSE; + DXGI_SWAP_CHAIN_DESC desc; + if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen && + SUCCEEDED(m_swap_chain->GetDesc(&desc))) + { + m_window_info.surface_refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / + static_cast(desc.BufferDesc.RefreshRate.Denominator); + } + else + { + m_window_info.surface_refresh_rate = 0.0f; + } + } + + m_current_swap_chain_buffer = 0; + return true; +} + +void D3D12HostDisplay::DestroySwapChainRTVs() +{ + for (D3D12::Texture& buffer : m_swap_chain_buffers) + buffer.Destroy(false); + m_swap_chain_buffers.clear(); + m_current_swap_chain_buffer = 0; +} + +bool D3D12HostDisplay::ChangeRenderWindow(const WindowInfo& new_wi) +{ + DestroyRenderSurface(); + + m_window_info = new_wi; + return CreateSwapChain(nullptr); +} + +void D3D12HostDisplay::DestroyRenderSurface() +{ + if (IsFullscreen()) + SetFullscreen(false, 0, 0, 0.0f); + + DestroySwapChainRTVs(); + m_swap_chain.Reset(); +} + +void D3D12HostDisplay::ResizeRenderWindow(s32 new_window_width, s32 new_window_height) +{ + if (!m_swap_chain) + return; + + // For some reason if we don't execute the command list here, the swap chain is in use.. not sure where. + g_d3d12_context->ExecuteCommandList(true); + + DestroySwapChainRTVs(); + + HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, + m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0); + if (FAILED(hr)) + Log_ErrorPrintf("ResizeBuffers() failed: 0x%08X", hr); + + if (!CreateSwapChainRTV()) + Panic("Failed to recreate swap chain RTV after resize"); +} + +bool D3D12HostDisplay::SupportsFullscreen() const +{ + return true; +} + +bool D3D12HostDisplay::IsFullscreen() +{ + BOOL is_fullscreen = FALSE; + return (m_swap_chain && SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen); +} + +bool D3D12HostDisplay::SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) +{ + if (!m_swap_chain) + return false; + + BOOL is_fullscreen = FALSE; + HRESULT hr = m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr); + if (!fullscreen) + { + // leaving fullscreen + if (is_fullscreen) + return SUCCEEDED(m_swap_chain->SetFullscreenState(FALSE, nullptr)); + else + return true; + } + + IDXGIOutput* output; + if (FAILED(hr = m_swap_chain->GetContainingOutput(&output))) + return false; + + DXGI_SWAP_CHAIN_DESC current_desc; + hr = m_swap_chain->GetDesc(¤t_desc); + if (FAILED(hr)) + return false; + + DXGI_MODE_DESC new_mode = current_desc.BufferDesc; + new_mode.Width = width; + new_mode.Height = height; + new_mode.RefreshRate.Numerator = static_cast(std::floor(refresh_rate * 1000.0f)); + new_mode.RefreshRate.Denominator = 1000u; + + DXGI_MODE_DESC closest_mode; + if (FAILED(hr = output->FindClosestMatchingMode(&new_mode, &closest_mode, nullptr)) || + new_mode.Format != current_desc.BufferDesc.Format) + { + Log_ErrorPrintf("Failed to find closest matching mode, hr=%08X", hr); + return false; + } + + if (new_mode.Width == current_desc.BufferDesc.Width && new_mode.Height == current_desc.BufferDesc.Width && + new_mode.RefreshRate.Numerator == current_desc.BufferDesc.RefreshRate.Numerator && + new_mode.RefreshRate.Denominator == current_desc.BufferDesc.RefreshRate.Denominator) + { + Log_InfoPrintf("Fullscreen mode already set"); + return true; + } + + DestroySwapChainRTVs(); + m_swap_chain.Reset(); + + if (!CreateSwapChain(&closest_mode)) + { + Log_ErrorPrintf("Failed to create a fullscreen swap chain"); + if (!CreateSwapChain(nullptr)) + Panic("Failed to recreate windowed swap chain"); + + return false; + } + + return true; +} + +HostDisplay::AdapterAndModeList D3D12HostDisplay::GetAdapterAndModeList() +{ + return GetAdapterAndModeList(m_dxgi_factory.Get()); +} + +bool D3D12HostDisplay::CreateResources() +{ + D3D12::RootSignatureBuilder rsbuilder; + rsbuilder.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); + rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_ALL); + rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_ALL); + m_display_root_signature = rsbuilder.Create(); + if (!m_display_root_signature) + return false; + + D3D12::GraphicsPipelineBuilder gpbuilder; + gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); + gpbuilder.SetRootSignature(m_display_root_signature.Get()); + gpbuilder.SetVertexShader(s_display_vs_bytecode, sizeof(s_display_vs_bytecode)); + gpbuilder.SetPixelShader(s_display_ps_bytecode, sizeof(s_display_ps_bytecode)); + gpbuilder.SetNoCullRasterizationState(); + gpbuilder.SetNoDepthTestState(); + gpbuilder.SetNoBlendingState(); + gpbuilder.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); + m_display_pipeline = gpbuilder.Create(g_d3d12_context->GetDevice(), false); + if (!m_display_pipeline) + return false; + + gpbuilder.SetBlendState(0, true, D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_ALL); + m_software_cursor_pipeline = gpbuilder.Create(g_d3d12_context->GetDevice(), false); + if (!m_software_cursor_pipeline) + return false; + + D3D12_SAMPLER_DESC desc = {}; + D3D12::SetDefaultSampler(&desc); + desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + desc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + + if (!g_d3d12_context->GetSamplerHeapManager().Allocate(&m_point_sampler)) + return false; + + g_d3d12_context->GetDevice()->CreateSampler(&desc, m_point_sampler.cpu_handle); + + desc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + + if (!g_d3d12_context->GetSamplerHeapManager().Allocate(&m_linear_sampler)) + return false; + + g_d3d12_context->GetDevice()->CreateSampler(&desc, m_linear_sampler.cpu_handle); + + if (!m_display_uniform_buffer.Create(DISPLAY_UNIFORM_BUFFER_SIZE)) + return false; + + return true; +} + +void D3D12HostDisplay::DestroyResources() +{ + // m_post_processing_chain.ClearStages(); + // m_post_processing_input_texture.Destroy(); + // m_post_processing_stages.clear(); + + m_readback_staging_texture.Destroy(false); + m_display_uniform_buffer.Destroy(false); + g_d3d12_context->GetSamplerHeapManager().Free(&m_linear_sampler); + g_d3d12_context->GetSamplerHeapManager().Free(&m_point_sampler); + m_software_cursor_pipeline.Reset(); + m_display_pipeline.Reset(); + m_display_root_signature.Reset(); +} + +bool D3D12HostDisplay::CreateImGuiContext() +{ + ImGui::GetIO().DisplaySize.x = static_cast(m_window_info.surface_width); + ImGui::GetIO().DisplaySize.y = static_cast(m_window_info.surface_height); + + return ImGui_ImplDX12_Init(g_d3d12_context->GetDevice(), D3D12::Context::NUM_COMMAND_LISTS, + DXGI_FORMAT_R8G8B8A8_UNORM); +} + +void D3D12HostDisplay::DestroyImGuiContext() +{ + g_d3d12_context->WaitForGPUIdle(); + + ImGui_ImplDX12_Shutdown(); +} + +bool D3D12HostDisplay::UpdateImGuiFontTexture() +{ + return ImGui_ImplDX12_CreateFontsTexture(); +} + +bool D3D12HostDisplay::Render() +{ + if (ShouldSkipDisplayingFrame()) + { + if (ImGui::GetCurrentContext()) + ImGui::Render(); + + return false; + } + + static constexpr std::array clear_color = {}; + D3D12::Texture& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; + m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast(m_swap_chain_buffers.size())); + + ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); + swap_chain_buf.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + cmdlist->ClearRenderTargetView(swap_chain_buf.GetRTVOrDSVDescriptor(), clear_color.data(), 0, nullptr); + cmdlist->OMSetRenderTargets(1, &swap_chain_buf.GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); + cmdlist->SetGraphicsRootSignature(m_display_root_signature.Get()); + + RenderDisplay(cmdlist); + + if (ImGui::GetCurrentContext()) + RenderImGui(cmdlist); + + RenderSoftwareCursor(cmdlist); + + swap_chain_buf.TransitionToState(D3D12_RESOURCE_STATE_PRESENT); + g_d3d12_context->ExecuteCommandList(false); + + if (!m_vsync && m_using_allow_tearing) + m_swap_chain->Present(0, DXGI_PRESENT_ALLOW_TEARING); + else + m_swap_chain->Present(BoolToUInt32(m_vsync), 0); + + return true; +} + +bool D3D12HostDisplay::RenderScreenshot(u32 width, u32 height, std::vector* out_pixels, u32* out_stride, + HostDisplayPixelFormat* out_format) +{ + return false; +} + +void D3D12HostDisplay::RenderImGui(ID3D12GraphicsCommandList* cmdlist) +{ + ImGui::Render(); + ImGui_ImplDX12_RenderDrawData(ImGui::GetDrawData(), cmdlist); +} + +void D3D12HostDisplay::RenderDisplay(ID3D12GraphicsCommandList* cmdlist) +{ + if (!HasDisplayTexture()) + return; + + const auto [left, top, width, height] = CalculateDrawRect(GetWindowWidth(), GetWindowHeight(), m_display_top_margin); + + // if (!m_post_processing_chain.IsEmpty()) + // { + // ApplyPostProcessingChain(m_swap_chain_rtv.Get(), left, top, width, height, m_display_texture_handle, + // m_display_texture_width, m_display_texture_height, m_display_texture_view_x, + // m_display_texture_view_y, m_display_texture_view_width, + // m_display_texture_view_height); + // return; + // } + + RenderDisplay(cmdlist, left, top, width, height, m_display_texture_handle, m_display_texture_width, + m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, + m_display_texture_view_width, m_display_texture_view_height, m_display_linear_filtering); +} + +void D3D12HostDisplay::RenderDisplay(ID3D12GraphicsCommandList* cmdlist, s32 left, s32 top, s32 width, s32 height, + void* texture_handle, u32 texture_width, s32 texture_height, s32 texture_view_x, + s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, + bool linear_filter) +{ + const float uniforms[4] = {static_cast(texture_view_x) / static_cast(texture_width), + static_cast(texture_view_y) / static_cast(texture_height), + (static_cast(texture_view_width) - 0.5f) / static_cast(texture_width), + (static_cast(texture_view_height) - 0.5f) / static_cast(texture_height)}; + if (!m_display_uniform_buffer.ReserveMemory(sizeof(uniforms), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) + Panic("Failed to reserve UBO space"); + + const u32 ubo_offset = m_display_uniform_buffer.GetCurrentOffset(); + std::memcpy(m_display_uniform_buffer.GetCurrentHostPointer(), uniforms, sizeof(uniforms)); + m_display_uniform_buffer.CommitMemory(sizeof(uniforms)); + + cmdlist->SetPipelineState(m_display_pipeline.Get()); + cmdlist->SetGraphicsRootConstantBufferView(0, m_display_uniform_buffer.GetGPUPointer() + ubo_offset); + cmdlist->SetGraphicsRootDescriptorTable(1, reinterpret_cast(texture_handle)->GetSRVDescriptor()); + cmdlist->SetGraphicsRootDescriptorTable(2, linear_filter ? m_linear_sampler : m_point_sampler); + + D3D12::SetViewportAndScissor(cmdlist, left, top, width, height); + + cmdlist->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + cmdlist->DrawInstanced(3, 1, 0, 0); +} + +void D3D12HostDisplay::RenderSoftwareCursor(ID3D12GraphicsCommandList* cmdlist) +{ + if (!HasSoftwareCursor()) + return; + + const auto [left, top, width, height] = CalculateSoftwareCursorDrawRect(); + RenderSoftwareCursor(cmdlist, left, top, width, height, m_cursor_texture.get()); +} + +void D3D12HostDisplay::RenderSoftwareCursor(ID3D12GraphicsCommandList* cmdlist, s32 left, s32 top, s32 width, + s32 height, HostDisplayTexture* texture_handle) +{ + const float uniforms[4] = {0.0f, 0.0f, 1.0f, 1.0f}; + if (!m_display_uniform_buffer.ReserveMemory(sizeof(uniforms), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) + Panic("Failed to reserve UBO space"); + + const u32 ubo_offset = m_display_uniform_buffer.GetCurrentOffset(); + std::memcpy(m_display_uniform_buffer.GetCurrentHostPointer(), uniforms, sizeof(uniforms)); + m_display_uniform_buffer.CommitMemory(sizeof(uniforms)); + + cmdlist->SetPipelineState(m_display_pipeline.Get()); + cmdlist->SetGraphicsRootConstantBufferView(0, m_display_uniform_buffer.GetGPUPointer() + ubo_offset); + cmdlist->SetGraphicsRootDescriptorTable( + 1, static_cast(texture_handle)->GetTexture().GetRTVOrDSVDescriptor()); + cmdlist->SetGraphicsRootDescriptorTable(2, m_linear_sampler); + + D3D12::SetViewportAndScissor(cmdlist, left, top, width, height); + + cmdlist->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + cmdlist->DrawInstanced(3, 1, 0, 0); +} + +HostDisplay::AdapterAndModeList D3D12HostDisplay::StaticGetAdapterAndModeList() +{ + ComPtr dxgi_factory; +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + HRESULT hr = CreateDXGIFactory(IID_PPV_ARGS(dxgi_factory.GetAddressOf())); +#else + HRESULT hr = CreateDXGIFactory2(0, IID_PPV_ARGS(dxgi_factory.GetAddressOf())); +#endif + if (FAILED(hr)) + return {}; + + return GetAdapterAndModeList(dxgi_factory.Get()); +} + +HostDisplay::AdapterAndModeList D3D12HostDisplay::GetAdapterAndModeList(IDXGIFactory* dxgi_factory) +{ + AdapterAndModeList adapter_info; + ComPtr current_adapter; + while (SUCCEEDED(dxgi_factory->EnumAdapters(static_cast(adapter_info.adapter_names.size()), + current_adapter.ReleaseAndGetAddressOf()))) + { + DXGI_ADAPTER_DESC adapter_desc; + std::string adapter_name; + if (SUCCEEDED(current_adapter->GetDesc(&adapter_desc))) + { + char adapter_name_buffer[128]; + const int name_length = WideCharToMultiByte(CP_UTF8, 0, adapter_desc.Description, + static_cast(std::wcslen(adapter_desc.Description)), + adapter_name_buffer, countof(adapter_name_buffer), 0, nullptr); + if (name_length >= 0) + adapter_name.assign(adapter_name_buffer, static_cast(name_length)); + else + adapter_name.assign("(Unknown)"); + } + else + { + adapter_name.assign("(Unknown)"); + } + + if (adapter_info.fullscreen_modes.empty()) + { + ComPtr output; + if (SUCCEEDED(current_adapter->EnumOutputs(0, &output))) + { + UINT num_modes = 0; + if (SUCCEEDED(output->GetDisplayModeList(DXGI_FORMAT_R8G8B8A8_UNORM, 0, &num_modes, nullptr))) + { + std::vector modes(num_modes); + if (SUCCEEDED(output->GetDisplayModeList(DXGI_FORMAT_R8G8B8A8_UNORM, 0, &num_modes, modes.data()))) + { + for (const DXGI_MODE_DESC& mode : modes) + { + adapter_info.fullscreen_modes.push_back(StringUtil::StdStringFromFormat( + "%u x %u @ %f hz", mode.Width, mode.Height, + static_cast(mode.RefreshRate.Numerator) / static_cast(mode.RefreshRate.Denominator))); + } + } + } + } + } + + // handle duplicate adapter names + if (std::any_of(adapter_info.adapter_names.begin(), adapter_info.adapter_names.end(), + [&adapter_name](const std::string& other) { return (adapter_name == other); })) + { + std::string original_adapter_name = std::move(adapter_name); + + u32 current_extra = 2; + do + { + adapter_name = StringUtil::StdStringFromFormat("%s (%u)", original_adapter_name.c_str(), current_extra); + current_extra++; + } while (std::any_of(adapter_info.adapter_names.begin(), adapter_info.adapter_names.end(), + [&adapter_name](const std::string& other) { return (adapter_name == other); })); + } + + adapter_info.adapter_names.push_back(std::move(adapter_name)); + } + + return adapter_info; +} + +bool D3D12HostDisplay::SetPostProcessingChain(const std::string_view& config) +{ + return false; +} + +} // namespace FrontendCommon diff --git a/src/frontend-common/d3d12_host_display.h b/src/frontend-common/d3d12_host_display.h new file mode 100644 index 000000000..2af6dfcd2 --- /dev/null +++ b/src/frontend-common/d3d12_host_display.h @@ -0,0 +1,127 @@ +#pragma once +#pragma once +#include "common/d3d12/descriptor_heap_manager.h" +#include "common/d3d12/staging_texture.h" +#include "common/d3d12/stream_buffer.h" +#include "common/d3d12/texture.h" +#include "common/window_info.h" +#include "common/windows_headers.h" +#include "core/host_display.h" +#include +#include +#include +#include +#include +#include +#include + +namespace FrontendCommon { + +class D3D12HostDisplay : public HostDisplay +{ +public: + template + using ComPtr = Microsoft::WRL::ComPtr; + + D3D12HostDisplay(); + ~D3D12HostDisplay(); + + virtual RenderAPI GetRenderAPI() const override; + virtual void* GetRenderDevice() const override; + virtual void* GetRenderContext() const override; + + virtual bool HasRenderDevice() const override; + virtual bool HasRenderSurface() const override; + + virtual bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool threaded_presentation) override; + virtual bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, + bool threaded_presentation) override; + virtual void DestroyRenderDevice() override; + + virtual bool MakeRenderContextCurrent() override; + virtual bool DoneRenderContextCurrent() override; + + virtual bool ChangeRenderWindow(const WindowInfo& new_wi) override; + virtual void ResizeRenderWindow(s32 new_window_width, s32 new_window_height) override; + virtual bool SupportsFullscreen() const override; + virtual bool IsFullscreen() override; + virtual bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override; + virtual AdapterAndModeList GetAdapterAndModeList() override; + virtual void DestroyRenderSurface() override; + + virtual bool SetPostProcessingChain(const std::string_view& config) override; + + std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + HostDisplayPixelFormat format, const void* data, u32 data_stride, + bool dynamic = false) override; + void UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* texture_data, + u32 texture_data_stride) override; + bool DownloadTexture(const void* texture_handle, HostDisplayPixelFormat texture_format, u32 x, u32 y, u32 width, + u32 height, void* out_data, u32 out_data_stride) override; + bool SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const override; + bool BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer, + u32* out_pitch) override; + void EndSetDisplayPixels() override; + + bool GetHostRefreshRate(float* refresh_rate) override; + + virtual void SetVSync(bool enabled) override; + + virtual bool Render() override; + virtual bool RenderScreenshot(u32 width, u32 height, std::vector* out_pixels, u32* out_stride, + HostDisplayPixelFormat* out_format) override; + + static AdapterAndModeList StaticGetAdapterAndModeList(); + +protected: + enum : u32 + { + DISPLAY_UNIFORM_BUFFER_SIZE = 65536, + TEXTURE_STREAMING_BUFFER_SIZE = 4 * 1024 * 1024 + }; + + static AdapterAndModeList GetAdapterAndModeList(IDXGIFactory* dxgi_factory); + + virtual bool CreateResources() override; + virtual void DestroyResources() override; + + virtual bool CreateImGuiContext(); + virtual void DestroyImGuiContext(); + virtual bool UpdateImGuiFontTexture() override; + + bool CreateSwapChain(const DXGI_MODE_DESC* fullscreen_mode); + bool CreateSwapChainRTV(); + void DestroySwapChainRTVs(); + + void RenderDisplay(ID3D12GraphicsCommandList* cmdlist); + void RenderSoftwareCursor(ID3D12GraphicsCommandList* cmdlist); + void RenderImGui(ID3D12GraphicsCommandList* cmdlist); + + void RenderDisplay(ID3D12GraphicsCommandList* cmdlist, s32 left, s32 top, s32 width, s32 height, void* texture_handle, + u32 texture_width, s32 texture_height, s32 texture_view_x, s32 texture_view_y, + s32 texture_view_width, s32 texture_view_height, bool linear_filter); + void RenderSoftwareCursor(ID3D12GraphicsCommandList* cmdlist, s32 left, s32 top, s32 width, s32 height, + HostDisplayTexture* texture_handle); + + ComPtr m_dxgi_factory; + ComPtr m_swap_chain; + std::vector m_swap_chain_buffers; + u32 m_current_swap_chain_buffer = 0; + + ComPtr m_display_root_signature; + ComPtr m_display_pipeline; + ComPtr m_software_cursor_pipeline; + D3D12::DescriptorHandle m_point_sampler; + D3D12::DescriptorHandle m_linear_sampler; + + D3D12::Texture m_display_pixels_texture; + D3D12::StreamBuffer m_display_uniform_buffer; + D3D12::StagingTexture m_readback_staging_texture; + + bool m_allow_tearing_supported = false; + bool m_using_allow_tearing = false; + bool m_vsync = true; +}; + +} // namespace FrontendCommon diff --git a/src/frontend-common/frontend-common.vcxproj b/src/frontend-common/frontend-common.vcxproj index b695803b7..bc3f13a9f 100644 --- a/src/frontend-common/frontend-common.vcxproj +++ b/src/frontend-common/frontend-common.vcxproj @@ -10,6 +10,7 @@ + @@ -17,6 +18,7 @@ + @@ -45,6 +47,7 @@ + @@ -52,6 +55,7 @@ + diff --git a/src/frontend-common/frontend-common.vcxproj.filters b/src/frontend-common/frontend-common.vcxproj.filters index 29c59a196..73d9bff4e 100644 --- a/src/frontend-common/frontend-common.vcxproj.filters +++ b/src/frontend-common/frontend-common.vcxproj.filters @@ -34,6 +34,8 @@ + + @@ -69,6 +71,8 @@ + + diff --git a/src/frontend-common/imgui_impl_dx12.cpp b/src/frontend-common/imgui_impl_dx12.cpp new file mode 100644 index 000000000..c29f3bf69 --- /dev/null +++ b/src/frontend-common/imgui_impl_dx12.cpp @@ -0,0 +1,533 @@ +// dear imgui: Renderer Backend for DirectX12 +// This needs to be used along with a Platform Backend (e.g. Win32) + +// Implemented features: +// [X] Renderer: User texture binding. Use 'D3D12_GPU_DESCRIPTOR_HANDLE' as ImTextureID. Read the FAQ about ImTextureID! +// [X] Renderer: Support for large meshes (64k+ vertices) with 16-bit indices. + +// Important: to compile on 32-bit systems, this backend requires code to be compiled with '#define ImTextureID ImU64'. +// This is because we need ImTextureID to carry a 64-bit value and by default ImTextureID is defined as void*. +// This define is set in the example .vcxproj file and need to be replicated in your app or by adding it to your imconfig.h file. + +// You can copy and use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. +// If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp. +// Read online: https://github.com/ocornut/imgui/tree/master/docs + +// CHANGELOG +// (minor and older changes stripped away, please see git history for details) +// 2021-01-11: DirectX12: Improve Windows 7 compatibility (for D3D12On7) by loading d3d12.dll dynamically. +// 2020-09-16: DirectX12: Avoid rendering calls with zero-sized scissor rectangle since it generates a validation layer warning. +// 2020-09-08: DirectX12: Clarified support for building on 32-bit systems by redefining ImTextureID. +// 2019-10-18: DirectX12: *BREAKING CHANGE* Added extra ID3D12DescriptorHeap parameter to ImGui_ImplDX12_Init() function. +// 2019-05-29: DirectX12: Added support for large mesh (64K+ vertices), enable ImGuiBackendFlags_RendererHasVtxOffset flag. +// 2019-04-30: DirectX12: Added support for special ImDrawCallback_ResetRenderState callback to reset render state. +// 2019-03-29: Misc: Various minor tidying up. +// 2018-12-03: Misc: Added #pragma comment statement to automatically link with d3dcompiler.lib when using D3DCompile(). +// 2018-11-30: Misc: Setting up io.BackendRendererName so it can be displayed in the About Window. +// 2018-06-12: DirectX12: Moved the ID3D12GraphicsCommandList* parameter from NewFrame() to RenderDrawData(). +// 2018-06-08: Misc: Extracted imgui_impl_dx12.cpp/.h away from the old combined DX12+Win32 example. +// 2018-06-08: DirectX12: Use draw_data->DisplayPos and draw_data->DisplaySize to setup projection matrix and clipping rectangle (to ease support for future multi-viewport). +// 2018-02-22: Merged into master with all Win32 code synchronized to other examples. + +#include "imgui.h" +#include "imgui_impl_dx12.h" + +// DirectX +#include +#include +#include +#ifdef _MSC_VER +#pragma comment(lib, "d3dcompiler") // Automatically link with d3dcompiler.lib as we are using D3DCompile() below. +#endif + +#include "common/d3d12/texture.h" +#include "common/d3d12/context.h" + +// DirectX data +static ID3D12Device* g_pd3dDevice = NULL; +static ID3D12RootSignature* g_pRootSignature = NULL; +static ID3D12PipelineState* g_pPipelineState = NULL; +static DXGI_FORMAT g_RTVFormat = DXGI_FORMAT_UNKNOWN; +static D3D12::Texture g_FontTexture; + +struct FrameResources +{ + ID3D12Resource* IndexBuffer; + ID3D12Resource* VertexBuffer; + int IndexBufferSize; + int VertexBufferSize; +}; +static FrameResources* g_pFrameResources = NULL; +static UINT g_numFramesInFlight = 0; +static UINT g_frameIndex = UINT_MAX; + +template +static void SafeRelease(T*& res) +{ + if (res) + res->Release(); + res = NULL; +} + +struct VERTEX_CONSTANT_BUFFER +{ + float mvp[4][4]; +}; + +static void ImGui_ImplDX12_SetupRenderState(ImDrawData* draw_data, ID3D12GraphicsCommandList* ctx, FrameResources* fr) +{ + // Setup orthographic projection matrix into our constant buffer + // Our visible imgui space lies from draw_data->DisplayPos (top left) to draw_data->DisplayPos+data_data->DisplaySize (bottom right). + VERTEX_CONSTANT_BUFFER vertex_constant_buffer; + { + float L = draw_data->DisplayPos.x; + float R = draw_data->DisplayPos.x + draw_data->DisplaySize.x; + float T = draw_data->DisplayPos.y; + float B = draw_data->DisplayPos.y + draw_data->DisplaySize.y; + float mvp[4][4] = + { + { 2.0f / (R - L), 0.0f, 0.0f, 0.0f }, + { 0.0f, 2.0f / (T - B), 0.0f, 0.0f }, + { 0.0f, 0.0f, 0.5f, 0.0f }, + { (R + L) / (L - R), (T + B) / (B - T), 0.5f, 1.0f }, + }; + memcpy(&vertex_constant_buffer.mvp, mvp, sizeof(mvp)); + } + + // Setup viewport + D3D12_VIEWPORT vp; + memset(&vp, 0, sizeof(D3D12_VIEWPORT)); + vp.Width = draw_data->DisplaySize.x; + vp.Height = draw_data->DisplaySize.y; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + vp.TopLeftX = vp.TopLeftY = 0.0f; + ctx->RSSetViewports(1, &vp); + + // Bind shader and vertex buffers + unsigned int stride = sizeof(ImDrawVert); + unsigned int offset = 0; + D3D12_VERTEX_BUFFER_VIEW vbv; + memset(&vbv, 0, sizeof(D3D12_VERTEX_BUFFER_VIEW)); + vbv.BufferLocation = fr->VertexBuffer->GetGPUVirtualAddress() + offset; + vbv.SizeInBytes = fr->VertexBufferSize * stride; + vbv.StrideInBytes = stride; + ctx->IASetVertexBuffers(0, 1, &vbv); + D3D12_INDEX_BUFFER_VIEW ibv; + memset(&ibv, 0, sizeof(D3D12_INDEX_BUFFER_VIEW)); + ibv.BufferLocation = fr->IndexBuffer->GetGPUVirtualAddress(); + ibv.SizeInBytes = fr->IndexBufferSize * sizeof(ImDrawIdx); + ibv.Format = sizeof(ImDrawIdx) == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; + ctx->IASetIndexBuffer(&ibv); + ctx->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + ctx->SetPipelineState(g_pPipelineState); + ctx->SetGraphicsRootSignature(g_pRootSignature); + ctx->SetGraphicsRoot32BitConstants(0, 16, &vertex_constant_buffer, 0); + + // Setup blend factor + const float blend_factor[4] = { 0.f, 0.f, 0.f, 0.f }; + ctx->OMSetBlendFactor(blend_factor); +} + +// Render function +void ImGui_ImplDX12_RenderDrawData(ImDrawData* draw_data, ID3D12GraphicsCommandList* ctx) +{ + // Avoid rendering when minimized + if (draw_data->DisplaySize.x <= 0.0f || draw_data->DisplaySize.y <= 0.0f) + return; + + // FIXME: I'm assuming that this only gets called once per frame! + // If not, we can't just re-allocate the IB or VB, we'll have to do a proper allocator. + g_frameIndex = g_frameIndex + 1; + FrameResources* fr = &g_pFrameResources[g_frameIndex % g_numFramesInFlight]; + + // Create and grow vertex/index buffers if needed + if (fr->VertexBuffer == NULL || fr->VertexBufferSize < draw_data->TotalVtxCount) + { + SafeRelease(fr->VertexBuffer); + fr->VertexBufferSize = draw_data->TotalVtxCount + 5000; + D3D12_HEAP_PROPERTIES props; + memset(&props, 0, sizeof(D3D12_HEAP_PROPERTIES)); + props.Type = D3D12_HEAP_TYPE_UPLOAD; + props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + D3D12_RESOURCE_DESC desc; + memset(&desc, 0, sizeof(D3D12_RESOURCE_DESC)); + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Width = fr->VertexBufferSize * sizeof(ImDrawVert); + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + if (g_pd3dDevice->CreateCommittedResource(&props, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, IID_PPV_ARGS(&fr->VertexBuffer)) < 0) + return; + } + if (fr->IndexBuffer == NULL || fr->IndexBufferSize < draw_data->TotalIdxCount) + { + SafeRelease(fr->IndexBuffer); + fr->IndexBufferSize = draw_data->TotalIdxCount + 10000; + D3D12_HEAP_PROPERTIES props; + memset(&props, 0, sizeof(D3D12_HEAP_PROPERTIES)); + props.Type = D3D12_HEAP_TYPE_UPLOAD; + props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + D3D12_RESOURCE_DESC desc; + memset(&desc, 0, sizeof(D3D12_RESOURCE_DESC)); + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Width = fr->IndexBufferSize * sizeof(ImDrawIdx); + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + if (g_pd3dDevice->CreateCommittedResource(&props, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, IID_PPV_ARGS(&fr->IndexBuffer)) < 0) + return; + } + + // Upload vertex/index data into a single contiguous GPU buffer + void* vtx_resource, * idx_resource; + D3D12_RANGE range; + memset(&range, 0, sizeof(D3D12_RANGE)); + if (fr->VertexBuffer->Map(0, &range, &vtx_resource) != S_OK) + return; + if (fr->IndexBuffer->Map(0, &range, &idx_resource) != S_OK) + return; + ImDrawVert* vtx_dst = (ImDrawVert*)vtx_resource; + ImDrawIdx* idx_dst = (ImDrawIdx*)idx_resource; + for (int n = 0; n < draw_data->CmdListsCount; n++) + { + const ImDrawList* cmd_list = draw_data->CmdLists[n]; + memcpy(vtx_dst, cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size * sizeof(ImDrawVert)); + memcpy(idx_dst, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx)); + vtx_dst += cmd_list->VtxBuffer.Size; + idx_dst += cmd_list->IdxBuffer.Size; + } + fr->VertexBuffer->Unmap(0, &range); + fr->IndexBuffer->Unmap(0, &range); + + // Setup desired DX state + ImGui_ImplDX12_SetupRenderState(draw_data, ctx, fr); + + // Render command lists + // (Because we merged all buffers into a single one, we maintain our own offset into them) + int global_vtx_offset = 0; + int global_idx_offset = 0; + ImVec2 clip_off = draw_data->DisplayPos; + for (int n = 0; n < draw_data->CmdListsCount; n++) + { + const ImDrawList* cmd_list = draw_data->CmdLists[n]; + for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) + { + const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; + if (pcmd->UserCallback != NULL) + { + // User callback, registered via ImDrawList::AddCallback() + // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) + if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) + ImGui_ImplDX12_SetupRenderState(draw_data, ctx, fr); + else + pcmd->UserCallback(cmd_list, pcmd); + } + else + { + // Apply Scissor, Bind texture, Draw + const D3D12_RECT r = { (LONG)(pcmd->ClipRect.x - clip_off.x), (LONG)(pcmd->ClipRect.y - clip_off.y), (LONG)(pcmd->ClipRect.z - clip_off.x), (LONG)(pcmd->ClipRect.w - clip_off.y) }; + if (r.right > r.left && r.bottom > r.top) + { + ctx->SetGraphicsRootDescriptorTable(1, reinterpret_cast(pcmd->TextureId)->GetSRVDescriptor()); + ctx->RSSetScissorRects(1, &r); + ctx->DrawIndexedInstanced(pcmd->ElemCount, 1, pcmd->IdxOffset + global_idx_offset, pcmd->VtxOffset + global_vtx_offset, 0); + } + } + } + global_idx_offset += cmd_list->IdxBuffer.Size; + global_vtx_offset += cmd_list->VtxBuffer.Size; + } +} + +bool ImGui_ImplDX12_CreateFontsTexture() +{ + // Build texture atlas + ImGuiIO & io = ImGui::GetIO(); + unsigned char* pixels; + int width, height; + io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); + + // Upload texture to graphics system + D3D12::Texture texture; + if (!texture.Create(width, height, 1, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_NONE) || + !texture.LoadData(0, 0, width, height, pixels, width * sizeof(u32))) + { + return false; + } + + g_FontTexture = std::move(texture); + + // Store our identifier + io.Fonts->TexID = reinterpret_cast(&g_FontTexture); + return true; +} + +bool ImGui_ImplDX12_CreateDeviceObjects() +{ + if (!g_pd3dDevice) + return false; + if (g_pPipelineState) + ImGui_ImplDX12_InvalidateDeviceObjects(); + + // Create the root signature + { + D3D12_DESCRIPTOR_RANGE descRange = {}; + descRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + descRange.NumDescriptors = 1; + descRange.BaseShaderRegister = 0; + descRange.RegisterSpace = 0; + descRange.OffsetInDescriptorsFromTableStart = 0; + + D3D12_ROOT_PARAMETER param[2] = {}; + + param[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + param[0].Constants.ShaderRegister = 0; + param[0].Constants.RegisterSpace = 0; + param[0].Constants.Num32BitValues = 16; + param[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + + param[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + param[1].DescriptorTable.NumDescriptorRanges = 1; + param[1].DescriptorTable.pDescriptorRanges = &descRange; + param[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + D3D12_STATIC_SAMPLER_DESC staticSampler = {}; + staticSampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + staticSampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + staticSampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + staticSampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + staticSampler.MipLODBias = 0.f; + staticSampler.MaxAnisotropy = 0; + staticSampler.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + staticSampler.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + staticSampler.MinLOD = 0.f; + staticSampler.MaxLOD = 0.f; + staticSampler.ShaderRegister = 0; + staticSampler.RegisterSpace = 0; + staticSampler.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + D3D12_ROOT_SIGNATURE_DESC desc = {}; + desc.NumParameters = _countof(param); + desc.pParameters = param; + desc.NumStaticSamplers = 1; + desc.pStaticSamplers = &staticSampler; + desc.Flags = + D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | + D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS; + + auto blob = g_d3d12_context->SerializeRootSignature(&desc); + if (!blob) + return false; + + g_pd3dDevice->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(&g_pRootSignature)); + } + + // By using D3DCompile() from / d3dcompiler.lib, we introduce a dependency to a given version of d3dcompiler_XX.dll (see D3DCOMPILER_DLL_A) + // If you would like to use this DX12 sample code but remove this dependency you can: + // 1) compile once, save the compiled shader blobs into a file or source code and pass them to CreateVertexShader()/CreatePixelShader() [preferred solution] + // 2) use code to detect any version of the DLL and grab a pointer to D3DCompile from the DLL. + // See https://github.com/ocornut/imgui/pull/638 for sources and details. + + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc; + memset(&psoDesc, 0, sizeof(D3D12_GRAPHICS_PIPELINE_STATE_DESC)); + psoDesc.NodeMask = 1; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + psoDesc.pRootSignature = g_pRootSignature; + psoDesc.SampleMask = UINT_MAX; + psoDesc.NumRenderTargets = 1; + psoDesc.RTVFormats[0] = g_RTVFormat; + psoDesc.SampleDesc.Count = 1; + psoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + + ID3DBlob* vertexShaderBlob; + ID3DBlob* pixelShaderBlob; + + // Create the vertex shader + { + static const char* vertexShader = + "cbuffer vertexBuffer : register(b0) \ + {\ + float4x4 ProjectionMatrix; \ + };\ + struct VS_INPUT\ + {\ + float2 pos : POSITION;\ + float4 col : COLOR0;\ + float2 uv : TEXCOORD0;\ + };\ + \ + struct PS_INPUT\ + {\ + float4 pos : SV_POSITION;\ + float4 col : COLOR0;\ + float2 uv : TEXCOORD0;\ + };\ + \ + PS_INPUT main(VS_INPUT input)\ + {\ + PS_INPUT output;\ + output.pos = mul( ProjectionMatrix, float4(input.pos.xy, 0.f, 1.f));\ + output.col = input.col;\ + output.uv = input.uv;\ + return output;\ + }"; + + if (FAILED(D3DCompile(vertexShader, strlen(vertexShader), NULL, NULL, NULL, "main", "vs_5_0", 0, 0, &vertexShaderBlob, NULL))) + return false; // NB: Pass ID3D10Blob* pErrorBlob to D3DCompile() to get error showing in (const char*)pErrorBlob->GetBufferPointer(). Make sure to Release() the blob! + psoDesc.VS = { vertexShaderBlob->GetBufferPointer(), vertexShaderBlob->GetBufferSize() }; + + // Create the input layout + static D3D12_INPUT_ELEMENT_DESC local_layout[] = + { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, pos), D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, uv), D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, (UINT)IM_OFFSETOF(ImDrawVert, col), D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + }; + psoDesc.InputLayout = { local_layout, 3 }; + } + + // Create the pixel shader + { + static const char* pixelShader = + "struct PS_INPUT\ + {\ + float4 pos : SV_POSITION;\ + float4 col : COLOR0;\ + float2 uv : TEXCOORD0;\ + };\ + SamplerState sampler0 : register(s0);\ + Texture2D texture0 : register(t0);\ + \ + float4 main(PS_INPUT input) : SV_Target\ + {\ + float4 out_col = input.col * texture0.Sample(sampler0, input.uv); \ + return out_col; \ + }"; + + if (FAILED(D3DCompile(pixelShader, strlen(pixelShader), NULL, NULL, NULL, "main", "ps_5_0", 0, 0, &pixelShaderBlob, NULL))) + { + vertexShaderBlob->Release(); + return false; // NB: Pass ID3D10Blob* pErrorBlob to D3DCompile() to get error showing in (const char*)pErrorBlob->GetBufferPointer(). Make sure to Release() the blob! + } + psoDesc.PS = { pixelShaderBlob->GetBufferPointer(), pixelShaderBlob->GetBufferSize() }; + } + + // Create the blending setup + { + D3D12_BLEND_DESC& desc = psoDesc.BlendState; + desc.AlphaToCoverageEnable = false; + desc.RenderTarget[0].BlendEnable = true; + desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; + desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; + desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; + desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; + desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_ZERO; + desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; + desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + } + + // Create the rasterizer state + { + D3D12_RASTERIZER_DESC& desc = psoDesc.RasterizerState; + desc.FillMode = D3D12_FILL_MODE_SOLID; + desc.CullMode = D3D12_CULL_MODE_NONE; + desc.FrontCounterClockwise = FALSE; + desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; + desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; + desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; + desc.DepthClipEnable = true; + desc.MultisampleEnable = FALSE; + desc.AntialiasedLineEnable = FALSE; + desc.ForcedSampleCount = 0; + desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + } + + // Create depth-stencil State + { + D3D12_DEPTH_STENCIL_DESC& desc = psoDesc.DepthStencilState; + desc.DepthEnable = false; + desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + desc.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; + desc.StencilEnable = false; + desc.FrontFace.StencilFailOp = desc.FrontFace.StencilDepthFailOp = desc.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP; + desc.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; + desc.BackFace = desc.FrontFace; + } + + HRESULT result_pipeline_state = g_pd3dDevice->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&g_pPipelineState)); + vertexShaderBlob->Release(); + pixelShaderBlob->Release(); + if (result_pipeline_state != S_OK) + return false; + + return true; +} + +void ImGui_ImplDX12_InvalidateDeviceObjects() +{ + if (!g_pd3dDevice) + return; + + SafeRelease(g_pRootSignature); + SafeRelease(g_pPipelineState); + g_FontTexture.Destroy(true); + + ImGuiIO& io = ImGui::GetIO(); + io.Fonts->SetTexID(NULL); // We copied g_pFontTextureView to io.Fonts->TexID so let's clear that as well. + + for (UINT i = 0; i < g_numFramesInFlight; i++) + { + FrameResources* fr = &g_pFrameResources[i]; + SafeRelease(fr->IndexBuffer); + SafeRelease(fr->VertexBuffer); + } +} + +bool ImGui_ImplDX12_Init(ID3D12Device* device, int num_frames_in_flight, DXGI_FORMAT rtv_format) +{ + // Setup backend capabilities flags + ImGuiIO& io = ImGui::GetIO(); + io.BackendRendererName = "imgui_impl_dx12"; + io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. + + g_pd3dDevice = device; + g_RTVFormat = rtv_format; + g_pFrameResources = new FrameResources[num_frames_in_flight]; + g_numFramesInFlight = num_frames_in_flight; + g_frameIndex = UINT_MAX; + + // Create buffers with a default size (they will later be grown as needed) + for (int i = 0; i < num_frames_in_flight; i++) + { + FrameResources* fr = &g_pFrameResources[i]; + fr->IndexBuffer = NULL; + fr->VertexBuffer = NULL; + fr->IndexBufferSize = 10000; + fr->VertexBufferSize = 5000; + } + + return ImGui_ImplDX12_CreateDeviceObjects(); +} + +void ImGui_ImplDX12_Shutdown() +{ + ImGui_ImplDX12_InvalidateDeviceObjects(); + delete[] g_pFrameResources; + g_pFrameResources = NULL; + g_pd3dDevice = NULL; + g_numFramesInFlight = 0; + g_frameIndex = UINT_MAX; +} diff --git a/src/frontend-common/imgui_impl_dx12.h b/src/frontend-common/imgui_impl_dx12.h new file mode 100644 index 000000000..5907f9596 --- /dev/null +++ b/src/frontend-common/imgui_impl_dx12.h @@ -0,0 +1,47 @@ +// dear imgui: Renderer Backend for DirectX12 +// This needs to be used along with a Platform Backend (e.g. Win32) + +// Implemented features: +// [X] Renderer: User texture binding. Use 'D3D12_GPU_DESCRIPTOR_HANDLE' as ImTextureID. Read the FAQ about ImTextureID! +// [X] Renderer: Support for large meshes (64k+ vertices) with 16-bit indices. + +// Important: to compile on 32-bit systems, this backend requires code to be compiled with '#define ImTextureID ImU64'. +// This is because we need ImTextureID to carry a 64-bit value and by default ImTextureID is defined as void*. +// This define is set in the example .vcxproj file and need to be replicated in your app or by adding it to your imconfig.h file. + +// You can copy and use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. +// If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp. +// Read online: https://github.com/ocornut/imgui/tree/master/docs + +#pragma once +#include "imgui.h" // IMGUI_IMPL_API + +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable: 4471) // a forward declaration of an unscoped enumeration must have an underlying type +#endif + +enum DXGI_FORMAT; +struct ID3D12Device; +struct ID3D12DescriptorHeap; +struct ID3D12GraphicsCommandList; +struct D3D12_CPU_DESCRIPTOR_HANDLE; +struct D3D12_GPU_DESCRIPTOR_HANDLE; + +// cmd_list is the command list that the implementation will use to render imgui draw lists. +// Before calling the render function, caller must prepare cmd_list by resetting it and setting the appropriate +// render target and descriptor heap that contains font_srv_cpu_desc_handle/font_srv_gpu_desc_handle. +// font_srv_cpu_desc_handle and font_srv_gpu_desc_handle are handles to a single SRV descriptor to use for the internal font texture. +IMGUI_IMPL_API bool ImGui_ImplDX12_Init(ID3D12Device* device, int num_frames_in_flight, DXGI_FORMAT rtv_format); +IMGUI_IMPL_API void ImGui_ImplDX12_Shutdown(); +IMGUI_IMPL_API void ImGui_ImplDX12_RenderDrawData(ImDrawData* draw_data, ID3D12GraphicsCommandList* graphics_command_list); + +// Use if you want to reset your rendering device without losing Dear ImGui state. +IMGUI_IMPL_API void ImGui_ImplDX12_InvalidateDeviceObjects(); +IMGUI_IMPL_API bool ImGui_ImplDX12_CreateDeviceObjects(); +IMGUI_IMPL_API bool ImGui_ImplDX12_CreateFontsTexture(); + +#ifdef _MSC_VER +#pragma warning (pop) +#endif +