From cc7483ad58a832b0a74325c42c1fc2f4df82a958 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Tue, 5 Nov 2019 22:34:27 +1000 Subject: [PATCH] GPU/HW: Only copy the dirty area to the VRAM read texture --- src/common/rectangle.h | 27 +++++++++++++++++++++++++++ src/core/gpu.h | 2 ++ src/core/gpu_hw.cpp | 37 ++++++++++++++++++++++++++----------- src/core/gpu_hw.h | 10 +++++++++- src/core/gpu_hw_d3d11.cpp | 18 ++++++++---------- src/core/gpu_hw_d3d11.h | 2 +- src/core/gpu_hw_opengl.cpp | 22 +++++++++++----------- src/core/gpu_hw_opengl.h | 2 +- 8 files changed, 85 insertions(+), 35 deletions(-) diff --git a/src/common/rectangle.h b/src/common/rectangle.h index e3fbdd2eb..c84bbf68b 100644 --- a/src/common/rectangle.h +++ b/src/common/rectangle.h @@ -87,6 +87,33 @@ struct Rectangle #undef RELATIONAL_OPERATOR + // Arithmetic operators. +#define ARITHMETIC_OPERATOR(op) \ + constexpr Rectangle& operator op##=(const T amount) \ + { \ + left op## = amount; \ + top op## = amount; \ + right op## = amount; \ + bottom op## = amount; \ + } \ + constexpr Rectangle operator op(const T amount) \ + { \ + return Rectangle(left op amount, top op amount, right op amount, bottom op amount); \ + } + + ARITHMETIC_OPERATOR(+); + ARITHMETIC_OPERATOR(-); + ARITHMETIC_OPERATOR(*); + ARITHMETIC_OPERATOR(/); + ARITHMETIC_OPERATOR(%); + ARITHMETIC_OPERATOR(>>); + ARITHMETIC_OPERATOR(<<); + ARITHMETIC_OPERATOR(|); + ARITHMETIC_OPERATOR(&); + ARITHMETIC_OPERATOR(^); + +#undef ARITHMETIC_OPERATOR + #ifdef _WINDEF_ /// Casts this rectangle to a Win32 RECT structure if compatible. template && _>> diff --git a/src/core/gpu.h b/src/core/gpu.h index 78d4bd2f7..f370d7202 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -385,9 +385,11 @@ protected: } bool IsTexturePageChanged() const { return texture_page_changed; } + void SetTexturePageChanged() { texture_page_changed = true; } void ClearTexturePageChangedFlag() { texture_page_changed = false; } bool IsTextureWindowChanged() const { return texture_window_changed; } + void SetTextureWindowChanged() { texture_window_changed = true; } void ClearTextureWindowChangedFlag() { texture_window_changed = false; } void SetFromPolygonTexcoord(u32 texcoord0, u32 texcoord1); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 216e570be..b7516c0ea 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -1,6 +1,7 @@ #include "gpu_hw.h" #include "YBaseLib/Assert.h" #include "YBaseLib/Log.h" +#include "common/state_wrapper.h" #include "settings.h" #include "system.h" #include @@ -11,15 +12,6 @@ GPU_HW::GPU_HW() = default; GPU_HW::~GPU_HW() = default; -void GPU_HW::Reset() -{ - GPU::Reset(); - - m_batch = {}; - m_batch_ubo_data = {}; - m_batch_ubo_dirty = true; -} - bool GPU_HW::Initialize(HostDisplay* host_display, System* system, DMA* dma, InterruptController* interrupt_controller, Timers* timers) { @@ -33,6 +25,29 @@ bool GPU_HW::Initialize(HostDisplay* host_display, System* system, DMA* dma, Int return true; } +void GPU_HW::Reset() +{ + GPU::Reset(); + + m_batch = {}; + m_batch_ubo_data = {}; + m_batch_ubo_dirty = true; + + SetFullVRAMDirtyRectangle(); +} + +bool GPU_HW::DoState(StateWrapper& sw) +{ + if (!GPU::DoState(sw)) + return false; + + // invalidate the whole VRAM read texture when loading state + if (sw.IsReading()) + SetFullVRAMDirtyRectangle(); + + return true; +} + void GPU_HW::UpdateSettings() { GPU::UpdateSettings(); @@ -228,11 +243,11 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32 if (m_vram_dirty_rect.Valid() && (m_render_state.GetTexturePageRectangle().Intersects(m_vram_dirty_rect) || m_render_state.GetTexturePaletteRectangle().Intersects(m_vram_dirty_rect))) { + Log_WarningPrintf("Invalidating VRAM read cache due to drawing area overlap"); if (!IsFlushed()) FlushRender(); - Log_WarningPrintf("Invalidating VRAM read cache due to drawing area overlap"); - m_vram_read_texture_dirty = true; + UpdateVRAMReadTexture(); } } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 2f02d3c2c..0e9e40e68 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -30,6 +30,7 @@ public: virtual bool Initialize(HostDisplay* host_display, System* system, DMA* dma, InterruptController* interrupt_controller, Timers* timers) override; virtual void Reset() override; + virtual bool DoState(StateWrapper& sw) override; virtual void UpdateSettings() override; protected: @@ -111,6 +112,14 @@ protected: } virtual void MapBatchVertexPointer(u32 required_vertices) = 0; + virtual void UpdateVRAMReadTexture() = 0; + + void SetFullVRAMDirtyRectangle() + { + m_vram_dirty_rect.Set(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + m_render_state.SetTexturePageChanged(); + } + void ClearVRAMDirtyRectangle() { m_vram_dirty_rect.SetInvalid(); } u32 GetBatchVertexSpace() const { return static_cast(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); } u32 GetBatchVertexCount() const { return static_cast(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); } @@ -151,7 +160,6 @@ protected: // Changed state bool m_batch_ubo_dirty = true; - bool m_vram_read_texture_dirty = false; private: enum : u32 diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index cab792d41..9a35591b7 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -184,7 +184,7 @@ bool GPU_HW_D3D11::CreateFramebuffer() } m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr); - m_vram_read_texture_dirty = true; + SetFullVRAMDirtyRectangle(); return true; } @@ -192,7 +192,7 @@ void GPU_HW_D3D11::ClearFramebuffer() { static constexpr std::array color = {}; m_context->ClearRenderTargetView(m_vram_texture.GetD3DRTV(), color.data()); - m_vram_read_texture_dirty = true; + SetFullVRAMDirtyRectangle(); } void GPU_HW_D3D11::DestroyFramebuffer() @@ -491,9 +491,6 @@ void GPU_HW_D3D11::SetDrawState(BatchRenderMode render_mode) UploadUniformBlock(&m_batch_ubo_data, sizeof(m_batch_ubo_data)); m_batch_ubo_dirty = false; } - - if (m_vram_read_texture_dirty) - UpdateVRAMReadTexture(); } void GPU_HW_D3D11::SetScissorFromDrawingArea() @@ -648,12 +645,13 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt void GPU_HW_D3D11::UpdateVRAMReadTexture() { - m_renderer_stats.num_vram_read_texture_updates++; - m_vram_read_texture_dirty = false; - m_vram_dirty_rect.SetInvalid(); + const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; + const CD3D11_BOX src_box(scaled_rect.left, scaled_rect.top, 0, scaled_rect.right, scaled_rect.bottom, 1); + m_context->CopySubresourceRegion(m_vram_read_texture, 0, scaled_rect.left, scaled_rect.top, 0, m_vram_texture, 0, + &src_box); - const CD3D11_BOX src_box(0, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 1); - m_context->CopySubresourceRegion(m_vram_read_texture, 0, 0, 0, 0, m_vram_texture, 0, &src_box); + m_renderer_stats.num_vram_read_texture_updates++; + ClearVRAMDirtyRectangle(); } void GPU_HW_D3D11::FlushRender() diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index 6a560f623..e471c1d01 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -34,13 +34,13 @@ protected: void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void FlushRender() override; void MapBatchVertexPointer(u32 required_vertices) override; + void UpdateVRAMReadTexture() override; private: void SetCapabilities(); bool CreateFramebuffer(); void ClearFramebuffer(); void DestroyFramebuffer(); - void UpdateVRAMReadTexture(); bool CreateVertexBuffer(); bool CreateUniformBuffer(); diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 07b4beea0..5149c46d2 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -169,7 +169,7 @@ void GPU_HW_OpenGL::CreateFramebuffer() std::make_unique(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true); m_vram_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); - m_vram_read_texture_dirty = true; + SetFullVRAMDirtyRectangle(); } void GPU_HW_OpenGL::ClearFramebuffer() @@ -178,7 +178,7 @@ void GPU_HW_OpenGL::ClearFramebuffer() glClearColor(0.0f, 0.0f, 0.0f, 0.0f); glClear(GL_COLOR_BUFFER_BIT); glEnable(GL_SCISSOR_TEST); - m_vram_read_texture_dirty = true; + SetFullVRAMDirtyRectangle(); } void GPU_HW_OpenGL::DestroyFramebuffer() @@ -352,9 +352,6 @@ void GPU_HW_OpenGL::SetDrawState(BatchRenderMode render_mode) UploadUniformBlock(&m_batch_ubo_data, sizeof(m_batch_ubo_data)); m_batch_ubo_dirty = false; } - - if (m_vram_read_texture_dirty) - UpdateVRAMReadTexture(); } void GPU_HW_OpenGL::SetScissorFromDrawingArea() @@ -678,13 +675,16 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid void GPU_HW_OpenGL::UpdateVRAMReadTexture() { - m_renderer_stats.num_vram_read_texture_updates++; - m_vram_read_texture_dirty = false; - m_vram_dirty_rect.SetInvalid(); + // TODO: Fallback blit path. + const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; + const u32 flipped_y = m_vram_texture->GetHeight() - scaled_rect.top - scaled_rect.GetHeight(); - // TODO: Fallback blit path, and partial updates. - glCopyImageSubData(m_vram_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0, m_vram_read_texture->GetGLId(), - GL_TEXTURE_2D, 0, 0, 0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1); + glCopyImageSubData(m_vram_texture->GetGLId(), GL_TEXTURE_2D, 0, scaled_rect.left, flipped_y, 0, + m_vram_read_texture->GetGLId(), GL_TEXTURE_2D, 0, scaled_rect.left, flipped_y, 0, + scaled_rect.GetWidth(), scaled_rect.GetHeight(), 1); + + m_renderer_stats.num_vram_read_texture_updates++; + ClearVRAMDirtyRectangle(); } void GPU_HW_OpenGL::FlushRender() diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index e4adaccd2..17f9e06b9 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -30,6 +30,7 @@ protected: void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void FlushRender() override; void MapBatchVertexPointer(u32 required_vertices) override; + void UpdateVRAMReadTexture() override; private: struct GLStats @@ -48,7 +49,6 @@ private: void CreateFramebuffer(); void ClearFramebuffer(); void DestroyFramebuffer(); - void UpdateVRAMReadTexture(); void CreateVertexBuffer(); void CreateUniformBuffer();