From 2bcb85d43160c0de3281dd55bdd93a90c7404027 Mon Sep 17 00:00:00 2001 From: Stenzek <stenzek@gmail.com> Date: Sun, 2 Jun 2024 18:55:25 +1000 Subject: [PATCH] GPU/HW: Push all draws/writes through a common path --- src/core/gpu_hw.cpp | 148 +++++++++++++++++++++----------------------- src/core/gpu_hw.h | 10 ++- 2 files changed, 78 insertions(+), 80 deletions(-) diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index ccdb68049..c928e70a5 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "gpu_hw.h" @@ -16,6 +16,7 @@ #include "common/align.h" #include "common/assert.h" +#include "common/gsvector_formatter.h" #include "common/log.h" #include "common/scoped_guard.h" #include "common/string_util.h" @@ -651,11 +652,39 @@ void GPU_HW::ClearVRAMDirtyRectangle() m_vram_dirty_write_rect = INVALID_RECT; } -void GPU_HW::IncludeDrawnDirtyRectangle(const GSVector4i rect) +void GPU_HW::AddWrittenRectangle(const GSVector4i rect) { + m_vram_dirty_write_rect = m_vram_dirty_write_rect.runion(rect); + SetTexPageChangedOnOverlap(m_vram_dirty_write_rect); +} + +void GPU_HW::AddDrawnRectangle(const GSVector4i rect) +{ + // Normally, we would check for overlap here. But the GPU's texture cache won't actually reload until the page + // changes, or it samples a larger region, so we can get away without doing so. This reduces copies considerably in + // games like Mega Man Legends 2. m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(rect.rintersect(m_clamped_drawing_area)); } +void GPU_HW::AddUnclampedDrawnRectangle(const GSVector4i rect) +{ + m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(rect); + SetTexPageChangedOnOverlap(m_vram_dirty_draw_rect); +} + +void GPU_HW::SetTexPageChangedOnOverlap(const GSVector4i update_rect) +{ + // the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the + // shadow texture is updated + if (!m_draw_mode.IsTexturePageChanged() && m_batch.texture_mode != BatchTextureMode::Disabled && + (m_draw_mode.mode_reg.GetTexturePageRectangle().rintersects(update_rect) || + (m_draw_mode.mode_reg.IsUsingPalette() && + m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode).rintersects(update_rect)))) + { + m_draw_mode.SetTexturePageChanged(); + } +} + std::tuple<u32, u32> GPU_HW::GetEffectiveDisplayResolution(bool scaled /* = true */) { const u32 scale = scaled ? m_resolution_scale : 1u; @@ -1515,17 +1544,13 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written) if (drawn) { DebugAssert(!m_vram_dirty_draw_rect.eq(INVALID_RECT)); - GL_INS_FMT("Updating draw rect {},{} => {},{} ({}x{})", m_vram_dirty_draw_rect.left, m_vram_dirty_draw_rect.right, - m_vram_dirty_draw_rect.top, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.width(), - m_vram_dirty_draw_rect.height()); + GL_INS_FMT("Updating draw rect {}", m_vram_dirty_draw_rect); u8 dbits = TEXPAGE_DIRTY_DRAWN_RECT; if (written && m_vram_dirty_draw_rect.rintersects(m_vram_dirty_write_rect)) { DebugAssert(!m_vram_dirty_write_rect.eq(INVALID_RECT)); - GL_INS_FMT("Including write rect {},{} => {},{} ({}x{})", m_vram_dirty_write_rect.left, - m_vram_dirty_write_rect.right, m_vram_dirty_write_rect.top, m_vram_dirty_write_rect.bottom, - m_vram_dirty_write_rect.width(), m_vram_dirty_write_rect.height()); + GL_INS_FMT("Including write rect {}", m_vram_dirty_write_rect); m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(m_vram_dirty_write_rect); m_vram_dirty_write_rect = INVALID_RECT; dbits = TEXPAGE_DIRTY_DRAWN_RECT | TEXPAGE_DIRTY_WRITTEN_RECT; @@ -1536,9 +1561,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written) } if (written) { - GL_INS_FMT("Updating write rect {},{} => {},{} ({}x{})", m_vram_dirty_write_rect.left, - m_vram_dirty_write_rect.right, m_vram_dirty_write_rect.top, m_vram_dirty_write_rect.bottom, - m_vram_dirty_write_rect.width(), m_vram_dirty_write_rect.height()); + GL_INS_FMT("Updating write rect {}", m_vram_dirty_write_rect); update(m_vram_dirty_write_rect, TEXPAGE_DIRTY_WRITTEN_RECT); } } @@ -2229,7 +2252,7 @@ void GPU_HW::LoadVertices() if (textured && m_compute_uv_range) ComputePolygonUVLimits(vertices.data(), num_vertices); - IncludeDrawnDirtyRectangle(draw_rect_012); + AddDrawnRectangle(draw_rect_012); AddDrawTriangleTicks(GSVector4i(native_vertex_positions[0]), GSVector4i(native_vertex_positions[1]), GSVector4i(native_vertex_positions[2]), rc.shading_enable, rc.texture_enable, rc.transparency_enable); @@ -2275,7 +2298,7 @@ void GPU_HW::LoadVertices() if (first_tri_culled && textured && m_compute_uv_range) ComputePolygonUVLimits(vertices.data(), num_vertices); - IncludeDrawnDirtyRectangle(draw_rect_123); + AddDrawnRectangle(draw_rect_123); AddDrawTriangleTicks(GSVector4i(native_vertex_positions[2]), GSVector4i(native_vertex_positions[1]), GSVector4i(native_vertex_positions[3]), rc.shading_enable, rc.texture_enable, rc.transparency_enable); @@ -2410,7 +2433,7 @@ void GPU_HW::LoadVertices() tex_top = 0; } - IncludeDrawnDirtyRectangle(GSVector4i(pos_x, pos_y, pos_x + rectangle_width, pos_y + rectangle_height)); + AddDrawnRectangle(GSVector4i(pos_x, pos_y, pos_x + rectangle_width, pos_y + rectangle_height)); AddDrawRectangleTicks(pos_x, pos_y, rectangle_width, rectangle_height, rc.texture_enable, rc.transparency_enable); if (m_sw_renderer) @@ -2467,7 +2490,7 @@ void GPU_HW::LoadVertices() return; } - IncludeDrawnDirtyRectangle(GSVector4i(min_x, min_y, max_x + 1, max_y + 1)); + AddDrawnRectangle(GSVector4i(min_x, min_y, max_x + 1, max_y + 1)); AddDrawLineTicks(min_x, min_y, max_x, max_y, rc.shading_enable); // TODO: Should we do a PGXP lookup here? Most lines are 2D. @@ -2527,7 +2550,7 @@ void GPU_HW::LoadVertices() } else { - IncludeDrawnDirtyRectangle(GSVector4i(min_x, min_y, max_x + 1, max_y + 1)); + AddDrawnRectangle(GSVector4i(min_x, min_y, max_x + 1, max_y + 1)); AddDrawLineTicks(min_x, min_y, max_x, max_y, rc.shading_enable); // TODO: Should we do a PGXP lookup here? Most lines are 2D. @@ -2596,24 +2619,6 @@ bool GPU_HW::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u3 return true; } -void GPU_HW::IncludeVRAMDirtyRectangle(GSVector4i& rect, const GSVector4i new_rect) -{ - if (rect.rcontains(new_rect)) - return; - - rect = rect.runion(new_rect); - - // the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the - // shadow texture is updated - if (!m_draw_mode.IsTexturePageChanged() && m_batch.texture_mode != BatchTextureMode::Disabled && - (m_draw_mode.mode_reg.GetTexturePageRectangle().rintersects(new_rect) || - (m_draw_mode.mode_reg.IsUsingPalette() && - m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode).rintersects(new_rect)))) - { - m_draw_mode.SetTexturePageChanged(); - } -} - ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) { DebugAssert(m_texpage_dirty != 0 && m_batch.texture_mode != BatchTextureMode::Disabled); @@ -2636,35 +2641,31 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) uv_rect = uv_rect.add32(GSVector4i::cxpr(0, 0, 1, 1)); // make exclusive uv_rect = uv_rect.rintersect(VRAM_SIZE_RECT); // clamp to vram bounds - const GSVector4i new_uv_rect = m_current_uv_range.runion(uv_rect); + const GSVector4i new_uv_rect = m_current_uv_rect.runion(uv_rect); - if (!m_current_uv_range.eq(new_uv_rect)) + if (!m_current_uv_rect.eq(new_uv_rect)) { - m_current_uv_range = new_uv_rect; + m_current_uv_rect = new_uv_rect; bool update_drawn = false, update_written = false; if (m_texpage_dirty & TEXPAGE_DIRTY_DRAWN_RECT) { DebugAssert(!m_vram_dirty_draw_rect.eq(INVALID_RECT)); - update_drawn = m_current_uv_range.rintersects(m_vram_dirty_draw_rect); + update_drawn = m_current_uv_rect.rintersects(m_vram_dirty_draw_rect); if (update_drawn) { - GL_INS_FMT("Updating VRAM cache due to UV {{{},{} => {},{}}} intersection with dirty DRAW {{{},{} => {},{}}}", - m_current_uv_range.left, m_current_uv_range.top, m_current_uv_range.right, m_current_uv_range.bottom, - m_vram_dirty_draw_rect.left, m_vram_dirty_draw_rect.top, m_vram_dirty_draw_rect.right, - m_vram_dirty_draw_rect.bottom); + GL_INS_FMT("Updating VRAM cache due to UV {} intersection with dirty DRAW {}", m_current_uv_rect, + m_vram_dirty_draw_rect); } } if (m_texpage_dirty & TEXPAGE_DIRTY_WRITTEN_RECT) { DebugAssert(!m_vram_dirty_write_rect.eq(INVALID_RECT)); - update_written = m_current_uv_range.rintersects(m_vram_dirty_write_rect); + update_written = m_current_uv_rect.rintersects(m_vram_dirty_write_rect); if (update_written) { - GL_INS_FMT("Updating VRAM cache due to UV {{{},{} => {},{}}} intersection with dirty WRITE {{{},{} => {},{}}}", - m_current_uv_range.left, m_current_uv_range.top, m_current_uv_range.right, m_current_uv_range.bottom, - m_vram_dirty_write_rect.left, m_vram_dirty_write_rect.top, m_vram_dirty_write_rect.right, - m_vram_dirty_write_rect.bottom); + GL_INS_FMT("Updating VRAM cache due to UV {} intersection with dirty WRITE {}", m_current_uv_rect, + m_vram_dirty_write_rect); } } @@ -2850,21 +2851,17 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) m_sw_renderer->PushCommand(cmd); } - GL_INS_FMT("Dirty draw area before: {},{} => {},{} ({}x{})", m_vram_dirty_draw_rect.left, m_vram_dirty_draw_rect.top, - m_vram_dirty_draw_rect.right, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.width(), - m_vram_dirty_draw_rect.height()); + GL_INS_FMT("Dirty draw area before: {}", m_vram_dirty_draw_rect); - IncludeVRAMDirtyRectangle(m_vram_dirty_draw_rect, GSVector4i(x, y, x + width, y + height).rintersect(VRAM_SIZE_RECT)); + const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); + AddUnclampedDrawnRectangle(bounds); - GL_INS_FMT("Dirty draw area after: {},{} => {},{} ({}x{})", m_vram_dirty_draw_rect.left, m_vram_dirty_draw_rect.top, - m_vram_dirty_draw_rect.right, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.width(), - m_vram_dirty_draw_rect.height()); + GL_INS_FMT("Dirty draw area after: {}", m_vram_dirty_draw_rect); const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)); g_gpu_device->SetPipeline( m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(IsInterlacedRenderingEnabled())].get()); - const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale)); g_gpu_device->SetViewportAndScissor(scaled_bounds); @@ -2971,7 +2968,7 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); DebugAssert(bounds.right <= static_cast<s32>(VRAM_WIDTH) && bounds.bottom <= static_cast<s32>(VRAM_HEIGHT)); - IncludeVRAMDirtyRectangle(m_vram_dirty_write_rect, bounds); + AddWrittenRectangle(bounds); if (check_mask) { @@ -3086,7 +3083,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 { if (intersect_with_draw || intersect_with_write) UpdateVRAMReadTexture(intersect_with_draw, intersect_with_write); - IncludeVRAMDirtyRectangle(m_vram_dirty_draw_rect, dst_bounds); + AddUnclampedDrawnRectangle(dst_bounds); struct VRAMCopyUBOData { @@ -3137,19 +3134,24 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 UpdateVRAMReadTexture(intersect_with_draw, intersect_with_write); } - GSVector4i* update_rect; - if (intersect_with_draw || intersect_with_write) + if (intersect_with_draw) { - update_rect = intersect_with_draw ? &m_vram_dirty_draw_rect : &m_vram_dirty_write_rect; + AddUnclampedDrawnRectangle(dst_bounds); + } + else if (intersect_with_write) + { + AddWrittenRectangle(dst_bounds); } else { const bool use_write = (!m_vram_dirty_write_rect.eq(INVALID_RECT) && !m_vram_dirty_draw_rect.eq(INVALID_RECT) && RectDistance(m_vram_dirty_write_rect, dst_bounds) < RectDistance(m_vram_dirty_draw_rect, dst_bounds)); - update_rect = use_write ? &m_vram_dirty_write_rect : &m_vram_dirty_draw_rect; + if (use_write) + AddWrittenRectangle(dst_bounds); + else + AddUnclampedDrawnRectangle(dst_bounds); } - IncludeVRAMDirtyRectangle(*update_rect, dst_bounds); if (m_GPUSTAT.check_mask_before_draw) { @@ -3176,19 +3178,13 @@ void GPU_HW::DispatchRenderCommand() { m_draw_mode.ClearTexturePageChangedFlag(); -#if 0 - if (m_vram_dirty_rect.Valid()) +#if 1 + if (!m_vram_dirty_draw_rect.eq(INVALID_RECT) || !m_vram_dirty_write_rect.eq(INVALID_RECT)) { - GL_INS_FMT("VRAM DIRTY: {},{} => {},{}", m_vram_dirty_rect.left, m_vram_dirty_rect.top, m_vram_dirty_rect.right, - m_vram_dirty_rect.bottom); - - auto tpr = m_draw_mode.mode_reg.GetTexturePageRectangle(); - GL_INS_FMT("PAGE RECT: {},{} => {},{}", tpr.left, tpr.top, tpr.right, tpr.bottom); + GL_INS_FMT("VRAM DIRTY: {} {}", m_vram_dirty_draw_rect, m_vram_dirty_write_rect); + GL_INS_FMT("PAGE RECT: {}", m_draw_mode.mode_reg.GetTexturePageRectangle()); if (m_draw_mode.mode_reg.IsUsingPalette()) - { - tpr = m_draw_mode.GetTexturePaletteRectangle(); - GL_INS_FMT("PALETTE RECT: {},{} => {},{}", tpr.left, tpr.top, tpr.right, tpr.bottom); - } + GL_INS_FMT("PALETTE RECT: {}", m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode)); } #endif @@ -3218,7 +3214,7 @@ void GPU_HW::DispatchRenderCommand() GL_INS("Texpage is in dirty area, checking UV ranges"); m_texpage_dirty = new_texpage_dirty; m_compute_uv_range = true; - m_current_uv_range = INVALID_RECT; + m_current_uv_rect = INVALID_RECT; } else { @@ -3356,9 +3352,7 @@ void GPU_HW::FlushRender() GL_SCOPE_FMT("Hardware Draw {}", ++s_draw_number); #endif - GL_INS_FMT("Dirty draw area: {},{} => {},{} ({}x{})", m_vram_dirty_draw_rect.left, m_vram_dirty_draw_rect.top, - m_vram_dirty_draw_rect.right, m_vram_dirty_draw_rect.bottom, m_vram_dirty_draw_rect.width(), - m_vram_dirty_draw_rect.height()); + GL_INS_FMT("Dirty draw area: {}", m_vram_dirty_draw_rect); if (m_batch_ubo_dirty) { diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 00b96a7b6..acea8db1c 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -175,8 +175,12 @@ private: void SetFullVRAMDirtyRectangle(); void ClearVRAMDirtyRectangle(); - void IncludeVRAMDirtyRectangle(GSVector4i& rect, const GSVector4i new_rect); - void IncludeDrawnDirtyRectangle(const GSVector4i rect); + + void AddWrittenRectangle(const GSVector4i rect); + void AddDrawnRectangle(const GSVector4i rect); + void AddUnclampedDrawnRectangle(const GSVector4i rect); + void SetTexPageChangedOnOverlap(const GSVector4i update_rect); + void CheckForTexPageOverlap(GSVector4i uv_rect); bool IsFlushed() const; @@ -288,7 +292,7 @@ private: // Bounding box of VRAM area that the GPU has drawn into. GSVector4i m_vram_dirty_draw_rect = INVALID_RECT; GSVector4i m_vram_dirty_write_rect = INVALID_RECT; - GSVector4i m_current_uv_range = INVALID_RECT; + GSVector4i m_current_uv_rect = INVALID_RECT; GSVector2i m_current_texture_page_offset = {}; std::unique_ptr<GPUPipeline> m_wireframe_pipeline;