From 044d7a4797f174bfc3c09d84c9120d8b7142ff11 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 27 Jun 2024 16:07:49 +1000 Subject: [PATCH] GPU: Eliminate heap allocation on state load --- src/core/gpu.cpp | 13 +------------ src/core/gpu_hw.cpp | 24 ++++++++++++++++++++++-- src/core/gpu_hw.h | 2 ++ src/core/gpu_sw.cpp | 3 +++ 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 50b15e9b0..1e5b75d97 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -359,18 +359,7 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ if (!sw.DoMarker("GPU-VRAM")) return false; - if (sw.IsReading()) - { - // Still need a temporary here. - FixedHeapArray temp; - sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data(), false, false); - } - else - { - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); - } + sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); } if (sw.IsReading()) diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index c236b0ac2..643bdf577 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -270,6 +270,12 @@ void GPU_HW::Reset(bool clear_vram) bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) { + // Need to download local VRAM copy before calling the base class, because it serializes this. + if (m_sw_renderer) + m_sw_renderer->Sync(true); + else if (sw.IsWriting() && !host_texture) + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + if (!GPU::DoState(sw, host_texture, update_display)) return false; @@ -308,11 +314,18 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di tex->GetHeight()); } } + else if (sw.IsReading()) + { + // Need to update the VRAM copy on the GPU with the state data. + UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, + Common::Rectangle(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); + } // invalidate the whole VRAM read texture when loading state if (sw.IsReading()) { DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr); + ClearVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle(); ResetBatchVertexDepth(); } @@ -2954,6 +2967,12 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b } } + UpdateVRAMOnGPU(x, y, width, height, data, sizeof(u16) * width, set_mask, check_mask, bounds); +} + +void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, + bool check_mask, const Common::Rectangle& bounds) +{ std::unique_ptr upload_texture; u32 map_index; @@ -2961,7 +2980,7 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b { map_index = 0; upload_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, - GPUTexture::Format::R16U, data, width * sizeof(u16)); + GPUTexture::Format::R16U, data, data_pitch); if (!upload_texture) { ERROR_LOG("Failed to get {}x{} upload texture. Things are gonna break.", width, height); @@ -2971,9 +2990,10 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b else { const u32 num_pixels = width * height; + const u32 dst_pitch = width * sizeof(u16); void* map = m_vram_upload_buffer->Map(num_pixels); map_index = m_vram_upload_buffer->GetCurrentPosition(); - std::memcpy(map, data, num_pixels * sizeof(u16)); + StringUtil::StrideMemCpy(map, dst_pitch, data, data_pitch, dst_pitch, height); m_vram_upload_buffer->Unmap(num_pixels); } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index d47ce2cd4..aa149e818 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -204,6 +204,8 @@ private: void DrawRendererStats() override; void OnBufferSwapped() override; + void UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, + bool check_mask, const Common::Rectangle& bounds); bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); /// Expands a line into two triangles. diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 08b995aa1..55338543f 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -73,6 +73,9 @@ bool GPU_SW::Initialize() bool GPU_SW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) { + // need to ensure the worker thread is done + m_backend.Sync(true); + // ignore the host texture for software mode, since we want to save vram here return GPU::DoState(sw, nullptr, update_display); }