From e058beb4b0de58788c6a62d212a648d293cc89bc Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 3 May 2020 18:09:34 +1000 Subject: [PATCH] GPU/HW: Fix oversized VRAM copies writing out of bounds Fixes jittering in Duke Nukem - Land of the Babes water effect. --- src/core/gpu_hw.h | 2 ++ src/core/gpu_hw_d3d11.cpp | 2 ++ src/core/gpu_hw_opengl.cpp | 2 ++ src/core/gpu_hw_shadergen.cpp | 16 ++++++++++++++-- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 62fa1bb1b..37af61398 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -127,6 +127,8 @@ protected: u32 u_src_y; u32 u_dst_x; u32 u_dst_y; + u32 u_end_x; + u32 u_end_y; u32 u_width; u32 u_height; u32 u_set_mask_bit; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index e4cd3f2c1..0afebcf34 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -759,6 +759,8 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt src_y * m_resolution_scale, dst_x * m_resolution_scale, dst_y * m_resolution_scale, + ((dst_x + width) % VRAM_WIDTH) * m_resolution_scale, + ((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale, m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index b24d5a359..3dff8d812 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -864,6 +864,8 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid src_y * m_resolution_scale, dst_x * m_resolution_scale, dst_y * m_resolution_scale, + ((dst_x + width) % VRAM_WIDTH) * m_resolution_scale, + ((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale, m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index c75e517bf..76aa601ca 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -136,6 +136,10 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss) ss << "#define CONSTANT const\n"; ss << "#define VECTOR_EQ(a, b) ((a) == (b))\n"; ss << "#define VECTOR_NEQ(a, b) ((a) != (b))\n"; + ss << "#define VECTOR_LT(a, b) ((a) < (b))\n"; + ss << "#define VECTOR_LE(a, b) ((a) <= (b))\n"; + ss << "#define VECTOR_GT(a, b) ((a) > (b))\n"; + ss << "#define VECTOR_GE(a, b) ((a) >= (b))\n"; ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n"; ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n"; @@ -148,6 +152,10 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss) ss << "#define CONSTANT static const\n"; ss << "#define VECTOR_EQ(a, b) (all((a) == (b)))\n"; ss << "#define VECTOR_NEQ(a, b) (any((a) != (b)))\n"; + ss << "#define VECTOR_LT(a, b) (any((a) < (b)))\n"; + ss << "#define VECTOR_LE(a, b) (any((a) <= (b)))\n"; + ss << "#define VECTOR_GT(a, b) (any((a) > (b)))\n"; + ss << "#define VECTOR_GE(a, b) (any((a) >= (b)))\n"; ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n"; ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) name.Load(int3(coords, mip), offset)\n"; @@ -1104,8 +1112,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); - DeclareUniformBuffer( - ss, {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_size", "bool u_set_mask_bit", "float u_depth_value"}); + DeclareUniformBuffer(ss, {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size", + "bool u_set_mask_bit", "float u_depth_value"}); DeclareTexture(ss, "samp0", 0); DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); @@ -1113,6 +1121,10 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() { uint2 dst_coords = uint2(v_pos.xy); + // make sure it's not oversized and out of range + if (VECTOR_LT(dst_coords, u_dst_coords) && VECTOR_GE(dst_coords, u_end_coords)) + discard; + // find offset from the start of the row/column uint2 offset; offset.x = (dst_coords.x < u_dst_coords.x) ? ((VRAM_SIZE.x - 1u) - u_dst_coords.x + dst_coords.x) : (dst_coords.x - u_dst_coords.x);