GPU/HW: Fix oversized VRAM copies writing out of bounds

Fixes jittering in Duke Nukem - Land of the Babes water effect.
This commit is contained in:
Connor McLaughlin 2020-05-03 18:09:34 +10:00
parent 91d7212b36
commit e058beb4b0
4 changed files with 20 additions and 2 deletions

View file

@ -127,6 +127,8 @@ protected:
u32 u_src_y; u32 u_src_y;
u32 u_dst_x; u32 u_dst_x;
u32 u_dst_y; u32 u_dst_y;
u32 u_end_x;
u32 u_end_y;
u32 u_width; u32 u_width;
u32 u_height; u32 u_height;
u32 u_set_mask_bit; u32 u_set_mask_bit;

View file

@ -759,6 +759,8 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
src_y * m_resolution_scale, src_y * m_resolution_scale,
dst_x * m_resolution_scale, dst_x * m_resolution_scale,
dst_y * m_resolution_scale, dst_y * m_resolution_scale,
((dst_x + width) % VRAM_WIDTH) * m_resolution_scale,
((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale,
width * m_resolution_scale, width * m_resolution_scale,
height * m_resolution_scale, height * m_resolution_scale,
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,

View file

@ -864,6 +864,8 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
src_y * m_resolution_scale, src_y * m_resolution_scale,
dst_x * m_resolution_scale, dst_x * m_resolution_scale,
dst_y * m_resolution_scale, dst_y * m_resolution_scale,
((dst_x + width) % VRAM_WIDTH) * m_resolution_scale,
((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale,
width * m_resolution_scale, width * m_resolution_scale,
height * m_resolution_scale, height * m_resolution_scale,
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,

View file

@ -136,6 +136,10 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss)
ss << "#define CONSTANT const\n"; ss << "#define CONSTANT const\n";
ss << "#define VECTOR_EQ(a, b) ((a) == (b))\n"; ss << "#define VECTOR_EQ(a, b) ((a) == (b))\n";
ss << "#define VECTOR_NEQ(a, b) ((a) != (b))\n"; ss << "#define VECTOR_NEQ(a, b) ((a) != (b))\n";
ss << "#define VECTOR_LT(a, b) ((a) < (b))\n";
ss << "#define VECTOR_LE(a, b) ((a) <= (b))\n";
ss << "#define VECTOR_GT(a, b) ((a) > (b))\n";
ss << "#define VECTOR_GE(a, b) ((a) >= (b))\n";
ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n"; ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n";
ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n";
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n"; ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n";
@ -148,6 +152,10 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss)
ss << "#define CONSTANT static const\n"; ss << "#define CONSTANT static const\n";
ss << "#define VECTOR_EQ(a, b) (all((a) == (b)))\n"; ss << "#define VECTOR_EQ(a, b) (all((a) == (b)))\n";
ss << "#define VECTOR_NEQ(a, b) (any((a) != (b)))\n"; ss << "#define VECTOR_NEQ(a, b) (any((a) != (b)))\n";
ss << "#define VECTOR_LT(a, b) (any((a) < (b)))\n";
ss << "#define VECTOR_LE(a, b) (any((a) <= (b)))\n";
ss << "#define VECTOR_GT(a, b) (any((a) > (b)))\n";
ss << "#define VECTOR_GE(a, b) (any((a) >= (b)))\n";
ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n"; ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n";
ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n";
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) name.Load(int3(coords, mip), offset)\n"; ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) name.Load(int3(coords, mip), offset)\n";
@ -1104,8 +1112,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss); WriteCommonFunctions(ss);
DeclareUniformBuffer( DeclareUniformBuffer(ss, {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size",
ss, {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_size", "bool u_set_mask_bit", "float u_depth_value"}); "bool u_set_mask_bit", "float u_depth_value"});
DeclareTexture(ss, "samp0", 0); DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
@ -1113,6 +1121,10 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
{ {
uint2 dst_coords = uint2(v_pos.xy); uint2 dst_coords = uint2(v_pos.xy);
// make sure it's not oversized and out of range
if (VECTOR_LT(dst_coords, u_dst_coords) && VECTOR_GE(dst_coords, u_end_coords))
discard;
// find offset from the start of the row/column // find offset from the start of the row/column
uint2 offset; uint2 offset;
offset.x = (dst_coords.x < u_dst_coords.x) ? ((VRAM_SIZE.x - 1u) - u_dst_coords.x + dst_coords.x) : (dst_coords.x - u_dst_coords.x); offset.x = (dst_coords.x < u_dst_coords.x) ? ((VRAM_SIZE.x - 1u) - u_dst_coords.x + dst_coords.x) : (dst_coords.x - u_dst_coords.x);