mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-01-31 11:55:37 +00:00
GPU/HW: Simplify 24-bit scanout and interlacing shader
Has the added bonus of being faster (no downscale copy) and fixes the edge case where the image is cropped in VRAM.
This commit is contained in:
parent
1d948a53e0
commit
2a6e04988f
|
@ -440,10 +440,6 @@ void GPU::UpdateCRTCDisplayParameters()
|
|||
cs.display_vram_left = std::min<u16>(
|
||||
m_crtc_state.regs.X + ((horizontal_display_start_tick - cs.horizontal_display_start) / cs.dot_clock_divider),
|
||||
VRAM_WIDTH - 1);
|
||||
|
||||
// for 24-bit scanout we must stay aligned
|
||||
if (m_GPUSTAT.display_area_color_depth_24 && ((cs.display_vram_left - cs.regs.X) & 1u))
|
||||
cs.display_vram_left--;
|
||||
}
|
||||
|
||||
if (cs.horizontal_display_end <= horizontal_display_end_tick)
|
||||
|
|
|
@ -552,43 +552,24 @@ void GPU_HW_D3D11::UpdateDisplay()
|
|||
}
|
||||
else
|
||||
{
|
||||
const u32 field_offset = BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field);
|
||||
m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr);
|
||||
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
|
||||
|
||||
const u32 reinterpret_field_offset =
|
||||
(m_crtc_state.regs.Y + BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field)) & 1u;
|
||||
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
|
||||
const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X);
|
||||
const u32 uniforms[4] = {reinterpret_field_offset, reinterpret_start_x};
|
||||
ID3D11PixelShader* display_pixel_shader =
|
||||
m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Get();
|
||||
|
||||
// Because of how the reinterpret shader works, we need to use the downscaled version.
|
||||
if (m_GPUSTAT.display_area_color_depth_24 && m_resolution_scale > 1)
|
||||
{
|
||||
const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
|
||||
const u32 scaled_copy_width = copy_width * m_resolution_scale;
|
||||
BlitTexture(m_vram_encoding_texture.GetD3DRTV(), vram_offset_x, vram_offset_y, copy_width, display_height,
|
||||
m_vram_texture.GetD3DSRV(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_copy_width,
|
||||
scaled_display_height, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), false);
|
||||
SetViewportAndScissor(reinterpret_start_x, m_crtc_state.display_vram_top, reinterpret_width,
|
||||
scaled_display_height);
|
||||
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
|
||||
|
||||
m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr);
|
||||
m_context->PSSetShaderResources(0, 1, m_vram_encoding_texture.GetD3DSRVArray());
|
||||
|
||||
const u32 uniforms[4] = {vram_offset_x, vram_offset_y, field_offset};
|
||||
SetViewportAndScissor(0, field_offset, display_width, display_height);
|
||||
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
|
||||
|
||||
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),
|
||||
m_display_texture.GetHeight(), 0, 0, display_width, display_height);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr);
|
||||
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
|
||||
|
||||
const u32 uniforms[4] = {scaled_vram_offset_x, scaled_vram_offset_y, field_offset};
|
||||
SetViewportAndScissor(0, field_offset, scaled_display_width, scaled_display_height);
|
||||
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
|
||||
|
||||
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),
|
||||
m_display_texture.GetHeight(), 0, 0, scaled_display_width,
|
||||
scaled_display_height);
|
||||
}
|
||||
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),
|
||||
m_display_texture.GetHeight(), scaled_vram_offset_x, scaled_vram_offset_y,
|
||||
scaled_display_width, scaled_display_height);
|
||||
|
||||
RestoreGraphicsAPIState();
|
||||
}
|
||||
|
|
|
@ -507,63 +507,32 @@ void GPU_HW_OpenGL::UpdateDisplay()
|
|||
}
|
||||
else
|
||||
{
|
||||
const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
|
||||
const u32 scaled_flipped_vram_offset_y =
|
||||
m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height;
|
||||
const u32 field_offset = BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field);
|
||||
|
||||
glDisable(GL_BLEND);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
|
||||
const GL::Program& prog =
|
||||
m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)];
|
||||
prog.Bind();
|
||||
m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Bind();
|
||||
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
m_vram_texture.Bind();
|
||||
|
||||
// Because of how the reinterpret shader works, we need to use the downscaled version.
|
||||
if (m_GPUSTAT.display_area_color_depth_24 && m_resolution_scale > 1)
|
||||
{
|
||||
const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
|
||||
const u32 scaled_copy_width = copy_width * m_resolution_scale;
|
||||
m_vram_encoding_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
m_vram_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
|
||||
glBlitFramebuffer(scaled_vram_offset_x, scaled_flipped_vram_offset_y, scaled_vram_offset_x + scaled_copy_width,
|
||||
scaled_flipped_vram_offset_y + scaled_display_height, vram_offset_x, flipped_vram_offset_y,
|
||||
vram_offset_x + copy_width, flipped_vram_offset_y + display_height, GL_COLOR_BUFFER_BIT,
|
||||
GL_NEAREST);
|
||||
const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
|
||||
const u32 scaled_flipped_vram_offset_y =
|
||||
m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height;
|
||||
|
||||
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
m_vram_encoding_texture.Bind();
|
||||
const u32 reinterpret_field_offset =
|
||||
(m_crtc_state.regs.Y + BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field)) & 1u;
|
||||
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
|
||||
const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X);
|
||||
const u32 uniforms[4] = {reinterpret_field_offset, reinterpret_start_x};
|
||||
UploadUniformBlock(uniforms, sizeof(uniforms));
|
||||
m_batch_ubo_dirty = true;
|
||||
|
||||
glViewport(0, field_offset, display_width, display_height);
|
||||
glViewport(reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_width, scaled_display_height);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
const u32 uniforms[4] = {vram_offset_x, flipped_vram_offset_y, field_offset};
|
||||
UploadUniformBlock(uniforms, sizeof(uniforms));
|
||||
m_batch_ubo_dirty = true;
|
||||
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
|
||||
m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0,
|
||||
display_height, display_width, -static_cast<s32>(display_height));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
m_vram_texture.Bind();
|
||||
|
||||
glViewport(0, field_offset, scaled_display_width, scaled_display_height);
|
||||
|
||||
const u32 uniforms[4] = {scaled_vram_offset_x, scaled_flipped_vram_offset_y, field_offset};
|
||||
UploadUniformBlock(uniforms, sizeof(uniforms));
|
||||
m_batch_ubo_dirty = true;
|
||||
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
|
||||
m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0,
|
||||
scaled_display_height, scaled_display_width,
|
||||
-static_cast<s32>(scaled_display_height));
|
||||
}
|
||||
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
|
||||
m_display_texture.GetWidth(), m_display_texture.GetHeight(),
|
||||
scaled_vram_offset_x, m_vram_texture.GetHeight() - scaled_vram_offset_y,
|
||||
scaled_display_width, -static_cast<s32>(scaled_display_height));
|
||||
|
||||
// restore state
|
||||
m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
|
|
|
@ -371,75 +371,46 @@ void GPU_HW_OpenGL_ES::UpdateDisplay()
|
|||
}
|
||||
else
|
||||
{
|
||||
const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
|
||||
const u32 scaled_flipped_vram_offset_y =
|
||||
m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height;
|
||||
const u32 field_offset = BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field);
|
||||
|
||||
glDisable(GL_BLEND);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
|
||||
const GL::Program& prog =
|
||||
GL::Program& prog =
|
||||
m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)];
|
||||
prog.Bind();
|
||||
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
m_vram_texture.Bind();
|
||||
|
||||
// Because of how the reinterpret shader works, we need to use the downscaled version.
|
||||
if (m_GPUSTAT.display_area_color_depth_24 && m_resolution_scale > 1)
|
||||
{
|
||||
const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
|
||||
const u32 scaled_copy_width = copy_width * m_resolution_scale;
|
||||
m_vram_encoding_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
m_vram_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
|
||||
glBlitFramebuffer(scaled_vram_offset_x, scaled_flipped_vram_offset_y, scaled_vram_offset_x + scaled_copy_width,
|
||||
scaled_flipped_vram_offset_y + scaled_display_height, vram_offset_x, flipped_vram_offset_y,
|
||||
vram_offset_x + copy_width, flipped_vram_offset_y + display_height, GL_COLOR_BUFFER_BIT,
|
||||
GL_NEAREST);
|
||||
const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
|
||||
const u32 scaled_flipped_vram_offset_y =
|
||||
m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height;
|
||||
|
||||
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
m_vram_encoding_texture.Bind();
|
||||
const u32 reinterpret_field_offset =
|
||||
(m_crtc_state.regs.Y + BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field)) & 1u;
|
||||
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
|
||||
const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X);
|
||||
|
||||
glViewport(0, field_offset, display_width, display_height);
|
||||
prog.Uniform2i(0, reinterpret_field_offset, reinterpret_start_x);
|
||||
m_batch_ubo_dirty = true;
|
||||
|
||||
prog.Uniform3i(0, static_cast<s32>(vram_offset_x), static_cast<s32>(flipped_vram_offset_y),
|
||||
static_cast<s32>(field_offset));
|
||||
m_batch_ubo_dirty = true;
|
||||
glViewport(reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_width, scaled_display_height);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
|
||||
m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0,
|
||||
display_height, display_width, -static_cast<s32>(display_height));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
m_vram_texture.Bind();
|
||||
|
||||
glViewport(0, field_offset, scaled_display_width, scaled_display_height);
|
||||
|
||||
prog.Uniform3i(0, static_cast<s32>(scaled_vram_offset_x), static_cast<s32>(scaled_flipped_vram_offset_y),
|
||||
static_cast<s32>(field_offset));
|
||||
m_batch_ubo_dirty = true;
|
||||
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
|
||||
m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0,
|
||||
scaled_display_height, scaled_display_width,
|
||||
-static_cast<s32>(scaled_display_height));
|
||||
}
|
||||
|
||||
// restore state
|
||||
m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
|
||||
m_display_texture.GetWidth(), m_display_texture.GetHeight(),
|
||||
scaled_vram_offset_x, m_vram_texture.GetHeight() - scaled_vram_offset_y,
|
||||
scaled_display_width, -static_cast<s32>(scaled_display_height));
|
||||
}
|
||||
|
||||
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
|
||||
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top,
|
||||
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
|
||||
m_crtc_state.display_aspect_ratio);
|
||||
// restore state
|
||||
m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
}
|
||||
|
||||
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
|
||||
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top,
|
||||
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
|
||||
m_crtc_state.display_aspect_ratio);
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL_ES::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
|
||||
|
|
|
@ -750,7 +750,7 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
|
|||
DefineMacro(ss, "INTERLACED", interlaced);
|
||||
|
||||
WriteCommonFunctions(ss);
|
||||
DeclareUniformBuffer(ss, {"int3 u_base_coords"});
|
||||
DeclareUniformBuffer(ss, {"int u_field_offset", "int u_vram_start_x"});
|
||||
DeclareTexture(ss, "samp0", 0);
|
||||
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
|
||||
|
@ -759,55 +759,28 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
|
|||
int2 icoords = int2(v_pos.xy);
|
||||
|
||||
#if INTERLACED
|
||||
if (((icoords.y - u_base_coords.z) & 1) != 0)
|
||||
if (((icoords.y / RESOLUTION_SCALE) & 1) != u_field_offset)
|
||||
discard;
|
||||
#endif
|
||||
|
||||
#if DEPTH_24BIT
|
||||
// compute offset in dwords from the start of the 24-bit values
|
||||
int2 base = int2(u_base_coords.x, u_base_coords.y + icoords.y);
|
||||
int xoff = int(icoords.x);
|
||||
int dword_index = (xoff / 2) + (xoff / 4);
|
||||
// relative to start of scanout
|
||||
int relative_x = (icoords.x - u_vram_start_x) / RESOLUTION_SCALE;
|
||||
icoords.x = u_vram_start_x + ((relative_x * 3) / 2) * RESOLUTION_SCALE;
|
||||
|
||||
// sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
|
||||
uint s0 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(base.x + dword_index * 2 + 0, base.y), 0));
|
||||
uint s1 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(base.x + dword_index * 2 + 1, base.y), 0));
|
||||
uint s2 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
|
||||
uint s3 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
|
||||
|
||||
// select the bit for this pixel depending on its offset in the 4-pixel block
|
||||
uint r, g, b;
|
||||
int block_offset = xoff & 3;
|
||||
if (block_offset == 0)
|
||||
{
|
||||
r = s0 & 0xFFu;
|
||||
g = s0 >> 8;
|
||||
b = s1 & 0xFFu;
|
||||
}
|
||||
else if (block_offset == 1)
|
||||
{
|
||||
r = s1 >> 8;
|
||||
g = s2 & 0xFFu;
|
||||
b = s2 >> 8;
|
||||
}
|
||||
else if (block_offset == 2)
|
||||
{
|
||||
r = s1 & 0xFFu;
|
||||
g = s1 >> 8;
|
||||
b = s2 & 0xFFu;
|
||||
}
|
||||
else
|
||||
{
|
||||
r = s2 >> 8;
|
||||
g = s3 & 0xFFu;
|
||||
b = s3 >> 8;
|
||||
}
|
||||
|
||||
// and normalize
|
||||
o_col0 = float4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0);
|
||||
// load adjacent 16-bit texels
|
||||
uint s0 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, icoords, 0));
|
||||
uint s1 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, icoords + int2(RESOLUTION_SCALE, 0), 0));
|
||||
|
||||
// select which part of the combined 16-bit texels we are currently shading
|
||||
uint s1s0 = ((s1 << 16) | s0) >> ((relative_x & 1) * 8);
|
||||
|
||||
// extract components and normalize
|
||||
o_col0 = float4(float(s1s0 & 0xFFu) / 255.0, float((s1s0 >> 8u) & 0xFFu) / 255.0,
|
||||
float((s1s0 >> 16u) & 0xFFu) / 255.0, 1.0);
|
||||
#else
|
||||
// load and return
|
||||
o_col0 = LOAD_TEXTURE(samp0, u_base_coords.xy + icoords, 0);
|
||||
o_col0 = LOAD_TEXTURE(samp0, icoords, 0);
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
|
Loading…
Reference in a new issue