GPU: Implement weave deinterlacing

This commit is contained in:
Connor McLaughlin 2019-10-23 15:36:37 +10:00
parent 87f9f99938
commit 2d0dd03705
5 changed files with 101 additions and 74 deletions

View file

@ -342,12 +342,6 @@ void GPU::UpdateCRTCConfig()
cs.display_width = std::max<u32>(cs.visible_ticks_per_scanline / cs.dot_clock_divider, 1);
cs.display_height = cs.visible_scanlines_per_frame;
if (m_GPUSTAT.vertical_interlace)
{
// Force progressive for now.
cs.display_height *= 2;
}
if (cs.display_width != old_horizontal_resolution || cs.display_height != old_vertical_resolution)
Log_InfoPrintf("Visible resolution is now %ux%u", cs.display_width, cs.display_height);

View file

@ -406,61 +406,80 @@ void main()
return ss.str();
}
std::string GPU_HW::GenerateRGB24DecodeFragmentShader()
std::string GPU_HW::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced)
{
std::stringstream ss;
GenerateShaderHeader(ss);
DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
DefineMacro(ss, "INTERLACED", interlaced);
ss << R"(
in vec2 v_tex0;
out vec4 o_col0;
uniform sampler2D samp0;
uniform ivec2 u_base_coords;
uniform ivec3 u_base_coords;
ivec2 GetCoords(vec2 fragcoord)
{
ivec2 icoords = ivec2(fragcoord);
#if INTERLACED
if (((icoords.y - u_base_coords.z) & 1) != 0)
discard;
#endif
return icoords;
}
void main()
{
// compute offset in dwords from the start of the 24-bit values
ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + int(gl_FragCoord.y));
int xoff = int(gl_FragCoord.x);
int dword_index = (xoff / 2) + (xoff / 4);
ivec2 icoords = GetCoords(gl_FragCoord.xy);
// sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0));
uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0));
uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
#if DEPTH_24BIT
// compute offset in dwords from the start of the 24-bit values
ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y);
int xoff = int(icoords.x);
int dword_index = (xoff / 2) + (xoff / 4);
// select the bit for this pixel depending on its offset in the 4-pixel block
uint r, g, b;
int block_offset = xoff & 3;
if (block_offset == 0)
{
r = s0 & 0xFFu;
g = s0 >> 8;
b = s1 & 0xFFu;
}
else if (block_offset == 1)
{
r = s1 >> 8;
g = s2 & 0xFFu;
b = s2 >> 8;
}
else if (block_offset == 2)
{
r = s1 & 0xFFu;
g = s1 >> 8;
b = s2 & 0xFFu;
}
else
{
r = s2 >> 8;
g = s3 & 0xFFu;
b = s3 >> 8;
}
// sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0));
uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0));
uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
// and normalize
o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255, 1.0);
// select the bit for this pixel depending on its offset in the 4-pixel block
uint r, g, b;
int block_offset = xoff & 3;
if (block_offset == 0)
{
r = s0 & 0xFFu;
g = s0 >> 8;
b = s1 & 0xFFu;
}
else if (block_offset == 1)
{
r = s1 >> 8;
g = s2 & 0xFFu;
b = s2 >> 8;
}
else if (block_offset == 2)
{
r = s1 & 0xFFu;
g = s1 >> 8;
b = s2 & 0xFFu;
}
else
{
r = s2 >> 8;
g = s3 & 0xFFu;
b = s3 >> 8;
}
// and normalize
o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255, 1.0);
#else
// load and return
o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0);
#endif
}
)";

View file

@ -97,7 +97,7 @@ protected:
TextureColorMode texture_color_mode, bool blending);
std::string GenerateScreenQuadVertexShader();
std::string GenerateFillFragmentShader();
std::string GenerateRGB24DecodeFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
u32 m_resolution_scale = 1;
HWRenderBatch m_batch = {};

View file

@ -274,19 +274,27 @@ bool GPU_HW_OpenGL::CompilePrograms()
}
// TODO: Use string_view
if (!m_reinterpret_rgb8_program.Compile(GenerateScreenQuadVertexShader().c_str(),
GenerateRGB24DecodeFragmentShader().c_str()))
for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++)
{
return false;
}
m_reinterpret_rgb8_program.BindFragData(0, "o_col0");
if (!m_reinterpret_rgb8_program.Link())
return false;
for (u8 interlaced = 0; interlaced < 2; interlaced++)
{
GL::Program& prog = m_display_programs[depth_24bit][interlaced];
const std::string vs = GenerateScreenQuadVertexShader();
const std::string fs =
GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), ConvertToBoolUnchecked(interlaced));
if (!prog.Compile(vs.c_str(), fs.c_str()))
return false;
m_reinterpret_rgb8_program.Bind();
m_reinterpret_rgb8_program.RegisterUniform("u_base_coords");
m_reinterpret_rgb8_program.RegisterUniform("samp0");
m_reinterpret_rgb8_program.Uniform1i(1, 0);
prog.BindFragData(0, "o_col0");
if (!prog.Link())
return false;
prog.Bind();
prog.RegisterUniform("u_base_coords");
prog.RegisterUniform("samp0");
prog.Uniform1i(1, 0);
}
}
return true;
}
@ -400,24 +408,36 @@ void GPU_HW_OpenGL::UpdateDisplay()
}
else
{
const u32 field_offset = BoolToUInt8(m_GPUSTAT.vertical_interlace && !m_GPUSTAT.drawing_even_line);
const u32 vram_offset_x = m_crtc_state.regs.X;
const u32 vram_offset_y = m_crtc_state.regs.Y;
const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale;
const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale;
const u32 display_width = std::min<u32>(m_crtc_state.display_width, VRAM_WIDTH - vram_offset_x);
const u32 display_height = std::min<u32>(m_crtc_state.display_height, VRAM_HEIGHT - vram_offset_y);
const u32 display_height = std::min<u32>(m_crtc_state.display_height << BoolToUInt8(m_GPUSTAT.vertical_interlace),
VRAM_HEIGHT - vram_offset_y);
const u32 scaled_display_width = display_width * m_resolution_scale;
const u32 scaled_display_height = display_height * m_resolution_scale;
const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
const u32 scaled_flipped_vram_offset_y = m_vram_texture->GetHeight() - scaled_vram_offset_y - scaled_display_height;
if (m_GPUSTAT.display_area_color_depth_24)
// fast path when both interlacing and 24-bit depth is off
if (!m_GPUSTAT.display_area_color_depth_24 && !m_GPUSTAT.vertical_interlace)
{
glCopyImageSubData(m_vram_texture->GetGLId(), GL_TEXTURE_2D, 0, scaled_vram_offset_x,
scaled_flipped_vram_offset_y, 0, m_display_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0,
scaled_display_width, scaled_display_height, 1);
m_system->GetHostInterface()->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width,
scaled_display_height, m_crtc_state.display_aspect_ratio);
}
else
{
glDisable(GL_BLEND);
glDisable(GL_SCISSOR_TEST);
// Because of how the reinterpret shader works, we need to use the downscaled version.
if (m_resolution_scale > 1)
if (m_GPUSTAT.display_area_color_depth_24 && m_resolution_scale > 1)
{
m_vram_downsample_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
@ -432,10 +452,13 @@ void GPU_HW_OpenGL::UpdateDisplay()
m_vram_texture->Bind();
}
const GL::Program& prog = m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)]
[BoolToUInt8(m_GPUSTAT.vertical_interlace)];
m_display_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
glViewport(0, 0, display_width, display_height);
m_reinterpret_rgb8_program.Bind();
m_reinterpret_rgb8_program.Uniform2i(0, vram_offset_x, flipped_vram_offset_y);
glViewport(0, field_offset, display_width, display_height);
prog.Bind();
prog.Uniform3i(0, vram_offset_x, flipped_vram_offset_y, field_offset);
glDrawArrays(GL_TRIANGLES, 0, 3);
// restore state
@ -446,15 +469,6 @@ void GPU_HW_OpenGL::UpdateDisplay()
m_system->GetHostInterface()->SetDisplayTexture(m_display_texture.get(), 0, 0, display_width, display_height,
m_crtc_state.display_aspect_ratio);
}
else
{
glCopyImageSubData(m_vram_texture->GetGLId(), GL_TEXTURE_2D, 0, scaled_vram_offset_x,
scaled_flipped_vram_offset_y, 0, m_display_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0,
scaled_display_width, scaled_display_height, 1);
m_system->GetHostInterface()->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width,
scaled_display_height, m_crtc_state.display_aspect_ratio);
}
}
}

View file

@ -73,7 +73,7 @@ private:
bool m_show_renderer_statistics = false;
std::array<std::array<std::array<std::array<GL::Program, 2>, 3>, 2>, 4> m_render_programs;
GL::Program m_reinterpret_rgb8_program;
std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced]
GLStats m_stats = {};
GLStats m_last_stats = {};