diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 2e6ad85cd..0984f3c9c 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -342,12 +342,6 @@ void GPU::UpdateCRTCConfig() cs.display_width = std::max(cs.visible_ticks_per_scanline / cs.dot_clock_divider, 1); cs.display_height = cs.visible_scanlines_per_frame; - if (m_GPUSTAT.vertical_interlace) - { - // Force progressive for now. - cs.display_height *= 2; - } - if (cs.display_width != old_horizontal_resolution || cs.display_height != old_vertical_resolution) Log_InfoPrintf("Visible resolution is now %ux%u", cs.display_width, cs.display_height); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index f85f2d18f..615633d9f 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -406,61 +406,80 @@ void main() return ss.str(); } -std::string GPU_HW::GenerateRGB24DecodeFragmentShader() +std::string GPU_HW::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced) { std::stringstream ss; GenerateShaderHeader(ss); + DefineMacro(ss, "DEPTH_24BIT", depth_24bit); + DefineMacro(ss, "INTERLACED", interlaced); ss << R"( in vec2 v_tex0; out vec4 o_col0; uniform sampler2D samp0; -uniform ivec2 u_base_coords; +uniform ivec3 u_base_coords; + +ivec2 GetCoords(vec2 fragcoord) +{ + ivec2 icoords = ivec2(fragcoord); + #if INTERLACED + if (((icoords.y - u_base_coords.z) & 1) != 0) + discard; + #endif + return icoords; +} void main() { - // compute offset in dwords from the start of the 24-bit values - ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + int(gl_FragCoord.y)); - int xoff = int(gl_FragCoord.x); - int dword_index = (xoff / 2) + (xoff / 4); + ivec2 icoords = GetCoords(gl_FragCoord.xy); - // sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these - uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0)); - uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0)); - uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0)); - uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0)); + #if DEPTH_24BIT + // compute offset in dwords from the start of the 24-bit values + ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y); + int xoff = int(icoords.x); + int dword_index = (xoff / 2) + (xoff / 4); - // select the bit for this pixel depending on its offset in the 4-pixel block - uint r, g, b; - int block_offset = xoff & 3; - if (block_offset == 0) - { - r = s0 & 0xFFu; - g = s0 >> 8; - b = s1 & 0xFFu; - } - else if (block_offset == 1) - { - r = s1 >> 8; - g = s2 & 0xFFu; - b = s2 >> 8; - } - else if (block_offset == 2) - { - r = s1 & 0xFFu; - g = s1 >> 8; - b = s2 & 0xFFu; - } - else - { - r = s2 >> 8; - g = s3 & 0xFFu; - b = s3 >> 8; - } + // sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these + uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0)); + uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0)); + uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0)); + uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0)); - // and normalize - o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255, 1.0); + // select the bit for this pixel depending on its offset in the 4-pixel block + uint r, g, b; + int block_offset = xoff & 3; + if (block_offset == 0) + { + r = s0 & 0xFFu; + g = s0 >> 8; + b = s1 & 0xFFu; + } + else if (block_offset == 1) + { + r = s1 >> 8; + g = s2 & 0xFFu; + b = s2 >> 8; + } + else if (block_offset == 2) + { + r = s1 & 0xFFu; + g = s1 >> 8; + b = s2 & 0xFFu; + } + else + { + r = s2 >> 8; + g = s3 & 0xFFu; + b = s3 >> 8; + } + + // and normalize + o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255, 1.0); + #else + // load and return + o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0); + #endif } )"; diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 0afffe79d..154e2388d 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -97,7 +97,7 @@ protected: TextureColorMode texture_color_mode, bool blending); std::string GenerateScreenQuadVertexShader(); std::string GenerateFillFragmentShader(); - std::string GenerateRGB24DecodeFragmentShader(); + std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced); u32 m_resolution_scale = 1; HWRenderBatch m_batch = {}; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 8e2d62baf..8918c3401 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -274,19 +274,27 @@ bool GPU_HW_OpenGL::CompilePrograms() } // TODO: Use string_view - if (!m_reinterpret_rgb8_program.Compile(GenerateScreenQuadVertexShader().c_str(), - GenerateRGB24DecodeFragmentShader().c_str())) + for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++) { - return false; - } - m_reinterpret_rgb8_program.BindFragData(0, "o_col0"); - if (!m_reinterpret_rgb8_program.Link()) - return false; + for (u8 interlaced = 0; interlaced < 2; interlaced++) + { + GL::Program& prog = m_display_programs[depth_24bit][interlaced]; + const std::string vs = GenerateScreenQuadVertexShader(); + const std::string fs = + GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), ConvertToBoolUnchecked(interlaced)); + if (!prog.Compile(vs.c_str(), fs.c_str())) + return false; - m_reinterpret_rgb8_program.Bind(); - m_reinterpret_rgb8_program.RegisterUniform("u_base_coords"); - m_reinterpret_rgb8_program.RegisterUniform("samp0"); - m_reinterpret_rgb8_program.Uniform1i(1, 0); + prog.BindFragData(0, "o_col0"); + if (!prog.Link()) + return false; + + prog.Bind(); + prog.RegisterUniform("u_base_coords"); + prog.RegisterUniform("samp0"); + prog.Uniform1i(1, 0); + } + } return true; } @@ -400,24 +408,36 @@ void GPU_HW_OpenGL::UpdateDisplay() } else { + const u32 field_offset = BoolToUInt8(m_GPUSTAT.vertical_interlace && !m_GPUSTAT.drawing_even_line); const u32 vram_offset_x = m_crtc_state.regs.X; const u32 vram_offset_y = m_crtc_state.regs.Y; const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale; const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale; const u32 display_width = std::min(m_crtc_state.display_width, VRAM_WIDTH - vram_offset_x); - const u32 display_height = std::min(m_crtc_state.display_height, VRAM_HEIGHT - vram_offset_y); + const u32 display_height = std::min(m_crtc_state.display_height << BoolToUInt8(m_GPUSTAT.vertical_interlace), + VRAM_HEIGHT - vram_offset_y); const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale; const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height; const u32 scaled_flipped_vram_offset_y = m_vram_texture->GetHeight() - scaled_vram_offset_y - scaled_display_height; - if (m_GPUSTAT.display_area_color_depth_24) + // fast path when both interlacing and 24-bit depth is off + if (!m_GPUSTAT.display_area_color_depth_24 && !m_GPUSTAT.vertical_interlace) + { + glCopyImageSubData(m_vram_texture->GetGLId(), GL_TEXTURE_2D, 0, scaled_vram_offset_x, + scaled_flipped_vram_offset_y, 0, m_display_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0, + scaled_display_width, scaled_display_height, 1); + + m_system->GetHostInterface()->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width, + scaled_display_height, m_crtc_state.display_aspect_ratio); + } + else { glDisable(GL_BLEND); glDisable(GL_SCISSOR_TEST); // Because of how the reinterpret shader works, we need to use the downscaled version. - if (m_resolution_scale > 1) + if (m_GPUSTAT.display_area_color_depth_24 && m_resolution_scale > 1) { m_vram_downsample_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); @@ -432,10 +452,13 @@ void GPU_HW_OpenGL::UpdateDisplay() m_vram_texture->Bind(); } + const GL::Program& prog = m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)] + [BoolToUInt8(m_GPUSTAT.vertical_interlace)]; + m_display_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); - glViewport(0, 0, display_width, display_height); - m_reinterpret_rgb8_program.Bind(); - m_reinterpret_rgb8_program.Uniform2i(0, vram_offset_x, flipped_vram_offset_y); + glViewport(0, field_offset, display_width, display_height); + prog.Bind(); + prog.Uniform3i(0, vram_offset_x, flipped_vram_offset_y, field_offset); glDrawArrays(GL_TRIANGLES, 0, 3); // restore state @@ -446,15 +469,6 @@ void GPU_HW_OpenGL::UpdateDisplay() m_system->GetHostInterface()->SetDisplayTexture(m_display_texture.get(), 0, 0, display_width, display_height, m_crtc_state.display_aspect_ratio); } - else - { - glCopyImageSubData(m_vram_texture->GetGLId(), GL_TEXTURE_2D, 0, scaled_vram_offset_x, - scaled_flipped_vram_offset_y, 0, m_display_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0, - scaled_display_width, scaled_display_height, 1); - - m_system->GetHostInterface()->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width, - scaled_display_height, m_crtc_state.display_aspect_ratio); - } } } diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index e4152ff09..1725b3a0b 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -73,7 +73,7 @@ private: bool m_show_renderer_statistics = false; std::array, 3>, 2>, 4> m_render_programs; - GL::Program m_reinterpret_rgb8_program; + std::array, 2> m_display_programs; // [depth_24][interlaced] GLStats m_stats = {}; GLStats m_last_stats = {};