diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 911fa6e3e..c6e22cf78 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -893,7 +893,7 @@ void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) {} void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { const u16 color16 = RGBA8888ToRGBA5551(color); - if ((x + width) <= VRAM_WIDTH) + if ((x + width) <= VRAM_WIDTH && !IsInterlacedRenderingEnabled()) { for (u32 yoffs = 0; yoffs < height; yoffs++) { @@ -901,6 +901,26 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16); } } + else if (IsInterlacedRenderingEnabled()) + { + // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field. + if (IsRasterScanlinePending()) + Synchronize(); + const u32 active_field = GetInterlacedField(); + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + const u32 row = (y + yoffs) % VRAM_HEIGHT; + if ((row & u32(1)) == active_field) + continue; + + u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH]; + for (u32 xoffs = 0; xoffs < width; xoffs++) + { + const u32 col = (x + xoffs) % VRAM_WIDTH; + row_ptr[col] = color16; + } + } + } else { for (u32 yoffs = 0; yoffs < height; yoffs++) diff --git a/src/core/gpu.h b/src/core/gpu.h index af5ad0e2d..84497c435 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -330,7 +330,16 @@ protected: void Execute(TickCount ticks); /// Returns true if scanout should be interlaced. - bool IsDisplayInterlaced() const { return !m_force_progressive_scan && m_GPUSTAT.In480iMode(); } + ALWAYS_INLINE bool IsInterlacedDisplayEnabled() const { return (!m_force_progressive_scan) & m_GPUSTAT.In480iMode(); } + + /// Returns true if interlaced rendering is enabled and force progressive scan is disabled. + ALWAYS_INLINE bool IsInterlacedRenderingEnabled() const + { + return (!m_force_progressive_scan) & m_GPUSTAT.SkipDrawingToActiveField(); + } + + /// Returns 0 if the currently-rendered field is even, otherwise 1. + ALWAYS_INLINE u32 GetInterlacedField() const { return BoolToUInt32(m_GPUSTAT.displaying_odd_line); } /// Sets/decodes GP0(E1h) (set draw mode). void SetDrawMode(u16 bits); diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 772fb9f9e..5fa81d504 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -306,7 +306,7 @@ bool GPU::HandleRenderCommand(const u32*& command_ptr, u32 command_size) primitive_names[static_cast(rc.primitive.GetValue())], ZeroExtend32(num_vertices), ZeroExtend32(words_per_vertex)); - if (m_GPUSTAT.SkipDrawingToActiveField() && IsRasterScanlinePending()) + if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending()) Synchronize(); DispatchRenderCommand(rc, num_vertices, command_ptr); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 4833bce9e..2d7b415e7 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -541,10 +541,10 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32 m_batch_ubo_dirty = true; } - m_batch.interlacing = m_GPUSTAT.SkipDrawingToActiveField(); + m_batch.interlacing = IsInterlacedRenderingEnabled(); if (m_batch.interlacing) { - const u32 displayed_field = BoolToUInt32(m_GPUSTAT.displaying_odd_line); + const u32 displayed_field = GetInterlacedField(); m_batch_ubo_dirty |= (m_batch_ubo_data.u_interlaced_displayed_field != displayed_field); m_batch_ubo_data.u_interlaced_displayed_field = displayed_field; } diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index c0599a2c1..27bdd524a 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -377,8 +377,13 @@ bool GPU_HW_D3D11::CompileShaders() if (!m_copy_pixel_shader) return false; - m_fill_pixel_shader = m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateFillFragmentShader()); - if (!m_fill_pixel_shader) + m_vram_fill_pixel_shader = m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateFillFragmentShader()); + if (!m_vram_fill_pixel_shader) + return false; + + m_vram_interlaced_fill_pixel_shader = + m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateInterlacedFillFragmentShader()); + if (!m_vram_interlaced_fill_pixel_shader) return false; m_vram_read_pixel_shader = m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMReadFragmentShader()); @@ -542,7 +547,7 @@ void GPU_HW_D3D11::UpdateDisplay() const u32 display_height = m_crtc_state.display_vram_height; const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale; - const bool interlaced = IsDisplayInterlaced(); + const bool interlaced = IsInterlacedDisplayEnabled(); if (m_GPUSTAT.display_disable) { @@ -559,7 +564,7 @@ void GPU_HW_D3D11::UpdateDisplay() m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr); m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); - const u32 reinterpret_field_offset = BoolToUInt32(m_GPUSTAT.displaying_odd_line); + const u32 reinterpret_field_offset = GetInterlacedField(); const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale; const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X); const u32 uniforms[4] = {reinterpret_field_offset, reinterpret_start_x}; @@ -634,12 +639,21 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) if (!m_true_color) color = RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color)); - float uniforms[4]; - std::tie(uniforms[0], uniforms[1], uniforms[2], uniforms[3]) = RGBA8ToFloat(color); + struct Uniforms + { + float u_fill_color[4]; + u32 u_interlaced_displayed_field; + }; + Uniforms uniforms; + std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) = + RGBA8ToFloat(color); + uniforms.u_interlaced_displayed_field = GetInterlacedField(); SetViewportAndScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale); - DrawUtilityShader(m_fill_pixel_shader.Get(), uniforms, sizeof(uniforms)); + DrawUtilityShader(IsInterlacedRenderingEnabled() ? m_vram_interlaced_fill_pixel_shader.Get() : + m_vram_fill_pixel_shader.Get(), + &uniforms, sizeof(uniforms)); RestoreGraphicsAPIState(); } diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index 759c1ae3b..9452ec328 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -109,7 +109,8 @@ private: ComPtr m_screen_quad_vertex_shader; ComPtr m_copy_pixel_shader; - ComPtr m_fill_pixel_shader; + ComPtr m_vram_fill_pixel_shader; + ComPtr m_vram_interlaced_fill_pixel_shader; ComPtr m_vram_read_pixel_shader; ComPtr m_vram_write_pixel_shader; std::array, 2>, 2> m_display_pixel_shaders; // [depth_24][interlaced] diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 909f2c3e2..c20039245 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -367,23 +367,34 @@ bool GPU_HW_OpenGL::CompilePrograms() prog->Bind(); prog->Uniform1i("samp0", 0); - m_display_programs[depth_24bit][interlaced] = std::move(*prog); } } - std::optional prog = m_shader_cache.GetProgram( - shadergen.GenerateScreenQuadVertexShader(), shadergen.GenerateVRAMReadFragmentShader(), [this](GL::Program& prog) { - if (!m_is_gles) - prog.BindFragData(0, "o_col0"); - }); + std::optional prog = + m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), + shadergen.GenerateInterlacedFillFragmentShader(), [this](GL::Program& prog) { + if (!m_is_gles) + prog.BindFragData(0, "o_col0"); + }); + if (!prog) + return false; + + prog->BindUniformBlock("UBOBlock", 1); + prog->Bind(); + m_vram_interlaced_fill_program = std::move(*prog); + + prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), + shadergen.GenerateVRAMReadFragmentShader(), [this](GL::Program& prog) { + if (!m_is_gles) + prog.BindFragData(0, "o_col0"); + }); if (!prog) return false; prog->BindUniformBlock("UBOBlock", 1); prog->Bind(); prog->Uniform1i("samp0", 0); - m_vram_read_program = std::move(*prog); if (m_supports_texture_buffer) @@ -399,7 +410,6 @@ bool GPU_HW_OpenGL::CompilePrograms() prog->BindUniformBlock("UBOBlock", 1); prog->Bind(); prog->Uniform1i("samp0", 0); - m_vram_write_program = std::move(*prog); } @@ -490,7 +500,7 @@ void GPU_HW_OpenGL::UpdateDisplay() const u32 display_height = m_crtc_state.display_vram_height; const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale; - const bool interlaced = IsDisplayInterlaced(); + const bool interlaced = IsInterlacedDisplayEnabled(); if (m_GPUSTAT.display_disable) { @@ -516,7 +526,7 @@ void GPU_HW_OpenGL::UpdateDisplay() const u32 scaled_flipped_vram_offset_y = m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height; - const u32 reinterpret_field_offset = BoolToUInt32(m_GPUSTAT.displaying_odd_line); + const u32 reinterpret_field_offset = GetInterlacedField(); const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale; const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X); const u32 uniforms[4] = {reinterpret_field_offset, reinterpret_start_x}; @@ -600,11 +610,32 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) if (!m_true_color) color = RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color)); - const auto [r, g, b, a] = RGBA8ToFloat(color); - glClearColor(r, g, b, a); - glClear(GL_COLOR_BUFFER_BIT); + // fast path when not using interlaced rendering + if (!IsInterlacedRenderingEnabled()) + { + const auto [r, g, b, a] = RGBA8ToFloat(color); + glClearColor(r, g, b, a); + glClear(GL_COLOR_BUFFER_BIT); + SetScissorFromDrawingArea(); + } + else + { + struct Uniforms + { + float u_fill_color[4]; + u32 u_interlaced_displayed_field; + }; + Uniforms uniforms; + std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) = + RGBA8ToFloat(color); + uniforms.u_interlaced_displayed_field = GetInterlacedField(); - SetScissorFromDrawingArea(); + m_vram_interlaced_fill_program.Bind(); + UploadUniformBlock(&uniforms, sizeof(uniforms)); + glDrawArrays(GL_TRIANGLES, 0, 3); + + RestoreGraphicsAPIState(); + } } void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index df68fd46d..4827c3fbe 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -79,6 +79,7 @@ private: std::array, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering][interlacing] std::array, 2> m_display_programs; // [depth_24][interlaced] + GL::Program m_vram_interlaced_fill_program; GL::Program m_vram_read_program; GL::Program m_vram_write_program; diff --git a/src/core/gpu_hw_opengl_es.cpp b/src/core/gpu_hw_opengl_es.cpp index 32380862b..7b45d893d 100644 --- a/src/core/gpu_hw_opengl_es.cpp +++ b/src/core/gpu_hw_opengl_es.cpp @@ -253,7 +253,17 @@ bool GPU_HW_OpenGL_ES::CompilePrograms() } } - std::optional prog = + std::optional prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), + shadergen.GenerateInterlacedFillFragmentShader()); + if (!prog) + return false; + + prog->Bind(); + prog->RegisterUniform("u_fill_color"); + prog->RegisterUniform("u_u_interlaced_displayed_field"); + m_vram_interlaced_fill_program = std::move(*prog); + + prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), shadergen.GenerateVRAMReadFragmentShader()); if (!prog) return false; @@ -357,7 +367,7 @@ void GPU_HW_OpenGL_ES::UpdateDisplay() const u32 display_height = m_crtc_state.display_vram_height; const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale; - const bool interlaced = IsDisplayInterlaced(); + const bool interlaced = IsInterlacedDisplayEnabled(); if (m_GPUSTAT.display_disable) { @@ -385,7 +395,7 @@ void GPU_HW_OpenGL_ES::UpdateDisplay() const u32 scaled_flipped_vram_offset_y = m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height; - const u32 reinterpret_field_offset = BoolToUInt32(m_GPUSTAT.displaying_odd_line); + const u32 reinterpret_field_offset = GetInterlacedField(); const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale; const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X); @@ -469,10 +479,21 @@ void GPU_HW_OpenGL_ES::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) color = RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color)); const auto [r, g, b, a] = RGBA8ToFloat(color); - glClearColor(r, g, b, a); - glClear(GL_COLOR_BUFFER_BIT); - SetScissorFromDrawingArea(); + if (!IsInterlacedRenderingEnabled()) + { + glClearColor(r, g, b, a); + glClear(GL_COLOR_BUFFER_BIT); + SetScissorFromDrawingArea(); + } + else + { + m_vram_interlaced_fill_program.Bind(); + m_vram_interlaced_fill_program.Uniform4f(0, r, g, b, a); + m_vram_interlaced_fill_program.Uniform1i(1, GetInterlacedField()); + glDrawArrays(GL_TRIANGLES, 0, 3); + RestoreGraphicsAPIState(); + } } void GPU_HW_OpenGL_ES::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) diff --git a/src/core/gpu_hw_opengl_es.h b/src/core/gpu_hw_opengl_es.h index d931abdd7..1d71442ab 100644 --- a/src/core/gpu_hw_opengl_es.h +++ b/src/core/gpu_hw_opengl_es.h @@ -67,5 +67,6 @@ private: std::array, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering][interlacing] std::array, 2> m_display_programs; // [depth_24][interlaced] + GL::Program m_vram_interlaced_fill_program; GL::Program m_vram_read_program; }; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 41b07af20..c51c9f205 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -523,7 +523,7 @@ float4 SampleFromVRAM(int4 texpage, int2 icoord) float oalpha; #if INTERLACING - if (((int(v_pos.y) / RESOLUTION_SCALE) & 1) == u_interlaced_displayed_field) + if (((fixYCoord(int(v_pos.y)) / RESOLUTION_SCALE) & 1) == u_interlaced_displayed_field) discard; #endif @@ -732,6 +732,26 @@ std::string GPU_HW_ShaderGen::GenerateFillFragmentShader() return ss.str(); } +std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader() +{ + std::stringstream ss; + WriteHeader(ss); + WriteCommonFunctions(ss); + DeclareUniformBuffer(ss, {"float4 u_fill_color", "int u_interlaced_displayed_field"}); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false); + + ss << R"( +{ + if (((fixYCoord(int(v_pos.y)) / RESOLUTION_SCALE) & 1) == u_interlaced_displayed_field) + discard; + + o_col0 = u_fill_color; +} +)"; + + return ss.str(); +} + std::string GPU_HW_ShaderGen::GenerateCopyFragmentShader() { std::stringstream ss; @@ -767,7 +787,7 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo int2 icoords = int2(v_pos.xy); #if INTERLACED - if (((icoords.y / RESOLUTION_SCALE) & 1) != u_field_offset) + if (((fixYCoord(icoords.y) / RESOLUTION_SCALE) & 1) != u_field_offset) discard; #endif diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 0f396c137..974cd1997 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -17,6 +17,7 @@ public: std::string GenerateBatchLineExpandGeometryShader(); std::string GenerateScreenQuadVertexShader(); std::string GenerateFillFragmentShader(); + std::string GenerateInterlacedFillFragmentShader(); std::string GenerateCopyFragmentShader(); std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced); std::string GenerateVRAMReadFragmentShader(); diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index f31d44dba..909b8af70 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -574,11 +574,8 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex if ((bg_color.bits & mask_and) != 0) return; - if (m_GPUSTAT.SkipDrawingToActiveField() && - BoolToUInt32(m_GPUSTAT.displaying_odd_line) == (static_cast(y) & 1u)) - { + if (IsInterlacedRenderingEnabled() && GetInterlacedField() == (static_cast(y) & 1u)) return; - } SetPixel(static_cast(x), static_cast(y), color.bits | m_GPUSTAT.GetMaskOR()); }