From a397979d72f38af20468697ed84026c5261b97e1 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Mon, 4 Nov 2019 01:45:03 +1000 Subject: [PATCH] GPU/HW: Elide buffer copy in scanout and fix flipped display --- src/core/gpu_hw_d3d11.cpp | 12 ++------ src/core/gpu_hw_opengl.cpp | 37 +++++++++++++------------ src/core/host_display.h | 2 +- src/core/settings.h | 2 +- src/duckstation/d3d11_host_display.cpp | 28 +++++++++---------- src/duckstation/d3d11_host_display.h | 10 +++---- src/duckstation/opengl_host_display.cpp | 15 +++++----- src/duckstation/opengl_host_display.h | 10 +++---- 8 files changed, 55 insertions(+), 61 deletions(-) diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 053d87a37..f1b414ec8 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -567,15 +567,9 @@ void GPU_HW_D3D11::UpdateDisplay() } else if (!m_GPUSTAT.display_area_color_depth_24 && !m_GPUSTAT.vertical_interlace) { - const CD3D11_BOX src_box(scaled_vram_offset_x, scaled_vram_offset_y, 0, - scaled_vram_offset_x + scaled_display_width, - scaled_vram_offset_y + scaled_display_height, 1); - m_context->CopySubresourceRegion(m_display_texture.GetD3DTexture(), 0, 0, 0, 0, m_vram_texture.GetD3DTexture(), 0, - &src_box); - - m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), 0, 0, scaled_display_width, - scaled_display_height, m_display_texture.GetWidth(), - m_display_texture.GetHeight(), m_crtc_state.display_aspect_ratio); + m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), scaled_vram_offset_x, scaled_vram_offset_y, + scaled_display_width, scaled_display_height, m_vram_texture.GetWidth(), + m_vram_texture.GetHeight(), m_crtc_state.display_aspect_ratio); } else { diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index bd8e5393c..bba174135 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -439,9 +439,10 @@ void GPU_HW_OpenGL::UpdateDisplay() if (m_system->GetSettings().debugging.show_vram) { - m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture->GetGLId())), 0, 0, - m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), - m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1.0f); + m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture->GetGLId())), 0, + m_vram_texture->GetHeight(), m_vram_texture->GetWidth(), + -static_cast(m_vram_texture->GetHeight()), m_vram_texture->GetWidth(), + m_vram_texture->GetHeight(), 1.0f); } else { @@ -454,8 +455,6 @@ void GPU_HW_OpenGL::UpdateDisplay() VRAM_HEIGHT - vram_offset_y); const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale; - const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height; - const u32 scaled_flipped_vram_offset_y = m_vram_texture->GetHeight() - scaled_vram_offset_y - scaled_display_height; if (m_GPUSTAT.display_disable) { @@ -463,17 +462,17 @@ void GPU_HW_OpenGL::UpdateDisplay() } else if (!m_GPUSTAT.display_area_color_depth_24 && !m_GPUSTAT.vertical_interlace) { - // fast path when both interlacing and 24-bit depth is off - glCopyImageSubData(m_vram_texture->GetGLId(), GL_TEXTURE_2D, 0, scaled_vram_offset_x, - scaled_flipped_vram_offset_y, 0, m_display_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0, - scaled_display_width, scaled_display_height, 1); - - m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture->GetGLId())), 0, - 0, scaled_display_width, scaled_display_height, m_display_texture->GetWidth(), - m_display_texture->GetHeight(), m_crtc_state.display_aspect_ratio); + m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture->GetGLId())), + scaled_vram_offset_x, m_vram_texture->GetHeight() - scaled_vram_offset_y, + scaled_display_width, -static_cast(scaled_display_height), + m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), + m_crtc_state.display_aspect_ratio); } else { + const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height; + const u32 scaled_flipped_vram_offset_y = + m_vram_texture->GetHeight() - scaled_vram_offset_y - scaled_display_height; const u32 field_offset = BoolToUInt8(m_GPUSTAT.vertical_interlace && !m_GPUSTAT.drawing_even_line); const u32 scaled_field_offset = field_offset * m_resolution_scale; @@ -507,9 +506,10 @@ void GPU_HW_OpenGL::UpdateDisplay() glDrawArrays(GL_TRIANGLES, 0, 3); - m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture->GetGLId())), 0, - 0, display_width, display_height, m_display_texture->GetWidth(), - m_display_texture->GetHeight(), m_crtc_state.display_aspect_ratio); + m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_display_texture->GetGLId())), + 0, display_height, display_width, -static_cast(display_height), + m_display_texture->GetWidth(), m_display_texture->GetHeight(), + m_crtc_state.display_aspect_ratio); } else { @@ -524,8 +524,9 @@ void GPU_HW_OpenGL::UpdateDisplay() glDrawArrays(GL_TRIANGLES, 0, 3); - m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture->GetGLId())), 0, - 0, scaled_display_width, scaled_display_height, m_display_texture->GetWidth(), + m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_display_texture->GetGLId())), + 0, scaled_display_height, scaled_display_width, + -static_cast(scaled_display_height), m_display_texture->GetWidth(), m_display_texture->GetHeight(), m_crtc_state.display_aspect_ratio); } diff --git a/src/core/host_display.h b/src/core/host_display.h index 60c8a5854..f0859929d 100644 --- a/src/core/host_display.h +++ b/src/core/host_display.h @@ -35,7 +35,7 @@ public: virtual void UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride) = 0; - virtual void SetDisplayTexture(void* texture_handle, u32 offset_x, u32 offset_y, u32 width, u32 height, + virtual void SetDisplayTexture(void* texture_handle, s32 offset_x, s32 offset_y, s32 width, s32 height, u32 texture_width, u32 texture_height, float aspect_ratio) = 0; virtual void SetDisplayLinearFiltering(bool enabled) = 0; diff --git a/src/core/settings.h b/src/core/settings.h index 7d2c7297b..8b0db77de 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -14,7 +14,7 @@ struct Settings bool start_paused = false; - GPURenderer gpu_renderer = GPURenderer::HardwareOpenGL; + GPURenderer gpu_renderer = GPURenderer::Software; u32 gpu_resolution_scale = 1; u32 max_gpu_resolution_scale = 1; bool gpu_vsync = true; diff --git a/src/duckstation/d3d11_host_display.cpp b/src/duckstation/d3d11_host_display.cpp index 0de26d160..f659f154f 100644 --- a/src/duckstation/d3d11_host_display.cpp +++ b/src/duckstation/d3d11_host_display.cpp @@ -130,7 +130,7 @@ void D3D11HostDisplay::UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, } } -void D3D11HostDisplay::SetDisplayTexture(void* texture, u32 offset_x, u32 offset_y, u32 width, u32 height, +void D3D11HostDisplay::SetDisplayTexture(void* texture, s32 offset_x, s32 offset_y, s32 width, s32 height, u32 texture_width, u32 texture_height, float aspect_ratio) { m_display_srv = static_cast(texture); @@ -249,29 +249,29 @@ bool D3D11HostDisplay::CreateSwapChainRTV() bool D3D11HostDisplay::CreateD3DResources() { static constexpr char fullscreen_quad_vertex_shader[] = R"( -void main(in uint vertex_id : SV_VertexID, - out float2 v_tex0 : TEXCOORD0, - out float4 o_pos : SV_Position) -{ - v_tex0 = float2(float((vertex_id << 1) & 2u), float(vertex_id & 2u)); - o_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); -} -)"; - - static constexpr char display_pixel_shader[] = R"( cbuffer UBOBlock : register(b0) { float4 u_src_rect; }; +void main(in uint vertex_id : SV_VertexID, + out float2 v_tex0 : TEXCOORD0, + out float4 o_pos : SV_Position) +{ + float2 pos = float2(float((vertex_id << 1) & 2u), float(vertex_id & 2u)); + v_tex0 = u_src_rect.xy + pos * u_src_rect.zw; + o_pos = float4(pos * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); +} +)"; + + static constexpr char display_pixel_shader[] = R"( Texture2D samp0 : register(t0); SamplerState samp0_ss : register(s0); void main(in float2 v_tex0 : TEXCOORD0, out float4 o_col0 : SV_Target) { - float2 coords = u_src_rect.xy + v_tex0 * u_src_rect.zw; - o_col0 = samp0.Sample(samp0_ss, coords); + o_col0 = samp0.Sample(samp0_ss, v_tex0); } )"; @@ -381,7 +381,7 @@ void D3D11HostDisplay::RenderDisplay() const auto map = m_display_uniform_buffer.Map(m_context.Get(), sizeof(uniforms), sizeof(uniforms)); std::memcpy(map.pointer, uniforms, sizeof(uniforms)); m_display_uniform_buffer.Unmap(m_context.Get(), sizeof(uniforms)); - m_context->PSSetConstantBuffers(0, 1, m_display_uniform_buffer.GetD3DBufferArray()); + m_context->VSSetConstantBuffers(0, 1, m_display_uniform_buffer.GetD3DBufferArray()); const CD3D11_VIEWPORT vp(static_cast(vp_left), static_cast(vp_top), static_cast(vp_width), static_cast(vp_height)); diff --git a/src/duckstation/d3d11_host_display.h b/src/duckstation/d3d11_host_display.h index 7fd4fdb30..712dfcc7a 100644 --- a/src/duckstation/d3d11_host_display.h +++ b/src/duckstation/d3d11_host_display.h @@ -28,7 +28,7 @@ public: void UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride) override; - void SetDisplayTexture(void* texture, u32 offset_x, u32 offset_y, u32 width, u32 height, u32 texture_width, + void SetDisplayTexture(void* texture, s32 offset_x, s32 offset_y, s32 width, s32 height, u32 texture_width, u32 texture_height, float aspect_ratio) override; void SetDisplayLinearFiltering(bool enabled) override; @@ -70,10 +70,10 @@ private: D3D11::StreamBuffer m_display_uniform_buffer; ID3D11ShaderResourceView* m_display_srv = nullptr; - u32 m_display_offset_x = 0; - u32 m_display_offset_y = 0; - u32 m_display_width = 0; - u32 m_display_height = 0; + s32 m_display_offset_x = 0; + s32 m_display_offset_y = 0; + s32 m_display_width = 0; + s32 m_display_height = 0; u32 m_display_texture_width = 0; u32 m_display_texture_height = 0; float m_display_aspect_ratio = 1.0f; diff --git a/src/duckstation/opengl_host_display.cpp b/src/duckstation/opengl_host_display.cpp index ff2dd8134..ab32a09fa 100644 --- a/src/duckstation/opengl_host_display.cpp +++ b/src/duckstation/opengl_host_display.cpp @@ -28,7 +28,7 @@ public: glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_texture_binding); // TODO: Set pack width - Assert(initial_data_stride == (width * sizeof(u32))); + Assert(!initial_data || initial_data_stride == (width * sizeof(u32))); glBindTexture(GL_TEXTURE_2D, id); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, initial_data); @@ -109,7 +109,7 @@ void OpenGLHostDisplay::UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, glBindTexture(GL_TEXTURE_2D, old_texture_binding); } -void OpenGLHostDisplay::SetDisplayTexture(void* texture, u32 offset_x, u32 offset_y, u32 width, u32 height, +void OpenGLHostDisplay::SetDisplayTexture(void* texture, s32 offset_x, s32 offset_y, s32 width, s32 height, u32 texture_width, u32 texture_height, float aspect_ratio) { m_display_texture_id = static_cast(reinterpret_cast(texture)); @@ -210,13 +210,14 @@ bool OpenGLHostDisplay::CreateGLResources() static constexpr char fullscreen_quad_vertex_shader[] = R"( #version 330 core +uniform vec4 u_src_rect; out vec2 v_tex0; void main() { - v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2)); - gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f); - gl_Position.y = -gl_Position.y; + vec2 pos = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2)); + v_tex0 = u_src_rect.xy + pos * u_src_rect.zw; + gl_Position = vec4(pos * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f); } )"; @@ -224,15 +225,13 @@ void main() #version 330 core uniform sampler2D samp0; -uniform vec4 u_src_rect; in vec2 v_tex0; out vec4 o_col0; void main() { - vec2 coords = u_src_rect.xy + v_tex0 * u_src_rect.zw; - o_col0 = texture(samp0, coords); + o_col0 = texture(samp0, v_tex0); } )"; diff --git a/src/duckstation/opengl_host_display.h b/src/duckstation/opengl_host_display.h index b1fc121d6..c5d10ac06 100644 --- a/src/duckstation/opengl_host_display.h +++ b/src/duckstation/opengl_host_display.h @@ -22,7 +22,7 @@ public: void UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride) override; - void SetDisplayTexture(void* texture, u32 offset_x, u32 offset_y, u32 width, u32 height, u32 texture_width, + void SetDisplayTexture(void* texture, s32 offset_x, s32 offset_y, s32 width, s32 height, u32 texture_width, u32 texture_height, float aspect_ratio) override; void SetDisplayLinearFiltering(bool enabled) override; @@ -49,10 +49,10 @@ private: GL::Program m_display_program; GLuint m_display_vao = 0; GLuint m_display_texture_id = 0; - u32 m_display_offset_x = 0; - u32 m_display_offset_y = 0; - u32 m_display_width = 0; - u32 m_display_height = 0; + s32 m_display_offset_x = 0; + s32 m_display_offset_y = 0; + s32 m_display_width = 0; + s32 m_display_height = 0; u32 m_display_texture_width = 0; u32 m_display_texture_height = 0; float m_display_aspect_ratio = 1.0f;