GPU/HW: Disable coordinate clamping when unnecessary

This commit is contained in:
Connor McLaughlin 2020-08-10 22:37:30 +10:00
parent b95ce993e0
commit 5c6e92cdfc
10 changed files with 142 additions and 87 deletions

View file

@ -21,6 +21,12 @@ ALWAYS_INLINE static constexpr std::tuple<T, T> MinMax(T v1, T v2)
return std::tie(v1, v2);
}
ALWAYS_INLINE static bool ShouldUseUVLimits()
{
// We only need UV limits if PGXP is enabled, or texture filtering is enabled.
return g_settings.gpu_pgxp_enable || g_settings.gpu_texture_filtering;
}
GPU_HW::GPU_HW() : GPU()
{
m_vram_ptr = m_vram_shadow.data();
@ -43,6 +49,7 @@ bool GPU_HW::Initialize(HostDisplay* host_display)
m_true_color = g_settings.gpu_true_color;
m_scaled_dithering = g_settings.gpu_scaled_dithering;
m_texture_filtering = g_settings.gpu_texture_filtering;
m_using_uv_limits = ShouldUseUVLimits();
PrintSettingsToLog();
return true;
}
@ -79,14 +86,21 @@ bool GPU_HW::DoState(StateWrapper& sw)
return true;
}
void GPU_HW::UpdateSettings()
void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
{
GPU::UpdateSettings();
const u32 resolution_scale = CalculateResolutionScale();
const bool use_uv_limits = ShouldUseUVLimits();
m_resolution_scale = CalculateResolutionScale();
*framebuffer_changed = (m_resolution_scale != resolution_scale);
*shaders_changed = (m_resolution_scale != resolution_scale || m_true_color != g_settings.gpu_true_color ||
m_scaled_dithering != g_settings.gpu_scaled_dithering ||
m_texture_filtering != g_settings.gpu_texture_filtering || m_using_uv_limits != use_uv_limits);
m_resolution_scale = resolution_scale;
m_true_color = g_settings.gpu_true_color;
m_scaled_dithering = g_settings.gpu_scaled_dithering;
m_texture_filtering = g_settings.gpu_texture_filtering;
m_using_uv_limits = use_uv_limits;
PrintSettingsToLog();
}
@ -120,6 +134,7 @@ void GPU_HW::PrintSettingsToLog()
(!m_true_color && m_scaled_dithering) ? " (Scaled)" : "");
Log_InfoPrintf("Texture Filtering: %s", m_texture_filtering ? "Enabled" : "Disabled");
Log_InfoPrintf("Dual-source blending: %s", m_supports_dual_source_blend ? "Supported" : "Not supported");
Log_InfoPrintf("Using UV limits: %s", m_using_uv_limits ? "YES" : "NO");
}
void GPU_HW::UpdateVRAMReadTexture()
@ -215,12 +230,6 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
}
}
bool GPU_HW::AreUVLimitsNeeded()
{
// We only need UV limits if PGXP is enabled, or texture filtering is enabled.
return g_settings.gpu_pgxp_enable || g_settings.gpu_texture_filtering;
}
void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices)
{
u16 min_u = vertices[0].u, max_u = vertices[0].u, min_v = vertices[0].v, max_v = vertices[0].v;
@ -383,7 +392,7 @@ void GPU_HW::LoadVertices()
if (rc.quad_polygon && m_resolution_scale > 1)
HandleFlippedQuadTextureCoordinates(vertices.data());
if (AreUVLimitsNeeded())
if (m_using_uv_limits && textured)
ComputePolygonUVLimits(vertices.data(), num_vertices);
if (!IsDrawingAreaIsValid())
@ -658,8 +667,6 @@ void GPU_HW::LoadVertices()
UnreachableCode();
break;
}
FlushRender();
}
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)

View file

@ -34,7 +34,6 @@ public:
virtual bool Initialize(HostDisplay* host_display) override;
virtual void Reset() override;
virtual bool DoState(StateWrapper& sw) override;
virtual void UpdateSettings() override;
virtual void UpdateResolutionScale() override;
protected:
@ -174,6 +173,8 @@ protected:
static_cast<float>(rgba >> 24) * (1.0f / 255.0f));
}
void UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed);
virtual void UpdateVRAMReadTexture();
virtual void UpdateDepthBufferFromMaskBit() = 0;
virtual void SetScissorFromDrawingArea() = 0;
@ -269,6 +270,7 @@ protected:
bool m_scaled_dithering = false;
bool m_texture_filtering = false;
bool m_supports_dual_source_blend = false;
bool m_using_uv_limits = false;
BatchConfig m_batch = {};
BatchUBOData m_batch_ubo_data = {};

View file

@ -70,12 +70,6 @@ bool GPU_HW_D3D11::Initialize(HostDisplay* host_display)
return false;
}
if (!CreateBatchInputLayout())
{
Log_ErrorPrintf("Failed to create batch input layout");
return false;
}
if (!CompileShaders())
{
Log_ErrorPrintf("Failed to compile shaders");
@ -124,9 +118,14 @@ void GPU_HW_D3D11::UpdateSettings()
{
GPU_HW::UpdateSettings();
CreateFramebuffer();
CreateStateObjects();
CompileShaders();
bool needs_new_framebuffer, needs_new_shaders;
UpdateHWSettings(&needs_new_framebuffer, &needs_new_shaders);
if (needs_new_framebuffer)
CreateFramebuffer();
if (needs_new_shaders)
CompileShaders();
RestoreGraphicsAPIState();
UpdateDisplay();
}
@ -263,35 +262,6 @@ bool GPU_HW_D3D11::CreateTextureBuffer()
return true;
}
bool GPU_HW_D3D11::CreateBatchInputLayout()
{
static constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 5> attributes = {
{{"ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 4, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, uv_limits), D3D11_INPUT_PER_VERTEX_DATA, 0}}};
// we need a vertex shader...
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering,
m_texture_filtering, m_supports_dual_source_blend);
ComPtr<ID3DBlob> vs_bytecode =
m_shader_cache.GetShaderBlob(D3D11::ShaderCompiler::Type::Vertex, shadergen.GenerateBatchVertexShader(true, false));
if (!vs_bytecode)
return false;
const HRESULT hr = m_device->CreateInputLayout(attributes.data(), static_cast<UINT>(attributes.size()),
vs_bytecode->GetBufferPointer(), vs_bytecode->GetBufferSize(),
m_batch_input_layout.GetAddressOf());
if (FAILED(hr))
{
Log_ErrorPrintf("CreateInputLayout failed: 0x%08X", hr);
return false;
}
return true;
}
bool GPU_HW_D3D11::CreateStateObjects()
{
HRESULT hr;
@ -373,10 +343,35 @@ bool GPU_HW_D3D11::CreateStateObjects()
bool GPU_HW_D3D11::CompileShaders()
{
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering,
m_texture_filtering, m_supports_dual_source_blend);
m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
g_host_interface->DisplayLoadingScreen("Compiling shaders...");
// input layout
{
static constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 5> attributes = {
{{"ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 4, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, uv_limits), D3D11_INPUT_PER_VERTEX_DATA, 0}}};
// we need a vertex shader...
ComPtr<ID3DBlob> vs_bytecode =
m_shader_cache.GetShaderBlob(D3D11::ShaderCompiler::Type::Vertex, shadergen.GenerateBatchVertexShader(true));
if (!vs_bytecode)
return false;
const HRESULT hr = m_device->CreateInputLayout(attributes.data(), static_cast<UINT>(attributes.size()),
vs_bytecode->GetBufferPointer(), vs_bytecode->GetBufferSize(),
m_batch_input_layout.GetAddressOf());
if (FAILED(hr))
{
Log_ErrorPrintf("CreateInputLayout failed: 0x%08X", hr);
return false;
}
}
m_screen_quad_vertex_shader =
m_shader_cache.GetVertexShader(m_device.Get(), shadergen.GenerateScreenQuadVertexShader());
if (!m_screen_quad_vertex_shader)
@ -384,7 +379,7 @@ bool GPU_HW_D3D11::CompileShaders()
for (u8 textured = 0; textured < 2; textured++)
{
const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured), false);
const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured));
m_batch_vertex_shaders[textured] = m_shader_cache.GetVertexShader(m_device.Get(), vs);
if (!m_batch_vertex_shaders[textured])
return false;

View file

@ -56,7 +56,6 @@ private:
bool CreateVertexBuffer();
bool CreateUniformBuffer();
bool CreateTextureBuffer();
bool CreateBatchInputLayout();
bool CreateStateObjects();
bool CompileShaders();
@ -95,7 +94,6 @@ private:
ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui;
ComPtr<ID3D11RasterizerState> m_cull_none_rasterizer_state;
ComPtr<ID3D11RasterizerState> m_wireframe_rasterizer_state;
ComPtr<ID3D11DepthStencilState> m_depth_disabled_state;
ComPtr<ID3D11DepthStencilState> m_depth_test_always_state;

View file

@ -122,8 +122,14 @@ void GPU_HW_OpenGL::UpdateSettings()
{
GPU_HW::UpdateSettings();
CreateFramebuffer();
CompilePrograms();
bool framebuffer_changed, shaders_changed;
UpdateHWSettings(&framebuffer_changed, &shaders_changed);
if (framebuffer_changed)
CreateFramebuffer();
if (shaders_changed)
CompilePrograms();
UpdateDisplay();
}
@ -343,7 +349,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
{
const bool use_binding_layout = GPU_HW_ShaderGen::UseGLSLBindingLayout();
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering,
m_texture_filtering, m_supports_dual_source_blend);
m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
g_host_interface->DisplayLoadingScreen("Compiling Shaders...");
@ -356,7 +362,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
const bool textured = (static_cast<TextureMode>(texture_mode) != TextureMode::Disabled);
const std::string batch_vs = shadergen.GenerateBatchVertexShader(textured, false);
const std::string batch_vs = shadergen.GenerateBatchVertexShader(textured);
const std::string fs = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<TextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing));

View file

@ -6,9 +6,10 @@
Log_SetChannel(GPU_HW_ShaderGen);
GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color,
bool scaled_dithering, bool texture_filtering, bool supports_dual_source_blend)
bool scaled_dithering, bool texture_filtering, bool uv_limits,
bool supports_dual_source_blend)
: m_render_api(render_api), m_resolution_scale(resolution_scale), m_true_color(true_color),
m_scaled_dithering(scaled_dithering), m_texture_filering(texture_filtering),
m_scaled_dithering(scaled_dithering), m_texture_filering(texture_filtering), m_uv_limits(uv_limits),
m_glsl(render_api != HostDisplay::RenderAPI::D3D11), m_supports_dual_source_blend(supports_dual_source_blend),
m_use_glsl_interface_blocks(false)
{
@ -494,27 +495,35 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss)
false);
}
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upscaled_lines)
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
{
std::stringstream ss;
WriteHeader(ss);
DefineMacro(ss, "TEXTURED", textured);
DefineMacro(ss, "UV_LIMITS", m_uv_limits);
WriteCommonFunctions(ss);
WriteBatchUniformBuffer(ss);
ss << "CONSTANT float EPSILON = 0.00001;\n";
const char* output_block_suffix = upscaled_lines ? "VS" : "";
if (textured)
{
DeclareVertexEntryPoint(
ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage", "float4 a_uv_limits"}, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, false, output_block_suffix);
if (m_uv_limits)
{
DeclareVertexEntryPoint(
ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage", "float4 a_uv_limits"}, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, false);
}
else
{
DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}}, false);
}
}
else
{
DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0"}, 1, 0, {}, false, output_block_suffix);
DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0"}, 1, 0, {}, false);
}
ss << R"(
@ -559,7 +568,9 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upsc
v_texpage.z = ((a_texpage >> 16) & 63u) * 16u * RESOLUTION_SCALE;
v_texpage.w = ((a_texpage >> 22) & 511u) * RESOLUTION_SCALE;
v_uv_limits = a_uv_limits * float4(255.0, 255.0, 255.0, 255.0);
#if UV_LIMITS
v_uv_limits = a_uv_limits * float4(255.0, 255.0, 255.0, 255.0);
#endif
#endif
}
)";
@ -596,6 +607,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "INTERLACING", interlacing);
DefineMacro(ss, "TRUE_COLOR", m_true_color);
DefineMacro(ss, "TEXTURE_FILTERING", m_texture_filering);
DefineMacro(ss, "UV_LIMITS", m_uv_limits);
DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source);
WriteCommonFunctions(ss);
@ -729,9 +741,17 @@ void BilinearSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
if (textured)
{
DeclareFragmentEntryPoint(ss, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, true,
use_dual_source ? 2 : 1, true);
if (m_uv_limits)
{
DeclareFragmentEntryPoint(ss, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}},
true, use_dual_source ? 2 : 1, true);
}
else
{
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1,
true);
}
}
else
{
@ -753,18 +773,22 @@ void BilinearSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
#endif
#if TEXTURED
float2 coords = v_tex0;
float4 uv_limits = v_uv_limits;
float4 texcol;
// We can't currently use upscaled coordinate for palettes because of how they're packed.
// Not that it would be any benefit anyway, render-to-texture effects don't use palettes.
float2 coords = v_tex0;
#if PALETTE
coords /= float2(RESOLUTION_SCALE, RESOLUTION_SCALE);
#else
uv_limits *= float4(RESOLUTION_SCALE, RESOLUTION_SCALE, RESOLUTION_SCALE, RESOLUTION_SCALE);
#endif
#if UV_LIMITS
float4 uv_limits = v_uv_limits;
#if !PALETTE
uv_limits *= float4(RESOLUTION_SCALE, RESOLUTION_SCALE, RESOLUTION_SCALE, RESOLUTION_SCALE);
#endif
#endif
float4 texcol;
#if TEXTURE_FILTERING
BilinearSampleFromVRAM(v_texpage, coords, uv_limits, texcol, ialpha);
if (ialpha < 0.5)
@ -773,7 +797,11 @@ void BilinearSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
texcol.rgb /= float3(ialpha, ialpha, ialpha);
semitransparent = (texcol.a != 0.0);
#else
texcol = SampleFromVRAM(v_texpage, clamp(coords, uv_limits.xy, uv_limits.zw));
#if UV_LIMITS
texcol = SampleFromVRAM(v_texpage, clamp(coords, uv_limits.xy, uv_limits.zw));
#else
texcol = SampleFromVRAM(v_texpage, coords);
#endif
if (VECTOR_EQ(texcol, TRANSPARENT_PIXEL_COLOR))
discard;

View file

@ -8,12 +8,12 @@ class GPU_HW_ShaderGen
{
public:
GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color, bool scaled_dithering,
bool texture_filtering, bool supports_dual_source_blend);
bool texture_filtering, bool uv_limits, bool supports_dual_source_blend);
~GPU_HW_ShaderGen();
static bool UseGLSLBindingLayout();
std::string GenerateBatchVertexShader(bool textured, bool upscaled_lines);
std::string GenerateBatchVertexShader(bool textured);
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPU::TextureMode texture_mode,
bool dithering, bool interlacing);
std::string GenerateScreenQuadVertexShader();
@ -51,6 +51,7 @@ private:
bool m_true_color;
bool m_scaled_dithering;
bool m_texture_filering;
bool m_uv_limits;
bool m_glsl;
bool m_supports_dual_source_blend;
bool m_use_glsl_interface_blocks;

View file

@ -128,11 +128,27 @@ void GPU_HW_Vulkan::UpdateSettings()
// Everything should be finished executing before recreating resources.
g_vulkan_context->ExecuteCommandBuffer(true);
CreateFramebuffer();
DestroyPipelines();
CompilePipelines();
UpdateDepthBufferFromMaskBit();
UpdateDisplay();
bool framebuffer_changed, shaders_changed;
UpdateHWSettings(&framebuffer_changed, &shaders_changed);
if (framebuffer_changed)
CreateFramebuffer();
if (shaders_changed)
{
// clear it since we draw a loading screen and it's not in the correct state
m_host_display->ClearDisplayTexture();
DestroyPipelines();
CompilePipelines();
}
// this has to be done here, because otherwise we're using destroyed pipelines in the same cmdbuffer
if (framebuffer_changed)
{
UpdateDepthBufferFromMaskBit();
UpdateDisplay();
}
RestoreGraphicsAPIState();
}
@ -576,7 +592,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
VkPipelineCache pipeline_cache = g_vulkan_shader_cache->GetPipelineCache();
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering,
m_texture_filtering, m_supports_dual_source_blend);
m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
// vertex shaders - [textured]
// fragment shaders - [render_mode][texture_mode][dithering][interlacing]
@ -589,7 +605,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
for (u8 textured = 0; textured < 2; textured++)
{
const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured), false);
const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured));
VkShaderModule shader = g_vulkan_shader_cache->GetVertexShader(vs);
if (shader == VK_NULL_HANDLE)
return false;

View file

@ -494,7 +494,8 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
g_settings.gpu_disable_interlacing != old_settings.gpu_disable_interlacing ||
g_settings.gpu_force_ntsc_timings != old_settings.gpu_force_ntsc_timings ||
g_settings.display_crop_mode != old_settings.display_crop_mode ||
g_settings.display_aspect_ratio != old_settings.display_aspect_ratio)
g_settings.display_aspect_ratio != old_settings.display_aspect_ratio ||
g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable)
{
g_gpu->RestoreGraphicsAPIState();
g_gpu->UpdateSettings();

View file

@ -1302,6 +1302,7 @@ void CommonHostInterface::RegisterGraphicsHotkeys()
if (!pressed)
{
g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable;
g_gpu->UpdateSettings();
ReportFormattedMessage("PGXP is now %s.", g_settings.gpu_pgxp_enable ? "enabled" : "disabled");
if (g_settings.gpu_pgxp_enable)