GPU/HW: Enable feedback loops for rendering

This commit is contained in:
Stenzek 2024-03-08 21:14:35 +10:00
parent 72ab669e70
commit 74f3c8d1aa
No known key found for this signature in database
9 changed files with 322 additions and 218 deletions

View file

@ -304,7 +304,7 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di
void GPU_HW::RestoreDeviceContext() void GPU_HW::RestoreDeviceContext()
{ {
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); SetVRAMRenderTarget();
g_gpu_device->SetViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); g_gpu_device->SetViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight());
SetScissor(); SetScissor();
m_batch_ubo_dirty = true; m_batch_ubo_dirty = true;
@ -328,6 +328,8 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
// TODO: Use old_settings // TODO: Use old_settings
const bool framebuffer_changed = const bool framebuffer_changed =
(m_resolution_scale != resolution_scale || m_multisamples != multisamples || m_downsample_mode != downsample_mode || (m_resolution_scale != resolution_scale || m_multisamples != multisamples || m_downsample_mode != downsample_mode ||
(static_cast<bool>(m_vram_depth_texture) !=
(g_settings.UsingPGXPDepthBuffer() || !m_supports_framebuffer_fetch)) ||
(m_downsample_mode == GPUDownsampleMode::Box && (m_downsample_mode == GPUDownsampleMode::Box &&
g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale)); g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale));
const bool shaders_changed = const bool shaders_changed =
@ -400,7 +402,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
m_batch.use_depth_buffer = false; m_batch.use_depth_buffer = false;
// might be null when resizing // might be null when resizing
if (m_vram_texture) if (m_vram_depth_texture)
{ {
if (m_pgxp_depth_buffer) if (m_pgxp_depth_buffer)
ClearDepthBuffer(); ClearDepthBuffer();
@ -422,7 +424,6 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
if (framebuffer_changed) if (framebuffer_changed)
{ {
// TODO: weird vram loss when rapidly changing resolutions
if (!CreateBuffers()) if (!CreateBuffers())
Panic("Failed to recreate buffers."); Panic("Failed to recreate buffers.");
@ -620,6 +621,12 @@ void GPU_HW::PrintSettingsToLog()
Log_InfoFmt("Using software renderer for readbacks: {}", m_sw_renderer ? "YES" : "NO"); Log_InfoFmt("Using software renderer for readbacks: {}", m_sw_renderer ? "YES" : "NO");
} }
bool GPU_HW::NeedsDepthBuffer() const
{
// PGXP depth, or no fbfetch, which means we need depth for the mask bit.
return (m_pgxp_depth_buffer || !m_supports_framebuffer_fetch);
}
bool GPU_HW::CreateBuffers() bool GPU_HW::CreateBuffers()
{ {
DestroyBuffers(); DestroyBuffers();
@ -628,6 +635,8 @@ bool GPU_HW::CreateBuffers()
const u32 texture_width = VRAM_WIDTH * m_resolution_scale; const u32 texture_width = VRAM_WIDTH * m_resolution_scale;
const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; const u32 texture_height = VRAM_HEIGHT * m_resolution_scale;
const u8 samples = static_cast<u8>(m_multisamples); const u8 samples = static_cast<u8>(m_multisamples);
const bool needs_depth_buffer = NeedsDepthBuffer();
Log_DevFmt("Depth buffer is {}needed", needs_depth_buffer ? "" : "NOT ");
// Needed for Metal resolve. // Needed for Metal resolve.
const GPUTexture::Type read_texture_type = (g_gpu_device->GetRenderAPI() == RenderAPI::Metal && m_multisamples > 1) ? const GPUTexture::Type read_texture_type = (g_gpu_device->GetRenderAPI() == RenderAPI::Metal && m_multisamples > 1) ?
@ -636,8 +645,9 @@ bool GPU_HW::CreateBuffers()
if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples,
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) ||
(needs_depth_buffer &&
!(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, !(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples,
GPUTexture::Type::DepthStencil, VRAM_DS_FORMAT)) || GPUTexture::Type::DepthStencil, VRAM_DS_FORMAT))) ||
!(m_vram_read_texture = !(m_vram_read_texture =
g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, read_texture_type, VRAM_RT_FORMAT)) || g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, read_texture_type, VRAM_RT_FORMAT)) ||
!(m_vram_readback_texture = g_gpu_device->FetchTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, !(m_vram_readback_texture = g_gpu_device->FetchTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1,
@ -647,6 +657,7 @@ bool GPU_HW::CreateBuffers()
} }
GL_OBJECT_NAME(m_vram_texture, "VRAM Texture"); GL_OBJECT_NAME(m_vram_texture, "VRAM Texture");
if (m_vram_depth_texture)
GL_OBJECT_NAME(m_vram_depth_texture, "VRAM Depth Texture"); GL_OBJECT_NAME(m_vram_depth_texture, "VRAM Depth Texture");
GL_OBJECT_NAME(m_vram_read_texture, "VRAM Read Texture"); GL_OBJECT_NAME(m_vram_read_texture, "VRAM Read Texture");
GL_OBJECT_NAME(m_vram_readback_texture, "VRAM Readback Texture"); GL_OBJECT_NAME(m_vram_readback_texture, "VRAM Readback Texture");
@ -689,7 +700,7 @@ bool GPU_HW::CreateBuffers()
else if (m_downsample_mode == GPUDownsampleMode::Box) else if (m_downsample_mode == GPUDownsampleMode::Box)
m_downsample_scale_or_levels = m_resolution_scale / GetBoxDownsampleScale(m_resolution_scale); m_downsample_scale_or_levels = m_resolution_scale / GetBoxDownsampleScale(m_resolution_scale);
g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); SetVRAMRenderTarget();
SetFullVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle();
return true; return true;
} }
@ -697,11 +708,18 @@ bool GPU_HW::CreateBuffers()
void GPU_HW::ClearFramebuffer() void GPU_HW::ClearFramebuffer()
{ {
g_gpu_device->ClearRenderTarget(m_vram_texture.get(), 0); g_gpu_device->ClearRenderTarget(m_vram_texture.get(), 0);
if (m_vram_depth_texture)
g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f);
ClearVRAMDirtyRectangle(); ClearVRAMDirtyRectangle();
m_last_depth_z = 1.0f; m_last_depth_z = 1.0f;
} }
void GPU_HW::SetVRAMRenderTarget()
{
g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get(),
m_allow_shader_blend ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags);
}
void GPU_HW::DestroyBuffers() void GPU_HW::DestroyBuffers()
{ {
ClearDisplayTexture(); ClearDisplayTexture();
@ -723,19 +741,36 @@ void GPU_HW::DestroyBuffers()
bool GPU_HW::CompilePipelines() bool GPU_HW::CompilePipelines()
{ {
const GPUDevice::Features features = g_gpu_device->GetFeatures(); const GPUDevice::Features features = g_gpu_device->GetFeatures();
const bool needs_depth_buffer = NeedsDepthBuffer();
const bool write_mask_as_depth = (!m_pgxp_depth_buffer && needs_depth_buffer);
m_allow_shader_blend = (features.feedback_loops && (m_pgxp_depth_buffer || !needs_depth_buffer));
GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading,
m_true_color, m_scaled_dithering, m_texture_filtering, m_clamp_uvs, m_pgxp_depth_buffer, m_true_color, m_scaled_dithering, m_texture_filtering, m_clamp_uvs, write_mask_as_depth,
m_disable_color_perspective, m_supports_dual_source_blend, m_supports_framebuffer_fetch, m_disable_color_perspective, m_supports_dual_source_blend, m_supports_framebuffer_fetch,
m_debanding); m_debanding);
ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 5 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + const u32 total_pipelines = 2 + // vertex shaders
2 + (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); (5 * 5 * 9 * 2 * 2 * 2) + // fragment shaders
((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * 9 * 2 * 2 * 2) + // batch pipelines
((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe
1 + // fullscreen quad VS
(2 * 2) + // vram fill
(1 + BoolToUInt32(write_mask_as_depth)) + // vram copy
(1 + BoolToUInt32(write_mask_as_depth)) + // vram write
1 + // vram write replacement
(needs_depth_buffer ? 1 : 0) + // mask -> depth
1 + // vram read
2 + // extract/display
((m_downsample_mode != GPUDownsampleMode::Disabled) ? 1 : 0); // downsample
ShaderCompileProgressTracker progress("Compiling Pipelines", total_pipelines);
// vertex shaders - [textured] // vertex shaders - [textured]
// fragment shaders - [render_mode][texture_mode][dithering][interlacing] // fragment shaders - [render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing]
static constexpr auto destroy_shader = [](std::unique_ptr<GPUShader>& s) { s.reset(); }; static constexpr auto destroy_shader = [](std::unique_ptr<GPUShader>& s) { s.reset(); };
DimensionalArray<std::unique_ptr<GPUShader>, 2> batch_vertex_shaders{}; DimensionalArray<std::unique_ptr<GPUShader>, 2> batch_vertex_shaders{};
DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 9, 5, 4> batch_fragment_shaders{}; DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2, 9, 5, 5> batch_fragment_shaders{};
ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() {
batch_vertex_shaders.enumerate(destroy_shader); batch_vertex_shaders.enumerate(destroy_shader);
batch_fragment_shaders.enumerate(destroy_shader); batch_fragment_shaders.enumerate(destroy_shader);
@ -743,39 +778,41 @@ bool GPU_HW::CompilePipelines()
for (u8 textured = 0; textured < 2; textured++) for (u8 textured = 0; textured < 2; textured++)
{ {
const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured)); const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured), m_pgxp_depth_buffer);
if (!(batch_vertex_shaders[textured] = g_gpu_device->CreateShader(GPUShaderStage::Vertex, vs))) if (!(batch_vertex_shaders[textured] = g_gpu_device->CreateShader(GPUShaderStage::Vertex, vs)))
return false; return false;
progress.Increment(); progress.Increment();
} }
for (u8 render_mode = 0; render_mode < 4; render_mode++) for (u8 render_mode = 0; render_mode < 5; render_mode++)
{ {
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
{ {
if (m_supports_framebuffer_fetch) if (
{
// Don't need multipass shaders.
if (render_mode != static_cast<u8>(BatchRenderMode::TransparencyDisabled) &&
render_mode != static_cast<u8>(BatchRenderMode::TransparentAndOpaque))
{
progress.Increment(2 * 2 * 9);
continue;
}
}
else
{
// Can't generate shader blending. // Can't generate shader blending.
if (transparency_mode != static_cast<u8>(GPUTransparencyMode::Disabled)) ((render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) && !features.feedback_loops) ||
(render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend) &&
transparency_mode != static_cast<u8>(GPUTransparencyMode::Disabled))) ||
// Don't need multipass shaders.
(m_supports_framebuffer_fetch && (render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) ||
render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent))))
{ {
progress.Increment(2 * 2 * 9); progress.Increment(9 * 2 * 2 * 2);
continue; continue;
} }
}
for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) for (u8 texture_mode = 0; texture_mode < 9; texture_mode++)
{ {
for (u8 check_mask = 0; check_mask < 2; check_mask++)
{
if (check_mask && render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend))
{
// mask bit testing is only valid with shader blending.
progress.Increment(2 * 2);
continue;
}
for (u8 dithering = 0; dithering < 2; dithering++) for (u8 dithering = 0; dithering < 2; dithering++)
{ {
for (u8 interlacing = 0; interlacing < 2; interlacing++) for (u8 interlacing = 0; interlacing < 2; interlacing++)
@ -783,10 +820,10 @@ bool GPU_HW::CompilePipelines()
const std::string fs = shadergen.GenerateBatchFragmentShader( const std::string fs = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<GPUTransparencyMode>(transparency_mode), static_cast<BatchRenderMode>(render_mode), static_cast<GPUTransparencyMode>(transparency_mode),
static_cast<GPUTextureMode>(texture_mode), ConvertToBoolUnchecked(dithering), static_cast<GPUTextureMode>(texture_mode), ConvertToBoolUnchecked(dithering),
ConvertToBoolUnchecked(interlacing)); ConvertToBoolUnchecked(interlacing), ConvertToBoolUnchecked(check_mask));
if (!(batch_fragment_shaders[render_mode][transparency_mode][texture_mode][dithering][interlacing] = if (!(batch_fragment_shaders[render_mode][transparency_mode][texture_mode][check_mask][dithering]
g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs))) [interlacing] = g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs)))
{ {
return false; return false;
} }
@ -797,6 +834,7 @@ bool GPU_HW::CompilePipelines()
} }
} }
} }
}
static constexpr GPUPipeline::VertexAttribute vertex_attributes[] = { static constexpr GPUPipeline::VertexAttribute vertex_attributes[] = {
GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0, GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0,
@ -820,48 +858,57 @@ bool GPU_HW::CompilePipelines()
plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
plconfig.primitive = GPUPipeline::Primitive::Triangles; plconfig.primitive = GPUPipeline::Primitive::Triangles;
plconfig.geometry_shader = nullptr; plconfig.geometry_shader = nullptr;
plconfig.SetTargetFormats(VRAM_RT_FORMAT, VRAM_DS_FORMAT); plconfig.SetTargetFormats(VRAM_RT_FORMAT, needs_depth_buffer ? VRAM_DS_FORMAT : GPUTexture::Format::Unknown);
plconfig.samples = m_multisamples; plconfig.samples = m_multisamples;
plconfig.per_sample_shading = m_per_sample_shading; plconfig.per_sample_shading = m_per_sample_shading;
plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; plconfig.render_pass_flags = m_allow_shader_blend ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags;
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask]
for (u8 depth_test = 0; depth_test < 3; depth_test++) for (u8 depth_test = 0; depth_test < 2; depth_test++)
{ {
for (u8 render_mode = 0; render_mode < 4; render_mode++) if (depth_test && !m_pgxp_depth_buffer)
{ {
if (m_supports_framebuffer_fetch) // Not used.
{
// Don't need multipass shaders.
if (render_mode != static_cast<u8>(BatchRenderMode::TransparencyDisabled) &&
render_mode != static_cast<u8>(BatchRenderMode::TransparentAndOpaque))
{
progress.Increment(2 * 2 * 9 * 5);
continue; continue;
} }
}
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
{ {
for (u8 render_mode = 0; render_mode < 5; render_mode++)
{
if (
// Can't generate shader blending.
(render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) && !features.feedback_loops) ||
// Don't need multipass shaders.
(m_supports_framebuffer_fetch && (render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) ||
render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent))))
{
progress.Increment(9 * 2 * 2 * 2);
continue;
}
for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) for (u8 texture_mode = 0; texture_mode < 9; texture_mode++)
{ {
for (u8 dithering = 0; dithering < 2; dithering++) for (u8 dithering = 0; dithering < 2; dithering++)
{ {
for (u8 interlacing = 0; interlacing < 2; interlacing++) for (u8 interlacing = 0; interlacing < 2; interlacing++)
{ {
static constexpr std::array<GPUPipeline::DepthFunc, 3> depth_test_values = { for (u8 check_mask = 0; check_mask < 2; check_mask++)
GPUPipeline::DepthFunc::Always, GPUPipeline::DepthFunc::GreaterEqual, {
GPUPipeline::DepthFunc::LessEqual};
const bool textured = (static_cast<GPUTextureMode>(texture_mode) != GPUTextureMode::Disabled); const bool textured = (static_cast<GPUTextureMode>(texture_mode) != GPUTextureMode::Disabled);
const bool use_shader_blending = const bool use_shader_blending =
(textured && NeedsShaderBlending(static_cast<GPUTransparencyMode>(transparency_mode))); (render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) &&
((textured &&
NeedsShaderBlending(static_cast<GPUTransparencyMode>(transparency_mode), (check_mask != 0))) ||
check_mask));
plconfig.input_layout.vertex_attributes = plconfig.input_layout.vertex_attributes =
textured ? textured ?
(m_clamp_uvs ? std::span<const GPUPipeline::VertexAttribute>( (m_clamp_uvs ? std::span<const GPUPipeline::VertexAttribute>(
vertex_attributes, NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES) : vertex_attributes, NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES) :
std::span<const GPUPipeline::VertexAttribute>(vertex_attributes, std::span<const GPUPipeline::VertexAttribute>(
NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) : vertex_attributes, NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) :
std::span<const GPUPipeline::VertexAttribute>(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); std::span<const GPUPipeline::VertexAttribute>(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES);
plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get(); plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get();
@ -869,11 +916,23 @@ bool GPU_HW::CompilePipelines()
batch_fragment_shaders[render_mode] batch_fragment_shaders[render_mode]
[use_shader_blending ? transparency_mode : [use_shader_blending ? transparency_mode :
static_cast<u8>(GPUTransparencyMode::Disabled)] static_cast<u8>(GPUTransparencyMode::Disabled)]
[texture_mode][dithering][interlacing] [texture_mode][use_shader_blending ? check_mask : 0][dithering][interlacing]
.get(); .get();
Assert(plconfig.vertex_shader && plconfig.fragment_shader);
if (needs_depth_buffer)
{
plconfig.depth.depth_test =
m_pgxp_depth_buffer ?
(depth_test ? GPUPipeline::DepthFunc::LessEqual : GPUPipeline::DepthFunc::Always) :
(check_mask ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always);
// Don't write for transparent, but still test.
plconfig.depth.depth_write =
!m_pgxp_depth_buffer ||
(depth_test && transparency_mode == static_cast<u8>(GPUTransparencyMode::Disabled));
}
plconfig.depth.depth_test = depth_test_values[depth_test];
plconfig.depth.depth_write = !m_pgxp_depth_buffer || depth_test != 0;
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
if (!use_shader_blending && if (!use_shader_blending &&
@ -923,8 +982,8 @@ bool GPU_HW::CompilePipelines()
} }
} }
if (!(m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering] if (!(m_batch_pipelines[depth_test][transparency_mode][render_mode][texture_mode][dithering]
[interlacing] = g_gpu_device->CreatePipeline(plconfig))) [interlacing][check_mask] = g_gpu_device->CreatePipeline(plconfig)))
{ {
return false; return false;
} }
@ -936,6 +995,7 @@ bool GPU_HW::CompilePipelines()
} }
} }
} }
}
if (m_wireframe_mode != GPUWireframeMode::Disabled) if (m_wireframe_mode != GPUWireframeMode::Disabled)
{ {
@ -968,12 +1028,15 @@ bool GPU_HW::CompilePipelines()
plconfig.vertex_shader = nullptr; plconfig.vertex_shader = nullptr;
plconfig.geometry_shader = nullptr; plconfig.geometry_shader = nullptr;
plconfig.fragment_shader = nullptr; plconfig.fragment_shader = nullptr;
progress.Increment();
} }
batch_shader_guard.Run(); batch_shader_guard.Run();
// use a depth of 1, that way writes will reset the depth
std::unique_ptr<GPUShader> fullscreen_quad_vertex_shader = std::unique_ptr<GPUShader> fullscreen_quad_vertex_shader =
g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateScreenQuadVertexShader()); g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateScreenQuadVertexShader(1.0f));
if (!fullscreen_quad_vertex_shader) if (!fullscreen_quad_vertex_shader)
return false; return false;
@ -1018,7 +1081,10 @@ bool GPU_HW::CompilePipelines()
plconfig.fragment_shader = fs.get(); plconfig.fragment_shader = fs.get();
for (u8 depth_test = 0; depth_test < 2; depth_test++) for (u8 depth_test = 0; depth_test < 2; depth_test++)
{ {
plconfig.depth.depth_write = true; if (depth_test && write_mask_as_depth)
continue;
plconfig.depth.depth_write = needs_depth_buffer;
plconfig.depth.depth_test = plconfig.depth.depth_test =
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
@ -1045,7 +1111,10 @@ bool GPU_HW::CompilePipelines()
plconfig.fragment_shader = fs.get(); plconfig.fragment_shader = fs.get();
for (u8 depth_test = 0; depth_test < 2; depth_test++) for (u8 depth_test = 0; depth_test < 2; depth_test++)
{ {
plconfig.depth.depth_write = true; if (depth_test && write_mask_as_depth)
continue;
plconfig.depth.depth_write = needs_depth_buffer;
plconfig.depth.depth_test = plconfig.depth.depth_test =
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
@ -1071,9 +1140,14 @@ bool GPU_HW::CompilePipelines()
plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState(); plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState();
if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig))) if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig)))
return false; return false;
progress.Increment();
} }
plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags;
// VRAM update depth // VRAM update depth
if (needs_depth_buffer)
{ {
std::unique_ptr<GPUShader> fs = std::unique_ptr<GPUShader> fs =
g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMUpdateDepthFragmentShader()); g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMUpdateDepthFragmentShader());
@ -1191,6 +1265,7 @@ bool GPU_HW::CompilePipelines()
if (!(m_downsample_composite_sampler = g_gpu_device->CreateSampler(config))) if (!(m_downsample_composite_sampler = g_gpu_device->CreateSampler(config)))
return false; return false;
GL_OBJECT_NAME(m_downsample_composite_sampler, "Downsample Trilinear Sampler"); GL_OBJECT_NAME(m_downsample_composite_sampler, "Downsample Trilinear Sampler");
progress.Increment();
} }
else if (m_downsample_mode == GPUDownsampleMode::Box) else if (m_downsample_mode == GPUDownsampleMode::Box)
{ {
@ -1207,9 +1282,8 @@ bool GPU_HW::CompilePipelines()
return false; return false;
GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline"); GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline");
}
progress.Increment(); progress.Increment();
}
#undef UPDATE_PROGRESS #undef UPDATE_PROGRESS
@ -1323,7 +1397,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written)
void GPU_HW::UpdateDepthBufferFromMaskBit() void GPU_HW::UpdateDepthBufferFromMaskBit()
{ {
if (m_pgxp_depth_buffer) if (m_pgxp_depth_buffer || !m_vram_depth_texture)
return; return;
// Viewport should already be set full, only need to fudge the scissor. // Viewport should already be set full, only need to fudge the scissor.
@ -1336,7 +1410,7 @@ void GPU_HW::UpdateDepthBufferFromMaskBit()
// Restore. // Restore.
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); SetVRAMRenderTarget();
SetScissor(); SetScissor();
} }
@ -1389,13 +1463,18 @@ void GPU_HW::UnmapGPUBuffer(u32 used_vertices, u32 used_indices)
ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index,
u32 base_vertex) u32 base_vertex)
{ {
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask]
const u8 depth_test = m_batch.use_depth_buffer ? static_cast<u8>(2) : BoolToUInt8(m_batch.check_mask_before_draw); const u8 depth_test = BoolToUInt8(m_batch.use_depth_buffer);
g_gpu_device->SetPipeline( const u8 check_mask = BoolToUInt8(m_batch.check_mask_before_draw);
m_batch_pipelines[depth_test][static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)][static_cast<u8>( g_gpu_device->SetPipeline(m_batch_pipelines[depth_test][static_cast<u8>(m_batch.transparency_mode)][static_cast<u8>(
m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)] render_mode)][static_cast<u8>(m_batch.texture_mode)][BoolToUInt8(m_batch.dithering)]
[BoolToUInt8(m_batch.interlacing)][check_mask]
.get()); .get());
if (render_mode != BatchRenderMode::ShaderBlend || m_supports_framebuffer_fetch)
g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex);
else
g_gpu_device->DrawIndexedWithBarrier(num_indices, base_index, base_vertex, GPUDevice::DrawBarrier::Full);
} }
ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
@ -1873,9 +1952,7 @@ void GPU_HW::LoadVertices()
} }
else if (m_pgxp_depth_buffer) else if (m_pgxp_depth_buffer)
{ {
const bool use_depth = (m_batch.transparency_mode == GPUTransparencyMode::Disabled); SetBatchDepthBuffer(true);
SetBatchDepthBuffer(use_depth);
if (use_depth)
CheckForDepthClear(vertices.data(), num_vertices); CheckForDepthClear(vertices.data(), num_vertices);
} }
} }
@ -2410,10 +2487,11 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsTwoPassRendering() const
(!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled))); (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled)));
} }
ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency) const ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency, bool check_mask) const
{ {
return (m_supports_framebuffer_fetch && return (m_allow_shader_blend &&
(transparency == GPUTransparencyMode::BackgroundMinusForeground || ((check_mask && (m_pgxp_depth_buffer || !m_vram_depth_texture)) ||
transparency == GPUTransparencyMode::BackgroundMinusForeground ||
(!m_supports_dual_source_blend && (!m_supports_dual_source_blend &&
(transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering))))); (transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering)))));
} }
@ -2472,7 +2550,7 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
void GPU_HW::ResetBatchVertexDepth() void GPU_HW::ResetBatchVertexDepth()
{ {
if (m_pgxp_depth_buffer) if (m_pgxp_depth_buffer || !m_vram_depth_texture)
return; return;
Log_PerfPrint("Resetting batch vertex depth"); Log_PerfPrint("Resetting batch vertex depth");
@ -2736,7 +2814,8 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b
// the viewport should already be set to the full vram, so just adjust the scissor // the viewport should already be set to the full vram, so just adjust the scissor
const Common::Rectangle<u32> scaled_bounds = bounds * m_resolution_scale; const Common::Rectangle<u32> scaled_bounds = bounds * m_resolution_scale;
g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight());
g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer)].get()); g_gpu_device->SetPipeline(
m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer && NeedsDepthBuffer())].get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
if (upload_texture) if (upload_texture)
{ {
@ -2815,7 +2894,8 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
g_gpu_device->SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), g_gpu_device->SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(),
dst_bounds_scaled.GetHeight()); dst_bounds_scaled.GetHeight());
g_gpu_device->SetPipeline( g_gpu_device->SetPipeline(
m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer)].get()); m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer && NeedsDepthBuffer())]
.get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0); g_gpu_device->Draw(3, 0);
RestoreDeviceContext(); RestoreDeviceContext();
@ -2945,7 +3025,7 @@ void GPU_HW::DispatchRenderCommand()
rc.transparency_enable ? m_draw_mode.mode_reg.transparency_mode : GPUTransparencyMode::Disabled; rc.transparency_enable ? m_draw_mode.mode_reg.transparency_mode : GPUTransparencyMode::Disabled;
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false;
if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode || if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode ||
(transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_supports_framebuffer_fetch) || (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) ||
dithering_enable != m_batch.dithering) dithering_enable != m_batch.dithering)
{ {
FlushRender(); FlushRender();
@ -2956,8 +3036,9 @@ void GPU_HW::DispatchRenderCommand()
if (m_batch_index_count == 0) if (m_batch_index_count == 0)
{ {
// transparency mode change // transparency mode change
const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw;
if (transparency_mode != GPUTransparencyMode::Disabled && if (transparency_mode != GPUTransparencyMode::Disabled &&
(texture_mode == GPUTextureMode::Disabled || !NeedsShaderBlending(transparency_mode))) (texture_mode == GPUTextureMode::Disabled || !NeedsShaderBlending(transparency_mode, check_mask_before_draw)))
{ {
static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}}; static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}};
@ -2969,7 +3050,6 @@ void GPU_HW::DispatchRenderCommand()
m_batch_ubo_data.u_dst_alpha_factor = dst_alpha_factor; m_batch_ubo_data.u_dst_alpha_factor = dst_alpha_factor;
} }
const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw;
const bool set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; const bool set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing;
if (m_batch.check_mask_before_draw != check_mask_before_draw || if (m_batch.check_mask_before_draw != check_mask_before_draw ||
m_batch.set_mask_while_drawing != set_mask_while_drawing) m_batch.set_mask_while_drawing != set_mask_while_drawing)
@ -3052,7 +3132,11 @@ void GPU_HW::FlushRender()
if (m_wireframe_mode != GPUWireframeMode::OnlyWireframe) if (m_wireframe_mode != GPUWireframeMode::OnlyWireframe)
{ {
if (NeedsTwoPassRendering()) if (NeedsShaderBlending(m_batch.transparency_mode, m_batch.check_mask_before_draw))
{
DrawBatchVertices(BatchRenderMode::ShaderBlend, index_count, base_index, base_vertex);
}
else if (NeedsTwoPassRendering())
{ {
DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex); DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex);
DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex); DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex);

View file

@ -29,7 +29,8 @@ public:
TransparencyDisabled, TransparencyDisabled,
TransparentAndOpaque, TransparentAndOpaque,
OnlyOpaque, OnlyOpaque,
OnlyTransparent OnlyTransparent,
ShaderBlend
}; };
GPU_HW(); GPU_HW();
@ -115,6 +116,9 @@ private:
u32 num_uniform_buffer_updates; u32 num_uniform_buffer_updates;
}; };
/// Returns true if a depth buffer should be created.
bool NeedsDepthBuffer() const;
bool CreateBuffers(); bool CreateBuffers();
void ClearFramebuffer(); void ClearFramebuffer();
void DestroyBuffers(); void DestroyBuffers();
@ -131,6 +135,7 @@ private:
void UpdateDepthBufferFromMaskBit(); void UpdateDepthBufferFromMaskBit();
void ClearDepthBuffer(); void ClearDepthBuffer();
void SetScissor(); void SetScissor();
void SetVRAMRenderTarget();
void MapGPUBuffer(u32 required_vertices, u32 required_indices); void MapGPUBuffer(u32 required_vertices, u32 required_indices);
void UnmapGPUBuffer(u32 used_vertices, u32 used_indices); void UnmapGPUBuffer(u32 used_vertices, u32 used_indices);
void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex); void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex);
@ -158,7 +163,7 @@ private:
bool NeedsTwoPassRendering() const; bool NeedsTwoPassRendering() const;
/// Returns true if the draw is going to use shader blending/framebuffer fetch. /// Returns true if the draw is going to use shader blending/framebuffer fetch.
bool NeedsShaderBlending(GPUTransparencyMode transparency) const; bool NeedsShaderBlending(GPUTransparencyMode transparency, bool check_mask) const;
void FillBackendCommandParameters(GPUBackendCommand* cmd) const; void FillBackendCommandParameters(GPUBackendCommand* cmd) const;
void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const;
@ -236,6 +241,8 @@ private:
bool m_clamp_uvs : 1 = false; bool m_clamp_uvs : 1 = false;
bool m_compute_uv_range : 1 = false; bool m_compute_uv_range : 1 = false;
bool m_pgxp_depth_buffer : 1 = false; bool m_pgxp_depth_buffer : 1 = false;
bool m_allow_shader_blend : 1 = false;
bool m_prefer_shader_blend : 1 = false;
u8 m_texpage_dirty = 0; u8 m_texpage_dirty = 0;
BatchConfig m_batch; BatchConfig m_batch;
@ -249,8 +256,8 @@ private:
Common::Rectangle<u32> m_vram_dirty_write_rect; Common::Rectangle<u32> m_vram_dirty_write_rect;
Common::Rectangle<u32> m_current_uv_range; Common::Rectangle<u32> m_current_uv_range;
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask]
DimensionalArray<std::unique_ptr<GPUPipeline>, 2, 2, 5, 9, 4, 3> m_batch_pipelines{}; DimensionalArray<std::unique_ptr<GPUPipeline>, 2, 2, 2, 9, 5, 5, 2> m_batch_pipelines{};
std::unique_ptr<GPUPipeline> m_wireframe_pipeline; std::unique_ptr<GPUPipeline> m_wireframe_pipeline;
// [wrapped][interlaced] // [wrapped][interlaced]

View file

@ -7,13 +7,14 @@
GPU_HW_ShaderGen::GPU_HW_ShaderGen(RenderAPI render_api, u32 resolution_scale, u32 multisamples, GPU_HW_ShaderGen::GPU_HW_ShaderGen(RenderAPI render_api, u32 resolution_scale, u32 multisamples,
bool per_sample_shading, bool true_color, bool scaled_dithering, bool per_sample_shading, bool true_color, bool scaled_dithering,
GPUTextureFilter texture_filtering, bool uv_limits, bool pgxp_depth, GPUTextureFilter texture_filtering, bool uv_limits, bool write_mask_as_depth,
bool disable_color_perspective, bool supports_dual_source_blend, bool disable_color_perspective, bool supports_dual_source_blend,
bool supports_framebuffer_fetch, bool debanding) bool supports_framebuffer_fetch, bool debanding)
: ShaderGen(render_api, supports_dual_source_blend, supports_framebuffer_fetch), m_resolution_scale(resolution_scale), : ShaderGen(render_api, supports_dual_source_blend, supports_framebuffer_fetch), m_resolution_scale(resolution_scale),
m_multisamples(multisamples), m_per_sample_shading(per_sample_shading), m_true_color(true_color), m_multisamples(multisamples), m_per_sample_shading(per_sample_shading), m_true_color(true_color),
m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits), m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits),
m_pgxp_depth(pgxp_depth), m_disable_color_perspective(disable_color_perspective), m_debanding(debanding) m_write_mask_as_depth(write_mask_as_depth), m_disable_color_perspective(disable_color_perspective),
m_debanding(debanding)
{ {
} }
@ -58,13 +59,13 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss)
false); false);
} }
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured) std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pgxp_depth)
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "TEXTURED", textured);
DefineMacro(ss, "UV_LIMITS", m_uv_limits); DefineMacro(ss, "UV_LIMITS", m_uv_limits);
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); DefineMacro(ss, "PGXP_DEPTH", pgxp_depth);
WriteCommonFunctions(ss); WriteCommonFunctions(ss);
WriteBatchUniformBuffer(ss); WriteBatchUniformBuffer(ss);
@ -632,19 +633,20 @@ void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode,
GPUTransparencyMode transparency, GPUTextureMode texture_mode, GPUTransparencyMode transparency, GPUTextureMode texture_mode,
bool dithering, bool interlacing) bool dithering, bool interlacing, bool check_mask)
{ {
// Shouldn't be using shader blending without fbfetch. // TODO: don't write depth for shader blend
DebugAssert(m_supports_framebuffer_fetch || transparency == GPUTransparencyMode::Disabled); DebugAssert(transparency == GPUTransparencyMode::Disabled || render_mode == GPU_HW::BatchRenderMode::ShaderBlend);
const GPUTextureMode actual_texture_mode = texture_mode & ~GPUTextureMode::RawTextureBit; const GPUTextureMode actual_texture_mode = texture_mode & ~GPUTextureMode::RawTextureBit;
const bool raw_texture = (texture_mode & GPUTextureMode::RawTextureBit) == GPUTextureMode::RawTextureBit; const bool raw_texture = (texture_mode & GPUTextureMode::RawTextureBit) == GPUTextureMode::RawTextureBit;
const bool textured = (texture_mode != GPUTextureMode::Disabled); const bool textured = (texture_mode != GPUTextureMode::Disabled);
const bool use_framebuffer_fetch = (m_supports_framebuffer_fetch && transparency != GPUTransparencyMode::Disabled); const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend &&
const bool use_dual_source = !use_framebuffer_fetch && m_supports_dual_source_blend && (transparency != GPUTransparencyMode::Disabled || check_mask));
const bool use_dual_source = (!shader_blending && m_supports_dual_source_blend &&
((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled && ((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled &&
render_mode != GPU_HW::BatchRenderMode::OnlyOpaque) || render_mode != GPU_HW::BatchRenderMode::OnlyOpaque) ||
m_texture_filter != GPUTextureFilter::Nearest); m_texture_filter != GPUTextureFilter::Nearest));
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
@ -652,7 +654,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", render_mode == GPU_HW::BatchRenderMode::OnlyOpaque); DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", render_mode == GPU_HW::BatchRenderMode::OnlyOpaque);
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", render_mode == GPU_HW::BatchRenderMode::OnlyTransparent); DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", render_mode == GPU_HW::BatchRenderMode::OnlyTransparent);
DefineMacro(ss, "TRANSPARENCY_MODE", static_cast<s32>(transparency)); DefineMacro(ss, "TRANSPARENCY_MODE", static_cast<s32>(transparency));
DefineMacro(ss, "SHADER_BLENDING", use_framebuffer_fetch); DefineMacro(ss, "SHADER_BLENDING", shader_blending);
DefineMacro(ss, "CHECK_MASK_BIT", check_mask);
DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "TEXTURED", textured);
DefineMacro(ss, "PALETTE", DefineMacro(ss, "PALETTE",
actual_texture_mode == GPUTextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette8Bit); actual_texture_mode == GPUTextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette8Bit);
@ -668,7 +671,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "TEXTURE_FILTERING", m_texture_filter != GPUTextureFilter::Nearest); DefineMacro(ss, "TEXTURE_FILTERING", m_texture_filter != GPUTextureFilter::Nearest);
DefineMacro(ss, "UV_LIMITS", m_uv_limits); DefineMacro(ss, "UV_LIMITS", m_uv_limits);
DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source); DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source);
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth);
WriteCommonFunctions(ss); WriteCommonFunctions(ss);
WriteBatchUniformBuffer(ss); WriteBatchUniformBuffer(ss);
@ -799,20 +802,20 @@ float3 ApplyDebanding(float2 frag_coord)
{ {
DeclareFragmentEntryPoint(ss, 1, 1, DeclareFragmentEntryPoint(ss, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}},
true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), true, use_dual_source ? 2 : 1, m_write_mask_as_depth, UsingMSAA(),
false, m_disable_color_perspective, use_framebuffer_fetch); UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending);
} }
else else
{ {
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1,
!m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), false, m_disable_color_perspective, m_write_mask_as_depth, UsingMSAA(), UsingPerSampleShading(), false,
use_framebuffer_fetch); m_disable_color_perspective, shader_blending);
} }
} }
else else
{ {
DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, m_write_mask_as_depth, UsingMSAA(),
UsingPerSampleShading(), false, m_disable_color_perspective, use_framebuffer_fetch); UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending);
} }
ss << R"( ss << R"(
@ -930,6 +933,11 @@ float3 ApplyDebanding(float2 frag_coord)
float4 bg_col = LAST_FRAG_COLOR; float4 bg_col = LAST_FRAG_COLOR;
float4 fg_col = float4(color, oalpha); float4 fg_col = float4(color, oalpha);
#if CHECK_MASK_BIT
if (bg_col.a != 0.0)
discard;
#endif
#if TEXTURE_FILTERING #if TEXTURE_FILTERING
#if TRANSPARENCY_MODE == 0 || TRANSPARENCY_MODE == 3 #if TRANSPARENCY_MODE == 0 || TRANSPARENCY_MODE == 3
bg_col.rgb /= ialpha; bg_col.rgb /= ialpha;
@ -964,7 +972,7 @@ float3 ApplyDebanding(float2 frag_coord)
o_col0 = float4(color, oalpha); o_col0 = float4(color, oalpha);
#endif #endif
#if !PGXP_DEPTH #if WRITE_MASK_AS_DEPTH
o_depth = oalpha * v_pos.z; o_depth = oalpha * v_pos.z;
#endif #endif
@ -981,7 +989,7 @@ float3 ApplyDebanding(float2 frag_coord)
o_col0 = float4(color, oalpha); o_col0 = float4(color, oalpha);
#endif #endif
#if !PGXP_DEPTH #if WRITE_MASK_AS_DEPTH
o_depth = oalpha * v_pos.z; o_depth = oalpha * v_pos.z;
#endif #endif
@ -998,7 +1006,7 @@ float3 ApplyDebanding(float2 frag_coord)
o_col0 = float4(color, oalpha); o_col0 = float4(color, oalpha);
#endif #endif
#if !PGXP_DEPTH #if WRITE_MASK_AS_DEPTH
o_depth = oalpha * v_pos.z; o_depth = oalpha * v_pos.z;
#endif #endif
#else #else
@ -1009,7 +1017,7 @@ float3 ApplyDebanding(float2 frag_coord)
o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha); o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha);
#endif #endif
#if !PGXP_DEPTH #if WRITE_MASK_AS_DEPTH
o_depth = oalpha * v_pos.z; o_depth = oalpha * v_pos.z;
#endif #endif
#endif #endif
@ -1233,7 +1241,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss); WriteCommonFunctions(ss);
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth);
DefineMacro(ss, "USE_BUFFER", use_buffer); DefineMacro(ss, "USE_BUFFER", use_buffer);
DeclareUniformBuffer(ss, DeclareUniformBuffer(ss,
{"uint2 u_base_coords", "uint2 u_end_coords", "uint2 u_size", "uint u_buffer_base_offset", {"uint2 u_base_coords", "uint2 u_end_coords", "uint2 u_size", "uint u_buffer_base_offset",
@ -1266,7 +1274,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b
ss << "#define GET_VALUE(buffer_offset) (LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r)\n\n"; ss << "#define GET_VALUE(buffer_offset) (LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r)\n\n";
} }
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, m_write_mask_as_depth);
ss << R"( ss << R"(
{ {
uint2 coords = uint2(v_pos.xy) / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE); uint2 coords = uint2(v_pos.xy) / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE);
@ -1291,10 +1299,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b
#endif #endif
o_col0 = RGBA5551ToRGBA8(value); o_col0 = RGBA5551ToRGBA8(value);
#if !PGXP_DEPTH #if WRITE_MASK_AS_DEPTH
o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0; o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0;
#else
o_depth = 1.0;
#endif #endif
})"; })";
@ -1309,7 +1315,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss); WriteCommonFunctions(ss);
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth);
DeclareUniformBuffer(ss, DeclareUniformBuffer(ss,
{"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size", {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size",
"bool u_set_mask_bit", "float u_depth_value"}, "bool u_set_mask_bit", "float u_depth_value"},
@ -1317,7 +1323,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
DeclareTexture(ss, "samp0", 0, msaa); DeclareTexture(ss, "samp0", 0, msaa);
DefineMacro(ss, "MSAA_COPY", msaa); DefineMacro(ss, "MSAA_COPY", msaa);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true, false, false, msaa); DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, m_write_mask_as_depth, false, false, msaa);
ss << R"( ss << R"(
{ {
uint2 dst_coords = uint2(v_pos.xy); uint2 dst_coords = uint2(v_pos.xy);
@ -1344,10 +1350,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
float4 color = LOAD_TEXTURE(samp0, int2(src_coords), 0); float4 color = LOAD_TEXTURE(samp0, int2(src_coords), 0);
#endif #endif
o_col0 = float4(color.xyz, u_set_mask_bit ? 1.0 : color.a); o_col0 = float4(color.xyz, u_set_mask_bit ? 1.0 : color.a);
#if !PGXP_DEPTH #if WRITE_MASK_AS_DEPTH
o_depth = (u_set_mask_bit ? 1.0f : ((o_col0.a == 1.0) ? u_depth_value : 0.0)); o_depth = (u_set_mask_bit ? 1.0f : ((o_col0.a == 1.0) ? u_depth_value : 0.0));
#else
o_depth = 1.0f;
#endif #endif
})"; })";
@ -1359,14 +1363,14 @@ std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss); WriteCommonFunctions(ss);
DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth);
DefineMacro(ss, "WRAPPED", wrapped); DefineMacro(ss, "WRAPPED", wrapped);
DefineMacro(ss, "INTERLACED", interlaced); DefineMacro(ss, "INTERLACED", interlaced);
DeclareUniformBuffer( DeclareUniformBuffer(
ss, {"uint2 u_dst_coords", "uint2 u_end_coords", "float4 u_fill_color", "uint u_interlaced_displayed_field"}, true); ss, {"uint2 u_dst_coords", "uint2 u_end_coords", "float4 u_fill_color", "uint u_interlaced_displayed_field"}, true);
DeclareFragmentEntryPoint(ss, 0, 1, {}, interlaced || wrapped, 1, true, false, false, false); DeclareFragmentEntryPoint(ss, 0, 1, {}, interlaced || wrapped, 1, m_write_mask_as_depth, false, false, false);
ss << R"( ss << R"(
{ {
#if INTERLACED || WRAPPED #if INTERLACED || WRAPPED
@ -1388,10 +1392,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool
#endif #endif
o_col0 = u_fill_color; o_col0 = u_fill_color;
#if !PGXP_DEPTH #if WRITE_MASK_AS_DEPTH
o_depth = u_fill_color.a; o_depth = u_fill_color.a;
#else
o_depth = 1.0f;
#endif #endif
})"; })";

View file

@ -10,13 +10,14 @@ class GPU_HW_ShaderGen : public ShaderGen
public: public:
GPU_HW_ShaderGen(RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading, GPU_HW_ShaderGen(RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading,
bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits, bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits,
bool pgxp_depth, bool disable_color_perspective, bool supports_dual_source_blend, bool write_mask_as_depth, bool disable_color_perspective, bool supports_dual_source_blend,
bool supports_framebuffer_fetch, bool debanding); bool supports_framebuffer_fetch, bool debanding);
~GPU_HW_ShaderGen(); ~GPU_HW_ShaderGen();
std::string GenerateBatchVertexShader(bool textured); std::string GenerateBatchVertexShader(bool textured, bool pgxp_depth);
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency,
GPUTextureMode texture_mode, bool dithering, bool interlacing); GPUTextureMode texture_mode, bool dithering, bool interlacing,
bool check_mask);
std::string GenerateWireframeGeometryShader(); std::string GenerateWireframeGeometryShader();
std::string GenerateWireframeFragmentShader(); std::string GenerateWireframeFragmentShader();
std::string GenerateVRAMReadFragmentShader(); std::string GenerateVRAMReadFragmentShader();
@ -48,7 +49,7 @@ private:
bool m_scaled_dithering; bool m_scaled_dithering;
GPUTextureFilter m_texture_filter; GPUTextureFilter m_texture_filter;
bool m_uv_limits; bool m_uv_limits;
bool m_pgxp_depth; bool m_write_mask_as_depth;
bool m_disable_color_perspective; bool m_disable_color_perspective;
bool m_debanding; bool m_debanding;
}; };

View file

@ -4,4 +4,4 @@
#pragma once #pragma once
#include "common/types.h" #include "common/types.h"
static constexpr u32 SHADER_CACHE_VERSION = 13; static constexpr u32 SHADER_CACHE_VERSION = 14;

View file

@ -249,7 +249,8 @@ public:
void PushUniformBuffer(const void* data, u32 data_size) override; void PushUniformBuffer(const void* data, u32 data_size) override;
void* MapUniformBuffer(u32 size) override; void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop) override;
void SetPipeline(GPUPipeline* pipeline) override; void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -257,6 +258,7 @@ public:
void SetScissor(s32 x, s32 y, s32 width, s32 height) override; void SetScissor(s32 x, s32 y, s32 width, s32 height) override;
void Draw(u32 vertex_count, u32 base_vertex) override; void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
bool GetHostRefreshRate(float* refresh_rate) override; bool GetHostRefreshRate(float* refresh_rate) override;

View file

@ -1721,8 +1721,10 @@ void MetalDevice::UnmapUniformBuffer(u32 size)
} }
} }
void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop)
{ {
DebugAssert(!feedback_loop);
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds);
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
bool needs_rt_clear = false; bool needs_rt_clear = false;
@ -1843,7 +1845,7 @@ void MetalDevice::UnbindTexture(MetalTexture* tex)
if (m_current_render_targets[i] == tex) if (m_current_render_targets[i] == tex)
{ {
Log_WarningPrint("Unbinding current RT"); Log_WarningPrint("Unbinding current RT");
SetRenderTargets(nullptr, 0, m_current_depth_target); SetRenderTargets(nullptr, 0, m_current_depth_target, GPUPipeline::NoRenderPassFlags); // TODO: Wrong
break; break;
} }
} }
@ -1853,7 +1855,7 @@ void MetalDevice::UnbindTexture(MetalTexture* tex)
if (m_current_depth_target == tex) if (m_current_depth_target == tex)
{ {
Log_WarningPrint("Unbinding current DS"); Log_WarningPrint("Unbinding current DS");
SetRenderTargets(nullptr, 0, nullptr); SetRenderTargets(nullptr, 0, nullptr, GPUPipeline::NoRenderPassFlags);
} }
} }
} }
@ -2094,6 +2096,11 @@ void MetalDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
baseInstance:0]; baseInstance:0];
} }
void MetalDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type)
{
Panic("Barriers are not supported");
}
id<MTLBlitCommandEncoder> MetalDevice::GetBlitEncoder(bool is_inline) id<MTLBlitCommandEncoder> MetalDevice::GetBlitEncoder(bool is_inline)
{ {
@autoreleasepool @autoreleasepool

View file

@ -1,12 +1,15 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "shadergen.h" #include "shadergen.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/bitutils.h" #include "common/bitutils.h"
#include "common/log.h" #include "common/log.h"
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <iomanip>
#ifdef ENABLE_OPENGL #ifdef ENABLE_OPENGL
#include "opengl_loader.h" #include "opengl_loader.h"
@ -660,20 +663,18 @@ void ShaderGen::DeclareFragmentEntryPoint(
} }
} }
std::string ShaderGen::GenerateScreenQuadVertexShader() std::string ShaderGen::GenerateScreenQuadVertexShader(float z /* = 0.0f */)
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true); DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true);
ss << R"( ss << "{\n";
{ ss << " v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u));\n";
v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u)); ss << " v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), " << std::fixed << z << "f, 1.0f);\n";
v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); ss << " #if API_OPENGL || API_OPENGL_ES || API_VULKAN\n";
#if API_OPENGL || API_OPENGL_ES || API_VULKAN ss << " v_pos.y = -v_pos.y;\n";
v_pos.y = -v_pos.y; ss << " #endif\n";
#endif ss << "}\n";
}
)";
return ss.str(); return ss.str();
} }

View file

@ -16,7 +16,7 @@ public:
static bool UseGLSLBindingLayout(); static bool UseGLSLBindingLayout();
std::string GenerateScreenQuadVertexShader(); std::string GenerateScreenQuadVertexShader(float z = 0.0f);
std::string GenerateUVQuadVertexShader(); std::string GenerateUVQuadVertexShader();
std::string GenerateFillFragmentShader(); std::string GenerateFillFragmentShader();
std::string GenerateCopyFragmentShader(); std::string GenerateCopyFragmentShader();