mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-02-16 19:05:39 +00:00
MetalDevice: Add support for framebuffer fetch
This commit is contained in:
parent
42ac79d42a
commit
e5a024ba85
|
@ -1062,7 +1062,8 @@ bool GPU_HW::CompilePipelines()
|
|||
return false;
|
||||
|
||||
plconfig.fragment_shader = fs.get();
|
||||
plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState();
|
||||
plconfig.depth = needs_depth_buffer ? GPUPipeline::DepthState::GetAlwaysWriteState() :
|
||||
GPUPipeline::DepthState::GetNoTestsState();
|
||||
|
||||
if (!(m_vram_fill_pipelines[wrapped][interlaced] = g_gpu_device->CreatePipeline(plconfig)))
|
||||
return false;
|
||||
|
@ -1137,7 +1138,7 @@ bool GPU_HW::CompilePipelines()
|
|||
return false;
|
||||
|
||||
plconfig.fragment_shader = fs.get();
|
||||
plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState();
|
||||
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
|
||||
if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig)))
|
||||
return false;
|
||||
|
||||
|
|
|
@ -376,8 +376,9 @@ private:
|
|||
id<MTLCommandBuffer> m_render_cmdbuf = nil;
|
||||
id<MTLRenderCommandEncoder> m_render_encoder = nil;
|
||||
|
||||
u8 m_num_current_render_targets = 0;
|
||||
GPUPipeline::RenderPassFlag m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
|
||||
std::array<MetalTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
|
||||
u32 m_num_current_render_targets = 0;
|
||||
MetalTexture* m_current_depth_target = nullptr;
|
||||
|
||||
MetalPipeline* m_current_pipeline = nullptr;
|
||||
|
|
|
@ -222,6 +222,13 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features)
|
|||
m_max_texture_size = 8192;
|
||||
}
|
||||
|
||||
// Framebuffer fetch requires MSL 2.3 and an Apple GPU family.
|
||||
const bool supports_fbfetch = [m_device supportsFamily:MTLGPUFamilyApple1];
|
||||
|
||||
// If fbfetch is disabled, barriers aren't supported on Apple GPUs.
|
||||
const bool supports_barriers =
|
||||
([m_device supportsFamily:MTLGPUFamilyMac1] && ![m_device supportsFamily:MTLGPUFamilyApple3]);
|
||||
|
||||
m_max_multisamples = 0;
|
||||
for (u32 multisamples = 1; multisamples < 16; multisamples *= 2)
|
||||
{
|
||||
|
@ -231,13 +238,13 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features)
|
|||
}
|
||||
|
||||
m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND);
|
||||
m_features.framebuffer_fetch = !(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && false; // TODO
|
||||
m_features.framebuffer_fetch = !(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && supports_fbfetch;
|
||||
m_features.per_sample_shading = true;
|
||||
m_features.noperspective_interpolation = true;
|
||||
m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF);
|
||||
m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
|
||||
m_features.texture_buffers_emulated_with_ssbo = true;
|
||||
m_features.feedback_loops = false;
|
||||
m_features.feedback_loops = (m_features.framebuffer_fetch || supports_barriers);
|
||||
m_features.geometry_shaders = false;
|
||||
m_features.partial_msaa_resolve = false;
|
||||
m_features.memory_import = true;
|
||||
|
@ -687,6 +694,9 @@ std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromSource(GPUShaderStage st
|
|||
spirv_cross::CompilerMSL compiler(result.cbegin(), std::distance(result.cbegin(), result.cend()));
|
||||
spirv_cross::CompilerMSL::Options msl_options = compiler.get_msl_options();
|
||||
msl_options.pad_fragment_output_components = true;
|
||||
msl_options.use_framebuffer_fetch_subpasses = m_features.framebuffer_fetch;
|
||||
if (m_features.framebuffer_fetch)
|
||||
msl_options.set_msl_version(2, 3);
|
||||
|
||||
if (stage == GPUShaderStage::Fragment)
|
||||
{
|
||||
|
@ -702,6 +712,16 @@ std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromSource(GPUShaderStage st
|
|||
rb.msl_buffer = i;
|
||||
compiler.add_msl_resource_binding(rb);
|
||||
}
|
||||
|
||||
if (!m_features.framebuffer_fetch)
|
||||
{
|
||||
spirv_cross::MSLResourceBinding rb;
|
||||
rb.stage = spv::ExecutionModelFragment;
|
||||
rb.desc_set = 2;
|
||||
rb.binding = 0;
|
||||
rb.msl_texture = MAX_TEXTURE_SAMPLERS;
|
||||
compiler.add_msl_resource_binding(rb);
|
||||
}
|
||||
}
|
||||
|
||||
compiler.set_msl_options(msl_options);
|
||||
|
@ -1764,8 +1784,9 @@ void MetalDevice::UnmapUniformBuffer(u32 size)
|
|||
void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
|
||||
GPUPipeline::RenderPassFlag feedback_loop)
|
||||
{
|
||||
DebugAssert(!feedback_loop);
|
||||
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds);
|
||||
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds ||
|
||||
(!m_features.framebuffer_fetch && ((feedback_loop & GPUPipeline::ColorFeedbackLoop) !=
|
||||
(m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))));
|
||||
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
|
||||
bool needs_rt_clear = false;
|
||||
|
||||
|
@ -1779,7 +1800,8 @@ void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
|
|||
}
|
||||
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
|
||||
m_current_render_targets[i] = nullptr;
|
||||
m_num_current_render_targets = num_rts;
|
||||
m_num_current_render_targets = static_cast<u8>(num_rts);
|
||||
m_current_feedback_loop = feedback_loop;
|
||||
|
||||
if (changed || needs_rt_clear || needs_ds_clear)
|
||||
{
|
||||
|
@ -2077,6 +2099,13 @@ void MetalDevice::SetInitialEncoderState()
|
|||
[m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
||||
if (m_current_ssbo)
|
||||
[m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1];
|
||||
|
||||
if (!m_features.framebuffer_fetch && (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))
|
||||
{
|
||||
DebugAssert(m_current_render_targets[0]);
|
||||
[m_render_encoder setFragmentTexture:m_current_render_targets[0]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS];
|
||||
}
|
||||
|
||||
SetViewportInRenderEncoder();
|
||||
SetScissorInRenderEncoder();
|
||||
}
|
||||
|
@ -2138,7 +2167,118 @@ void MetalDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
|
|||
|
||||
void MetalDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type)
|
||||
{
|
||||
Panic("Barriers are not supported");
|
||||
// Shouldn't be using this with framebuffer fetch.
|
||||
DebugAssert(!m_features.framebuffer_fetch);
|
||||
|
||||
const bool skip_first_barrier = !InRenderPass();
|
||||
PreDrawCheck();
|
||||
|
||||
// TODO: The first barrier is unnecessary if we're starting the render pass.
|
||||
|
||||
u32 index_offset = base_index * sizeof(u16);
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case GPUDevice::DrawBarrier::None:
|
||||
{
|
||||
s_stats.num_draws++;
|
||||
|
||||
[m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive()
|
||||
indexCount:index_count
|
||||
indexType:MTLIndexTypeUInt16
|
||||
indexBuffer:m_index_buffer.GetBuffer()
|
||||
indexBufferOffset:index_offset
|
||||
instanceCount:1
|
||||
baseVertex:base_vertex
|
||||
baseInstance:0];
|
||||
}
|
||||
break;
|
||||
|
||||
case GPUDevice::DrawBarrier::One:
|
||||
{
|
||||
DebugAssert(m_num_current_render_targets == 1);
|
||||
s_stats.num_draws++;
|
||||
|
||||
if (!skip_first_barrier)
|
||||
{
|
||||
s_stats.num_barriers++;
|
||||
[m_render_encoder memoryBarrierWithScope:MTLBarrierScopeRenderTargets
|
||||
afterStages:MTLRenderStageFragment
|
||||
beforeStages:MTLRenderStageFragment];
|
||||
}
|
||||
|
||||
[m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive()
|
||||
indexCount:index_count
|
||||
indexType:MTLIndexTypeUInt16
|
||||
indexBuffer:m_index_buffer.GetBuffer()
|
||||
indexBufferOffset:index_offset
|
||||
instanceCount:1
|
||||
baseVertex:base_vertex
|
||||
baseInstance:0];
|
||||
}
|
||||
break;
|
||||
|
||||
case GPUDevice::DrawBarrier::Full:
|
||||
{
|
||||
DebugAssert(m_num_current_render_targets == 1);
|
||||
|
||||
static constexpr const u8 vertices_per_primitive[][2] = {
|
||||
{1, 1}, // MTLPrimitiveTypePoint
|
||||
{2, 2}, // MTLPrimitiveTypeLine
|
||||
{2, 1}, // MTLPrimitiveTypeLineStrip
|
||||
{3, 3}, // MTLPrimitiveTypeTriangle
|
||||
{3, 1}, // MTLPrimitiveTypeTriangleStrip
|
||||
};
|
||||
|
||||
const u32 first_step =
|
||||
vertices_per_primitive[static_cast<size_t>(m_current_pipeline->GetPrimitive())][0] * sizeof(u16);
|
||||
const u32 index_step =
|
||||
vertices_per_primitive[static_cast<size_t>(m_current_pipeline->GetPrimitive())][1] * sizeof(u16);
|
||||
const u32 end_offset = (base_index + index_count) * sizeof(u16);
|
||||
|
||||
// first primitive
|
||||
if (!skip_first_barrier)
|
||||
{
|
||||
s_stats.num_barriers++;
|
||||
[m_render_encoder memoryBarrierWithScope:MTLBarrierScopeRenderTargets
|
||||
afterStages:MTLRenderStageFragment
|
||||
beforeStages:MTLRenderStageFragment];
|
||||
}
|
||||
s_stats.num_draws++;
|
||||
[m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive()
|
||||
indexCount:index_count
|
||||
indexType:MTLIndexTypeUInt16
|
||||
indexBuffer:m_index_buffer.GetBuffer()
|
||||
indexBufferOffset:index_offset
|
||||
instanceCount:1
|
||||
baseVertex:base_vertex
|
||||
baseInstance:0];
|
||||
|
||||
index_offset += first_step;
|
||||
|
||||
// remaining primitices
|
||||
for (; index_offset < end_offset; index_offset += index_step)
|
||||
{
|
||||
s_stats.num_barriers++;
|
||||
s_stats.num_draws++;
|
||||
|
||||
[m_render_encoder memoryBarrierWithScope:MTLBarrierScopeRenderTargets
|
||||
afterStages:MTLRenderStageFragment
|
||||
beforeStages:MTLRenderStageFragment];
|
||||
[m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive()
|
||||
indexCount:index_count
|
||||
indexType:MTLIndexTypeUInt16
|
||||
indexBuffer:m_index_buffer.GetBuffer()
|
||||
indexBufferOffset:index_offset
|
||||
instanceCount:1
|
||||
baseVertex:base_vertex
|
||||
baseInstance:0];
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
DefaultCaseIsUnreachable();
|
||||
}
|
||||
}
|
||||
|
||||
id<MTLBlitCommandEncoder> MetalDevice::GetBlitEncoder(bool is_inline)
|
||||
|
@ -2199,6 +2339,7 @@ bool MetalDevice::BeginPresent(bool skip_present)
|
|||
s_stats.num_render_passes++;
|
||||
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
|
||||
m_num_current_render_targets = 0;
|
||||
m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
|
||||
m_current_depth_target = nullptr;
|
||||
m_current_pipeline = nullptr;
|
||||
m_current_depth_state = nil;
|
||||
|
|
|
@ -123,6 +123,15 @@ void ShaderGen::WriteHeader(std::stringstream& ss)
|
|||
else if (m_spirv)
|
||||
ss << "#version 450 core\n\n";
|
||||
|
||||
#ifdef __APPLE__
|
||||
// TODO: Do this for Vulkan as well.
|
||||
if (m_render_api == RenderAPI::Metal)
|
||||
{
|
||||
if (!m_supports_framebuffer_fetch)
|
||||
ss << "#extension GL_EXT_samplerless_texture_functions : require\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_OPENGL
|
||||
// Extension enabling for OpenGL.
|
||||
if (m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES)
|
||||
|
@ -587,6 +596,22 @@ void ShaderGen::DeclareFragmentEntryPoint(
|
|||
ss << "layout(input_attachment_index = 0, set = 2, binding = 0) uniform subpassInput u_input_rt;\n";
|
||||
ss << "#define LAST_FRAG_COLOR subpassLoad(u_input_rt)\n";
|
||||
}
|
||||
#endif
|
||||
#ifdef __APPLE__
|
||||
if (m_render_api == RenderAPI::Metal)
|
||||
{
|
||||
if (m_supports_framebuffer_fetch)
|
||||
{
|
||||
// Set doesn't matter, because it's transformed to color0.
|
||||
ss << "layout(input_attachment_index = 0, set = 2, binding = 0) uniform subpassInput u_input_rt;\n";
|
||||
ss << "#define LAST_FRAG_COLOR subpassLoad(u_input_rt)\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
ss << "layout(set = 2, binding = 0) uniform texture2D u_input_rt;\n";
|
||||
ss << "#define LAST_FRAG_COLOR texelFetch(u_input_rt, int2(gl_FragCoord.xy), 0)\n";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -3886,5 +3886,7 @@ void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
|
|||
}
|
||||
}
|
||||
break;
|
||||
|
||||
DefaultCaseIsUnreachable();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue