MetalDevice: Use shader to clear current depth target

This commit is contained in:
Stenzek 2024-04-12 18:10:31 +10:00
parent e5a024ba85
commit f4fef36eba
No known key found for this signature in database
3 changed files with 176 additions and 56 deletions

View file

@ -306,6 +306,19 @@ private:
using DepthStateMap = std::unordered_map<u8, id<MTLDepthStencilState>>;
struct ClearPipelineConfig
{
GPUTexture::Format color_formats[MAX_RENDER_TARGETS];
GPUTexture::Format depth_format;
u8 samples;
u8 pad[2];
bool operator==(const ClearPipelineConfig& c) const { return (std::memcmp(this, &c, sizeof(*this)) == 0); }
bool operator!=(const ClearPipelineConfig& c) const { return (std::memcmp(this, &c, sizeof(*this)) != 0); }
bool operator<(const ClearPipelineConfig& c) const { return (std::memcmp(this, &c, sizeof(*this)) < 0); }
};
static_assert(sizeof(ClearPipelineConfig) == 8);
ALWAYS_INLINE NSView* GetWindowView() const { return (__bridge NSView*)m_window_info.window_handle; }
void SetFeatures(FeatureMask disabled_features);
@ -313,6 +326,8 @@ private:
id<MTLFunction> GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name);
id<MTLComputePipelineState> CreateComputePipeline(id<MTLFunction> function, NSString* name);
ClearPipelineConfig GetCurrentClearPipelineConfig() const;
id<MTLRenderPipelineState> GetClearDepthPipeline(const ClearPipelineConfig& config);
std::unique_ptr<GPUShader> CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source,
const std::string_view& entry_point);
@ -368,6 +383,7 @@ private:
id<MTLLibrary> m_shaders = nil;
std::vector<std::pair<std::pair<GPUTexture::Format, GPUTexture::Format>, id<MTLComputePipelineState>>>
m_resolve_pipelines;
std::vector<std::pair<ClearPipelineConfig, id<MTLRenderPipelineState>>> m_clear_pipelines;
id<MTLCommandBuffer> m_upload_cmdbuf = nil;
id<MTLBlitCommandEncoder> m_upload_encoder = nil;

View file

@ -341,12 +341,24 @@ void MetalDevice::DestroyDevice()
[it.second release];
m_cleanup_objects.clear();
for (auto& it : m_depth_states)
{
if (it.second != nil)
[it.second release];
}
m_depth_states.clear();
for (auto& it : m_resolve_pipelines)
{
if (it.second != nil)
[it.second release];
}
m_resolve_pipelines.clear();
for (auto& it : m_clear_pipelines)
{
if (it.second != nil)
[it.second release];
}
m_clear_pipelines.clear();
if (m_shaders != nil)
{
[m_shaders release];
@ -518,13 +530,6 @@ void MetalDevice::DestroyBuffers()
m_uniform_buffer.Destroy();
m_vertex_buffer.Destroy();
m_index_buffer.Destroy();
for (auto& it : m_depth_states)
{
if (it.second != nil)
[it.second release];
}
m_depth_states.clear();
}
bool MetalDevice::IsRenderTargetBound(const GPUTexture* tex) const
@ -870,7 +875,23 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
{
if (config.color_formats[i] == GPUTexture::Format::Unknown)
break;
desc.colorAttachments[0].pixelFormat = s_pixel_format_mapping[static_cast<u8>(config.color_formats[i])];
MTLRenderPipelineColorAttachmentDescriptor* ca = desc.colorAttachments[0];
ca.pixelFormat = s_pixel_format_mapping[static_cast<u8>(config.color_formats[i])];
ca.writeMask = (config.blend.write_r ? MTLColorWriteMaskRed : MTLColorWriteMaskNone) |
(config.blend.write_g ? MTLColorWriteMaskGreen : MTLColorWriteMaskNone) |
(config.blend.write_b ? MTLColorWriteMaskBlue : MTLColorWriteMaskNone) |
(config.blend.write_a ? MTLColorWriteMaskAlpha : MTLColorWriteMaskNone);
ca.blendingEnabled = config.blend.enable;
if (config.blend.enable)
{
ca.sourceRGBBlendFactor = blend_mapping[static_cast<u8>(config.blend.src_blend.GetValue())];
ca.destinationRGBBlendFactor = blend_mapping[static_cast<u8>(config.blend.dst_blend.GetValue())];
ca.rgbBlendOperation = op_mapping[static_cast<u8>(config.blend.blend_op.GetValue())];
ca.sourceAlphaBlendFactor = blend_mapping[static_cast<u8>(config.blend.src_alpha_blend.GetValue())];
ca.destinationAlphaBlendFactor = blend_mapping[static_cast<u8>(config.blend.dst_alpha_blend.GetValue())];
ca.alphaBlendOperation = op_mapping[static_cast<u8>(config.blend.alpha_blend_op.GetValue())];
}
}
desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast<u8>(config.depth_format)];
@ -907,13 +928,6 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
if (depth == nil)
return {};
// Blending state
MTLRenderPipelineColorAttachmentDescriptor* ca = desc.colorAttachments[0];
ca.writeMask = (config.blend.write_r ? MTLColorWriteMaskRed : MTLColorWriteMaskNone) |
(config.blend.write_g ? MTLColorWriteMaskGreen : MTLColorWriteMaskNone) |
(config.blend.write_b ? MTLColorWriteMaskBlue : MTLColorWriteMaskNone) |
(config.blend.write_a ? MTLColorWriteMaskAlpha : MTLColorWriteMaskNone);
// General
const MTLPrimitiveType primitive = primitives[static_cast<u8>(config.primitive)];
desc.rasterSampleCount = config.samples;
@ -926,17 +940,6 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
if (config.layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
desc.fragmentBuffers[1].mutability = MTLMutabilityImmutable;
ca.blendingEnabled = config.blend.enable;
if (config.blend.enable)
{
ca.sourceRGBBlendFactor = blend_mapping[static_cast<u8>(config.blend.src_blend.GetValue())];
ca.destinationRGBBlendFactor = blend_mapping[static_cast<u8>(config.blend.dst_blend.GetValue())];
ca.rgbBlendOperation = op_mapping[static_cast<u8>(config.blend.blend_op.GetValue())];
ca.sourceAlphaBlendFactor = blend_mapping[static_cast<u8>(config.blend.src_alpha_blend.GetValue())];
ca.destinationAlphaBlendFactor = blend_mapping[static_cast<u8>(config.blend.dst_alpha_blend.GetValue())];
ca.alphaBlendOperation = op_mapping[static_cast<u8>(config.blend.alpha_blend_op.GetValue())];
}
NSError* error = nullptr;
id<MTLRenderPipelineState> pipeline = [m_device newRenderPipelineStateWithDescriptor:desc error:&error];
if (pipeline == nil)
@ -1585,7 +1588,43 @@ void MetalDevice::ClearDepth(GPUTexture* t, float d)
{
GPUDevice::ClearDepth(t, d);
if (InRenderPass() && m_current_depth_target == t)
EndRenderPass();
{
const ClearPipelineConfig config = GetCurrentClearPipelineConfig();
id<MTLRenderPipelineState> pipeline = GetClearDepthPipeline(config);
id<MTLDepthStencilState> depth = GetDepthState(GPUPipeline::DepthState::GetAlwaysWriteState());
const Common::Rectangle<s32> rect(0, 0, t->GetWidth(), t->GetHeight());
const bool set_vp = (m_current_viewport != rect);
const bool set_scissor = (m_current_scissor != rect);
if (set_vp)
{
[m_render_encoder setViewport:(MTLViewport){0.0, 0.0, static_cast<double>(t->GetWidth()),
static_cast<double>(t->GetHeight()), 0.0, 1.0}];
}
if (set_scissor)
[m_render_encoder setScissorRect:(MTLScissorRect){0u, 0u, t->GetWidth(), t->GetHeight()}];
[m_render_encoder setRenderPipelineState:pipeline];
if (m_current_cull_mode != MTLCullModeNone)
[m_render_encoder setCullMode:MTLCullModeNone];
if (depth != m_current_depth_state)
[m_render_encoder setDepthStencilState:depth];
[m_render_encoder setVertexBytes:&d length:sizeof(d) atIndex:0];
[m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:0 vertexCount:3];
s_stats.num_draws++;
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
if (m_current_pipeline)
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
if (m_current_cull_mode != MTLCullModeNone)
[m_render_encoder setCullMode:m_current_cull_mode];
if (depth != m_current_depth_state)
[m_render_encoder setDepthStencilState:m_current_depth_state];
if (set_vp)
SetViewportInRenderEncoder();
if (set_scissor)
SetScissorInRenderEncoder();
}
}
void MetalDevice::InvalidateRenderTarget(GPUTexture* t)
@ -1634,6 +1673,51 @@ void MetalDevice::CommitClear(MetalTexture* tex)
}
}
MetalDevice::ClearPipelineConfig MetalDevice::GetCurrentClearPipelineConfig() const
{
ClearPipelineConfig config = {};
for (u32 i = 0; i < m_num_current_render_targets; i++)
config.color_formats[i] = m_current_render_targets[i]->GetFormat();
config.depth_format = m_current_depth_target ? m_current_depth_target->GetFormat() : GPUTexture::Format::Unknown;
config.samples =
m_current_depth_target ? m_current_depth_target->GetSamples() : m_current_render_targets[0]->GetSamples();
return config;
}
id<MTLRenderPipelineState> MetalDevice::GetClearDepthPipeline(const ClearPipelineConfig& config)
{
const auto iter = std::find_if(m_clear_pipelines.begin(), m_clear_pipelines.end(),
[&config](const auto& it) { return (it.first == config); });
if (iter != m_clear_pipelines.end())
return iter->second;
MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor new] autorelease];
desc.vertexFunction = [GetFunctionFromLibrary(m_shaders, @"depthClearVertex") autorelease];
desc.fragmentFunction = [GetFunctionFromLibrary(m_shaders, @"depthClearFragment") autorelease];
for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
{
if (config.color_formats[i] == GPUTexture::Format::Unknown)
break;
desc.colorAttachments[i].pixelFormat = s_pixel_format_mapping[static_cast<u8>(config.color_formats[i])];
desc.colorAttachments[i].writeMask = MTLColorWriteMaskNone;
}
desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast<u8>(config.depth_format)];
desc.rasterizationEnabled = TRUE;
desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassTriangle;
desc.rasterSampleCount = config.samples;
desc.vertexBuffers[0].mutability = MTLMutabilityImmutable;
NSError* error = nullptr;
id<MTLRenderPipelineState> pipeline = [m_device newRenderPipelineStateWithDescriptor:desc error:&error];
if (pipeline == nil)
LogNSError(error, "Failed to create clear render pipeline state");
m_clear_pipelines.emplace_back(config, pipeline);
return pipeline;
}
MetalTextureBuffer::MetalTextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements)
{
}

View file

@ -3,40 +3,60 @@
using namespace metal;
// https://developer.apple.com/documentation/metal/metal_sample_code_library/improving_edge-rendering_quality_with_multisample_antialiasing_msaa?language=objc
kernel void
colorResolveKernel(texture2d_ms<float, access::read> multisampledTexture [[texture(0)]],
texture2d<float, access::write> resolvedTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
const uint count = multisampledTexture.get_num_samples();
float4 resolved_color = 0;
for (uint i = 0; i < count; ++i)
{
resolved_color += multisampledTexture.read(gid, i);
}
resolved_color /= count;
resolvedTexture.write(resolved_color, gid);
}
kernel void
depthResolveKernel(texture2d_ms<float, access::read> multisampledTexture [[texture(0)]],
texture2d<float, access::write> resolvedTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
kernel void colorResolveKernel(texture2d_ms<float, access::read> multisampledTexture [[texture(0)]],
texture2d<float, access::write> resolvedTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
const uint count = multisampledTexture.get_num_samples();
float resolved_depth = 0;
float4 resolved_color = 0;
for (uint i = 0; i < count; ++i)
{
resolved_depth += multisampledTexture.read(gid, i).r;
resolved_color += multisampledTexture.read(gid, i);
}
resolved_color /= count;
resolvedTexture.write(resolved_color, gid);
}
kernel void depthResolveKernel(texture2d_ms<float, access::read> multisampledTexture [[texture(0)]],
texture2d<float, access::write> resolvedTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
const uint count = multisampledTexture.get_num_samples();
float resolved_depth = 0;
for (uint i = 0; i < count; ++i)
{
resolved_depth += multisampledTexture.read(gid, i).r;
}
resolved_depth /= count;
resolvedTexture.write(float4(resolved_depth, 0, 0, 0), gid);
}
struct DepthClearUBO
{
float depth;
};
struct DepthClearOut
{
float4 pos [[position]];
};
vertex DepthClearOut depthClearVertex(constant DepthClearUBO& ubo [[buffer(0)]], uint vertexId [[vertex_id]])
{
DepthClearOut out = {};
float2 uv = float2(float((vertexId << uint(1)) & 2u), float(vertexId & 2u));
out.pos = float4((uv * float2(2.0, -2.0)) + float2(-1.0, 1.0), ubo.depth, 1.0);
return out;
}
fragment void depthClearFragment()
{
}