GPU/HW: Avoid extra texture copy in Vulkan/GL

This commit is contained in:
Stenzek 2023-12-01 19:08:50 +10:00
parent 3469b83a58
commit 2ca503639d
No known key found for this signature in database
7 changed files with 24 additions and 14 deletions

View file

@ -2474,9 +2474,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
const bool use_shader =
(m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH ||
((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH ||
((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT ||
Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height)
.Intersects(Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height)));
((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT);
if (use_shader || IsUsingMultisampling())
{
@ -2526,14 +2524,15 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
return;
}
// We can't CopySubresourceRegion to the same resource. So use the shadow texture if we can, but that may need to be
// updated first. Copying to the same resource seemed to work on Windows 10, but breaks on Windows 7. But, it's
// against the API spec, so better to be safe than sorry.
// TODO: make this an optional feature, DX12 can do it
if (m_vram_dirty_rect.Intersects(Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height)))
UpdateVRAMReadTexture();
GPUTexture* src_tex = m_vram_texture.get();
const bool overlaps_with_self = Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height)
.Intersects(Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height));
if (!g_gpu_device->GetFeatures().texture_copy_to_self || overlaps_with_self)
{
src_tex = m_vram_read_texture.get();
if (m_vram_dirty_rect.Intersects(Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height)))
UpdateVRAMReadTexture();
}
IncludeVRAMDirtyRectangle(
Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
@ -2545,9 +2544,10 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
}
g_gpu_device->CopyTextureRegion(m_vram_texture.get(), dst_x * m_resolution_scale, dst_y * m_resolution_scale, 0, 0,
m_vram_read_texture.get(), src_x * m_resolution_scale, src_y * m_resolution_scale, 0,
src_tex, src_x * m_resolution_scale, src_y * m_resolution_scale, 0,
0, width * m_resolution_scale, height * m_resolution_scale);
m_vram_read_texture->MakeReadyForSampling();
if (src_tex != m_vram_texture.get())
m_vram_read_texture->MakeReadyForSampling();
}
void GPU_HW::DispatchRenderCommand()

View file

@ -174,6 +174,7 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
m_features.framebuffer_fetch = false;
m_features.per_sample_shading = (feature_level >= D3D_FEATURE_LEVEL_10_1);
m_features.noperspective_interpolation = true;
m_features.texture_copy_to_self = false;
m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);

View file

@ -116,7 +116,8 @@ D3D12Device::ComPtr<ID3D12RootSignature> D3D12Device::CreateRootSignature(const
return rs;
}
bool D3D12Device::CreateDevice(const std::string_view& adapter, bool threaded_presentation, FeatureMask disabled_features)
bool D3D12Device::CreateDevice(const std::string_view& adapter, bool threaded_presentation,
FeatureMask disabled_features)
{
std::unique_lock lock(s_instance_mutex);
@ -1176,6 +1177,8 @@ void D3D12Device::SetFeatures(FeatureMask disabled_features)
m_features.framebuffer_fetch = false;
m_features.per_sample_shading = true;
m_features.noperspective_interpolation = true;
m_features.texture_copy_to_self =
/*!(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF)*/ false; // TODO: Support with Enhanced Barriers
m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);

View file

@ -449,6 +449,7 @@ public:
FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 1),
FEATURE_MASK_TEXTURE_BUFFERS = (1 << 2),
FEATURE_MASK_GEOMETRY_SHADERS = (1 << 3),
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 4),
};
struct Features
@ -457,6 +458,7 @@ public:
bool framebuffer_fetch : 1;
bool per_sample_shading : 1;
bool noperspective_interpolation : 1;
bool texture_copy_to_self : 1;
bool supports_texture_buffers : 1;
bool texture_buffers_emulated_with_ssbo : 1;
bool geometry_shaders : 1;

View file

@ -215,6 +215,7 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features)
m_features.framebuffer_fetch = !(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && false; // TODO
m_features.per_sample_shading = true;
m_features.noperspective_interpolation = true;
m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF);
m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
m_features.texture_buffers_emulated_with_ssbo = true;
m_features.geometry_shaders = false;

View file

@ -496,6 +496,8 @@ bool OpenGLDevice::CheckFeatures(bool* buggy_pbo, FeatureMask disabled_features)
// noperspective is not supported in GLSL ES.
m_features.noperspective_interpolation = !is_gles;
m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF);
m_features.geometry_shaders =
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2);

View file

@ -2202,6 +2202,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features)
Log_WarningPrintf("Vulkan driver is missing dual-source blending. This will have an impact on performance.");
m_features.noperspective_interpolation = true;
m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF);
m_features.per_sample_shading = m_device_features.sampleRateShading;
m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);