GPU/HW: Trace min/max UVs for hazards

This commit is contained in:
Stenzek 2023-11-26 15:15:58 +10:00
parent eeebfce79b
commit 8446856ecf
No known key found for this signature in database
4 changed files with 100 additions and 34 deletions

View file

@ -198,6 +198,7 @@ bool GPU::HandleNOPCommand()
bool GPU::HandleClearCacheCommand()
{
Log_DebugPrintf("GP0 clear cache");
m_draw_mode.SetTexturePageChanged();
m_fifo.RemoveOne();
AddCommandTicks(1);
EndCommand();

View file

@ -59,7 +59,7 @@ ALWAYS_INLINE_RELEASE static u32 GetBoxDownsampleScale(u32 resolution_scale)
return scale;
}
ALWAYS_INLINE static bool ShouldUseUVLimits()
ALWAYS_INLINE static bool ShouldClampUVs()
{
// We only need UV limits if PGXP is enabled, or texture filtering is enabled.
return g_settings.gpu_pgxp_enable || g_settings.gpu_texture_filter != GPUTextureFilter::Nearest;
@ -214,7 +214,8 @@ bool GPU_HW::Initialize()
m_true_color = g_settings.gpu_true_color;
m_scaled_dithering = g_settings.gpu_scaled_dithering;
m_texture_filtering = g_settings.gpu_texture_filter;
m_using_uv_limits = ShouldUseUVLimits();
m_clamp_uvs = ShouldClampUVs();
m_compute_uv_range = m_clamp_uvs;
m_chroma_smoothing = g_settings.gpu_24bit_chroma_smoothing;
m_downsample_mode = GetDownsampleMode(m_resolution_scale);
m_wireframe_mode = g_settings.gpu_wireframe_mode;
@ -334,7 +335,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
const GPUDownsampleMode downsample_mode = GetDownsampleMode(resolution_scale);
const GPUWireframeMode wireframe_mode =
features.geometry_shaders ? g_settings.gpu_wireframe_mode : GPUWireframeMode::Disabled;
const bool use_uv_limits = ShouldUseUVLimits();
const bool clamp_uvs = ShouldClampUVs();
const bool disable_color_perspective = features.noperspective_interpolation && ShouldDisableColorPerspective();
// TODO: Use old_settings
@ -346,7 +347,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
(m_resolution_scale != resolution_scale || m_multisamples != multisamples ||
m_true_color != g_settings.gpu_true_color || m_per_sample_shading != per_sample_shading ||
m_scaled_dithering != g_settings.gpu_scaled_dithering || m_texture_filtering != g_settings.gpu_texture_filter ||
m_using_uv_limits != use_uv_limits || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing ||
m_clamp_uvs != clamp_uvs || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing ||
m_downsample_mode != downsample_mode ||
(m_downsample_mode == GPUDownsampleMode::Box &&
g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale) ||
@ -396,7 +397,8 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
m_true_color = g_settings.gpu_true_color;
m_scaled_dithering = g_settings.gpu_scaled_dithering;
m_texture_filtering = g_settings.gpu_texture_filter;
m_using_uv_limits = use_uv_limits;
m_clamp_uvs = clamp_uvs;
m_compute_uv_range = m_clamp_uvs;
m_chroma_smoothing = g_settings.gpu_24bit_chroma_smoothing;
m_downsample_mode = downsample_mode;
m_wireframe_mode = wireframe_mode;
@ -604,7 +606,7 @@ void GPU_HW::PrintSettingsToLog()
(!m_true_color && m_scaled_dithering) ? " (Scaled)" : "");
Log_InfoFmt("Texture Filtering: {}", Settings::GetTextureFilterDisplayName(m_texture_filtering));
Log_InfoFmt("Dual-source blending: {}", m_supports_dual_source_blend ? "Supported" : "Not supported");
Log_InfoFmt("Using UV limits: {}", m_using_uv_limits ? "YES" : "NO");
Log_InfoFmt("Clamping UVs: {}", m_clamp_uvs ? "YES" : "NO");
Log_InfoFmt("Depth buffer: {}", m_pgxp_depth_buffer ? "YES" : "NO");
Log_InfoFmt("Downsampling: {}", Settings::GetDownsampleModeDisplayName(m_downsample_mode));
Log_InfoFmt("Wireframe rendering: {}", Settings::GetGPUWireframeModeDisplayName(m_wireframe_mode));
@ -738,9 +740,8 @@ bool GPU_HW::CompilePipelines()
{
const GPUDevice::Features features = g_gpu_device->GetFeatures();
GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading,
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend,
m_supports_framebuffer_fetch);
m_true_color, m_scaled_dithering, m_texture_filtering, m_clamp_uvs, m_pgxp_depth_buffer,
m_disable_color_perspective, m_supports_dual_source_blend, m_supports_framebuffer_fetch);
ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 5 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 +
2 + (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1);
@ -872,10 +873,10 @@ bool GPU_HW::CompilePipelines()
plconfig.input_layout.vertex_attributes =
textured ?
(m_using_uv_limits ? std::span<const GPUPipeline::VertexAttribute>(
(m_clamp_uvs ? std::span<const GPUPipeline::VertexAttribute>(
vertex_attributes, NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES) :
std::span<const GPUPipeline::VertexAttribute>(
vertex_attributes, NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) :
std::span<const GPUPipeline::VertexAttribute>(vertex_attributes,
NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) :
std::span<const GPUPipeline::VertexAttribute>(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES);
plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get();
@ -1273,6 +1274,10 @@ void GPU_HW::UpdateVRAMReadTexture()
{
GL_SCOPE("UpdateVRAMReadTexture()");
if (m_texpage_dirty)
GL_INS("Texpage is no longer dirty");
m_texpage_dirty = false;
const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale;
if (m_vram_texture->IsMultisampled())
{
@ -1464,15 +1469,15 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
}
}
void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices)
void GPU_HW::ComputePolygonUVLimits(u32 texpage, BatchVertex* vertices, u32 num_vertices)
{
u16 min_u = vertices[0].u, max_u = vertices[0].u, min_v = vertices[0].v, max_v = vertices[0].v;
u32 min_u = vertices[0].u, max_u = vertices[0].u, min_v = vertices[0].v, max_v = vertices[0].v;
for (u32 i = 1; i < num_vertices; i++)
{
min_u = std::min<u16>(min_u, vertices[i].u);
max_u = std::max<u16>(max_u, vertices[i].u);
min_v = std::min<u16>(min_v, vertices[i].v);
max_v = std::max<u16>(max_v, vertices[i].v);
min_u = std::min<u32>(min_u, vertices[i].u);
max_u = std::max<u32>(max_u, vertices[i].u);
min_v = std::min<u32>(min_v, vertices[i].v);
max_v = std::max<u32>(max_v, vertices[i].v);
}
if (min_u != max_u)
@ -1480,6 +1485,8 @@ void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices)
if (min_v != max_v)
max_v--;
CheckForTexPageOverlap(texpage, min_u, min_v, max_u, max_v);
for (u32 i = 0; i < num_vertices; i++)
vertices[i].SetUVLimits(min_u, max_u, min_v, max_v);
}
@ -1689,8 +1696,8 @@ void GPU_HW::LoadVertices()
if (rc.quad_polygon && m_resolution_scale > 1)
HandleFlippedQuadTextureCoordinates(vertices.data());
if (m_using_uv_limits && textured)
ComputePolygonUVLimits(vertices.data(), num_vertices);
if (m_compute_uv_range && textured)
ComputePolygonUVLimits(texpage, vertices.data(), num_vertices);
if (!IsDrawingAreaIsValid())
return;
@ -1848,6 +1855,8 @@ void GPU_HW::LoadVertices()
const u16 tex_right = tex_left + static_cast<u16>(quad_width);
const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1);
CheckForTexPageOverlap(texpage, tex_left, tex_top, tex_right - 1, tex_bottom - 1);
AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
@ -2077,6 +2086,44 @@ void GPU_HW::IncludeVRAMDirtyRectangle(const Common::Rectangle<u32>& rect)
}
}
ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(u32 texpage, u32 min_u, u32 min_v, u32 max_u, u32 max_v)
{
if (!m_texpage_dirty)
return;
static constexpr std::array<std::array<u8, 2>, 4> uv_shifts_adds = {{{2, 3}, {1, 1}, {0, 0}, {0, 0}}};
const u32 xoffs = (texpage & 0xFu) * 64u;
const u32 yoffs = ((texpage >> 4) & 1u) * 256u;
const u32 xshift = uv_shifts_adds[(texpage >> 7) & 2][0];
const u32 xadd = uv_shifts_adds[(texpage >> 7) & 2][1];
const u32 vram_min_u =
(((min_u & m_draw_mode.texture_window.and_x) | m_draw_mode.texture_window.or_x) >> xshift) + xoffs;
const u32 vram_max_u =
((((max_u & m_draw_mode.texture_window.and_x) | m_draw_mode.texture_window.or_x) + xadd) >> xshift) + xoffs;
const u32 vram_min_v = ((min_v & m_draw_mode.texture_window.and_y) | m_draw_mode.texture_window.or_y) + yoffs;
const u32 vram_max_v = ((max_v & m_draw_mode.texture_window.and_y) | m_draw_mode.texture_window.or_y) + yoffs;
// Log_InfoFmt("{}: {},{} => {},{}", s_draw_number, vram_min_u, vram_min_v, vram_max_u, vram_max_v);
if (vram_min_u < m_current_uv_range.left || vram_min_v < m_current_uv_range.top ||
vram_max_u >= m_current_uv_range.right || vram_max_v >= m_current_uv_range.bottom)
{
m_current_uv_range.Include(vram_min_u, vram_max_u, vram_min_v, vram_max_v);
DebugAssert(m_vram_dirty_rect.Valid());
if (m_current_uv_range.Intersects(m_vram_dirty_rect))
{
GL_INS_FMT("Updating VRAM cache due to UV {{{},{} => {},{}}} intersection with dirty {{{},{} => {},{}}}",
m_current_uv_range.left, m_current_uv_range.top, m_current_uv_range.right, m_current_uv_range.bottom,
m_vram_dirty_rect.left, m_vram_dirty_rect.top, m_vram_dirty_rect.right, m_vram_dirty_rect.bottom);
UpdateVRAMReadTexture();
}
}
}
ALWAYS_INLINE bool GPU_HW::IsFlushed() const
{
return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr;
@ -2515,18 +2562,31 @@ void GPU_HW::DispatchRenderCommand()
}
#endif
if (m_vram_dirty_rect.Valid() && (m_draw_mode.mode_reg.GetTexturePageRectangle().Intersects(m_vram_dirty_rect) ||
(m_draw_mode.mode_reg.IsUsingPalette() &&
m_draw_mode.GetTexturePaletteRectangle().Intersects(m_vram_dirty_rect))))
if (m_vram_dirty_rect.Valid() && m_draw_mode.mode_reg.IsUsingPalette() &&
m_draw_mode.GetTexturePaletteRectangle().Intersects(m_vram_dirty_rect))
{
GL_INS("Invalidating VRAM read cache due to drawing area overlap");
// Log_DevPrint("Invalidating VRAM read cache due to drawing area overlap");
GL_INS("Palette in VRAM dirty area, flushing cache");
if (!IsFlushed())
FlushRender();
UpdateVRAMReadTexture();
}
if (m_vram_dirty_rect.Valid() && m_draw_mode.mode_reg.GetTexturePageRectangle().Intersects(m_vram_dirty_rect))
{
GL_INS("Texpage is in dirty area, checking UV ranges");
m_compute_uv_range = true;
m_texpage_dirty = true;
m_current_uv_range.SetInvalid();
}
else
{
m_compute_uv_range = m_clamp_uvs;
if (m_texpage_dirty)
GL_INS("Texpage is no longer dirty");
m_texpage_dirty = false;
}
}
texture_mode = m_draw_mode.mode_reg.texture_mode;

View file

@ -151,6 +151,7 @@ private:
void SetFullVRAMDirtyRectangle();
void ClearVRAMDirtyRectangle();
void IncludeVRAMDirtyRectangle(const Common::Rectangle<u32>& rect);
void CheckForTexPageOverlap(u32 texpage, u32 min_u, u32 min_v, u32 max_u, u32 max_v);
bool IsFlushed() const;
u32 GetBatchVertexSpace() const;
@ -192,7 +193,7 @@ private:
static void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices);
/// Computes polygon U/V boundaries.
static void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices);
void ComputePolygonUVLimits(u32 texpage, BatchVertex* vertices, u32 num_vertices);
/// Sets the depth test flag for PGXP depth buffering.
void SetBatchDepthBuffer(bool enabled);
@ -251,17 +252,20 @@ private:
GPUDownsampleMode m_downsample_mode = GPUDownsampleMode::Disabled;
GPUWireframeMode m_wireframe_mode = GPUWireframeMode::Disabled;
bool m_true_color = true;
bool m_using_uv_limits = false;
bool m_clamp_uvs = false;
bool m_compute_uv_range = false;
bool m_pgxp_depth_buffer = false;
bool m_texpage_dirty = false;
BatchConfig m_batch;
// Changed state
bool m_batch_ubo_dirty = true;
BatchUBOData m_batch_ubo_data = {};
// Bounding box of VRAM area that the GPU has drawn into.
Common::Rectangle<u32> m_vram_dirty_rect;
// Changed state
bool m_batch_ubo_dirty = true;
Common::Rectangle<u32> m_current_uv_range;
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
DimensionalArray<std::unique_ptr<GPUPipeline>, 2, 2, 5, 9, 4, 3> m_batch_pipelines{};

View file

@ -167,6 +167,9 @@ union GPUDrawModeReg
// Bits 0..5 are returned in the GPU status register, latched at E1h/polygon draw time.
static constexpr u32 GPUSTAT_MASK = 0b11111111111;
static constexpr std::array<u32, 4> texture_page_widths = {
{TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}};
u16 bits;
BitField<u16, u8, 0, 4> texture_page_x_base;
@ -188,8 +191,6 @@ union GPUDrawModeReg
/// Returns a rectangle comprising the texture page area.
ALWAYS_INLINE_RELEASE Common::Rectangle<u32> GetTexturePageRectangle() const
{
static constexpr std::array<u32, 4> texture_page_widths = {
{TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}};
return Common::Rectangle<u32>::FromExtents(GetTexturePageBaseX(), GetTexturePageBaseY(),
texture_page_widths[static_cast<u8>(texture_mode.GetValue())],
TEXTURE_PAGE_HEIGHT);