GPU/HW: Provide depth in vertex rather than computing

Fixes mask bit effects in Vulkan (e.g. Silent Hill).
Significantly reduces the number of mask bit refreshes in OpenGL/Vulkan.
This commit is contained in:
Connor McLaughlin 2020-06-20 20:21:32 +10:00
parent 7192b8d83a
commit a08c398d4b
6 changed files with 58 additions and 112 deletions

View file

@ -53,10 +53,8 @@ void GPU_HW::Reset()
m_batch = {}; m_batch = {};
m_batch_ubo_data = {}; m_batch_ubo_data = {};
m_batch_current_vertex_depth_id = 1;
m_batch_next_vertex_depth_id = 2;
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
m_batch_ubo_dirty = true; m_batch_ubo_dirty = true;
m_current_depth = 1;
SetFullVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle();
} }
@ -71,7 +69,7 @@ bool GPU_HW::DoState(StateWrapper& sw)
{ {
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
SetFullVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle();
ResetBatchVertexDepthID(); ResetBatchVertexDepth();
} }
return true; return true;
@ -193,6 +191,9 @@ void GPU_HW::LoadVertices()
const RenderCommand rc{m_render_command.bits}; const RenderCommand rc{m_render_command.bits};
const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16); const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16);
if (m_GPUSTAT.check_mask_before_draw)
m_current_depth++;
switch (rc.primitive) switch (rc.primitive)
{ {
case Primitive::Polygon: case Primitive::Polygon:
@ -211,7 +212,8 @@ void GPU_HW::LoadVertices()
const VertexPosition vp{m_fifo.Pop()}; const VertexPosition vp{m_fifo.Pop()};
const u16 packed_texcoord = textured ? Truncate16(m_fifo.Pop()) : 0; const u16 packed_texcoord = textured ? Truncate16(m_fifo.Pop()) : 0;
vertices[i].Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, color, texpage, packed_texcoord); vertices[i].Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, m_current_depth, color, texpage,
packed_texcoord);
} }
if (rc.quad_polygon && m_resolution_scale > 1) if (rc.quad_polygon && m_resolution_scale > 1)
@ -351,13 +353,13 @@ void GPU_HW::LoadVertices()
const s32 quad_end_x = quad_start_x + quad_width; const s32 quad_end_x = quad_start_x + quad_width;
const u16 tex_right = tex_left + static_cast<u16>(quad_width); const u16 tex_right = tex_left + static_cast<u16>(quad_width);
AddNewVertex(quad_start_x, quad_start_y, color, texpage, tex_left, tex_top); AddNewVertex(quad_start_x, quad_start_y, m_current_depth, color, texpage, tex_left, tex_top);
AddNewVertex(quad_end_x, quad_start_y, color, texpage, tex_right, tex_top); AddNewVertex(quad_end_x, quad_start_y, m_current_depth, color, texpage, tex_right, tex_top);
AddNewVertex(quad_start_x, quad_end_y, color, texpage, tex_left, tex_bottom); AddNewVertex(quad_start_x, quad_end_y, m_current_depth, color, texpage, tex_left, tex_bottom);
AddNewVertex(quad_start_x, quad_end_y, color, texpage, tex_left, tex_bottom); AddNewVertex(quad_start_x, quad_end_y, m_current_depth, color, texpage, tex_left, tex_bottom);
AddNewVertex(quad_end_x, quad_start_y, color, texpage, tex_right, tex_top); AddNewVertex(quad_end_x, quad_start_y, m_current_depth, color, texpage, tex_right, tex_top);
AddNewVertex(quad_end_x, quad_end_y, color, texpage, tex_right, tex_bottom); AddNewVertex(quad_end_x, quad_end_y, m_current_depth, color, texpage, tex_right, tex_bottom);
x_offset += quad_width; x_offset += quad_width;
tex_left = 0; tex_left = 0;
@ -405,8 +407,8 @@ void GPU_HW::LoadVertices()
return; return;
BatchVertex start, end; BatchVertex start, end;
start.Set(m_drawing_offset.x + pos0.x, m_drawing_offset.y + pos0.y, color0, 0, 0); start.Set(m_drawing_offset.x + pos0.x, m_drawing_offset.y + pos0.y, m_current_depth, color0, 0, 0);
end.Set(m_drawing_offset.x + pos1.x, m_drawing_offset.y + pos1.y, color1, 0, 0); end.Set(m_drawing_offset.x + pos1.x, m_drawing_offset.y + pos1.y, m_current_depth, color1, 0, 0);
const s32 min_x = std::min(start.x, end.x); const s32 min_x = std::min(start.x, end.x);
const s32 max_x = std::max(start.x, end.x); const s32 max_x = std::max(start.x, end.x);
@ -451,7 +453,7 @@ void GPU_HW::LoadVertices()
const VertexPosition vp{m_blit_buffer[buffer_pos++]}; const VertexPosition vp{m_blit_buffer[buffer_pos++]};
BatchVertex vertex; BatchVertex vertex;
vertex.Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, color, 0, 0); vertex.Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, m_current_depth, color, 0, 0);
if (i > 0) if (i > 0)
{ {
@ -552,7 +554,7 @@ GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst
width * m_resolution_scale, width * m_resolution_scale,
height * m_resolution_scale, height * m_resolution_scale,
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
GetCurrentNormalizedBatchVertexDepthID()}; GetCurrentNormalizedVertexDepth()};
return uniforms; return uniforms;
} }
@ -610,11 +612,10 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
} }
// can we fit these vertices in the current depth buffer range? // can we fit these vertices in the current depth buffer range?
if (BatchVertexDepthIDNeedsUpdate() && if ((m_current_depth + required_vertices) > MAX_BATCH_VERTEX_COUNTER_IDS)
(m_batch_next_vertex_depth_id + GetBatchVertexCount() + required_vertices) > MAX_BATCH_VERTEX_COUNTER_IDS)
{ {
// implies FlushRender() // implies FlushRender()
ResetBatchVertexDepthID(); ResetBatchVertexDepth();
} }
else if (m_batch_current_vertex_ptr) else if (m_batch_current_vertex_ptr)
{ {
@ -627,36 +628,13 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
MapBatchVertexPointer(required_vertices); MapBatchVertexPointer(required_vertices);
} }
void GPU_HW::ResetBatchVertexDepthID() void GPU_HW::ResetBatchVertexDepth()
{ {
Log_PerfPrint("Resetting batch vertex depth ID"); Log_PerfPrint("Resetting batch vertex depth");
FlushRender(); FlushRender();
UpdateDepthBufferFromMaskBit(); UpdateDepthBufferFromMaskBit();
m_batch_current_vertex_depth_id = 1; m_current_depth = 1;
m_batch_next_vertex_depth_id = 2;
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
}
void GPU_HW::IncrementBatchVertexID(u32 count)
{
DebugAssert((m_batch_next_vertex_depth_id + count) <= MAX_BATCH_VERTEX_COUNTER_IDS);
m_batch_next_vertex_depth_id += count;
}
void GPU_HW::SetBatchUBOVertexDepthID(u32 value)
{
u32 ubo_value;
// In OpenGL, gl_VertexID is inclusive of the base vertex, whereas SV_VertexID in D3D isn't.
// We rely on unsigned overflow to compute the correct value based on the base vertex.
if (m_render_api != HostDisplay::RenderAPI::D3D11)
ubo_value = m_batch_base_vertex - value;
else
ubo_value = value;
m_batch_ubo_dirty |= (m_batch_ubo_data.u_vertex_depth_id != ubo_value);
m_batch_ubo_data.u_vertex_depth_id = ubo_value;
} }
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
@ -673,8 +651,7 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
if (m_GPUSTAT.check_mask_before_draw) if (m_GPUSTAT.check_mask_before_draw)
{ {
// set new vertex counter since we want this to take into consideration previous masked pixels // set new vertex counter since we want this to take into consideration previous masked pixels
m_batch_current_vertex_depth_id = m_batch_next_vertex_depth_id++; m_current_depth++;
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
} }
} }
@ -686,8 +663,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
if (m_GPUSTAT.check_mask_before_draw) if (m_GPUSTAT.check_mask_before_draw)
{ {
// set new vertex counter since we want this to take into consideration previous masked pixels // set new vertex counter since we want this to take into consideration previous masked pixels
m_batch_current_vertex_depth_id = m_batch_next_vertex_depth_id++; m_current_depth++;
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
} }
} }
@ -753,7 +729,6 @@ void GPU_HW::DispatchRenderCommand()
{ {
m_batch.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; m_batch.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw;
m_batch.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; m_batch.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing;
m_batch_ubo_data.u_check_mask_before_draw = BoolToUInt32(m_batch.check_mask_before_draw);
m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(m_batch.set_mask_while_drawing); m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(m_batch.set_mask_while_drawing);
m_batch_ubo_dirty = true; m_batch_ubo_dirty = true;
} }
@ -797,10 +772,6 @@ void GPU_HW::FlushRender()
if (vertex_count == 0) if (vertex_count == 0)
return; return;
const bool update_depth_id = BatchVertexDepthIDNeedsUpdate();
if (update_depth_id)
SetBatchUBOVertexDepthID(m_batch_next_vertex_depth_id);
if (m_drawing_area_changed) if (m_drawing_area_changed)
{ {
m_drawing_area_changed = false; m_drawing_area_changed = false;
@ -824,9 +795,6 @@ void GPU_HW::FlushRender()
m_renderer_stats.num_batches++; m_renderer_stats.num_batches++;
DrawBatchVertices(m_batch.GetRenderMode(), m_batch_base_vertex, vertex_count); DrawBatchVertices(m_batch.GetRenderMode(), m_batch_base_vertex, vertex_count);
} }
if (update_depth_id)
IncrementBatchVertexID(vertex_count);
} }
void GPU_HW::DrawRendererStats(bool is_idle_frame) void GPU_HW::DrawRendererStats(bool is_idle_frame)

View file

@ -58,20 +58,22 @@ protected:
{ {
s32 x; s32 x;
s32 y; s32 y;
s32 z;
u32 color; u32 color;
u32 texpage; u32 texpage;
u16 u; // 16-bit texcoords are needed for 256 extent rectangles u16 u; // 16-bit texcoords are needed for 256 extent rectangles
u16 v; u16 v;
ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u32 texpage_, u16 packed_texcoord) ALWAYS_INLINE void Set(s32 x_, s32 y_, s32 z_, u32 color_, u32 texpage_, u16 packed_texcoord)
{ {
Set(x_, y_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8)); Set(x_, y_, z_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8));
} }
ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u32 texpage_, u16 u_, u16 v_) ALWAYS_INLINE void Set(s32 x_, s32 y_, s32 z_, u32 color_, u32 texpage_, u16 u_, u16 v_)
{ {
x = x_; x = x_;
y = y_; y = y_;
z = z_;
color = color_; color = color_;
texpage = texpage_; texpage = texpage_;
u = u_; u = u_;
@ -112,8 +114,6 @@ protected:
float u_src_alpha_factor; float u_src_alpha_factor;
float u_dst_alpha_factor; float u_dst_alpha_factor;
u32 u_interlaced_displayed_field; u32 u_interlaced_displayed_field;
u32 u_vertex_depth_id;
u32 u_check_mask_before_draw;
u32 u_set_mask_while_drawing; u32 u_set_mask_while_drawing;
}; };
@ -183,21 +183,12 @@ protected:
u32 GetBatchVertexCount() const { return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); } u32 GetBatchVertexCount() const { return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); }
void EnsureVertexBufferSpace(u32 required_vertices); void EnsureVertexBufferSpace(u32 required_vertices);
void EnsureVertexBufferSpaceForCurrentCommand(); void EnsureVertexBufferSpaceForCurrentCommand();
void ResetBatchVertexDepthID(); void ResetBatchVertexDepth();
void IncrementBatchVertexID(u32 count);
void SetBatchUBOVertexDepthID(u32 value);
/// Returns the value to be written to the depth buffer for the current operation for mask bit emulation. /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
ALWAYS_INLINE float GetCurrentNormalizedBatchVertexDepthID() const ALWAYS_INLINE float GetCurrentNormalizedVertexDepth() const
{ {
return 1.0f - (static_cast<float>(m_batch_next_vertex_depth_id) / 65535.0f); return (static_cast<float>(m_current_depth) / 65535.0f);
}
/// Returns true if the batch vertex depth ID needs to be updated.
ALWAYS_INLINE bool BatchVertexDepthIDNeedsUpdate() const
{
// because GL uses base vertex we're incrementing the depth id every draw whether we like it or not
return m_batch.check_mask_before_draw || m_render_api != HostDisplay::RenderAPI::D3D11;
} }
/// Returns the interlaced mode to use when scanning out/displaying. /// Returns the interlaced mode to use when scanning out/displaying.
@ -246,8 +237,7 @@ protected:
BatchVertex* m_batch_end_vertex_ptr = nullptr; BatchVertex* m_batch_end_vertex_ptr = nullptr;
BatchVertex* m_batch_current_vertex_ptr = nullptr; BatchVertex* m_batch_current_vertex_ptr = nullptr;
u32 m_batch_base_vertex = 0; u32 m_batch_base_vertex = 0;
u32 m_batch_current_vertex_depth_id = 0; s32 m_current_depth = 0;
u32 m_batch_next_vertex_depth_id = 0;
u32 m_resolution_scale = 1; u32 m_resolution_scale = 1;
u32 m_max_resolution_scale = 1; u32 m_max_resolution_scale = 1;

View file

@ -264,7 +264,7 @@ bool GPU_HW_D3D11::CreateTextureBuffer()
bool GPU_HW_D3D11::CreateBatchInputLayout() bool GPU_HW_D3D11::CreateBatchInputLayout()
{ {
static constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 4> attributes = { static constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 4> attributes = {
{{"ATTR", 0, DXGI_FORMAT_R32G32_SINT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0}, {{"ATTR", 0, DXGI_FORMAT_R32G32B32_SINT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0}, {"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0}, {"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}}}; {"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}}};
@ -731,7 +731,7 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d
height, height,
map_result.index_aligned, map_result.index_aligned,
m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00, m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00,
GetCurrentNormalizedBatchVertexDepthID()}; GetCurrentNormalizedVertexDepth()};
m_context->OMSetDepthStencilState( m_context->OMSetDepthStencilState(
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf()); m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf());
@ -763,7 +763,7 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
width * m_resolution_scale, width * m_resolution_scale,
height * m_resolution_scale, height * m_resolution_scale,
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
GetCurrentNormalizedBatchVertexDepthID()}; GetCurrentNormalizedVertexDepth()};
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale); const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(),

View file

@ -281,7 +281,7 @@ bool GPU_HW_OpenGL::CreateVertexBuffer()
glEnableVertexAttribArray(1); glEnableVertexAttribArray(1);
glEnableVertexAttribArray(2); glEnableVertexAttribArray(2);
glEnableVertexAttribArray(3); glEnableVertexAttribArray(3);
glVertexAttribIPointer(0, 2, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, x))); glVertexAttribIPointer(0, 3, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, x)));
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex), glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex),
reinterpret_cast<void*>(offsetof(BatchVertex, color))); reinterpret_cast<void*>(offsetof(BatchVertex, color)));
glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, u))); glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, u)));
@ -779,7 +779,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
height, height,
map_result.index_aligned, map_result.index_aligned,
m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00, m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00,
GetCurrentNormalizedBatchVertexDepthID()}; GetCurrentNormalizedVertexDepth()};
UploadUniformBuffer(&uniforms, sizeof(uniforms)); UploadUniformBuffer(&uniforms, sizeof(uniforms));
glBindVertexArray(m_attributeless_vao_id); glBindVertexArray(m_attributeless_vao_id);
@ -864,7 +864,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
width * m_resolution_scale, width * m_resolution_scale,
height * m_resolution_scale, height * m_resolution_scale,
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
GetCurrentNormalizedBatchVertexDepthID()}; GetCurrentNormalizedVertexDepth()};
uniforms.u_src_y = m_vram_texture.GetHeight() - uniforms.u_src_y - uniforms.u_height; uniforms.u_src_y = m_vram_texture.GetHeight() - uniforms.u_src_y - uniforms.u_height;
uniforms.u_dst_y = m_vram_texture.GetHeight() - uniforms.u_dst_y - uniforms.u_height; uniforms.u_dst_y = m_vram_texture.GetHeight() - uniforms.u_dst_y - uniforms.u_height;
UploadUniformBuffer(&uniforms, sizeof(uniforms)); UploadUniformBuffer(&uniforms, sizeof(uniforms));

View file

@ -493,8 +493,8 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss)
{ {
DeclareUniformBuffer(ss, DeclareUniformBuffer(ss,
{"uint2 u_texture_window_mask", "uint2 u_texture_window_offset", "float u_src_alpha_factor", {"uint2 u_texture_window_mask", "uint2 u_texture_window_offset", "float u_src_alpha_factor",
"float u_dst_alpha_factor", "uint u_interlaced_displayed_field", "uint u_base_vertex_depth_id", "float u_dst_alpha_factor", "uint u_interlaced_displayed_field",
"bool u_check_mask_before_draw", "bool u_set_mask_while_drawing"}, "bool u_set_mask_while_drawing"},
false); false);
} }
@ -511,12 +511,12 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
if (textured) if (textured)
{ {
DeclareVertexEntryPoint(ss, {"int2 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1, DeclareVertexEntryPoint(ss, {"int3 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float v_depth"}}, true); {{"nointerpolation", "uint4 v_texpage"}}, false);
} }
else else
{ {
DeclareVertexEntryPoint(ss, {"int2 a_pos", "float4 a_col0"}, 1, 0, {{"nointerpolation", "float v_depth"}}, true); DeclareVertexEntryPoint(ss, {"int3 a_pos", "float4 a_col0"}, 1, 0, {}, false);
} }
ss << R"( ss << R"(
@ -529,11 +529,15 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
// 0..+1023 -> -1..1 // 0..+1023 -> -1..1
float pos_x = ((float(a_pos.x) + vertex_offset) / 512.0) - 1.0; float pos_x = ((float(a_pos.x) + vertex_offset) / 512.0) - 1.0;
float pos_y = ((float(a_pos.y) + vertex_offset) / -256.0) + 1.0; float pos_y = ((float(a_pos.y) + vertex_offset) / -256.0) + 1.0;
float pos_z = 1.0 - (float(a_pos.z) / 65535.0);
#if API_OPENGL || API_OPENGL_ES
// OpenGL seems to be off by one pixel in the Y direction due to lower-left origin, but only on // OpenGL seems to be off by one pixel in the Y direction due to lower-left origin, but only on
// Intel and NVIDIA drivers. AMD is fine... // Intel and NVIDIA drivers. AMD is fine...
#if API_OPENGL || API_OPENGL_ES
pos_y += EPSILON; pos_y += EPSILON;
// 0..1 to -1..1 depth range.
pos_z = (pos_z * 2.0) - 1.0;
#endif #endif
// NDC space Y flip in Vulkan. // NDC space Y flip in Vulkan.
@ -541,13 +545,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
pos_y = -pos_y; pos_y = -pos_y;
#endif #endif
v_pos = float4(pos_x, pos_y, 0.0, 1.0); v_pos = float4(pos_x, pos_y, pos_z, 1.0);
#if API_D3D11
v_depth = 1.0 - (float(u_base_vertex_depth_id + (u_check_mask_before_draw ? v_id : 0u)) / 65535.0);
#else
v_depth = 1.0 - (float(v_id - u_base_vertex_depth_id) / 65535.0);
#endif
v_col0 = a_col0; v_col0 = a_col0;
#if TEXTURED #if TEXTURED
@ -707,12 +705,12 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
if (textured) if (textured)
{ {
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float v_depth"}}, DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}},
true, use_dual_source ? 2 : 1, true); true, use_dual_source ? 2 : 1, true);
} }
else else
{ {
DeclareFragmentEntryPoint(ss, 1, 0, {{"nointerpolation", "float v_depth"}}, true, use_dual_source ? 2 : 1, true); DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, true);
} }
ss << R"( ss << R"(
@ -846,7 +844,7 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
o_col0 = float4(color, u_dst_alpha_factor / ialpha); o_col0 = float4(color, u_dst_alpha_factor / ialpha);
#endif #endif
o_depth = oalpha * v_depth; o_depth = oalpha * v_pos.z;
} }
else else
{ {
@ -864,7 +862,7 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
o_col0 = float4(color, 1.0 - ialpha); o_col0 = float4(color, 1.0 - ialpha);
#endif #endif
o_depth = oalpha * v_depth; o_depth = oalpha * v_pos.z;
} }
#else #else
// Non-transparency won't enable blending so we can write the mask here regardless. // Non-transparency won't enable blending so we can write the mask here regardless.
@ -874,7 +872,7 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha); o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha);
#endif #endif
o_depth = oalpha * v_depth; o_depth = oalpha * v_pos.z;
#endif #endif
} }
)"; )";
@ -900,7 +898,6 @@ CONSTANT float2 WIDTH = (1.0 / float2(VRAM_SIZE)) * float2(RESOLUTION_SCALE, RES
ss << R"(in VertexData { ss << R"(in VertexData {
float4 v_col0; float4 v_col0;
nointerpolation float v_depth;
} in_data[];)"; } in_data[];)";
if (IsVulkan()) if (IsVulkan())
@ -908,7 +905,6 @@ CONSTANT float2 WIDTH = (1.0 / float2(VRAM_SIZE)) * float2(RESOLUTION_SCALE, RES
ss << R"(out VertexData { ss << R"(out VertexData {
float4 v_col0; float4 v_col0;
nointerpolation float v_depth;
} out_data; } out_data;
layout(lines) in; layout(lines) in;
@ -921,25 +917,21 @@ void main() {
// top-left // top-left
out_data.v_col0 = in_data[0].v_col0; out_data.v_col0 = in_data[0].v_col0;
out_data.v_depth = in_data[0].v_depth;
gl_Position = gl_in[0].gl_Position - offset; gl_Position = gl_in[0].gl_Position - offset;
EmitVertex(); EmitVertex();
// top-right // top-right
out_data.v_col0 = in_data[0].v_col0; out_data.v_col0 = in_data[0].v_col0;
out_data.v_depth = in_data[0].v_depth;
gl_Position = gl_in[0].gl_Position + offset; gl_Position = gl_in[0].gl_Position + offset;
EmitVertex(); EmitVertex();
// bottom-left // bottom-left
out_data.v_col0 = in_data[1].v_col0; out_data.v_col0 = in_data[1].v_col0;
out_data.v_depth = in_data[1].v_depth;
gl_Position = gl_in[1].gl_Position - offset; gl_Position = gl_in[1].gl_Position - offset;
EmitVertex(); EmitVertex();
// bottom-right // bottom-right
out_data.v_col0 = in_data[1].v_col0; out_data.v_col0 = in_data[1].v_col0;
out_data.v_depth = in_data[1].v_depth;
gl_Position = gl_in[1].gl_Position + offset; gl_Position = gl_in[1].gl_Position + offset;
EmitVertex(); EmitVertex();
@ -968,25 +960,21 @@ void main(line Vertex input[2], inout TriangleStream<Vertex> output)
// top-left // top-left
v.col0 = input[0].col0; v.col0 = input[0].col0;
v.depth = input[0].depth;
v.pos = input[0].pos - offset; v.pos = input[0].pos - offset;
output.Append(v); output.Append(v);
// top-right // top-right
v.col0 = input[0].col0; v.col0 = input[0].col0;
v.depth = input[0].depth;
v.pos = input[0].pos + offset; v.pos = input[0].pos + offset;
output.Append(v); output.Append(v);
// bottom-left // bottom-left
v.col0 = input[1].col0; v.col0 = input[1].col0;
v.depth = input[1].depth;
v.pos = input[1].pos - offset; v.pos = input[1].pos - offset;
output.Append(v); output.Append(v);
// bottom-right // bottom-right
v.col0 = input[1].col0; v.col0 = input[1].col0;
v.depth = input[1].depth;
v.pos = input[1].pos + offset; v.pos = input[1].pos + offset;
output.Append(v); output.Append(v);

View file

@ -619,7 +619,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
gpbuilder.SetRenderPass(m_vram_render_pass, 0); gpbuilder.SetRenderPass(m_vram_render_pass, 0);
gpbuilder.AddVertexBuffer(0, sizeof(BatchVertex), VK_VERTEX_INPUT_RATE_VERTEX); gpbuilder.AddVertexBuffer(0, sizeof(BatchVertex), VK_VERTEX_INPUT_RATE_VERTEX);
gpbuilder.AddVertexAttribute(0, 0, VK_FORMAT_R32G32_SINT, offsetof(BatchVertex, x)); gpbuilder.AddVertexAttribute(0, 0, VK_FORMAT_R32G32B32_SINT, offsetof(BatchVertex, x));
gpbuilder.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, color)); gpbuilder.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, color));
if (textured) if (textured)
{ {
@ -632,7 +632,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
gpbuilder.SetFragmentShader(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing]); gpbuilder.SetFragmentShader(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing]);
gpbuilder.SetRasterizationState(polygon_mode_mapping[primitive], VK_CULL_MODE_NONE, gpbuilder.SetRasterizationState(polygon_mode_mapping[primitive], VK_CULL_MODE_NONE,
VK_FRONT_FACE_CLOCKWISE); VK_FRONT_FACE_CLOCKWISE);
gpbuilder.SetDepthState(depth_test != 0, true, gpbuilder.SetDepthState(true, true,
(depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS); (depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS);
gpbuilder.SetNoBlendingState(); gpbuilder.SetNoBlendingState();
@ -1066,7 +1066,7 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
height, height,
start_index, start_index,
m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00, m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00,
GetCurrentNormalizedBatchVertexDepthID()}; GetCurrentNormalizedVertexDepth()};
vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
&uniforms); &uniforms);
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,