mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-02-17 03:15:39 +00:00
GPU/HW: Eliminate CPU round trip on oversized VRAM writes
This commit is contained in:
parent
e144392187
commit
d1a2ebd8f3
|
@ -529,6 +529,20 @@ bool GPU_HW::UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 w
|
||||||
.Intersects(Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height)));
|
.Intersects(Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset) const
|
||||||
|
{
|
||||||
|
const VRAMWriteUBOData uniforms = {x,
|
||||||
|
y,
|
||||||
|
((x + width) % VRAM_WIDTH),
|
||||||
|
((y + height) % VRAM_HEIGHT),
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
buffer_offset,
|
||||||
|
m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00,
|
||||||
|
GetCurrentNormalizedVertexDepth()};
|
||||||
|
return uniforms;
|
||||||
|
}
|
||||||
|
|
||||||
GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const
|
GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const
|
||||||
{
|
{
|
||||||
// drop precision unless true colour is enabled
|
// drop precision unless true colour is enabled
|
||||||
|
|
|
@ -125,8 +125,12 @@ protected:
|
||||||
|
|
||||||
struct VRAMWriteUBOData
|
struct VRAMWriteUBOData
|
||||||
{
|
{
|
||||||
u32 u_base_coords[2];
|
u32 u_dst_x;
|
||||||
u32 u_size[2];
|
u32 u_dst_y;
|
||||||
|
u32 u_end_x;
|
||||||
|
u32 u_end_y;
|
||||||
|
u32 u_width;
|
||||||
|
u32 u_height;
|
||||||
u32 u_buffer_base_offset;
|
u32 u_buffer_base_offset;
|
||||||
u32 u_mask_or_bits;
|
u32 u_mask_or_bits;
|
||||||
float u_depth_value;
|
float u_depth_value;
|
||||||
|
@ -226,6 +230,7 @@ protected:
|
||||||
bool UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const;
|
bool UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const;
|
||||||
|
|
||||||
VRAMFillUBOData GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const;
|
VRAMFillUBOData GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const;
|
||||||
|
VRAMWriteUBOData GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset) const;
|
||||||
VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const;
|
VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const;
|
||||||
|
|
||||||
/// Handles quads with flipped texture coordinate directions.
|
/// Handles quads with flipped texture coordinate directions.
|
||||||
|
|
|
@ -681,19 +681,7 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
|
|
||||||
GPU_HW::FillVRAM(x, y, width, height, color);
|
GPU_HW::FillVRAM(x, y, width, height, color);
|
||||||
|
|
||||||
// drop precision unless true colour is enabled
|
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
|
||||||
if (!m_true_color)
|
|
||||||
color = RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color));
|
|
||||||
|
|
||||||
struct Uniforms
|
|
||||||
{
|
|
||||||
float u_fill_color[4];
|
|
||||||
u32 u_interlaced_displayed_field;
|
|
||||||
};
|
|
||||||
Uniforms uniforms;
|
|
||||||
std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) =
|
|
||||||
RGBA8ToFloat(color);
|
|
||||||
uniforms.u_interlaced_displayed_field = GetActiveLineLSB();
|
|
||||||
|
|
||||||
m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0);
|
m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0);
|
||||||
|
|
||||||
|
@ -708,36 +696,22 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
|
|
||||||
void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
|
void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
|
||||||
{
|
{
|
||||||
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
|
const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height);
|
||||||
{
|
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data);
|
||||||
// CPU round trip if oversized for now.
|
|
||||||
Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
|
|
||||||
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
|
||||||
GPU::UpdateVRAM(x, y, width, height, data);
|
|
||||||
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
GPU_HW::UpdateVRAM(x, y, width, height, data);
|
|
||||||
|
|
||||||
const u32 num_pixels = width * height;
|
const u32 num_pixels = width * height;
|
||||||
const auto map_result = m_texture_stream_buffer.Map(m_context.Get(), sizeof(u16), num_pixels * sizeof(u16));
|
const auto map_result = m_texture_stream_buffer.Map(m_context.Get(), sizeof(u16), num_pixels * sizeof(u16));
|
||||||
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
|
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
|
||||||
m_texture_stream_buffer.Unmap(m_context.Get(), num_pixels * sizeof(u16));
|
m_texture_stream_buffer.Unmap(m_context.Get(), num_pixels * sizeof(u16));
|
||||||
|
|
||||||
const VRAMWriteUBOData uniforms = {x,
|
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned);
|
||||||
y,
|
|
||||||
width,
|
|
||||||
height,
|
|
||||||
map_result.index_aligned,
|
|
||||||
m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00,
|
|
||||||
GetCurrentNormalizedVertexDepth()};
|
|
||||||
m_context->OMSetDepthStencilState(
|
m_context->OMSetDepthStencilState(
|
||||||
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
|
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
|
||||||
m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf());
|
m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf());
|
||||||
|
|
||||||
// the viewport should already be set to the full vram, so just adjust the scissor
|
// the viewport should already be set to the full vram, so just adjust the scissor
|
||||||
SetScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale);
|
const Common::Rectangle<u32> scaled_bounds = bounds * m_resolution_scale;
|
||||||
|
SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight());
|
||||||
|
|
||||||
DrawUtilityShader(m_vram_write_pixel_shader.Get(), &uniforms, sizeof(uniforms));
|
DrawUtilityShader(m_vram_write_pixel_shader.Get(), &uniforms, sizeof(uniforms));
|
||||||
|
|
||||||
|
@ -754,16 +728,7 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
|
||||||
UpdateVRAMReadTexture();
|
UpdateVRAMReadTexture();
|
||||||
IncludeVRAMDityRectangle(dst_bounds);
|
IncludeVRAMDityRectangle(dst_bounds);
|
||||||
|
|
||||||
const VRAMCopyUBOData uniforms = {src_x * m_resolution_scale,
|
const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height);
|
||||||
src_y * m_resolution_scale,
|
|
||||||
dst_x * m_resolution_scale,
|
|
||||||
dst_y * m_resolution_scale,
|
|
||||||
((dst_x + width) % VRAM_WIDTH) * m_resolution_scale,
|
|
||||||
((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale,
|
|
||||||
width * m_resolution_scale,
|
|
||||||
height * m_resolution_scale,
|
|
||||||
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
|
|
||||||
GetCurrentNormalizedVertexDepth()};
|
|
||||||
|
|
||||||
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
|
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
|
||||||
SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(),
|
SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(),
|
||||||
|
|
|
@ -711,14 +711,10 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
|
|
||||||
glScissor(x, m_vram_texture.GetHeight() - y - height, width, height);
|
glScissor(x, m_vram_texture.GetHeight() - y - height, width, height);
|
||||||
|
|
||||||
// drop precision unless true colour is enabled
|
|
||||||
if (!m_true_color)
|
|
||||||
color = RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color));
|
|
||||||
|
|
||||||
// fast path when not using interlaced rendering
|
// fast path when not using interlaced rendering
|
||||||
if (!IsInterlacedRenderingEnabled())
|
if (!IsInterlacedRenderingEnabled())
|
||||||
{
|
{
|
||||||
const auto [r, g, b, a] = RGBA8ToFloat(color);
|
const auto [r, g, b, a] = RGBA8ToFloat(m_true_color ? color : RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color)));
|
||||||
glClearColor(r, g, b, a);
|
glClearColor(r, g, b, a);
|
||||||
IsGLES() ? glClearDepthf(a) : glClearDepth(a);
|
IsGLES() ? glClearDepthf(a) : glClearDepth(a);
|
||||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||||
|
@ -726,15 +722,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
struct Uniforms
|
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
|
||||||
{
|
|
||||||
float u_fill_color[4];
|
|
||||||
u32 u_interlaced_displayed_field;
|
|
||||||
};
|
|
||||||
Uniforms uniforms;
|
|
||||||
std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) =
|
|
||||||
RGBA8ToFloat(color);
|
|
||||||
uniforms.u_interlaced_displayed_field = GetActiveLineLSB();
|
|
||||||
|
|
||||||
m_vram_interlaced_fill_program.Bind();
|
m_vram_interlaced_fill_program.Bind();
|
||||||
UploadUniformBuffer(&uniforms, sizeof(uniforms));
|
UploadUniformBuffer(&uniforms, sizeof(uniforms));
|
||||||
|
@ -749,36 +737,18 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
|
|
||||||
void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
|
void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
|
||||||
{
|
{
|
||||||
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
|
|
||||||
{
|
|
||||||
// CPU round trip if oversized for now.
|
|
||||||
Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
|
|
||||||
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
|
||||||
GPU::UpdateVRAM(x, y, width, height, data);
|
|
||||||
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
GPU_HW::UpdateVRAM(x, y, width, height, data);
|
|
||||||
|
|
||||||
const u32 num_pixels = width * height;
|
const u32 num_pixels = width * height;
|
||||||
if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes)
|
if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes)
|
||||||
{
|
{
|
||||||
|
const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height);
|
||||||
|
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data);
|
||||||
|
|
||||||
const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
|
const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
|
||||||
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
|
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
|
||||||
m_texture_stream_buffer->Unmap(num_pixels * sizeof(u16));
|
m_texture_stream_buffer->Unmap(num_pixels * sizeof(u16));
|
||||||
m_texture_stream_buffer->Unbind();
|
m_texture_stream_buffer->Unbind();
|
||||||
|
|
||||||
// viewport should be set to the whole VRAM size, so we can just set the scissor
|
|
||||||
const u32 flipped_y = VRAM_HEIGHT - y - height;
|
|
||||||
const u32 scaled_width = width * m_resolution_scale;
|
|
||||||
const u32 scaled_height = height * m_resolution_scale;
|
|
||||||
const u32 scaled_x = x * m_resolution_scale;
|
|
||||||
const u32 scaled_y = y * m_resolution_scale;
|
|
||||||
const u32 scaled_flipped_y = m_vram_texture.GetHeight() - scaled_y - scaled_height;
|
|
||||||
glViewport(scaled_x, scaled_flipped_y, scaled_width, scaled_height);
|
|
||||||
glDisable(GL_BLEND);
|
glDisable(GL_BLEND);
|
||||||
glDisable(GL_SCISSOR_TEST);
|
|
||||||
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
|
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
|
||||||
|
|
||||||
m_vram_write_program.Bind();
|
m_vram_write_program.Bind();
|
||||||
|
@ -787,15 +757,14 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
||||||
else
|
else
|
||||||
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
|
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
|
||||||
|
|
||||||
const VRAMWriteUBOData uniforms = {x,
|
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned);
|
||||||
flipped_y,
|
|
||||||
width,
|
|
||||||
height,
|
|
||||||
map_result.index_aligned,
|
|
||||||
m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00,
|
|
||||||
GetCurrentNormalizedVertexDepth()};
|
|
||||||
UploadUniformBuffer(&uniforms, sizeof(uniforms));
|
UploadUniformBuffer(&uniforms, sizeof(uniforms));
|
||||||
|
|
||||||
|
// the viewport should already be set to the full vram, so just adjust the scissor
|
||||||
|
const Common::Rectangle<u32> scaled_bounds = bounds * m_resolution_scale;
|
||||||
|
glScissor(scaled_bounds.left, m_vram_texture.GetHeight() - scaled_bounds.top - scaled_bounds.GetHeight(),
|
||||||
|
scaled_bounds.GetWidth(), scaled_bounds.GetHeight());
|
||||||
|
|
||||||
glBindVertexArray(m_attributeless_vao_id);
|
glBindVertexArray(m_attributeless_vao_id);
|
||||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||||
|
|
||||||
|
@ -803,6 +772,18 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
|
||||||
|
{
|
||||||
|
// CPU round trip if oversized for now.
|
||||||
|
Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
|
||||||
|
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
||||||
|
GPU::UpdateVRAM(x, y, width, height, data);
|
||||||
|
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
GPU_HW::UpdateVRAM(x, y, width, height, data);
|
||||||
|
|
||||||
const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32));
|
const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32));
|
||||||
|
|
||||||
// reverse copy the rows so it matches opengl's lower-left origin
|
// reverse copy the rows so it matches opengl's lower-left origin
|
||||||
|
|
|
@ -1177,7 +1177,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo)
|
||||||
WriteCommonFunctions(ss);
|
WriteCommonFunctions(ss);
|
||||||
DeclareUniformBuffer(
|
DeclareUniformBuffer(
|
||||||
ss,
|
ss,
|
||||||
{"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", "float u_depth_value"},
|
{"uint2 u_base_coords", "uint2 u_end_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", "float u_depth_value"},
|
||||||
true);
|
true);
|
||||||
|
|
||||||
if (use_ssbo && m_glsl)
|
if (use_ssbo && m_glsl)
|
||||||
|
@ -1203,13 +1203,16 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo)
|
||||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
|
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
|
||||||
ss << R"(
|
ss << R"(
|
||||||
{
|
{
|
||||||
uint2 coords = uint2(v_pos.xy) / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
uint2 coords = uint2(uint(v_pos.x) / RESOLUTION_SCALE, fixYCoord(uint(v_pos.y)) / RESOLUTION_SCALE);
|
||||||
uint2 offset = coords - u_base_coords;
|
|
||||||
|
|
||||||
#if API_OPENGL || API_OPENGL_ES
|
// make sure it's not oversized and out of range
|
||||||
// Lower-left origin flip for OpenGL
|
if (VECTOR_LT(coords, u_base_coords) && VECTOR_GE(coords, u_end_coords))
|
||||||
offset.y = u_size.y - offset.y - 1u;
|
discard;
|
||||||
#endif
|
|
||||||
|
// find offset from the start of the row/column
|
||||||
|
uint2 offset;
|
||||||
|
offset.x = (coords.x < u_base_coords.x) ? (((VRAM_SIZE.x / RESOLUTION_SCALE) - 1u) - u_base_coords.x + coords.x) : (coords.x - u_base_coords.x);
|
||||||
|
offset.y = (coords.y < u_base_coords.y) ? (((VRAM_SIZE.y / RESOLUTION_SCALE) - 1u) - u_base_coords.y + coords.y) : (coords.y - u_base_coords.y);
|
||||||
|
|
||||||
uint buffer_offset = u_buffer_base_offset + (offset.y * u_size.x) + offset.x;
|
uint buffer_offset = u_buffer_base_offset + (offset.y * u_size.x) + offset.x;
|
||||||
uint value = GET_VALUE(buffer_offset) | u_mask_or_bits;
|
uint value = GET_VALUE(buffer_offset) | u_mask_or_bits;
|
||||||
|
|
|
@ -1080,17 +1080,8 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
|
|
||||||
void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
|
void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
|
||||||
{
|
{
|
||||||
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
|
const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height);
|
||||||
{
|
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data);
|
||||||
// CPU round trip if oversized for now.
|
|
||||||
Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
|
|
||||||
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
|
||||||
GPU::UpdateVRAM(x, y, width, height, data);
|
|
||||||
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
GPU_HW::UpdateVRAM(x, y, width, height, data);
|
|
||||||
|
|
||||||
const u32 data_size = width * height * sizeof(u16);
|
const u32 data_size = width * height * sizeof(u16);
|
||||||
const u32 alignment = std::max<u32>(sizeof(u16), static_cast<u32>(g_vulkan_context->GetTexelBufferAlignment()));
|
const u32 alignment = std::max<u32>(sizeof(u16), static_cast<u32>(g_vulkan_context->GetTexelBufferAlignment()));
|
||||||
|
@ -1114,13 +1105,7 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
||||||
BeginVRAMRenderPass();
|
BeginVRAMRenderPass();
|
||||||
|
|
||||||
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
|
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
|
||||||
const VRAMWriteUBOData uniforms = {x,
|
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index);
|
||||||
y,
|
|
||||||
width,
|
|
||||||
height,
|
|
||||||
start_index,
|
|
||||||
m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00,
|
|
||||||
GetCurrentNormalizedVertexDepth()};
|
|
||||||
vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
|
vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
|
||||||
&uniforms);
|
&uniforms);
|
||||||
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||||
|
@ -1129,8 +1114,9 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
||||||
&m_vram_write_descriptor_set, 0, nullptr);
|
&m_vram_write_descriptor_set, 0, nullptr);
|
||||||
|
|
||||||
// the viewport should already be set to the full vram, so just adjust the scissor
|
// the viewport should already be set to the full vram, so just adjust the scissor
|
||||||
Vulkan::Util::SetScissor(cmdbuf, x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale,
|
const Common::Rectangle<u32> scaled_bounds = bounds * m_resolution_scale;
|
||||||
height * m_resolution_scale);
|
Vulkan::Util::SetScissor(cmdbuf, scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(),
|
||||||
|
scaled_bounds.GetHeight());
|
||||||
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
|
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
|
||||||
|
|
||||||
RestoreGraphicsAPIState();
|
RestoreGraphicsAPIState();
|
||||||
|
|
Loading…
Reference in a new issue