diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 7ab79a243..723a75c0b 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -529,6 +529,20 @@ bool GPU_HW::UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 w .Intersects(Common::Rectangle::FromExtents(dst_x, dst_y, width, height))); } +GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset) const +{ + const VRAMWriteUBOData uniforms = {x, + y, + ((x + width) % VRAM_WIDTH), + ((y + height) % VRAM_HEIGHT), + width, + height, + buffer_offset, + m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00, + GetCurrentNormalizedVertexDepth()}; + return uniforms; +} + GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const { // drop precision unless true colour is enabled diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 623561a99..d57e3aeec 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -125,8 +125,12 @@ protected: struct VRAMWriteUBOData { - u32 u_base_coords[2]; - u32 u_size[2]; + u32 u_dst_x; + u32 u_dst_y; + u32 u_end_x; + u32 u_end_y; + u32 u_width; + u32 u_height; u32 u_buffer_base_offset; u32 u_mask_or_bits; float u_depth_value; @@ -226,6 +230,7 @@ protected: bool UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const; VRAMFillUBOData GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const; + VRAMWriteUBOData GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset) const; VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const; /// Handles quads with flipped texture coordinate directions. diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 8d1da2d78..ceb70c1a3 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -681,19 +681,7 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) GPU_HW::FillVRAM(x, y, width, height, color); - // drop precision unless true colour is enabled - if (!m_true_color) - color = RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color)); - - struct Uniforms - { - float u_fill_color[4]; - u32 u_interlaced_displayed_field; - }; - Uniforms uniforms; - std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) = - RGBA8ToFloat(color); - uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); + const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0); @@ -708,36 +696,22 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) { - if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) - { - // CPU round trip if oversized for now. - Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::UpdateVRAM(x, y, width, height, data); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); - return; - } - - GPU_HW::UpdateVRAM(x, y, width, height, data); + const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); + GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data); const u32 num_pixels = width * height; const auto map_result = m_texture_stream_buffer.Map(m_context.Get(), sizeof(u16), num_pixels * sizeof(u16)); std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); m_texture_stream_buffer.Unmap(m_context.Get(), num_pixels * sizeof(u16)); - const VRAMWriteUBOData uniforms = {x, - y, - width, - height, - map_result.index_aligned, - m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00, - GetCurrentNormalizedVertexDepth()}; + const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned); m_context->OMSetDepthStencilState( m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf()); // the viewport should already be set to the full vram, so just adjust the scissor - SetScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale); + const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; + SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); DrawUtilityShader(m_vram_write_pixel_shader.Get(), &uniforms, sizeof(uniforms)); @@ -754,16 +728,7 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt UpdateVRAMReadTexture(); IncludeVRAMDityRectangle(dst_bounds); - const VRAMCopyUBOData uniforms = {src_x * m_resolution_scale, - src_y * m_resolution_scale, - dst_x * m_resolution_scale, - dst_y * m_resolution_scale, - ((dst_x + width) % VRAM_WIDTH) * m_resolution_scale, - ((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale, - width * m_resolution_scale, - height * m_resolution_scale, - m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, - GetCurrentNormalizedVertexDepth()}; + const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height); const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index c7ad60e2b..b20655dfd 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -711,14 +711,10 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) glScissor(x, m_vram_texture.GetHeight() - y - height, width, height); - // drop precision unless true colour is enabled - if (!m_true_color) - color = RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color)); - // fast path when not using interlaced rendering if (!IsInterlacedRenderingEnabled()) { - const auto [r, g, b, a] = RGBA8ToFloat(color); + const auto [r, g, b, a] = RGBA8ToFloat(m_true_color ? color : RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color))); glClearColor(r, g, b, a); IsGLES() ? glClearDepthf(a) : glClearDepth(a); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); @@ -726,15 +722,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) } else { - struct Uniforms - { - float u_fill_color[4]; - u32 u_interlaced_displayed_field; - }; - Uniforms uniforms; - std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) = - RGBA8ToFloat(color); - uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); + const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); m_vram_interlaced_fill_program.Bind(); UploadUniformBuffer(&uniforms, sizeof(uniforms)); @@ -749,36 +737,18 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) { - if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) - { - // CPU round trip if oversized for now. - Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::UpdateVRAM(x, y, width, height, data); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); - return; - } - - GPU_HW::UpdateVRAM(x, y, width, height, data); - const u32 num_pixels = width * height; if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes) { + const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); + GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data); + const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16)); std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); m_texture_stream_buffer->Unmap(num_pixels * sizeof(u16)); m_texture_stream_buffer->Unbind(); - // viewport should be set to the whole VRAM size, so we can just set the scissor - const u32 flipped_y = VRAM_HEIGHT - y - height; - const u32 scaled_width = width * m_resolution_scale; - const u32 scaled_height = height * m_resolution_scale; - const u32 scaled_x = x * m_resolution_scale; - const u32 scaled_y = y * m_resolution_scale; - const u32 scaled_flipped_y = m_vram_texture.GetHeight() - scaled_y - scaled_height; - glViewport(scaled_x, scaled_flipped_y, scaled_width, scaled_height); glDisable(GL_BLEND); - glDisable(GL_SCISSOR_TEST); glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); m_vram_write_program.Bind(); @@ -787,15 +757,14 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* else glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); - const VRAMWriteUBOData uniforms = {x, - flipped_y, - width, - height, - map_result.index_aligned, - m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00, - GetCurrentNormalizedVertexDepth()}; + const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned); UploadUniformBuffer(&uniforms, sizeof(uniforms)); + // the viewport should already be set to the full vram, so just adjust the scissor + const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; + glScissor(scaled_bounds.left, m_vram_texture.GetHeight() - scaled_bounds.top - scaled_bounds.GetHeight(), + scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); + glBindVertexArray(m_attributeless_vao_id); glDrawArrays(GL_TRIANGLES, 0, 3); @@ -803,6 +772,18 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* } else { + if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) + { + // CPU round trip if oversized for now. + Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + GPU::UpdateVRAM(x, y, width, height, data); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); + return; + } + + GPU_HW::UpdateVRAM(x, y, width, height, data); + const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32)); // reverse copy the rows so it matches opengl's lower-left origin diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 2cdb956bc..c7f4766d0 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1177,7 +1177,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo) WriteCommonFunctions(ss); DeclareUniformBuffer( ss, - {"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", "float u_depth_value"}, + {"uint2 u_base_coords", "uint2 u_end_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", "float u_depth_value"}, true); if (use_ssbo && m_glsl) @@ -1203,13 +1203,16 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo) DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); ss << R"( { - uint2 coords = uint2(v_pos.xy) / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE); - uint2 offset = coords - u_base_coords; + uint2 coords = uint2(uint(v_pos.x) / RESOLUTION_SCALE, fixYCoord(uint(v_pos.y)) / RESOLUTION_SCALE); - #if API_OPENGL || API_OPENGL_ES - // Lower-left origin flip for OpenGL - offset.y = u_size.y - offset.y - 1u; - #endif + // make sure it's not oversized and out of range + if (VECTOR_LT(coords, u_base_coords) && VECTOR_GE(coords, u_end_coords)) + discard; + + // find offset from the start of the row/column + uint2 offset; + offset.x = (coords.x < u_base_coords.x) ? (((VRAM_SIZE.x / RESOLUTION_SCALE) - 1u) - u_base_coords.x + coords.x) : (coords.x - u_base_coords.x); + offset.y = (coords.y < u_base_coords.y) ? (((VRAM_SIZE.y / RESOLUTION_SCALE) - 1u) - u_base_coords.y + coords.y) : (coords.y - u_base_coords.y); uint buffer_offset = u_buffer_base_offset + (offset.y * u_size.x) + offset.x; uint value = GET_VALUE(buffer_offset) | u_mask_or_bits; diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp index 7ba19a6ec..4e2f3d75a 100644 --- a/src/core/gpu_hw_vulkan.cpp +++ b/src/core/gpu_hw_vulkan.cpp @@ -1080,17 +1080,8 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) { - if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) - { - // CPU round trip if oversized for now. - Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::UpdateVRAM(x, y, width, height, data); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); - return; - } - - GPU_HW::UpdateVRAM(x, y, width, height, data); + const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); + GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data); const u32 data_size = width * height * sizeof(u16); const u32 alignment = std::max(sizeof(u16), static_cast(g_vulkan_context->GetTexelBufferAlignment())); @@ -1114,13 +1105,7 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* BeginVRAMRenderPass(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const VRAMWriteUBOData uniforms = {x, - y, - width, - height, - start_index, - m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00, - GetCurrentNormalizedVertexDepth()}; + const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index); vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), &uniforms); vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, @@ -1129,8 +1114,9 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* &m_vram_write_descriptor_set, 0, nullptr); // the viewport should already be set to the full vram, so just adjust the scissor - Vulkan::Util::SetScissor(cmdbuf, x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, - height * m_resolution_scale); + const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; + Vulkan::Util::SetScissor(cmdbuf, scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), + scaled_bounds.GetHeight()); vkCmdDraw(cmdbuf, 3, 1, 0, 0); RestoreGraphicsAPIState();