From 9d6d00480c991a659fe3cbba5b06ee4d6e9709b2 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 24 Nov 2019 18:47:40 +1000 Subject: [PATCH] GPU: Implement mask bit handling in software renderer Still needs implementation in the hardware renderers. --- src/core/gpu.cpp | 22 ++++++++++++++++++---- src/core/gpu.h | 11 ++++++++--- src/core/gpu_commands.cpp | 13 +++++++++---- src/core/gpu_sw.cpp | 25 ++++++++++++++++++++----- 4 files changed, 55 insertions(+), 16 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index fb7ca7f2d..512b50c18 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -137,10 +137,19 @@ bool GPU::DoState(StateWrapper& sw) if (sw.IsReading()) { + // Need to clear the mask bits since we want to pull it in from the copy. + const u32 old_GPUSTAT = m_GPUSTAT.bits; + m_GPUSTAT.check_mask_before_draw = false; + m_GPUSTAT.set_mask_while_drawing = false; + // Still need a temporary here. HeapArray temp; sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data()); + + // Restore mask setting. + m_GPUSTAT.bits = old_GPUSTAT; + UpdateDisplay(); UpdateSliceTicks(); } @@ -673,7 +682,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) {} void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) { // Fast path when the copy is not oversized. - if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT) + if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !m_GPUSTAT.IsMaskingEnabled()) { const u16* src_ptr = static_cast(data); u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x]; @@ -688,13 +697,18 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) { // Slow path when we need to handle wrap-around. const u16* src_ptr = static_cast(data); + const u16 mask_and = m_GPUSTAT.GetMaskAND(); + const u16 mask_or = m_GPUSTAT.GetMaskOR(); + for (u32 row = 0; row < height;) { u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; for (u32 col = 0; col < width;) { // TODO: Handle unaligned reads... - dst_row_ptr[(x + col++) % VRAM_WIDTH] = *(src_ptr++); + u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH]; + if (((*pixel_ptr) & mask_and) == mask_and) + *pixel_ptr = *(src_ptr++) | mask_or; } } } @@ -869,8 +883,8 @@ void GPU::DrawDebugStateWindow() { ImGui::Text("Dither: %s", m_GPUSTAT.dither_enable ? "Enabled" : "Disabled"); ImGui::Text("Draw To Display Area: %s", m_GPUSTAT.dither_enable ? "Yes" : "No"); - ImGui::Text("Draw Set Mask Bit: %s", m_GPUSTAT.draw_set_mask_bit ? "Yes" : "No"); - ImGui::Text("Draw To Masked Pixels: %s", m_GPUSTAT.draw_to_masked_pixels ? "Yes" : "No"); + ImGui::Text("Draw Set Mask Bit: %s", m_GPUSTAT.set_mask_while_drawing ? "Yes" : "No"); + ImGui::Text("Draw To Masked Pixels: %s", m_GPUSTAT.check_mask_before_draw ? "Yes" : "No"); ImGui::Text("Reverse Flag: %s", m_GPUSTAT.reverse_flag ? "Yes" : "No"); ImGui::Text("Texture Disable: %s", m_GPUSTAT.texture_disable ? "Yes" : "No"); ImGui::Text("PAL Mode: %s", m_GPUSTAT.pal_mode ? "Yes" : "No"); diff --git a/src/core/gpu.h b/src/core/gpu.h index 1b136725a..880c903a7 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -326,8 +326,8 @@ protected: BitField texture_color_mode; BitField dither_enable; BitField draw_to_display_area; - BitField draw_set_mask_bit; - BitField draw_to_masked_pixels; + BitField set_mask_while_drawing; + BitField check_mask_before_draw; BitField interlaced_field; BitField reverse_flag; BitField texture_disable; @@ -346,7 +346,12 @@ protected: BitField dma_direction; BitField drawing_even_line; - bool In480iMode() const { return vertical_interlace & vertical_resolution; } + bool IsMaskingEnabled() const { return (bits & ((1 << 11) | (1 << 12))) != 0; } + bool In480iMode() const { return (bits & ((1 << 22) | (1 << 19))) != 0; } + + // During transfer/render operations, if ((dst_pixel & mask_and) == mask_and) { pixel = src_pixel | mask_or } + u16 GetMaskAND() const { return check_mask_before_draw ? 0x8000 : 0x0000; } + u16 GetMaskOR() const { return set_mask_while_drawing ? 0x8000 : 0x0000; } } m_GPUSTAT = {}; struct RenderState diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 20c10eda9..b1f394580 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -212,10 +212,15 @@ bool GPU::HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size) { const u32 param = *(command_ptr++) & 0x00FFFFFF; - m_GPUSTAT.draw_set_mask_bit = (param & 0x01) != 0; - m_GPUSTAT.draw_to_masked_pixels = (param & 0x01) != 0; - Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.draw_set_mask_bit), - BoolToUInt32(m_GPUSTAT.draw_to_masked_pixels)); + constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); + const u32 gpustat_bits = (param & 0x03) << 11; + if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits) + { + FlushRender(); + m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits; + } + Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing), + BoolToUInt32(m_GPUSTAT.check_mask_before_draw)); EndCommand(); return true; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 825b11dc5..1db8234d4 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -52,11 +52,22 @@ void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) { - for (u32 yoffs = 0; yoffs < height; yoffs++) + // This doesn't have a fast path, but do we really need one? It's not common. + const u16 mask_and = m_GPUSTAT.GetMaskAND(); + const u16 mask_or = m_GPUSTAT.GetMaskOR(); + + for (u32 row = 0; row < height; row++) { - const u16* src_ptr = GetPixelPtr(src_x, src_y + yoffs); - u16* dst_ptr = GetPixelPtr(dst_x, dst_y + yoffs); - std::copy_n(src_ptr, width, dst_ptr); + const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + + for (u32 col = 0; col < width; col++) + { + const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH]; + u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH]; + if ((*dst_pixel_ptr & mask_and) == mask_and) + *dst_pixel_ptr = src_pixel | mask_or; + } } } @@ -502,7 +513,11 @@ void GPU_SW::ShadePixel(RenderCommand rc, u32 x, u32 y, u8 color_r, u8 color_g, #undef BLEND_AVERAGE } - SetPixel(static_cast(x), static_cast(y), color.bits); + const u16 mask_and = m_GPUSTAT.GetMaskAND(); + if ((color.bits & mask_and) != mask_and) + return; + + SetPixel(static_cast(x), static_cast(y), color.bits | m_GPUSTAT.GetMaskOR()); } std::unique_ptr GPU::CreateSoftwareRenderer()