GPU: Implement mask bit handling in software renderer

Still needs implementation in the hardware renderers.
This commit is contained in:
Connor McLaughlin 2019-11-24 18:47:40 +10:00
parent 6c6bf8714c
commit 9d6d00480c
4 changed files with 55 additions and 16 deletions

View file

@ -137,10 +137,19 @@ bool GPU::DoState(StateWrapper& sw)
if (sw.IsReading())
{
// Need to clear the mask bits since we want to pull it in from the copy.
const u32 old_GPUSTAT = m_GPUSTAT.bits;
m_GPUSTAT.check_mask_before_draw = false;
m_GPUSTAT.set_mask_while_drawing = false;
// Still need a temporary here.
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> temp;
sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data());
// Restore mask setting.
m_GPUSTAT.bits = old_GPUSTAT;
UpdateDisplay();
UpdateSliceTicks();
}
@ -673,7 +682,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) {}
void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
{
// Fast path when the copy is not oversized.
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT)
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !m_GPUSTAT.IsMaskingEnabled())
{
const u16* src_ptr = static_cast<const u16*>(data);
u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
@ -688,13 +697,18 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
{
// Slow path when we need to handle wrap-around.
const u16* src_ptr = static_cast<const u16*>(data);
const u16 mask_and = m_GPUSTAT.GetMaskAND();
const u16 mask_or = m_GPUSTAT.GetMaskOR();
for (u32 row = 0; row < height;)
{
u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width;)
{
// TODO: Handle unaligned reads...
dst_row_ptr[(x + col++) % VRAM_WIDTH] = *(src_ptr++);
u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
if (((*pixel_ptr) & mask_and) == mask_and)
*pixel_ptr = *(src_ptr++) | mask_or;
}
}
}
@ -869,8 +883,8 @@ void GPU::DrawDebugStateWindow()
{
ImGui::Text("Dither: %s", m_GPUSTAT.dither_enable ? "Enabled" : "Disabled");
ImGui::Text("Draw To Display Area: %s", m_GPUSTAT.dither_enable ? "Yes" : "No");
ImGui::Text("Draw Set Mask Bit: %s", m_GPUSTAT.draw_set_mask_bit ? "Yes" : "No");
ImGui::Text("Draw To Masked Pixels: %s", m_GPUSTAT.draw_to_masked_pixels ? "Yes" : "No");
ImGui::Text("Draw Set Mask Bit: %s", m_GPUSTAT.set_mask_while_drawing ? "Yes" : "No");
ImGui::Text("Draw To Masked Pixels: %s", m_GPUSTAT.check_mask_before_draw ? "Yes" : "No");
ImGui::Text("Reverse Flag: %s", m_GPUSTAT.reverse_flag ? "Yes" : "No");
ImGui::Text("Texture Disable: %s", m_GPUSTAT.texture_disable ? "Yes" : "No");
ImGui::Text("PAL Mode: %s", m_GPUSTAT.pal_mode ? "Yes" : "No");

View file

@ -326,8 +326,8 @@ protected:
BitField<u32, TextureMode, 7, 2> texture_color_mode;
BitField<u32, bool, 9, 1> dither_enable;
BitField<u32, bool, 10, 1> draw_to_display_area;
BitField<u32, bool, 11, 1> draw_set_mask_bit;
BitField<u32, bool, 12, 1> draw_to_masked_pixels;
BitField<u32, bool, 11, 1> set_mask_while_drawing;
BitField<u32, bool, 12, 1> check_mask_before_draw;
BitField<u32, bool, 13, 1> interlaced_field;
BitField<u32, bool, 14, 1> reverse_flag;
BitField<u32, bool, 15, 1> texture_disable;
@ -346,7 +346,12 @@ protected:
BitField<u32, DMADirection, 29, 2> dma_direction;
BitField<u32, bool, 31, 1> drawing_even_line;
bool In480iMode() const { return vertical_interlace & vertical_resolution; }
bool IsMaskingEnabled() const { return (bits & ((1 << 11) | (1 << 12))) != 0; }
bool In480iMode() const { return (bits & ((1 << 22) | (1 << 19))) != 0; }
// During transfer/render operations, if ((dst_pixel & mask_and) == mask_and) { pixel = src_pixel | mask_or }
u16 GetMaskAND() const { return check_mask_before_draw ? 0x8000 : 0x0000; }
u16 GetMaskOR() const { return set_mask_while_drawing ? 0x8000 : 0x0000; }
} m_GPUSTAT = {};
struct RenderState

View file

@ -212,10 +212,15 @@ bool GPU::HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size)
{
const u32 param = *(command_ptr++) & 0x00FFFFFF;
m_GPUSTAT.draw_set_mask_bit = (param & 0x01) != 0;
m_GPUSTAT.draw_to_masked_pixels = (param & 0x01) != 0;
Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.draw_set_mask_bit),
BoolToUInt32(m_GPUSTAT.draw_to_masked_pixels));
constexpr u32 gpustat_mask = (1 << 11) | (1 << 12);
const u32 gpustat_bits = (param & 0x03) << 11;
if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits)
{
FlushRender();
m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits;
}
Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing),
BoolToUInt32(m_GPUSTAT.check_mask_before_draw));
EndCommand();
return true;

View file

@ -52,11 +52,22 @@ void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
{
for (u32 yoffs = 0; yoffs < height; yoffs++)
// This doesn't have a fast path, but do we really need one? It's not common.
const u16 mask_and = m_GPUSTAT.GetMaskAND();
const u16 mask_or = m_GPUSTAT.GetMaskOR();
for (u32 row = 0; row < height; row++)
{
const u16* src_ptr = GetPixelPtr(src_x, src_y + yoffs);
u16* dst_ptr = GetPixelPtr(dst_x, dst_y + yoffs);
std::copy_n(src_ptr, width, dst_ptr);
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width; col++)
{
const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH];
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH];
if ((*dst_pixel_ptr & mask_and) == mask_and)
*dst_pixel_ptr = src_pixel | mask_or;
}
}
}
@ -502,7 +513,11 @@ void GPU_SW::ShadePixel(RenderCommand rc, u32 x, u32 y, u8 color_r, u8 color_g,
#undef BLEND_AVERAGE
}
SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits);
const u16 mask_and = m_GPUSTAT.GetMaskAND();
if ((color.bits & mask_and) != mask_and)
return;
SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | m_GPUSTAT.GetMaskOR());
}
std::unique_ptr<GPU> GPU::CreateSoftwareRenderer()