GPU: Implement CLUT cache (SW renderer only)

Fixes copyright screen in Shadow Master.
Fixes title splash in Castrol Honda Superbike Racing.
This commit is contained in:
Stenzek 2024-05-01 13:51:01 +10:00
parent d1483d8077
commit 1185f8f6aa
No known key found for this signature in database
13 changed files with 159 additions and 35 deletions

View file

@ -36,6 +36,7 @@ Log_SetChannel(GPU);
std::unique_ptr<GPU> g_gpu;
alignas(HOST_PAGE_SIZE) u16 g_vram[VRAM_SIZE / sizeof(u16)];
u16 g_gpu_clut[GPU_CLUT_SIZE];
const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable();
@ -173,7 +174,10 @@ void GPU::Reset(bool clear_vram)
m_crtc_state.interlaced_display_field = 0;
if (clear_vram)
{
std::memset(g_vram, 0, sizeof(g_vram));
std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut));
}
// Force event to reschedule itself.
m_crtc_tick_event->Deactivate();
@ -224,6 +228,7 @@ void GPU::SoftReset()
SetDrawMode(0);
SetTexturePalette(0);
SetTextureWindow(0);
InvalidateCLUT();
UpdateDMARequest();
UpdateCRTCConfig();
UpdateCommandTickEvent();
@ -308,6 +313,18 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ
sw.Do(&m_command_total_words);
sw.Do(&m_GPUREAD_latch);
if (sw.GetVersion() < 64) [[unlikely]]
{
// Clear CLUT cache and let it populate later.
InvalidateCLUT();
}
else
{
sw.Do(&m_current_clut_reg_bits);
sw.Do(&m_current_clut_is_8bit);
sw.DoArray(g_gpu_clut, std::size(g_gpu_clut));
}
sw.Do(&m_vram_transfer.x);
sw.Do(&m_vram_transfer.y);
sw.Do(&m_vram_transfer.width);
@ -1443,6 +1460,27 @@ void GPU::HandleGetGPUInfoCommand(u32 value)
}
}
void GPU::UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut)
{
if (texmode >= GPUTextureMode::Direct16Bit)
return;
const bool needs_8bit = (texmode == GPUTextureMode::Palette8Bit);
if ((clut.bits != m_current_clut_reg_bits) || BoolToUInt8(needs_8bit) > BoolToUInt8(m_current_clut_is_8bit))
{
Log_DebugFmt("Reloading CLUT from {},{}, {}", clut.GetXBase(), clut.GetYBase(), needs_8bit ? "8-bit" : "4-bit");
UpdateCLUT(clut, needs_8bit);
m_current_clut_reg_bits = clut.bits;
m_current_clut_is_8bit = needs_8bit;
}
}
void GPU::InvalidateCLUT()
{
m_current_clut_reg_bits = std::numeric_limits<decltype(m_current_clut_reg_bits)>::max(); // will never match
m_current_clut_is_8bit = false;
}
void GPU::ClearDisplay()
{
ClearDisplayTexture();
@ -1451,10 +1489,6 @@ void GPU::ClearDisplay()
DestroyDeinterlaceTextures();
}
void GPU::UpdateDisplay()
{
}
void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{
}
@ -1615,14 +1649,6 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he
}
}
void GPU::DispatchRenderCommand()
{
}
void GPU::FlushRender()
{
}
void GPU::SetDrawMode(u16 value)
{
GPUDrawModeReg new_mode_reg{static_cast<u16>(value & GPUDrawModeReg::MASK)};
@ -1677,6 +1703,31 @@ void GPU::SetTextureWindow(u32 value)
m_draw_mode.texture_window_changed = true;
}
void GPU::ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit)
{
const u16* src_row = &g_vram[reg.GetYBase() * VRAM_WIDTH];
const u32 start_x = reg.GetXBase();
if (!clut_is_8bit)
{
// Wraparound can't happen in 4-bit mode.
std::memcpy(dest, &src_row[start_x], sizeof(u16) * 16);
}
else
{
if ((start_x + 256) > VRAM_WIDTH) [[unlikely]]
{
const u32 end = VRAM_WIDTH - start_x;
const u32 start = 256 - end;
std::memcpy(dest, &src_row[start_x], sizeof(u16) * end);
std::memcpy(dest + end, src_row, sizeof(u16) * start);
}
else
{
std::memcpy(dest, &src_row[start_x], sizeof(u16) * 256);
}
}
}
bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing)
{
GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend,

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -200,7 +200,7 @@ public:
bool DumpVRAMToFile(const char* filename);
// Ensures all buffered vertices are drawn.
virtual void FlushRender();
virtual void FlushRender() = 0;
ALWAYS_INLINE const void* GetDisplayTextureHandle() const { return m_display_texture; }
ALWAYS_INLINE s32 GetDisplayWidth() const { return m_display_width; }
@ -225,6 +225,9 @@ public:
/// Draws the current display texture, with any post-processing.
bool PresentDisplay();
/// Reads the CLUT from the specified coordinates, accounting for wrap-around.
static void ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit);
protected:
TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const;
TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const;
@ -306,14 +309,17 @@ protected:
void ExecuteCommands();
void TryExecuteCommands();
void HandleGetGPUInfoCommand(u32 value);
void UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut);
void InvalidateCLUT();
// Rendering in the backend
virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height);
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color);
virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask);
virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height);
virtual void DispatchRenderCommand();
virtual void UpdateDisplay();
virtual void DispatchRenderCommand() = 0;
virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0;
virtual void UpdateDisplay() = 0;
virtual void DrawRendererStats();
ALWAYS_INLINE_RELEASE void AddDrawTriangleTicks(s32 x1, s32 y1, s32 x2, s32 y2, s32 x3, s32 y3, bool shaded,
@ -568,6 +574,11 @@ protected:
/// GPUREAD value for non-VRAM-reads.
u32 m_GPUREAD_latch = 0;
// These are the bits from the palette register, but zero extended to 32-bit, so we can have an "invalid" value.
// If an extra byte is ever not needed here for padding, the 8-bit flag could be packed into the MSB of this value.
u32 m_current_clut_reg_bits = {};
bool m_current_clut_is_8bit = false;
/// True if currently executing/syncing.
bool m_executing_commands = false;
@ -693,3 +704,4 @@ private:
extern std::unique_ptr<GPU> g_gpu;
extern u16 g_vram[VRAM_SIZE / sizeof(u16)];
extern u16 g_gpu_clut[GPU_CLUT_SIZE];

View file

@ -73,6 +73,12 @@ GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand()
AllocateCommand(GPUBackendCommandType::SetDrawingArea, sizeof(GPUBackendSetDrawingAreaCommand)));
}
GPUBackendUpdateCLUTCommand* GPUBackend::NewUpdateCLUTCommand()
{
return static_cast<GPUBackendUpdateCLUTCommand*>(
AllocateCommand(GPUBackendCommandType::UpdateCLUT, sizeof(GPUBackendUpdateCLUTCommand)));
}
GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices)
{
const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex));
@ -309,6 +315,13 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd)
}
break;
case GPUBackendCommandType::UpdateCLUT:
{
const GPUBackendUpdateCLUTCommand* ccmd = static_cast<const GPUBackendUpdateCLUTCommand*>(cmd);
UpdateCLUT(ccmd->reg, ccmd->clut_is_8bit);
}
break;
case GPUBackendCommandType::DrawPolygon:
{
DrawPolygon(static_cast<const GPUBackendDrawPolygonCommand*>(cmd));
@ -328,6 +341,6 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd)
break;
default:
break;
UnreachableCode();
}
}

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -33,6 +33,7 @@ public:
GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words);
GPUBackendCopyVRAMCommand* NewCopyVRAMCommand();
GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand();
GPUBackendUpdateCLUTCommand* NewUpdateCLUTCommand();
GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices);
GPUBackendDrawRectangleCommand* NewDrawRectangleCommand();
GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices);
@ -60,6 +61,7 @@ protected:
virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0;
virtual void FlushRender() = 0;
virtual void DrawingAreaChanged() = 0;
virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0;
void HandleCommand(const GPUBackendCommand* cmd);

View file

@ -198,6 +198,7 @@ bool GPU::HandleClearCacheCommand()
{
Log_DebugPrintf("GP0 clear cache");
m_draw_mode.SetTexturePageChanged();
InvalidateCLUT();
m_fifo.RemoveOne();
AddCommandTicks(1);
EndCommand();
@ -347,6 +348,7 @@ bool GPU::HandleRenderPolygonCommand()
SetDrawMode((texpage_attribute & GPUDrawModeReg::POLYGON_TEXPAGE_MASK) |
(m_draw_mode.mode_reg.bits & ~GPUDrawModeReg::POLYGON_TEXPAGE_MASK));
SetTexturePalette(Truncate16(FifoPeek(2) >> 16));
UpdateCLUTIfNeeded(m_draw_mode.mode_reg.texture_mode, m_draw_mode.palette_reg);
}
m_counters.num_vertices += num_vertices;
@ -371,7 +373,10 @@ bool GPU::HandleRenderRectangleCommand()
SynchronizeCRTC();
if (rc.texture_enable)
{
SetTexturePalette(Truncate16(FifoPeek(2) >> 16));
UpdateCLUTIfNeeded(m_draw_mode.mode_reg.texture_mode, m_draw_mode.palette_reg);
}
const TickCount setup_ticks = 16;
AddCommandTicks(setup_ticks);

View file

@ -3110,6 +3110,13 @@ void GPU_HW::DispatchRenderCommand()
LoadVertices();
}
void GPU_HW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit)
{
// Not done in HW
GL_INS_FMT("Reloading CLUT from {},{}, {} not implemented", reg.GetXBase(), reg.GetYBase(),
clut_is_8bit ? "8-bit" : "4-bit");
}
void GPU_HW::FlushRender()
{
const u32 base_vertex = m_batch_base_vertex;

View file

@ -47,8 +47,8 @@ public:
void UpdateSettings(const Settings& old_settings) override;
void UpdateResolutionScale() override final;
std::tuple<u32, u32> GetEffectiveDisplayResolution(bool scaled = true) override final;
std::tuple<u32, u32> GetFullDisplayResolution(bool scaled = true) override final;
std::tuple<u32, u32> GetEffectiveDisplayResolution(bool scaled = true) override;
std::tuple<u32, u32> GetFullDisplayResolution(bool scaled = true) override;
void UpdateDisplay() override;
@ -176,6 +176,7 @@ private:
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void DispatchRenderCommand() override;
void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override;
void FlushRender() override;
void DrawRendererStats() override;

View file

@ -824,6 +824,19 @@ void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
m_backend.PushCommand(cmd);
}
void GPU_SW::FlushRender()
{
}
void GPU_SW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit)
{
GPUBackendUpdateCLUTCommand* cmd = m_backend.NewUpdateCLUTCommand();
FillBackendCommandParameters(cmd);
cmd->reg.bits = reg.bits;
cmd->clut_is_8bit = clut_is_8bit;
m_backend.PushCommand(cmd);
}
std::unique_ptr<GPU> GPU::CreateSoftwareRenderer()
{
std::unique_ptr<GPU_SW> gpu(std::make_unique<GPU_SW>());

View file

@ -40,6 +40,8 @@ protected:
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void FlushRender() override;
void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override;
template<GPUTexture::Format display_format>
bool CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip);

View file

@ -1,8 +1,8 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "gpu.h"
#include "gpu_sw_backend.h"
#include "gpu.h"
#include "system.h"
#include "util/gpu_device.h"
@ -93,10 +93,8 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman
const u16 palette_value =
GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH,
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
const u16 palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu;
texture_color.bits =
GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase());
const size_t palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu;
texture_color.bits = g_gpu_clut[palette_index];
}
break;
@ -105,9 +103,8 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman
const u16 palette_value =
GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH,
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
const u16 palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu;
texture_color.bits =
GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase());
const size_t palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu;
texture_color.bits = g_gpu_clut[palette_index];
}
break;
@ -869,9 +866,18 @@ void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wi
}
}
void GPU_SW_Backend::FlushRender() {}
void GPU_SW_Backend::FlushRender()
{
}
void GPU_SW_Backend::DrawingAreaChanged() {}
void GPU_SW_Backend::DrawingAreaChanged()
{
}
void GPU_SW_Backend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit)
{
GPU::ReadCLUT(g_gpu_clut, reg, clut_is_8bit);
}
GPU_SW_Backend::DrawLineFunction GPU_SW_Backend::GetDrawLineFunction(bool shading_enable, bool transparency_enable,
bool dithering_enable)

View file

@ -1,8 +1,11 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "gpu.h"
#include "gpu_backend.h"
#include <array>
#include <memory>
#include <vector>
@ -97,6 +100,7 @@ protected:
void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override;
void FlushRender() override;
void DrawingAreaChanged() override;
void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override;
//////////////////////////////////////////////////////////////////////////
// Rasterization

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -17,6 +17,7 @@ enum : u32
VRAM_HEIGHT_MASK = VRAM_HEIGHT - 1,
TEXTURE_PAGE_WIDTH = 256,
TEXTURE_PAGE_HEIGHT = 256,
GPU_CLUT_SIZE = 256,
// In interlaced modes, we can exceed the 512 height of VRAM, up to 576 in PAL games.
GPU_MAX_DISPLAY_WIDTH = 720,
@ -259,9 +260,10 @@ enum class GPUBackendCommandType : u8
UpdateVRAM,
CopyVRAM,
SetDrawingArea,
UpdateCLUT,
DrawPolygon,
DrawRectangle,
DrawLine
DrawLine,
};
union GPUBackendCommandParameters
@ -336,6 +338,12 @@ struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand
GPUDrawingArea new_area;
};
struct GPUBackendUpdateCLUTCommand : public GPUBackendCommand
{
GPUTexturePaletteReg reg;
bool clut_is_8bit;
};
struct GPUBackendDrawCommand : public GPUBackendCommand
{
GPUDrawModeReg draw_mode;

View file

@ -5,7 +5,7 @@
#include "types.h"
static constexpr u32 SAVE_STATE_MAGIC = 0x43435544;
static constexpr u32 SAVE_STATE_VERSION = 63;
static constexpr u32 SAVE_STATE_VERSION = 64;
static constexpr u32 SAVE_STATE_MINIMUM_VERSION = 42;
static_assert(SAVE_STATE_VERSION >= SAVE_STATE_MINIMUM_VERSION);