CPU/PGXP: Reduce vertex cache memory

This commit is contained in:
Stenzek 2024-05-25 02:06:40 +10:00
parent b4df9d3876
commit 22c76d43c3
No known key found for this signature in database
3 changed files with 22 additions and 29 deletions

View file

@ -27,8 +27,8 @@ namespace {
enum : u32 enum : u32
{ {
VERTEX_CACHE_WIDTH = 0x800 * 2, VERTEX_CACHE_WIDTH = 2048,
VERTEX_CACHE_HEIGHT = 0x800 * 2, VERTEX_CACHE_HEIGHT = 2048,
VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT, VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT,
PGXP_MEM_SIZE = (static_cast<u32>(Bus::RAM_8MB_SIZE) + static_cast<u32>(CPU::SCRATCHPAD_SIZE)) / 4, PGXP_MEM_SIZE = (static_cast<u32>(Bus::RAM_8MB_SIZE) + static_cast<u32>(CPU::SCRATCHPAD_SIZE)) / 4,
PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4, PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4,
@ -70,8 +70,8 @@ union psx_value
}; };
} // namespace } // namespace
static void CacheVertex(s16 sx, s16 sy, const PGXP_value& vertex); static void CacheVertex(u32 value, const PGXP_value& vertex);
static PGXP_value* GetCachedVertex(short sx, short sy); static PGXP_value* GetCachedVertex(u32 value);
static float TruncateVertexPosition(float p); static float TruncateVertexPosition(float p);
static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y); static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y);
@ -168,7 +168,7 @@ void CPU::PGXP::Reset()
if (s_mem) if (s_mem)
std::memset(s_mem, 0, sizeof(PGXP_value) * PGXP_MEM_SIZE); std::memset(s_mem, 0, sizeof(PGXP_value) * PGXP_MEM_SIZE);
if (s_vertex_cache) if (g_settings.gpu_pgxp_vertex_cache && s_vertex_cache)
std::memset(s_vertex_cache, 0, sizeof(PGXP_value) * VERTEX_CACHE_SIZE); std::memset(s_vertex_cache, 0, sizeof(PGXP_value) * VERTEX_CACHE_SIZE);
} }
@ -461,7 +461,7 @@ void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, co
#endif #endif
void CPU::PGXP::GTE_PushSXYZ2f(float x, float y, float z, u32 v) void CPU::PGXP::GTE_RTPS(float x, float y, float z, u32 value)
{ {
// push values down FIFO // push values down FIFO
SXY0 = SXY1; SXY0 = SXY1;
@ -470,11 +470,11 @@ void CPU::PGXP::GTE_PushSXYZ2f(float x, float y, float z, u32 v)
SXY2.x = x; SXY2.x = x;
SXY2.y = y; SXY2.y = y;
SXY2.z = z; SXY2.z = z;
SXY2.value = v; SXY2.value = value;
SXY2.flags = VALID_ALL; SXY2.flags = VALID_ALL;
if (g_settings.gpu_pgxp_vertex_cache) if (g_settings.gpu_pgxp_vertex_cache)
CacheVertex(static_cast<s16>(Truncate16(v)), static_cast<s16>(Truncate16(v >> 16)), SXY2); CacheVertex(value, SXY2);
} }
#define VX(n) (psxRegs.CP2D.p[n << 1].sw.l) #define VX(n) (psxRegs.CP2D.p[n << 1].sw.l)
@ -583,24 +583,21 @@ void CPU::PGXP::CPU_SWC2(u32 instr, u32 addr, u32 rtVal)
WriteMem(&g_state.pgxp_gte[idx], addr); WriteMem(&g_state.pgxp_gte[idx], addr);
} }
ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(s16 sx, s16 sy, const PGXP_value& vertex) ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(u32 value, const PGXP_value& vertex)
{ {
if (sx >= -0x800 && sx <= 0x7ff && sy >= -0x800 && sy <= 0x7ff) const s16 sx = static_cast<s16>(value & 0xFFFFu);
{ const s16 sy = static_cast<s16>(value >> 16);
// Write vertex into cache DebugAssert(sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1023);
s_vertex_cache[(sy + 0x800) * VERTEX_CACHE_WIDTH + (sx + 0x800)] = vertex; s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] = vertex;
}
} }
ALWAYS_INLINE_RELEASE CPU::PGXP_value* CPU::PGXP::GetCachedVertex(short sx, short sy) ALWAYS_INLINE_RELEASE CPU::PGXP_value* CPU::PGXP::GetCachedVertex(u32 value)
{ {
if (sx >= -0x800 && sx <= 0x7ff && sy >= -0x800 && sy <= 0x7ff) const s16 sx = static_cast<s16>(value & 0xFFFFu);
{ const s16 sy = static_cast<s16>(value >> 16);
// Return pointer to cache entry return (sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1013) ?
return &s_vertex_cache[(sy + 0x800) * VERTEX_CACHE_WIDTH + (sx + 0x800)]; &s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] :
} nullptr;
return nullptr;
} }
ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p) ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p)
@ -646,11 +643,7 @@ bool CPU::PGXP::GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, i
if (g_settings.gpu_pgxp_vertex_cache) if (g_settings.gpu_pgxp_vertex_cache)
{ {
const short psx_x = (short)(value & 0xFFFFu); vert = GetCachedVertex(value);
const short psx_y = (short)(value >> 16);
// Look in cache for valid vertex
vert = GetCachedVertex(psx_x, psx_y);
if (vert && (vert->flags & VALID_XY) == VALID_XY) if (vert && (vert->flags & VALID_XY) == VALID_XY)
{ {
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs); *out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);

View file

@ -12,7 +12,7 @@ void Shutdown();
// -- GTE functions // -- GTE functions
// Transforms // Transforms
void GTE_PushSXYZ2f(float x, float y, float z, u32 v); void GTE_RTPS(float x, float y, float z, u32 value);
int GTE_NCLIP_valid(u32 sxy0, u32 sxy1, u32 sxy2); int GTE_NCLIP_valid(u32 sxy0, u32 sxy1, u32 sxy2);
float GTE_NCLIP(); float GTE_NCLIP();

View file

@ -794,7 +794,7 @@ void GTE::RTPS(const s16 V[3], u8 shift, bool lm, bool last)
precise_x = std::clamp<float>(precise_x, -1024.0f, 1023.0f); precise_x = std::clamp<float>(precise_x, -1024.0f, 1023.0f);
precise_y = std::clamp<float>(precise_y, -1024.0f, 1023.0f); precise_y = std::clamp<float>(precise_y, -1024.0f, 1023.0f);
CPU::PGXP::GTE_PushSXYZ2f(precise_x, precise_y, precise_z, REGS.dr32[14]); CPU::PGXP::GTE_RTPS(precise_x, precise_y, precise_z, REGS.dr32[14]);
} }
if (last) if (last)