mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-01-20 15:25:38 +00:00
CPU/NewRec: Fix PGXP desync
Wobbly geometry in Threads of Fate.
This commit is contained in:
parent
979b7bd36f
commit
a3013efbca
|
@ -19,10 +19,6 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#ifdef ENABLE_RECOMPILER
|
|
||||||
// #include "cpu_recompiler_types.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace CPU::CodeCache {
|
namespace CPU::CodeCache {
|
||||||
|
|
||||||
enum : u32
|
enum : u32
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
|
// SPDX-FileCopyrightText: 2024 Connor McLaughlin <stenzek@gmail.com>
|
||||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||||
|
|
||||||
#include "cpu_newrec_compiler_aarch32.h"
|
#include "cpu_newrec_compiler_aarch32.h"
|
||||||
|
@ -1637,9 +1637,9 @@ void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
||||||
{
|
{
|
||||||
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
|
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
|
||||||
// new_value = (value & mask) | (RWRET << (24 - shift));
|
// new_value = (value & mask) | (RWRET << (24 - shift));
|
||||||
EmitMov(addr, 0xFFFFFFu);
|
EmitMov(RARG4, 0xFFFFFFu);
|
||||||
armAsm->lsr(addr, addr, RARG2);
|
armAsm->lsr(RARG4, RARG4, RARG2);
|
||||||
armAsm->and_(value, value, addr);
|
armAsm->and_(value, value, RARG4);
|
||||||
armAsm->lsl(RRET, RRET, RARG3);
|
armAsm->lsl(RRET, RRET, RARG3);
|
||||||
armAsm->orr(value, value, RRET);
|
armAsm->orr(value, value, RRET);
|
||||||
}
|
}
|
||||||
|
@ -1648,27 +1648,40 @@ void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
||||||
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
|
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
|
||||||
// new_value = (value & mask) | (RWRET >> shift);
|
// new_value = (value & mask) | (RWRET >> shift);
|
||||||
armAsm->lsr(RRET, RRET, RARG2);
|
armAsm->lsr(RRET, RRET, RARG2);
|
||||||
EmitMov(addr, 0xFFFFFF00u);
|
EmitMov(RARG4, 0xFFFFFF00u);
|
||||||
armAsm->lsl(addr, addr, RARG3);
|
armAsm->lsl(RARG4, RARG4, RARG3);
|
||||||
armAsm->and_(value, value, addr);
|
armAsm->and_(value, value, RARG4);
|
||||||
armAsm->orr(value, value, RRET);
|
armAsm->orr(value, value, RRET);
|
||||||
}
|
}
|
||||||
|
|
||||||
FreeHostReg(addr.GetCode());
|
FreeHostReg(addr.GetCode());
|
||||||
|
|
||||||
|
if (g_settings.gpu_pgxp_enable)
|
||||||
|
{
|
||||||
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
|
armAsm->mov(RARG3, value);
|
||||||
|
armAsm->and_(RARG2, addr, armCheckLogicalConstant(~0x3u));
|
||||||
|
EmitMov(RARG1, inst->bits);
|
||||||
|
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LW));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||||
const std::optional<VirtualMemoryAddress>& address)
|
const std::optional<VirtualMemoryAddress>& address)
|
||||||
{
|
{
|
||||||
|
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||||
|
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
||||||
const std::optional<Register> addr_reg = g_settings.gpu_pgxp_enable ?
|
const std::optional<Register> addr_reg = g_settings.gpu_pgxp_enable ?
|
||||||
std::optional<Register>(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
std::optional<Register>(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||||
std::optional<Register>();
|
std::optional<Register>();
|
||||||
FlushForLoadStore(address, false, use_fastmem);
|
FlushForLoadStore(address, false, use_fastmem);
|
||||||
const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||||
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
|
const Register value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action]() {
|
||||||
|
return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ?
|
||||||
|
Register(AllocateTempHostReg(HR_CALLEE_SAVED)) :
|
||||||
|
RRET;
|
||||||
|
});
|
||||||
|
|
||||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
|
||||||
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
|
||||||
switch (action)
|
switch (action)
|
||||||
{
|
{
|
||||||
case GTERegisterAccessAction::Ignore:
|
case GTERegisterAccessAction::Ignore:
|
||||||
|
@ -1678,28 +1691,28 @@ void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
||||||
|
|
||||||
case GTERegisterAccessAction::Direct:
|
case GTERegisterAccessAction::Direct:
|
||||||
{
|
{
|
||||||
armAsm->str(RRET, PTR(ptr));
|
armAsm->str(value, PTR(ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::SignExtend16:
|
case GTERegisterAccessAction::SignExtend16:
|
||||||
{
|
{
|
||||||
armAsm->sxth(RRET, RRET);
|
armAsm->sxth(RARG3, value);
|
||||||
armAsm->str(RRET, PTR(ptr));
|
armAsm->str(RARG3, PTR(ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::ZeroExtend16:
|
case GTERegisterAccessAction::ZeroExtend16:
|
||||||
{
|
{
|
||||||
armAsm->uxth(RRET, RRET);
|
armAsm->uxth(RARG3, value);
|
||||||
armAsm->str(RRET, PTR(ptr));
|
armAsm->str(RARG3, PTR(ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::CallHandler:
|
case GTERegisterAccessAction::CallHandler:
|
||||||
{
|
{
|
||||||
Flush(FLUSH_FOR_C_CALL);
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
armAsm->mov(RARG2, RRET);
|
armAsm->mov(RARG2, value);
|
||||||
EmitMov(RARG1, index);
|
EmitMov(RARG1, index);
|
||||||
EmitCall(reinterpret_cast<const void*>(>E::WriteRegister));
|
EmitCall(reinterpret_cast<const void*>(>E::WriteRegister));
|
||||||
break;
|
break;
|
||||||
|
@ -1710,12 +1723,12 @@ void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
||||||
// SXY0 <- SXY1
|
// SXY0 <- SXY1
|
||||||
// SXY1 <- SXY2
|
// SXY1 <- SXY2
|
||||||
// SXY2 <- SXYP
|
// SXY2 <- SXYP
|
||||||
DebugAssert(RRET.GetCode() != RARG2.GetCode() && RRET.GetCode() != RARG3.GetCode());
|
DebugAssert(value.GetCode() != RARG2.GetCode() && value.GetCode() != RARG3.GetCode());
|
||||||
armAsm->ldr(RARG2, PTR(&g_state.gte_regs.SXY1[0]));
|
armAsm->ldr(RARG2, PTR(&g_state.gte_regs.SXY1[0]));
|
||||||
armAsm->ldr(RARG3, PTR(&g_state.gte_regs.SXY2[0]));
|
armAsm->ldr(RARG3, PTR(&g_state.gte_regs.SXY2[0]));
|
||||||
armAsm->str(RARG2, PTR(&g_state.gte_regs.SXY0[0]));
|
armAsm->str(RARG2, PTR(&g_state.gte_regs.SXY0[0]));
|
||||||
armAsm->str(RARG3, PTR(&g_state.gte_regs.SXY1[0]));
|
armAsm->str(RARG3, PTR(&g_state.gte_regs.SXY1[0]));
|
||||||
armAsm->str(RRET, PTR(&g_state.gte_regs.SXY2[0]));
|
armAsm->str(value, PTR(&g_state.gte_regs.SXY2[0]));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1729,11 +1742,13 @@ void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
||||||
if (g_settings.gpu_pgxp_enable)
|
if (g_settings.gpu_pgxp_enable)
|
||||||
{
|
{
|
||||||
Flush(FLUSH_FOR_C_CALL);
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
armAsm->mov(RARG3, RRET);
|
armAsm->mov(RARG3, value);
|
||||||
|
if (value.GetCode() != RRET.GetCode())
|
||||||
|
FreeHostReg(value.GetCode());
|
||||||
armAsm->mov(RARG2, addr);
|
armAsm->mov(RARG2, addr);
|
||||||
|
FreeHostReg(addr_reg.value().GetCode());
|
||||||
EmitMov(RARG1, inst->bits);
|
EmitMov(RARG1, inst->bits);
|
||||||
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));
|
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));
|
||||||
FreeHostReg(addr_reg.value().GetCode());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1782,17 +1797,18 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
|
||||||
// TODO: this can take over rt's value if it's no longer needed
|
// TODO: this can take over rt's value if it's no longer needed
|
||||||
// NOTE: can't trust T in cf because of the flush
|
// NOTE: can't trust T in cf because of the flush
|
||||||
const Reg rt = inst->r.rt;
|
const Reg rt = inst->r.rt;
|
||||||
const Register value = RARG2;
|
const Register value = g_settings.gpu_pgxp_enable ? Register(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;
|
||||||
if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value())
|
MoveMIPSRegToReg(value, rt);
|
||||||
armAsm->mov(value, Register(rtreg.value()));
|
|
||||||
else if (HasConstantReg(rt))
|
|
||||||
EmitMov(value, GetConstantRegU32(rt));
|
|
||||||
else
|
|
||||||
armAsm->ldr(value, MipsPtr(rt));
|
|
||||||
|
|
||||||
armAsm->and_(RSCRATCH, addr, 3);
|
armAsm->and_(RSCRATCH, addr, 3);
|
||||||
armAsm->lsl(RSCRATCH, RSCRATCH, 3); // *8
|
armAsm->lsl(RSCRATCH, RSCRATCH, 3); // *8
|
||||||
|
|
||||||
|
// Don't need the original address anymore.
|
||||||
|
if (!g_settings.gpu_pgxp_enable)
|
||||||
|
FreeHostReg(addr.GetCode());
|
||||||
|
else
|
||||||
|
armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u));
|
||||||
|
|
||||||
if (inst->op == InstructionOp::swl)
|
if (inst->op == InstructionOp::swl)
|
||||||
{
|
{
|
||||||
// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;
|
// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;
|
||||||
|
@ -1820,10 +1836,18 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
|
||||||
armAsm->orr(value, value, RRET);
|
armAsm->orr(value, value, RRET);
|
||||||
}
|
}
|
||||||
|
|
||||||
FreeHostReg(addr.GetCode());
|
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
|
||||||
|
|
||||||
armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u));
|
if (g_settings.gpu_pgxp_enable)
|
||||||
GenerateStore(RARG1, value, MemoryAccessSize::Word, use_fastmem);
|
{
|
||||||
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
|
armAsm->mov(RARG3, value);
|
||||||
|
FreeHostReg(value.GetCode());
|
||||||
|
armAsm->mov(RARG2, addr);
|
||||||
|
FreeHostReg(addr.GetCode());
|
||||||
|
EmitMov(RARG1, inst->bits);
|
||||||
|
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SW));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||||
|
@ -1877,10 +1901,10 @@ void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz
|
||||||
Flush(FLUSH_FOR_C_CALL);
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
armAsm->mov(RARG3, data_backup);
|
armAsm->mov(RARG3, data_backup);
|
||||||
armAsm->mov(RARG2, addr_reg);
|
armAsm->mov(RARG2, addr_reg);
|
||||||
EmitMov(RARG1, inst->bits);
|
|
||||||
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SWC2));
|
|
||||||
FreeHostReg(addr_reg.GetCode());
|
FreeHostReg(addr_reg.GetCode());
|
||||||
FreeHostReg(data_backup.GetCode());
|
FreeHostReg(data_backup.GetCode());
|
||||||
|
EmitMov(RARG1, inst->bits);
|
||||||
|
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SWC2));
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPU::NewRec::AArch32Compiler::Compile_mtc0(CompileFlags cf)
|
void CPU::NewRec::AArch32Compiler::Compile_mtc0(CompileFlags cf)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
|
// SPDX-FileCopyrightText: 2024 Connor McLaughlin <stenzek@gmail.com>
|
||||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||||
|
|
||||||
#include "cpu_newrec_compiler_aarch64.h"
|
#include "cpu_newrec_compiler_aarch64.h"
|
||||||
|
@ -1616,9 +1616,9 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
||||||
{
|
{
|
||||||
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
|
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
|
||||||
// new_value = (value & mask) | (RWRET << (24 - shift));
|
// new_value = (value & mask) | (RWRET << (24 - shift));
|
||||||
EmitMov(addr, 0xFFFFFFu);
|
EmitMov(RWARG4, 0xFFFFFFu);
|
||||||
armAsm->lsrv(addr, addr, RWARG2);
|
armAsm->lsrv(RWARG4, RWARG4, RWARG2);
|
||||||
armAsm->and_(value, value, addr);
|
armAsm->and_(value, value, RWARG4);
|
||||||
armAsm->lslv(RWRET, RWRET, RWARG3);
|
armAsm->lslv(RWRET, RWRET, RWARG3);
|
||||||
armAsm->orr(value, value, RWRET);
|
armAsm->orr(value, value, RWRET);
|
||||||
}
|
}
|
||||||
|
@ -1627,27 +1627,40 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
||||||
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
|
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
|
||||||
// new_value = (value & mask) | (RWRET >> shift);
|
// new_value = (value & mask) | (RWRET >> shift);
|
||||||
armAsm->lsrv(RWRET, RWRET, RWARG2);
|
armAsm->lsrv(RWRET, RWRET, RWARG2);
|
||||||
EmitMov(addr, 0xFFFFFF00u);
|
EmitMov(RWARG4, 0xFFFFFF00u);
|
||||||
armAsm->lslv(addr, addr, RWARG3);
|
armAsm->lslv(RWARG4, RWARG4, RWARG3);
|
||||||
armAsm->and_(value, value, addr);
|
armAsm->and_(value, value, RWARG4);
|
||||||
armAsm->orr(value, value, RWRET);
|
armAsm->orr(value, value, RWRET);
|
||||||
}
|
}
|
||||||
|
|
||||||
FreeHostReg(addr.GetCode());
|
FreeHostReg(addr.GetCode());
|
||||||
|
|
||||||
|
if (g_settings.gpu_pgxp_enable)
|
||||||
|
{
|
||||||
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
|
armAsm->mov(RWARG3, value);
|
||||||
|
armAsm->and_(RWARG2, addr, armCheckLogicalConstant(~0x3u));
|
||||||
|
EmitMov(RWARG1, inst->bits);
|
||||||
|
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LW));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||||
const std::optional<VirtualMemoryAddress>& address)
|
const std::optional<VirtualMemoryAddress>& address)
|
||||||
{
|
{
|
||||||
|
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||||
|
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
||||||
const std::optional<WRegister> addr_reg =
|
const std::optional<WRegister> addr_reg =
|
||||||
g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||||
std::optional<WRegister>();
|
std::optional<WRegister>();
|
||||||
FlushForLoadStore(address, false, use_fastmem);
|
FlushForLoadStore(address, false, use_fastmem);
|
||||||
const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||||
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
|
const WRegister value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action]() {
|
||||||
|
return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ?
|
||||||
|
WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) :
|
||||||
|
RWRET;
|
||||||
|
});
|
||||||
|
|
||||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
|
||||||
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
|
||||||
switch (action)
|
switch (action)
|
||||||
{
|
{
|
||||||
case GTERegisterAccessAction::Ignore:
|
case GTERegisterAccessAction::Ignore:
|
||||||
|
@ -1657,28 +1670,28 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
||||||
|
|
||||||
case GTERegisterAccessAction::Direct:
|
case GTERegisterAccessAction::Direct:
|
||||||
{
|
{
|
||||||
armAsm->str(RWRET, PTR(ptr));
|
armAsm->str(value, PTR(ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::SignExtend16:
|
case GTERegisterAccessAction::SignExtend16:
|
||||||
{
|
{
|
||||||
armAsm->sxth(RWRET, RWRET);
|
armAsm->sxth(RWARG3, value);
|
||||||
armAsm->str(RWRET, PTR(ptr));
|
armAsm->str(RWARG3, PTR(ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::ZeroExtend16:
|
case GTERegisterAccessAction::ZeroExtend16:
|
||||||
{
|
{
|
||||||
armAsm->uxth(RWRET, RWRET);
|
armAsm->uxth(RWARG3, value);
|
||||||
armAsm->str(RWRET, PTR(ptr));
|
armAsm->str(RWARG3, PTR(ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::CallHandler:
|
case GTERegisterAccessAction::CallHandler:
|
||||||
{
|
{
|
||||||
Flush(FLUSH_FOR_C_CALL);
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
armAsm->mov(RWARG2, RWRET);
|
armAsm->mov(RWARG2, value);
|
||||||
EmitMov(RWARG1, index);
|
EmitMov(RWARG1, index);
|
||||||
EmitCall(reinterpret_cast<const void*>(>E::WriteRegister));
|
EmitCall(reinterpret_cast<const void*>(>E::WriteRegister));
|
||||||
break;
|
break;
|
||||||
|
@ -1689,12 +1702,12 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
||||||
// SXY0 <- SXY1
|
// SXY0 <- SXY1
|
||||||
// SXY1 <- SXY2
|
// SXY1 <- SXY2
|
||||||
// SXY2 <- SXYP
|
// SXY2 <- SXYP
|
||||||
DebugAssert(RWRET.GetCode() != RWARG2.GetCode() && RWRET.GetCode() != RWARG3.GetCode());
|
DebugAssert(value.GetCode() != RWARG2.GetCode() && value.GetCode() != RWARG3.GetCode());
|
||||||
armAsm->ldr(RWARG2, PTR(&g_state.gte_regs.SXY1[0]));
|
armAsm->ldr(RWARG2, PTR(&g_state.gte_regs.SXY1[0]));
|
||||||
armAsm->ldr(RWARG3, PTR(&g_state.gte_regs.SXY2[0]));
|
armAsm->ldr(RWARG3, PTR(&g_state.gte_regs.SXY2[0]));
|
||||||
armAsm->str(RWARG2, PTR(&g_state.gte_regs.SXY0[0]));
|
armAsm->str(RWARG2, PTR(&g_state.gte_regs.SXY0[0]));
|
||||||
armAsm->str(RWARG3, PTR(&g_state.gte_regs.SXY1[0]));
|
armAsm->str(RWARG3, PTR(&g_state.gte_regs.SXY1[0]));
|
||||||
armAsm->str(RWRET, PTR(&g_state.gte_regs.SXY2[0]));
|
armAsm->str(value, PTR(&g_state.gte_regs.SXY2[0]));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1708,11 +1721,13 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
||||||
if (g_settings.gpu_pgxp_enable)
|
if (g_settings.gpu_pgxp_enable)
|
||||||
{
|
{
|
||||||
Flush(FLUSH_FOR_C_CALL);
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
armAsm->mov(RWARG3, RWRET);
|
armAsm->mov(RWARG3, value);
|
||||||
|
if (value.GetCode() != RWRET.GetCode())
|
||||||
|
FreeHostReg(value.GetCode());
|
||||||
armAsm->mov(RWARG2, addr);
|
armAsm->mov(RWARG2, addr);
|
||||||
|
FreeHostReg(addr_reg.value().GetCode());
|
||||||
EmitMov(RWARG1, inst->bits);
|
EmitMov(RWARG1, inst->bits);
|
||||||
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));
|
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));
|
||||||
FreeHostReg(addr_reg.value().GetCode());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1761,17 +1776,18 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
|
||||||
// TODO: this can take over rt's value if it's no longer needed
|
// TODO: this can take over rt's value if it's no longer needed
|
||||||
// NOTE: can't trust T in cf because of the flush
|
// NOTE: can't trust T in cf because of the flush
|
||||||
const Reg rt = inst->r.rt;
|
const Reg rt = inst->r.rt;
|
||||||
const WRegister value = RWARG2;
|
const WRegister value = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
|
||||||
if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value())
|
MoveMIPSRegToReg(value, rt);
|
||||||
armAsm->mov(value, WRegister(rtreg.value()));
|
|
||||||
else if (HasConstantReg(rt))
|
|
||||||
EmitMov(value, GetConstantRegU32(rt));
|
|
||||||
else
|
|
||||||
armAsm->ldr(value, MipsPtr(rt));
|
|
||||||
|
|
||||||
armAsm->and_(RWSCRATCH, addr, 3);
|
armAsm->and_(RWSCRATCH, addr, 3);
|
||||||
armAsm->lsl(RWSCRATCH, RWSCRATCH, 3); // *8
|
armAsm->lsl(RWSCRATCH, RWSCRATCH, 3); // *8
|
||||||
|
|
||||||
|
// Don't need the original address anymore.
|
||||||
|
if (!g_settings.gpu_pgxp_enable)
|
||||||
|
FreeHostReg(addr.GetCode());
|
||||||
|
else
|
||||||
|
armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u));
|
||||||
|
|
||||||
if (inst->op == InstructionOp::swl)
|
if (inst->op == InstructionOp::swl)
|
||||||
{
|
{
|
||||||
// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;
|
// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;
|
||||||
|
@ -1801,8 +1817,18 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
|
||||||
|
|
||||||
FreeHostReg(addr.GetCode());
|
FreeHostReg(addr.GetCode());
|
||||||
|
|
||||||
armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u));
|
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
|
||||||
GenerateStore(RWARG1, value, MemoryAccessSize::Word, use_fastmem);
|
|
||||||
|
if (g_settings.gpu_pgxp_enable)
|
||||||
|
{
|
||||||
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
|
armAsm->mov(RWARG3, value);
|
||||||
|
FreeHostReg(value.GetCode());
|
||||||
|
armAsm->mov(RWARG2, addr);
|
||||||
|
FreeHostReg(addr.GetCode());
|
||||||
|
EmitMov(RWARG1, inst->bits);
|
||||||
|
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SW));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
|
// SPDX-FileCopyrightText: 2024 Connor McLaughlin <stenzek@gmail.com>
|
||||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||||
|
|
||||||
#include "cpu_newrec_compiler_riscv64.h"
|
#include "cpu_newrec_compiler_riscv64.h"
|
||||||
|
@ -1925,9 +1925,9 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
||||||
{
|
{
|
||||||
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
|
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
|
||||||
// new_value = (value & mask) | (RWRET << (24 - shift));
|
// new_value = (value & mask) | (RWRET << (24 - shift));
|
||||||
EmitMov(addr, 0xFFFFFFu);
|
EmitMov(RSCRATCH, 0xFFFFFFu);
|
||||||
rvAsm->SRLW(addr, addr, RARG2);
|
rvAsm->SRLW(RSCRATCH, RSCRATCH, RARG2);
|
||||||
rvAsm->AND(value, value, addr);
|
rvAsm->AND(value, value, RSCRATCH);
|
||||||
rvAsm->SLLW(RRET, RRET, RARG3);
|
rvAsm->SLLW(RRET, RRET, RARG3);
|
||||||
rvAsm->OR(value, value, RRET);
|
rvAsm->OR(value, value, RRET);
|
||||||
}
|
}
|
||||||
|
@ -1936,26 +1936,39 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
||||||
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
|
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
|
||||||
// new_value = (value & mask) | (RWRET >> shift);
|
// new_value = (value & mask) | (RWRET >> shift);
|
||||||
rvAsm->SRLW(RRET, RRET, RARG2);
|
rvAsm->SRLW(RRET, RRET, RARG2);
|
||||||
EmitMov(addr, 0xFFFFFF00u);
|
EmitMov(RSCRATCH, 0xFFFFFF00u);
|
||||||
rvAsm->SLLW(addr, addr, RARG3);
|
rvAsm->SLLW(RSCRATCH, RSCRATCH, RARG3);
|
||||||
rvAsm->AND(value, value, addr);
|
rvAsm->AND(value, value, RSCRATCH);
|
||||||
rvAsm->OR(value, value, RRET);
|
rvAsm->OR(value, value, RRET);
|
||||||
}
|
}
|
||||||
|
|
||||||
FreeHostReg(addr.Index());
|
FreeHostReg(addr.Index());
|
||||||
|
|
||||||
|
if (g_settings.gpu_pgxp_enable)
|
||||||
|
{
|
||||||
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
|
rvAsm->MV(RARG3, value);
|
||||||
|
rvAsm->ANDI(RARG2, addr, ~0x3u);
|
||||||
|
EmitMov(RARG1, inst->bits);
|
||||||
|
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LW));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||||
const std::optional<VirtualMemoryAddress>& address)
|
const std::optional<VirtualMemoryAddress>& address)
|
||||||
{
|
{
|
||||||
|
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||||
|
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
||||||
const std::optional<GPR> addr_reg =
|
const std::optional<GPR> addr_reg =
|
||||||
g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>();
|
g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>();
|
||||||
FlushForLoadStore(address, false, use_fastmem);
|
FlushForLoadStore(address, false, use_fastmem);
|
||||||
const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||||
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
|
const GPR value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action]() {
|
||||||
|
return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ?
|
||||||
|
GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) :
|
||||||
|
RRET;
|
||||||
|
});
|
||||||
|
|
||||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
|
||||||
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
|
||||||
switch (action)
|
switch (action)
|
||||||
{
|
{
|
||||||
case GTERegisterAccessAction::Ignore:
|
case GTERegisterAccessAction::Ignore:
|
||||||
|
@ -1965,28 +1978,28 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
||||||
|
|
||||||
case GTERegisterAccessAction::Direct:
|
case GTERegisterAccessAction::Direct:
|
||||||
{
|
{
|
||||||
rvAsm->SW(RRET, PTR(ptr));
|
rvAsm->SW(value, PTR(ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::SignExtend16:
|
case GTERegisterAccessAction::SignExtend16:
|
||||||
{
|
{
|
||||||
EmitSExtH(RRET, RRET);
|
EmitSExtH(RARG3, value);
|
||||||
rvAsm->SW(RRET, PTR(ptr));
|
rvAsm->SW(RARG3, PTR(ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::ZeroExtend16:
|
case GTERegisterAccessAction::ZeroExtend16:
|
||||||
{
|
{
|
||||||
EmitUExtH(RRET, RRET);
|
EmitUExtH(RARG3, value);
|
||||||
rvAsm->SW(RRET, PTR(ptr));
|
rvAsm->SW(RARG3, PTR(ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::CallHandler:
|
case GTERegisterAccessAction::CallHandler:
|
||||||
{
|
{
|
||||||
Flush(FLUSH_FOR_C_CALL);
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
rvAsm->MV(RARG2, RRET);
|
rvAsm->MV(RARG2, value);
|
||||||
EmitMov(RARG1, index);
|
EmitMov(RARG1, index);
|
||||||
EmitCall(reinterpret_cast<const void*>(>E::WriteRegister));
|
EmitCall(reinterpret_cast<const void*>(>E::WriteRegister));
|
||||||
break;
|
break;
|
||||||
|
@ -1997,12 +2010,12 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
||||||
// SXY0 <- SXY1
|
// SXY0 <- SXY1
|
||||||
// SXY1 <- SXY2
|
// SXY1 <- SXY2
|
||||||
// SXY2 <- SXYP
|
// SXY2 <- SXYP
|
||||||
DebugAssert(RRET.Index() != RARG2.Index() && RRET.Index() != RARG3.Index());
|
DebugAssert(value.Index() != RARG2.Index() && value.Index() != RARG3.Index());
|
||||||
rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0]));
|
rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0]));
|
||||||
rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0]));
|
rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0]));
|
||||||
rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0]));
|
rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0]));
|
||||||
rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0]));
|
rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0]));
|
||||||
rvAsm->SW(RRET, PTR(&g_state.gte_regs.SXY2[0]));
|
rvAsm->SW(value, PTR(&g_state.gte_regs.SXY2[0]));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2016,11 +2029,13 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
||||||
if (g_settings.gpu_pgxp_enable)
|
if (g_settings.gpu_pgxp_enable)
|
||||||
{
|
{
|
||||||
Flush(FLUSH_FOR_C_CALL);
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
rvAsm->MV(RARG3, RRET);
|
rvAsm->MV(RARG3, value);
|
||||||
|
if (value.Index() != RRET.Index())
|
||||||
|
FreeHostReg(value.Index());
|
||||||
rvAsm->MV(RARG2, addr);
|
rvAsm->MV(RARG2, addr);
|
||||||
|
FreeHostReg(addr_reg.value().Index());
|
||||||
EmitMov(RARG1, inst->bits);
|
EmitMov(RARG1, inst->bits);
|
||||||
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));
|
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));
|
||||||
FreeHostReg(addr_reg.value().Index());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2068,17 +2083,18 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
|
||||||
// TODO: this can take over rt's value if it's no longer needed
|
// TODO: this can take over rt's value if it's no longer needed
|
||||||
// NOTE: can't trust T in cf because of the flush
|
// NOTE: can't trust T in cf because of the flush
|
||||||
const Reg rt = inst->r.rt;
|
const Reg rt = inst->r.rt;
|
||||||
const GPR value = RARG2;
|
const GPR value = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;
|
||||||
if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value())
|
MoveMIPSRegToReg(value, rt);
|
||||||
rvAsm->MV(value, GPR(rtreg.value()));
|
|
||||||
else if (HasConstantReg(rt))
|
|
||||||
EmitMov(value, GetConstantRegU32(rt));
|
|
||||||
else
|
|
||||||
rvAsm->LW(value, PTR(&g_state.regs.r[static_cast<u8>(rt)]));
|
|
||||||
|
|
||||||
rvAsm->ANDI(RSCRATCH, addr, 3);
|
rvAsm->ANDI(RSCRATCH, addr, 3);
|
||||||
rvAsm->SLLIW(RSCRATCH, RSCRATCH, 3); // *8
|
rvAsm->SLLIW(RSCRATCH, RSCRATCH, 3); // *8
|
||||||
|
|
||||||
|
// Don't need the original address anymore.
|
||||||
|
if (!g_settings.gpu_pgxp_enable)
|
||||||
|
FreeHostReg(addr.Index());
|
||||||
|
else
|
||||||
|
rvAsm->ANDI(addr, addr, ~0x3u);
|
||||||
|
|
||||||
if (inst->op == InstructionOp::swl)
|
if (inst->op == InstructionOp::swl)
|
||||||
{
|
{
|
||||||
// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;
|
// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;
|
||||||
|
@ -2108,8 +2124,18 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
|
||||||
|
|
||||||
FreeHostReg(addr.Index());
|
FreeHostReg(addr.Index());
|
||||||
|
|
||||||
rvAsm->ANDI(RARG1, addr, ~0x3u);
|
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
|
||||||
GenerateStore(RARG1, value, MemoryAccessSize::Word, use_fastmem);
|
|
||||||
|
if (g_settings.gpu_pgxp_enable)
|
||||||
|
{
|
||||||
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
|
rvAsm->MV(RARG3, value);
|
||||||
|
FreeHostReg(value.Index());
|
||||||
|
rvAsm->MV(RARG2, addr);
|
||||||
|
FreeHostReg(addr.Index());
|
||||||
|
EmitMov(RARG1, inst->bits);
|
||||||
|
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SW));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||||
|
|
|
@ -1577,13 +1577,14 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz
|
||||||
cg->mov(RWARG2, 24);
|
cg->mov(RWARG2, 24);
|
||||||
cg->sub(RWARG2, cg->ecx);
|
cg->sub(RWARG2, cg->ecx);
|
||||||
|
|
||||||
|
const Reg32& temp = (RWARG3 == cg->ecx) ? RWARG4 : RWARG3;
|
||||||
if (inst->op == InstructionOp::lwl)
|
if (inst->op == InstructionOp::lwl)
|
||||||
{
|
{
|
||||||
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
|
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
|
||||||
// new_value = (value & mask) | (RWRET << (24 - shift));
|
// new_value = (value & mask) | (RWRET << (24 - shift));
|
||||||
cg->mov(addr, 0xFFFFFFu);
|
cg->mov(temp, 0xFFFFFFu);
|
||||||
cg->shr(addr, cg->cl);
|
cg->shr(temp, cg->cl);
|
||||||
cg->and_(value, addr);
|
cg->and_(value, temp);
|
||||||
cg->mov(cg->ecx, RWARG2);
|
cg->mov(cg->ecx, RWARG2);
|
||||||
cg->shl(RWRET, cg->cl);
|
cg->shl(RWRET, cg->cl);
|
||||||
cg->or_(value, RWRET);
|
cg->or_(value, RWRET);
|
||||||
|
@ -1593,28 +1594,42 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz
|
||||||
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
|
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
|
||||||
// new_value = (value & mask) | (RWRET >> shift);
|
// new_value = (value & mask) | (RWRET >> shift);
|
||||||
cg->shr(RWRET, cg->cl);
|
cg->shr(RWRET, cg->cl);
|
||||||
cg->mov(addr, 0xFFFFFF00u);
|
cg->mov(temp, 0xFFFFFF00u);
|
||||||
cg->mov(cg->ecx, RWARG2);
|
cg->mov(cg->ecx, RWARG2);
|
||||||
cg->shl(addr, cg->cl);
|
cg->shl(temp, cg->cl);
|
||||||
cg->and_(value, addr);
|
cg->and_(value, temp);
|
||||||
cg->or_(value, RWRET);
|
cg->or_(value, RWRET);
|
||||||
}
|
}
|
||||||
|
|
||||||
FreeHostReg(addr.getIdx());
|
FreeHostReg(addr.getIdx());
|
||||||
|
|
||||||
|
if (g_settings.gpu_pgxp_enable)
|
||||||
|
{
|
||||||
|
DebugAssert(value != RWARG3);
|
||||||
|
cg->mov(RWARG3, value);
|
||||||
|
cg->mov(RWARG2, addr);
|
||||||
|
cg->and_(RWARG2, ~0x3u);
|
||||||
|
cg->mov(RWARG1, inst->bits);
|
||||||
|
cg->call(reinterpret_cast<const void*>(&PGXP::CPU_LW));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||||
const std::optional<VirtualMemoryAddress>& address)
|
const std::optional<VirtualMemoryAddress>& address)
|
||||||
{
|
{
|
||||||
|
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||||
|
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
||||||
const std::optional<Reg32> addr_reg = g_settings.gpu_pgxp_enable ?
|
const std::optional<Reg32> addr_reg = g_settings.gpu_pgxp_enable ?
|
||||||
std::optional<Reg32>(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
std::optional<Reg32>(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||||
std::optional<Reg32>();
|
std::optional<Reg32>();
|
||||||
FlushForLoadStore(address, false, use_fastmem);
|
FlushForLoadStore(address, false, use_fastmem);
|
||||||
const Reg32 addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
const Reg32 addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||||
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
|
const Reg32 value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action]() {
|
||||||
|
return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ?
|
||||||
|
Reg32(AllocateTempHostReg(HR_CALLEE_SAVED)) :
|
||||||
|
RWRET;
|
||||||
|
});
|
||||||
|
|
||||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
|
||||||
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
|
||||||
switch (action)
|
switch (action)
|
||||||
{
|
{
|
||||||
case GTERegisterAccessAction::Ignore:
|
case GTERegisterAccessAction::Ignore:
|
||||||
|
@ -1624,28 +1639,28 @@ void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize si
|
||||||
|
|
||||||
case GTERegisterAccessAction::Direct:
|
case GTERegisterAccessAction::Direct:
|
||||||
{
|
{
|
||||||
cg->mov(cg->dword[PTR(ptr)], RWRET);
|
cg->mov(cg->dword[PTR(ptr)], value);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::SignExtend16:
|
case GTERegisterAccessAction::SignExtend16:
|
||||||
{
|
{
|
||||||
cg->movsx(RWRET, RWRET.cvt16());
|
cg->movsx(RWARG3, value.cvt16());
|
||||||
cg->mov(cg->dword[PTR(ptr)], RWRET);
|
cg->mov(cg->dword[PTR(ptr)], RWARG3);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::ZeroExtend16:
|
case GTERegisterAccessAction::ZeroExtend16:
|
||||||
{
|
{
|
||||||
cg->movzx(RWRET, RWRET.cvt16());
|
cg->movzx(RWARG3, value.cvt16());
|
||||||
cg->mov(cg->dword[PTR(ptr)], RWRET);
|
cg->mov(cg->dword[PTR(ptr)], RWARG3);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GTERegisterAccessAction::CallHandler:
|
case GTERegisterAccessAction::CallHandler:
|
||||||
{
|
{
|
||||||
Flush(FLUSH_FOR_C_CALL);
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
cg->mov(RWARG2, RWRET);
|
cg->mov(RWARG2, value);
|
||||||
cg->mov(RWARG1, index);
|
cg->mov(RWARG1, index);
|
||||||
cg->call(>E::WriteRegister);
|
cg->call(>E::WriteRegister);
|
||||||
break;
|
break;
|
||||||
|
@ -1656,12 +1671,12 @@ void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize si
|
||||||
// SXY0 <- SXY1
|
// SXY0 <- SXY1
|
||||||
// SXY1 <- SXY2
|
// SXY1 <- SXY2
|
||||||
// SXY2 <- SXYP
|
// SXY2 <- SXYP
|
||||||
DebugAssert(RWRET != RWARG1 && RWRET != RWARG2);
|
DebugAssert(value != RWARG1 && value != RWARG2);
|
||||||
cg->mov(RWARG1, cg->dword[PTR(&g_state.gte_regs.SXY1[0])]);
|
cg->mov(RWARG1, cg->dword[PTR(&g_state.gte_regs.SXY1[0])]);
|
||||||
cg->mov(RWARG2, cg->dword[PTR(&g_state.gte_regs.SXY2[0])]);
|
cg->mov(RWARG2, cg->dword[PTR(&g_state.gte_regs.SXY2[0])]);
|
||||||
cg->mov(cg->dword[PTR(&g_state.gte_regs.SXY0[0])], RWARG1);
|
cg->mov(cg->dword[PTR(&g_state.gte_regs.SXY0[0])], RWARG1);
|
||||||
cg->mov(cg->dword[PTR(&g_state.gte_regs.SXY1[0])], RWARG2);
|
cg->mov(cg->dword[PTR(&g_state.gte_regs.SXY1[0])], RWARG2);
|
||||||
cg->mov(cg->dword[PTR(&g_state.gte_regs.SXY2[0])], RWRET);
|
cg->mov(cg->dword[PTR(&g_state.gte_regs.SXY2[0])], value);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1675,11 +1690,13 @@ void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize si
|
||||||
if (g_settings.gpu_pgxp_enable)
|
if (g_settings.gpu_pgxp_enable)
|
||||||
{
|
{
|
||||||
Flush(FLUSH_FOR_C_CALL);
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
cg->mov(RWARG3, RWRET);
|
cg->mov(RWARG3, value);
|
||||||
|
if (value != RWRET)
|
||||||
|
FreeHostReg(value.getIdx());
|
||||||
cg->mov(RWARG2, addr);
|
cg->mov(RWARG2, addr);
|
||||||
|
FreeHostReg(addr_reg.value().getIdx());
|
||||||
cg->mov(RWARG1, inst->bits);
|
cg->mov(RWARG1, inst->bits);
|
||||||
cg->call(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));
|
cg->call(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));
|
||||||
FreeHostReg(addr_reg.value().getIdx());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1726,19 +1743,20 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz
|
||||||
// TODO: this can take over rt's value if it's no longer needed
|
// TODO: this can take over rt's value if it's no longer needed
|
||||||
// NOTE: can't trust T in cf because of the flush
|
// NOTE: can't trust T in cf because of the flush
|
||||||
const Reg rt = inst->r.rt;
|
const Reg rt = inst->r.rt;
|
||||||
const Reg32 value = RWARG2;
|
const Reg32 value = g_settings.gpu_pgxp_enable ? Reg32(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
|
||||||
DebugAssert(value != cg->ecx);
|
DebugAssert(value != cg->ecx);
|
||||||
if (HasConstantReg(rt))
|
MoveMIPSRegToReg(value, rt);
|
||||||
cg->mov(value, GetConstantRegU32(rt));
|
|
||||||
else if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value())
|
|
||||||
cg->mov(value, Reg32(rtreg.value()));
|
|
||||||
else
|
|
||||||
cg->mov(value, MipsPtr(rt));
|
|
||||||
|
|
||||||
cg->mov(cg->ecx, addr);
|
cg->mov(cg->ecx, addr);
|
||||||
cg->and_(cg->ecx, 3);
|
cg->and_(cg->ecx, 3);
|
||||||
cg->shl(cg->ecx, 3); // *8
|
cg->shl(cg->ecx, 3); // *8
|
||||||
|
|
||||||
|
// Don't need the original address anymore.
|
||||||
|
if (g_settings.gpu_pgxp_enable)
|
||||||
|
cg->and_(addr, ~0x3u);
|
||||||
|
else
|
||||||
|
FreeHostReg(addr.getIdx());
|
||||||
|
|
||||||
if (inst->op == InstructionOp::swl)
|
if (inst->op == InstructionOp::swl)
|
||||||
{
|
{
|
||||||
// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;
|
// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;
|
||||||
|
@ -1769,11 +1787,18 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz
|
||||||
cg->or_(value, RWRET);
|
cg->or_(value, RWRET);
|
||||||
}
|
}
|
||||||
|
|
||||||
FreeHostReg(addr.getIdx());
|
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
|
||||||
|
|
||||||
cg->mov(RWARG1, addr);
|
if (g_settings.gpu_pgxp_enable)
|
||||||
cg->and_(RWARG1, ~0x3u);
|
{
|
||||||
GenerateStore(RWARG1, value, MemoryAccessSize::Word, use_fastmem);
|
Flush(FLUSH_FOR_C_CALL);
|
||||||
|
cg->mov(RWARG3, value);
|
||||||
|
FreeHostReg(value.getIdx());
|
||||||
|
cg->mov(RWARG2, addr);
|
||||||
|
FreeHostReg(addr.getIdx());
|
||||||
|
cg->mov(RWARG1, inst->bits);
|
||||||
|
cg->call(reinterpret_cast<const void*>(&PGXP::CPU_SW));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||||
|
|
Loading…
Reference in a new issue