System: Refactor main loop

Reduces JIT exits.
Improves runahead performance.
This commit is contained in:
Stenzek 2023-08-15 23:12:21 +10:00
parent 4ebd34fcb3
commit 5b980dafa5
43 changed files with 1333 additions and 913 deletions

View file

@ -239,6 +239,8 @@ elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm" OR "${CMAKE_SYSTEM_PROCESSOR}"
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -marm -march=armv7-a") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -marm -march=armv7-a")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -marm -march=armv7-a") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -marm -march=armv7-a")
endif() endif()
elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "riscv64")
set(CPU_ARCH "riscv64")
else() else()
message(FATAL_ERROR "Unknown system processor: ${CMAKE_SYSTEM_PROCESSOR}") message(FATAL_ERROR "Unknown system processor: ${CMAKE_SYSTEM_PROCESSOR}")
endif() endif()

View file

@ -88,4 +88,7 @@ if(${CPU_ARCH} STREQUAL "aarch64")
) )
endif() endif()
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
message("Enabling vixl debug assertions")
target_compile_definitions(vixl PUBLIC VIXL_DEBUG)
endif()

View file

@ -12,6 +12,8 @@ add_library(common
dimensional_array.h dimensional_array.h
error.cpp error.cpp
error.h error.h
fastjmp.cpp
fastjmp.h
fifo_queue.h fifo_queue.h
file_system.cpp file_system.cpp
file_system.h file_system.h
@ -97,6 +99,14 @@ if(WIN32)
windows_headers.h windows_headers.h
) )
target_link_libraries(common PRIVATE d3dcompiler.lib) target_link_libraries(common PRIVATE d3dcompiler.lib)
if(${CPU_ARCH} STREQUAL "x64")
enable_language(ASM_MASM)
target_sources(common PRIVATE fastjmp_x86.asm)
elseif(${CPU_ARCH} STREQUAL "aarch32" OR ${CPU_ARCH} STREQUAL "aarch64")
enable_language(ASM_MARMASM)
target_sources(common PRIVATE fastjmp_arm.asm)
endif()
endif() endif()
if(NOT WIN32 AND NOT ANDROID) if(NOT WIN32 AND NOT ANDROID)

View file

@ -23,6 +23,7 @@
<ClInclude Include="dimensional_array.h" /> <ClInclude Include="dimensional_array.h" />
<ClInclude Include="easing.h" /> <ClInclude Include="easing.h" />
<ClInclude Include="error.h" /> <ClInclude Include="error.h" />
<ClInclude Include="fastjmp.h" />
<ClInclude Include="fifo_queue.h" /> <ClInclude Include="fifo_queue.h" />
<ClInclude Include="file_system.h" /> <ClInclude Include="file_system.h" />
<ClInclude Include="gl\context.h"> <ClInclude Include="gl\context.h">
@ -123,6 +124,7 @@
<ClCompile Include="d3d12\stream_buffer.cpp" /> <ClCompile Include="d3d12\stream_buffer.cpp" />
<ClCompile Include="d3d12\texture.cpp" /> <ClCompile Include="d3d12\texture.cpp" />
<ClCompile Include="d3d12\util.cpp" /> <ClCompile Include="d3d12\util.cpp" />
<ClCompile Include="fastjmp.cpp" />
<ClCompile Include="file_system.cpp" /> <ClCompile Include="file_system.cpp" />
<ClCompile Include="gl\context.cpp"> <ClCompile Include="gl\context.cpp">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
@ -192,6 +194,16 @@
<Natvis Include="bitfield.natvis" /> <Natvis Include="bitfield.natvis" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<MARMASM Include="fastjmp_arm.asm">
<FileType>Document</FileType>
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</MARMASM>
<MASM Include="fastjmp_x86.asm">
<FileType>Document</FileType>
<ExcludedFromBuild Condition="'$(Platform)'!='Win32' And '$(Platform)'!='x64'">true</ExcludedFromBuild>
<PreprocessorDefinitions Condition="'$(Platform)'=='Win32'">_M_X86_32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(Platform)'=='x64'">_M_X86_64;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</MASM>
<None Include="vulkan\entry_points.inl"> <None Include="vulkan\entry_points.inl">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</None> </None>
@ -219,9 +231,17 @@
<Project>{73ee0c55-6ffe-44e7-9c12-baa52434a797}</Project> <Project>{73ee0c55-6ffe-44e7-9c12-baa52434a797}</Project>
</ProjectReference> </ProjectReference>
</ItemGroup> </ItemGroup>
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\marmasm.targets" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\masm.targets" />
</ImportGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{EE054E08-3799-4A59-A422-18259C105FFD}</ProjectGuid> <ProjectGuid>{EE054E08-3799-4A59-A422-18259C105FFD}</ProjectGuid>
</PropertyGroup> </PropertyGroup>
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\marmasm.props" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\masm.props" />
</ImportGroup>
<Import Project="..\..\dep\msvc\vsprops\StaticLibrary.props" /> <Import Project="..\..\dep\msvc\vsprops\StaticLibrary.props" />
<Import Project="common.props" /> <Import Project="common.props" />
<ItemDefinitionGroup> <ItemDefinitionGroup>

View file

@ -129,6 +129,7 @@
<ClInclude Include="build_timestamp.h" /> <ClInclude Include="build_timestamp.h" />
<ClInclude Include="sha1_digest.h" /> <ClInclude Include="sha1_digest.h" />
<ClInclude Include="gpu_texture.h" /> <ClInclude Include="gpu_texture.h" />
<ClInclude Include="fastjmp.h" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="gl\program.cpp"> <ClCompile Include="gl\program.cpp">
@ -234,6 +235,7 @@
<ClCompile Include="threading.cpp" /> <ClCompile Include="threading.cpp" />
<ClCompile Include="sha1_digest.cpp" /> <ClCompile Include="sha1_digest.cpp" />
<ClCompile Include="gpu_texture.cpp" /> <ClCompile Include="gpu_texture.cpp" />
<ClCompile Include="fastjmp.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<Natvis Include="bitfield.natvis" /> <Natvis Include="bitfield.natvis" />
@ -260,4 +262,10 @@
<Filter>vulkan</Filter> <Filter>vulkan</Filter>
</None> </None>
</ItemGroup> </ItemGroup>
<ItemGroup>
<MASM Include="fastjmp_x86.asm" />
</ItemGroup>
<ItemGroup>
<MARMASM Include="fastjmp_arm.asm" />
</ItemGroup>
</Project> </Project>

166
src/common/fastjmp.cpp Normal file
View file

@ -0,0 +1,166 @@
// SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#ifndef _WIN32
#include "fastjmp.h"
#if defined(__APPLE__)
#define PREFIX "_"
#else
#define PREFIX ""
#endif
#if defined(__x86_64__)
asm("\t.global " PREFIX "fastjmp_set\n"
"\t.global " PREFIX "fastjmp_jmp\n"
"\t.text\n"
"\t" PREFIX "fastjmp_set:"
R"(
movq 0(%rsp), %rax
movq %rsp, %rdx # fixup stack pointer, so it doesn't include the call to fastjmp_set
addq $8, %rdx
movq %rax, 0(%rdi) # actually rip
movq %rbx, 8(%rdi)
movq %rdx, 16(%rdi) # actually rsp
movq %rbp, 24(%rdi)
movq %r12, 32(%rdi)
movq %r13, 40(%rdi)
movq %r14, 48(%rdi)
movq %r15, 56(%rdi)
xorl %eax, %eax
ret
)"
"\t" PREFIX "fastjmp_jmp:"
R"(
movl %esi, %eax
movq 0(%rdi), %rdx # actually rip
movq 8(%rdi), %rbx
movq 16(%rdi), %rsp # actually rsp
movq 24(%rdi), %rbp
movq 32(%rdi), %r12
movq 40(%rdi), %r13
movq 48(%rdi), %r14
movq 56(%rdi), %r15
jmp *%rdx
)");
#elif defined(__aarch64__)
asm(
"\t.global " PREFIX "fastjmp_set\n"
"\t.global " PREFIX "fastjmp_jmp\n"
"\t.text\n"
"\t.align 16\n"
"\t" PREFIX "fastjmp_set:" R"(
mov x16, sp
stp x16, x30, [x0]
stp x19, x20, [x0, #16]
stp x21, x22, [x0, #32]
stp x23, x24, [x0, #48]
stp x25, x26, [x0, #64]
stp x27, x28, [x0, #80]
str x29, [x0, #96]
stp d8, d9, [x0, #112]
stp d10, d11, [x0, #128]
stp d12, d13, [x0, #144]
stp d14, d15, [x0, #160]
mov w0, wzr
br x30
)"
".align 16\n"
"\t" PREFIX "fastjmp_jmp:" R"(
ldp x16, x30, [x0]
mov sp, x16
ldp x19, x20, [x0, #16]
ldp x21, x22, [x0, #32]
ldp x23, x24, [x0, #48]
ldp x25, x26, [x0, #64]
ldp x27, x28, [x0, #80]
ldr x29, [x0, #96]
ldp d8, d9, [x0, #112]
ldp d10, d11, [x0, #128]
ldp d12, d13, [x0, #144]
ldp d14, d15, [x0, #160]
mov w0, w1
br x30
)");
#elif defined(__riscv) && __riscv_xlen == 64
asm(
"\t.global " PREFIX "fastjmp_set\n"
"\t.global " PREFIX "fastjmp_jmp\n"
"\t.text\n"
"\t.align 16\n"
"\t" PREFIX "fastjmp_set:" R"(
sd sp, 0(a0)
sd s0, 8(a0)
sd s1, 16(a0)
sd s2, 24(a0)
sd s3, 32(a0)
sd s4, 40(a0)
sd s5, 48(a0)
sd s6, 56(a0)
sd s7, 64(a0)
sd s8, 72(a0)
sd s9, 80(a0)
sd s10, 88(a0)
sd s11, 96(a0)
fsd fs0, 104(a0)
fsd fs1, 112(a0)
fsd fs2, 120(a0)
fsd fs3, 128(a0)
fsd fs4, 136(a0)
fsd fs5, 144(a0)
fsd fs6, 152(a0)
fsd fs7, 160(a0)
fsd fs8, 168(a0)
fsd fs9, 176(a0)
fsd fs10, 184(a0)
fsd fs11, 192(a0)
sd ra, 208(a0)
li a0, 0
jr ra
)"
".align 16\n"
"\t" PREFIX "fastjmp_jmp:" R"(
ld ra, 208(a0)
fld fs11, 192(a0)
fld fs10, 184(a0)
fld fs9, 176(a0)
fld fs8, 168(a0)
fld fs7, 160(a0)
fld fs6, 152(a0)
fld fs5, 144(a0)
fld fs4, 136(a0)
fld fs3, 128(a0)
fld fs2, 120(a0)
fld fs1, 112(a0)
fld fs0, 104(a0)
ld s11, 96(a0)
ld s10, 88(a0)
ld s9, 80(a0)
ld s8, 72(a0)
ld s7, 64(a0)
ld s6, 56(a0)
ld s5, 48(a0)
ld s4, 40(a0)
ld s3, 32(a0)
ld s2, 24(a0)
ld s1, 16(a0)
ld s0, 8(a0)
ld sp, 0(a0)
mv a0, a1
jr ra
)");
#else
#error Unknown platform.
#endif
#endif // __WIN32

33
src/common/fastjmp.h Normal file
View file

@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "types.h"
#include <cstddef>
#include <cstdint>
struct fastjmp_buf
{
#if defined(_WIN32) && defined(_M_AMD64)
static constexpr std::size_t BUF_SIZE = 240;
#elif defined(_M_ARM64) || defined(__aarch64__)
static constexpr std::size_t BUF_SIZE = 168;
#elif defined(__x86_64__)
static constexpr std::size_t BUF_SIZE = 64;
#elif defined(_M_IX86) || defined(__i386__)
static constexpr std::size_t BUF_SIZE = 24;
#elif defined(__riscv) && __riscv_xlen == 64
static constexpr std::size_t BUF_SIZE = 208;
#else
#error Unknown architecture.
#endif
alignas(16) std::uint8_t buf[BUF_SIZE];
};
extern "C" {
int fastjmp_set(fastjmp_buf* buf);
[[noreturn]] void fastjmp_jmp(const fastjmp_buf* buf, int ret);
}

View file

@ -0,0 +1,47 @@
; SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
; SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "ksarm64.h"
EXPORT fastjmp_set
EXPORT fastjmp_jmp
TEXTAREA
; void fastjmp_set(fastjmp_buf*)
LEAF_ENTRY fastjmp_set
mov x16, sp
stp x16, x30, [x0]
stp x19, x20, [x0, #16]
stp x21, x22, [x0, #32]
stp x23, x24, [x0, #48]
stp x25, x26, [x0, #64]
stp x27, x28, [x0, #80]
str x29, [x0, #96]
stp d8, d9, [x0, #112]
stp d10, d11, [x0, #128]
stp d12, d13, [x0, #144]
stp d14, d15, [x0, #160]
mov w0, wzr
br x30
LEAF_END
; void fastjmp_jmp(fastjmp_buf*, int)
LEAF_ENTRY fastjmp_jmp
ldp x16, x30, [x0]
mov sp, x16
ldp x19, x20, [x0, #16]
ldp x21, x22, [x0, #32]
ldp x23, x24, [x0, #48]
ldp x25, x26, [x0, #64]
ldp x27, x28, [x0, #80]
ldr x29, [x0, #96]
ldp d8, d9, [x0, #112]
ldp d10, d11, [x0, #128]
ldp d12, d13, [x0, #144]
ldp d14, d15, [x0, #160]
mov w0, w1
br x30
LEAF_END
END

119
src/common/fastjmp_x86.asm Normal file
View file

@ -0,0 +1,119 @@
; SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
; SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
IFDEF _M_X86_32
; -----------------------------------------
; 32-bit X86
; -----------------------------------------
.386
.model flat
_TEXT SEGMENT
PUBLIC @fastjmp_set@4
PUBLIC @fastjmp_jmp@8
; void fastjmp_set(fastjmp_buf*)
@fastjmp_set@4 PROC
mov eax, dword ptr [esp]
mov edx, esp ; fixup stack pointer, so it doesn't include the call to fastjmp_set
add edx, 4
mov dword ptr [ecx], eax ; actually eip
mov dword ptr [ecx + 4], ebx
mov dword ptr [ecx + 8], edx ; actually esp
mov dword ptr [ecx + 12], ebp
mov dword ptr [ecx + 16], esi
mov dword ptr [ecx + 20], edi
xor eax, eax
ret
@fastjmp_set@4 ENDP
; void __fastcall fastjmp_jmp(fastjmp_buf*, int)
@fastjmp_jmp@8 PROC
mov eax, edx ; return code
mov edx, dword ptr [ecx + 0]
mov ebx, dword ptr [ecx + 4]
mov esp, dword ptr [ecx + 8]
mov ebp, dword ptr [ecx + 12]
mov esi, dword ptr [ecx + 16]
mov edi, dword ptr [ecx + 20]
jmp edx
@fastjmp_jmp@8 ENDP
_TEXT ENDS
ENDIF ; _M_X86_32
IFDEF _M_X86_64
; -----------------------------------------
; 64-bit X86
; -----------------------------------------
_TEXT SEGMENT
PUBLIC fastjmp_set
PUBLIC fastjmp_jmp
; void fastjmp_set(fastjmp_buf*)
fastjmp_set PROC
mov rax, qword ptr [rsp]
mov rdx, rsp ; fixup stack pointer, so it doesn't include the call to fastjmp_set
add rdx, 8
mov qword ptr [rcx], rax ; actually rip
mov qword ptr [rcx + 8], rbx
mov qword ptr [rcx + 16], rdx ; actually rsp
mov qword ptr [rcx + 24], rbp
mov qword ptr [rcx + 32], rsi
mov qword ptr [rcx + 40], rdi
mov qword ptr [rcx + 48], r12
mov qword ptr [rcx + 56], r13
mov qword ptr [rcx + 64], r14
mov qword ptr [rcx + 72], r15
movaps xmmword ptr [rcx + 80], xmm6
movaps xmmword ptr [rcx + 96], xmm7
movaps xmmword ptr [rcx + 112], xmm8
add rcx, 112 ; split to two batches to fit displacement in a single byte
movaps xmmword ptr [rcx + 16], xmm9
movaps xmmword ptr [rcx + 32], xmm10
movaps xmmword ptr [rcx + 48], xmm11
movaps xmmword ptr [rcx + 64], xmm12
movaps xmmword ptr [rcx + 80], xmm13
movaps xmmword ptr [rcx + 96], xmm14
movaps xmmword ptr [rcx + 112], xmm15
xor eax, eax
ret
fastjmp_set ENDP
; void fastjmp_jmp(fastjmp_buf*, int)
fastjmp_jmp PROC
mov eax, edx ; return code
mov rdx, qword ptr [rcx + 0] ; actually rip
mov rbx, qword ptr [rcx + 8]
mov rsp, qword ptr [rcx + 16]
mov rbp, qword ptr [rcx + 24]
mov rsi, qword ptr [rcx + 32]
mov rdi, qword ptr [rcx + 40]
mov r12, qword ptr [rcx + 48]
mov r13, qword ptr [rcx + 56]
mov r14, qword ptr [rcx + 64]
mov r15, qword ptr [rcx + 72]
movaps xmm6, xmmword ptr [rcx + 80]
movaps xmm7, xmmword ptr [rcx + 96]
movaps xmm8, xmmword ptr [rcx + 112]
add rcx, 112 ; split to two batches to fit displacement in a single byte
movaps xmm9, xmmword ptr [rcx + 16]
movaps xmm10, xmmword ptr [rcx + 32]
movaps xmm11, xmmword ptr [rcx + 48]
movaps xmm12, xmmword ptr [rcx + 64]
movaps xmm13, xmmword ptr [rcx + 80]
movaps xmm14, xmmword ptr [rcx + 96]
movaps xmm15, xmmword ptr [rcx + 112]
jmp rdx
fastjmp_jmp ENDP
_TEXT ENDS
ENDIF ; _M_X86_64
END

View file

@ -27,6 +27,8 @@
#define CPU_AARCH64 1 #define CPU_AARCH64 1
#elif defined(__arm__) #elif defined(__arm__)
#define CPU_AARCH32 1 #define CPU_AARCH32 1
#elif defined(__riscv) && __riscv_xlen == 64
#define CPU_RISCV64 1
#else #else
#error Unknown architecture. #error Unknown architecture.
#endif #endif

View file

@ -65,6 +65,13 @@ char (&__countof_ArraySizeHelper(T (&array)[N]))[N];
#define UNLIKELY(x) __builtin_expect(!!(x), 0) #define UNLIKELY(x) __builtin_expect(!!(x), 0)
#endif #endif
// [[noreturn]] which can be used on function pointers.
#ifdef _MSC_VER
// __declspec(noreturn) produces error C3829.
#define NORETURN_FUNCTION_POINTER
#else
#define NORETURN_FUNCTION_POINTER __attribute__((noreturn))
#endif
// disable warnings that show up at warning level 4 // disable warnings that show up at warning level 4
// TODO: Move to build system instead // TODO: Move to build system instead

View file

@ -21,6 +21,7 @@
#include "sio.h" #include "sio.h"
#include "spu.h" #include "spu.h"
#include "timers.h" #include "timers.h"
#include "timing_event.h"
#include "util/state_wrapper.h" #include "util/state_wrapper.h"
#include <cstdio> #include <cstdio>
#include <tuple> #include <tuple>
@ -1418,7 +1419,7 @@ TickCount GetICacheFillTicks(VirtualMemoryAddress address)
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks) void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks)
{ {
VirtualMemoryAddress current_pc = g_state.regs.pc & ICACHE_TAG_ADDRESS_MASK; VirtualMemoryAddress current_pc = g_state.pc & ICACHE_TAG_ADDRESS_MASK;
if (IsCachedAddress(current_pc)) if (IsCachedAddress(current_pc))
{ {
TickCount ticks = 0; TickCount ticks = 0;
@ -1541,10 +1542,20 @@ ALWAYS_INLINE static TickCount DoScratchpadAccess(PhysicalMemoryAddress address,
} }
template<MemoryAccessType type, MemoryAccessSize size> template<MemoryAccessType type, MemoryAccessSize size>
static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& value) static ALWAYS_INLINE_RELEASE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& value)
{ {
using namespace Bus; using namespace Bus;
#if 0
if (type == MemoryAccessType::Write && address == 0x80113028)
{
if ((TimingEvents::GetGlobalTickCounter() + CPU::g_state.pending_ticks) == 5051485)
__debugbreak();
Log_WarningPrintf("VAL %08X @ %u", value, (TimingEvents::GetGlobalTickCounter() + CPU::g_state.pending_ticks));
}
#endif
switch (address >> 29) switch (address >> 29)
{ {
case 0x00: // KUSEG 0M-512M case 0x00: // KUSEG 0M-512M
@ -1723,9 +1734,9 @@ static bool DoAlignmentCheck(VirtualMemoryAddress address)
bool FetchInstruction() bool FetchInstruction()
{ {
DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4)); DebugAssert(Common::IsAlignedPow2(g_state.npc, 4));
const PhysicalMemoryAddress address = g_state.regs.npc; const PhysicalMemoryAddress address = g_state.npc;
switch (address >> 29) switch (address >> 29)
{ {
case 0x00: // KUSEG 0M-512M case 0x00: // KUSEG 0M-512M
@ -1764,16 +1775,16 @@ bool FetchInstruction()
} }
} }
g_state.regs.pc = g_state.regs.npc; g_state.pc = g_state.npc;
g_state.regs.npc += sizeof(g_state.next_instruction.bits); g_state.npc += sizeof(g_state.next_instruction.bits);
return true; return true;
} }
bool FetchInstructionForInterpreterFallback() bool FetchInstructionForInterpreterFallback()
{ {
DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4)); DebugAssert(Common::IsAlignedPow2(g_state.npc, 4));
const PhysicalMemoryAddress address = g_state.regs.npc; const PhysicalMemoryAddress address = g_state.npc;
switch (address >> 29) switch (address >> 29)
{ {
case 0x00: // KUSEG 0M-512M case 0x00: // KUSEG 0M-512M
@ -1801,8 +1812,8 @@ bool FetchInstructionForInterpreterFallback()
} }
} }
g_state.regs.pc = g_state.regs.npc; g_state.pc = g_state.npc;
g_state.regs.npc += sizeof(g_state.next_instruction.bits); g_state.npc += sizeof(g_state.next_instruction.bits);
return true; return true;
} }

View file

@ -196,6 +196,8 @@
<Import Project="core.props" /> <Import Project="core.props" />
<ItemDefinitionGroup> <ItemDefinitionGroup>
<ClCompile> <ClCompile>
<PreprocessorDefinitions>ZYDIS_DISABLE_ENCODER;ZYDIS_DISABLE_AVX512;ZYDIS_DISABLE_KNC;ZYDIS_STATIC_BUILD;ZYCORE_STATIC_BUILD;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories Condition="'$(Platform)'=='x64'">$(SolutionDir)dep\zydis\include;$(SolutionDir)dep\zydis\dependencies\zycore\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<ObjectFileName>$(IntDir)/%(RelativeDir)/</ObjectFileName> <ObjectFileName>$(IntDir)/%(RelativeDir)/</ObjectFileName>
</ClCompile> </ClCompile>
</ItemDefinitionGroup> </ItemDefinitionGroup>

View file

@ -8,6 +8,7 @@
#include "cpu_core.h" #include "cpu_core.h"
#include "cpu_core_private.h" #include "cpu_core_private.h"
#include "cpu_disasm.h" #include "cpu_disasm.h"
#include "cpu_recompiler_types.h"
#include "settings.h" #include "settings.h"
#include "system.h" #include "system.h"
#include "timing_event.h" #include "timing_event.h"
@ -17,6 +18,8 @@ Log_SetChannel(CPU::CodeCache);
#include "cpu_recompiler_code_generator.h" #include "cpu_recompiler_code_generator.h"
#endif #endif
#include <zlib.h>
namespace CPU::CodeCache { namespace CPU::CodeCache {
static constexpr bool USE_BLOCK_LINKING = true; static constexpr bool USE_BLOCK_LINKING = true;
@ -50,6 +53,10 @@ alignas(Recompiler::CODE_STORAGE_ALIGNMENT) static u8
#endif #endif
static JitCodeBuffer s_code_buffer; static JitCodeBuffer s_code_buffer;
#endif
#ifdef WITH_RECOMPILER
static FastMapTable s_fast_map[FAST_MAP_TABLE_COUNT]; static FastMapTable s_fast_map[FAST_MAP_TABLE_COUNT];
static std::unique_ptr<CodeBlock::HostCodePointer[]> s_fast_map_pointers; static std::unique_ptr<CodeBlock::HostCodePointer[]> s_fast_map_pointers;
@ -253,12 +260,19 @@ void Initialize()
{ {
Panic("Failed to initialize code space"); Panic("Failed to initialize code space");
} }
}
#endif
AllocateFastMap(); AllocateFastMap();
#ifdef WITH_RECOMPILER
if (g_settings.IsUsingRecompiler())
{
if (g_settings.IsUsingFastmem() && !InitializeFastmem()) if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem"); Panic("Failed to initialize fastmem");
AllocateFastMap();
CompileDispatcher(); CompileDispatcher();
ResetFastMap(); ResetFastMap();
} }
@ -293,22 +307,13 @@ void Shutdown()
} }
template<PGXPMode pgxp_mode> template<PGXPMode pgxp_mode>
static void ExecuteImpl() [[noreturn]] static void ExecuteImpl()
{ {
CodeBlockKey next_block_key; CodeBlockKey next_block_key;
g_using_interpreter = false; for (;;)
g_state.frame_done = false;
while (!g_state.frame_done)
{ {
if (HasPendingInterrupt()) TimingEvents::RunEvents();
{
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
DispatchInterrupt();
}
TimingEvents::UpdateCPUDowncount();
next_block_key = GetNextBlockKey(); next_block_key = GetNextBlockKey();
while (g_state.pending_ticks < g_state.downcount) while (g_state.pending_ticks < g_state.downcount)
@ -384,27 +389,10 @@ static void ExecuteImpl()
} }
} }
} }
TimingEvents::RunEvents();
} }
// in case we switch to interpreter... // in case we switch to interpreter...
g_state.regs.npc = g_state.regs.pc; g_state.npc = g_state.pc;
}
void Execute()
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU>();
else
ExecuteImpl<PGXPMode::Memory>();
}
else
{
ExecuteImpl<PGXPMode::Disabled>();
}
} }
#ifdef WITH_RECOMPILER #ifdef WITH_RECOMPILER
@ -430,21 +418,15 @@ FastMapTable* GetFastMapPointer()
return s_fast_map; return s_fast_map;
} }
void ExecuteRecompiler() [[noreturn]] static void ExecuteRecompiler()
{ {
g_using_interpreter = false;
g_state.frame_done = false;
#if 0 #if 0
while (!g_state.frame_done) for (;;)
{ {
if (HasPendingInterrupt()) if (HasPendingInterrupt())
{
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
DispatchInterrupt(); DispatchInterrupt();
}
TimingEvents::UpdateCPUDowncount(); TimingEvents::RunEvents();
while (g_state.pending_ticks < g_state.downcount) while (g_state.pending_ticks < g_state.downcount)
{ {
@ -452,18 +434,50 @@ void ExecuteRecompiler()
LogCurrentState(); LogCurrentState();
#endif #endif
const u32 pc = g_state.regs.pc; const u32 pc = g_state.pc;
s_single_block_asm_dispatcher(s_fast_map[pc >> 16][pc >> 2]); s_single_block_asm_dispatcher(s_fast_map[pc >> 16][pc >> 2]);
} }
TimingEvents::RunEvents();
} }
#else #else
s_asm_dispatcher(); s_asm_dispatcher();
#endif #endif
}
// in case we switch to interpreter... #endif
g_state.regs.npc = g_state.regs.pc;
[[noreturn]] void Execute()
{
switch (g_settings.cpu_execution_mode)
{
#ifdef WITH_RECOMPILER
case CPUExecutionMode::Recompiler:
ExecuteRecompiler();
break;
#endif
default:
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU>();
else
ExecuteImpl<PGXPMode::Memory>();
}
else
{
ExecuteImpl<PGXPMode::Disabled>();
}
}
break;
}
}
#if defined(WITH_RECOMPILER)
JitCodeBuffer& GetCodeBuffer()
{
return s_code_buffer;
} }
#endif #endif
@ -473,13 +487,14 @@ void Reinitialize()
ClearState(); ClearState();
#ifdef WITH_RECOMPILER #ifdef WITH_RECOMPILER
ShutdownFastmem(); ShutdownFastmem();
#endif
#if defined(WITH_RECOMPILER)
s_code_buffer.Destroy(); s_code_buffer.Destroy();
if (g_settings.IsUsingRecompiler()) if (g_settings.IsUsingRecompiler())
{ {
#ifdef USE_STATIC_CODE_BUFFER #ifdef USE_STATIC_CODE_BUFFER
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE, if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
RECOMPILER_GUARD_SIZE)) RECOMPILER_GUARD_SIZE))
@ -489,7 +504,12 @@ void Reinitialize()
{ {
Panic("Failed to initialize code space"); Panic("Failed to initialize code space");
} }
}
#endif
#ifdef WITH_RECOMPILER
if (g_settings.IsUsingRecompiler())
{
if (g_settings.IsUsingFastmem() && !InitializeFastmem()) if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem"); Panic("Failed to initialize fastmem");
@ -509,25 +529,40 @@ void Flush()
#endif #endif
} }
#ifndef _MSC_VER
void __debugbreak() {}
#endif
void LogCurrentState() void LogCurrentState()
{ {
#if 0
if ((TimingEvents::GetGlobalTickCounter() + GetPendingTicks()) == 2546728915)
__debugbreak();
#endif
#if 0
if ((TimingEvents::GetGlobalTickCounter() + GetPendingTicks()) < 2546729174)
return;
#endif
const auto& regs = g_state.regs; const auto& regs = g_state.regs;
WriteToExecutionLog("tick=%u pc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X " WriteToExecutionLog(
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X " "tick=%u dc=%u/%u pc=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X ldr=%s " "t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
"ldv=%08X\n", "s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X ldr=%s "
TimingEvents::GetGlobalTickCounter() + GetPendingTicks(), regs.pc, regs.zero, regs.at, regs.v0, "ldv=%08X cause=%08X sr=%08X gte=%08X\n",
regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, TimingEvents::GetGlobalTickCounter() + GetPendingTicks(), g_state.pending_ticks, g_state.downcount, g_state.pc,
regs.t6, regs.t7, regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.at, regs.v0, regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5,
regs.t9, regs.k0, regs.k1, regs.gp, regs.sp, regs.fp, regs.ra, regs.t6, regs.t7, regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0,
(g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg), regs.k1, regs.gp, regs.sp, regs.fp, regs.ra,
(g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value); (g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg),
(g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value, g_state.cop0_regs.cause.bits,
g_state.cop0_regs.sr.bits, static_cast<u32>(crc32(0, (const Bytef*)&g_state.gte_regs, sizeof(g_state.gte_regs))));
} }
CodeBlockKey GetNextBlockKey() CodeBlockKey GetNextBlockKey()
{ {
CodeBlockKey key = {}; CodeBlockKey key = {};
key.SetPC(g_state.regs.pc); key.SetPC(g_state.pc);
key.user_mode = InUserMode(); key.user_mode = InUserMode();
return key; return key;
} }
@ -836,7 +871,7 @@ void FastCompileBlockFunction()
void InvalidCodeFunction() void InvalidCodeFunction()
{ {
Log_ErrorPrintf("Trying to execute invalid code at 0x%08X", g_state.regs.pc); Log_ErrorPrintf("Trying to execute invalid code at 0x%08X", g_state.pc);
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
{ {
if (g_settings.gpu_pgxp_cpu) if (g_settings.gpu_pgxp_cpu)
@ -1249,7 +1284,7 @@ void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, voi
void CPU::Recompiler::Thunks::LogPC(u32 pc) void CPU::Recompiler::Thunks::LogPC(u32 pc)
{ {
#if 0 #if 1
CPU::CodeCache::LogCurrentState(); CPU::CodeCache::LogCurrentState();
#endif #endif
#if 0 #if 0

View file

@ -121,14 +121,17 @@ using FastMapTable = CodeBlock::HostCodePointer*;
void Initialize(); void Initialize();
void Shutdown(); void Shutdown();
void Execute(); [[noreturn]] void Execute();
#ifdef WITH_RECOMPILER #ifdef WITH_RECOMPILER
using DispatcherFunction = void (*)(); using DispatcherFunction = void (*)();
using SingleBlockDispatcherFunction = void (*)(const CodeBlock::HostCodePointer); using SingleBlockDispatcherFunction = void (*)(const CodeBlock::HostCodePointer);
FastMapTable* GetFastMapPointer(); FastMapTable* GetFastMapPointer();
void ExecuteRecompiler(); #endif
#if defined(WITH_RECOMPILER)
JitCodeBuffer& GetCodeBuffer();
#endif #endif
/// Flushes the code cache, forcing all blocks to be recompiled. /// Flushes the code cache, forcing all blocks to be recompiled.

View file

@ -4,6 +4,7 @@
#include "cpu_core.h" #include "cpu_core.h"
#include "bus.h" #include "bus.h"
#include "common/align.h" #include "common/align.h"
#include "common/fastjmp.h"
#include "common/file_system.h" #include "common/file_system.h"
#include "common/log.h" #include "common/log.h"
#include "cpu_core_private.h" #include "cpu_core_private.h"
@ -29,9 +30,10 @@ static void Branch(u32 target);
static void FlushPipeline(); static void FlushPipeline();
State g_state; State g_state;
bool g_using_interpreter = false;
bool TRACE_EXECUTION = false; bool TRACE_EXECUTION = false;
static fastjmp_buf s_jmp_buf;
static std::FILE* s_log_file = nullptr; static std::FILE* s_log_file = nullptr;
static bool s_log_file_opened = false; static bool s_log_file_opened = false;
static bool s_trace_to_log = false; static bool s_trace_to_log = false;
@ -41,6 +43,7 @@ static std::vector<Breakpoint> s_breakpoints;
static u32 s_breakpoint_counter = 1; static u32 s_breakpoint_counter = 1;
static u32 s_last_breakpoint_check_pc = INVALID_BREAKPOINT_PC; static u32 s_last_breakpoint_check_pc = INVALID_BREAKPOINT_PC;
static bool s_single_step = false; static bool s_single_step = false;
static bool s_single_step_done = false;
bool IsTraceEnabled() bool IsTraceEnabled()
{ {
@ -134,6 +137,7 @@ void Reset()
GTE::Reset(); GTE::Reset();
// TODO: This consumes cycles...
SetPC(RESET_VECTOR); SetPC(RESET_VECTOR);
} }
@ -141,7 +145,9 @@ bool DoState(StateWrapper& sw)
{ {
sw.Do(&g_state.pending_ticks); sw.Do(&g_state.pending_ticks);
sw.Do(&g_state.downcount); sw.Do(&g_state.downcount);
sw.DoArray(g_state.regs.r, countof(g_state.regs.r)); sw.DoArray(g_state.regs.r, static_cast<u32>(Reg::count));
sw.Do(&g_state.pc);
sw.Do(&g_state.npc);
sw.Do(&g_state.cop0_regs.BPC); sw.Do(&g_state.cop0_regs.BPC);
sw.Do(&g_state.cop0_regs.BDA); sw.Do(&g_state.cop0_regs.BDA);
sw.Do(&g_state.cop0_regs.TAR); sw.Do(&g_state.cop0_regs.TAR);
@ -161,11 +167,23 @@ bool DoState(StateWrapper& sw)
sw.Do(&g_state.next_instruction_is_branch_delay_slot); sw.Do(&g_state.next_instruction_is_branch_delay_slot);
sw.Do(&g_state.branch_was_taken); sw.Do(&g_state.branch_was_taken);
sw.Do(&g_state.exception_raised); sw.Do(&g_state.exception_raised);
sw.Do(&g_state.interrupt_delay); if (sw.GetVersion() < 59)
{
bool interrupt_delay;
sw.Do(&interrupt_delay);
}
sw.Do(&g_state.load_delay_reg); sw.Do(&g_state.load_delay_reg);
sw.Do(&g_state.load_delay_value); sw.Do(&g_state.load_delay_value);
sw.Do(&g_state.next_load_delay_reg); sw.Do(&g_state.next_load_delay_reg);
sw.Do(&g_state.next_load_delay_value); sw.Do(&g_state.next_load_delay_value);
// Compatibility with old states.
if (sw.GetVersion() < 59)
{
g_state.load_delay_reg = static_cast<Reg>(std::min(static_cast<u8>(g_state.load_delay_reg), static_cast<u8>(Reg::count)));
g_state.next_load_delay_reg = static_cast<Reg>(std::min(static_cast<u8>(g_state.load_delay_reg), static_cast<u8>(Reg::count)));
}
sw.Do(&g_state.cache_control.bits); sw.Do(&g_state.cache_control.bits);
sw.DoBytes(g_state.dcache.data(), g_state.dcache.size()); sw.DoBytes(g_state.dcache.data(), g_state.dcache.size());
@ -203,7 +221,7 @@ void UpdateFastmemBase()
ALWAYS_INLINE_RELEASE void SetPC(u32 new_pc) ALWAYS_INLINE_RELEASE void SetPC(u32 new_pc)
{ {
DebugAssert(Common::IsAlignedPow2(new_pc, 4)); DebugAssert(Common::IsAlignedPow2(new_pc, 4));
g_state.regs.npc = new_pc; g_state.npc = new_pc;
FlushPipeline(); FlushPipeline();
} }
@ -217,7 +235,7 @@ ALWAYS_INLINE_RELEASE void Branch(u32 target)
return; return;
} }
g_state.regs.npc = target; g_state.npc = target;
g_state.branch_was_taken = true; g_state.branch_was_taken = true;
} }
@ -257,14 +275,14 @@ ALWAYS_INLINE_RELEASE static void RaiseException(u32 CAUSE_bits, u32 EPC, u32 ve
// TAR is set to the address which was being fetched in this instruction, or the next instruction to execute if the // TAR is set to the address which was being fetched in this instruction, or the next instruction to execute if the
// exception hadn't occurred in the delay slot. // exception hadn't occurred in the delay slot.
g_state.cop0_regs.EPC -= UINT32_C(4); g_state.cop0_regs.EPC -= UINT32_C(4);
g_state.cop0_regs.TAR = g_state.regs.pc; g_state.cop0_regs.TAR = g_state.pc;
} }
// current -> previous, switch to kernel mode and disable interrupts // current -> previous, switch to kernel mode and disable interrupts
g_state.cop0_regs.sr.mode_bits <<= 2; g_state.cop0_regs.sr.mode_bits <<= 2;
// flush the pipeline - we don't want to execute the previously fetched instruction // flush the pipeline - we don't want to execute the previously fetched instruction
g_state.regs.npc = vector; g_state.npc = vector;
g_state.exception_raised = true; g_state.exception_raised = true;
FlushPipeline(); FlushPipeline();
} }
@ -299,7 +317,7 @@ void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits)
if (PCDrv::HandleSyscall(instruction_bits, g_state.regs)) if (PCDrv::HandleSyscall(instruction_bits, g_state.regs))
{ {
// immediately return // immediately return
g_state.regs.npc = EPC + 4; g_state.npc = EPC + 4;
FlushPipeline(); FlushPipeline();
return; return;
} }
@ -311,16 +329,7 @@ void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits)
void SetExternalInterrupt(u8 bit) void SetExternalInterrupt(u8 bit)
{ {
g_state.cop0_regs.cause.Ip |= static_cast<u8>(1u << bit); g_state.cop0_regs.cause.Ip |= static_cast<u8>(1u << bit);
CheckForPendingInterrupt();
if (g_settings.cpu_execution_mode == CPUExecutionMode::Interpreter)
{
g_state.interrupt_delay = 1;
}
else
{
g_state.interrupt_delay = 0;
CheckForPendingInterrupt();
}
} }
void ClearExternalInterrupt(u8 bit) void ClearExternalInterrupt(u8 bit)
@ -331,9 +340,7 @@ void ClearExternalInterrupt(u8 bit)
ALWAYS_INLINE_RELEASE static void UpdateLoadDelay() ALWAYS_INLINE_RELEASE static void UpdateLoadDelay()
{ {
// the old value is needed in case the delay slot instruction overwrites the same register // the old value is needed in case the delay slot instruction overwrites the same register
if (g_state.load_delay_reg != Reg::count) g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.load_delay_reg = g_state.next_load_delay_reg; g_state.load_delay_reg = g_state.next_load_delay_reg;
g_state.load_delay_value = g_state.next_load_delay_value; g_state.load_delay_value = g_state.next_load_delay_value;
g_state.next_load_delay_reg = Reg::count; g_state.next_load_delay_reg = Reg::count;
@ -343,16 +350,13 @@ ALWAYS_INLINE_RELEASE static void FlushPipeline()
{ {
// loads are flushed // loads are flushed
g_state.next_load_delay_reg = Reg::count; g_state.next_load_delay_reg = Reg::count;
if (g_state.load_delay_reg != Reg::count) g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
{ g_state.load_delay_reg = Reg::count;
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.load_delay_reg = Reg::count;
}
// not in a branch delay slot // not in a branch delay slot
g_state.branch_was_taken = false; g_state.branch_was_taken = false;
g_state.next_instruction_is_branch_delay_slot = false; g_state.next_instruction_is_branch_delay_slot = false;
g_state.current_instruction_pc = g_state.regs.pc; g_state.current_instruction_pc = g_state.pc;
// prefetch the next instruction // prefetch the next instruction
FetchInstruction(); FetchInstruction();
@ -649,8 +653,8 @@ const std::array<DebuggerRegisterListEntry, NUM_DEBUGGER_REGISTER_LIST_ENTRIES>
{"ra", &CPU::g_state.regs.ra}, {"ra", &CPU::g_state.regs.ra},
{"hi", &CPU::g_state.regs.hi}, {"hi", &CPU::g_state.regs.hi},
{"lo", &CPU::g_state.regs.lo}, {"lo", &CPU::g_state.regs.lo},
{"pc", &CPU::g_state.regs.pc}, {"pc", &CPU::g_state.pc},
{"npc", &CPU::g_state.regs.npc}, {"npc", &CPU::g_state.npc},
{"COP0_SR", &CPU::g_state.cop0_regs.sr.bits}, {"COP0_SR", &CPU::g_state.cop0_regs.sr.bits},
{"COP0_CAUSE", &CPU::g_state.cop0_regs.cause.bits}, {"COP0_CAUSE", &CPU::g_state.cop0_regs.cause.bits},
@ -1111,7 +1115,7 @@ restart_instruction:
{ {
g_state.next_instruction_is_branch_delay_slot = true; g_state.next_instruction_is_branch_delay_slot = true;
const u32 target = ReadReg(inst.r.rs); const u32 target = ReadReg(inst.r.rs);
WriteReg(inst.r.rd, g_state.regs.npc); WriteReg(inst.r.rd, g_state.npc);
Branch(target); Branch(target);
} }
break; break;
@ -1267,7 +1271,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, sxvalue); WriteRegDelayed(inst.i.rt, sxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LBx(inst.bits, sxvalue, addr); PGXP::CPU_LBx(inst.bits, addr, sxvalue);
} }
break; break;
@ -1285,7 +1289,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, sxvalue); WriteRegDelayed(inst.i.rt, sxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LHx(inst.bits, sxvalue, addr); PGXP::CPU_LHx(inst.bits, addr, sxvalue);
} }
break; break;
@ -1302,7 +1306,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, value); WriteRegDelayed(inst.i.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LW(inst.bits, value, addr); PGXP::CPU_LW(inst.bits, addr, value);
} }
break; break;
@ -1320,7 +1324,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, zxvalue); WriteRegDelayed(inst.i.rt, zxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LBx(inst.bits, zxvalue, addr); PGXP::CPU_LBx(inst.bits, addr, zxvalue);
} }
break; break;
@ -1338,7 +1342,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, zxvalue); WriteRegDelayed(inst.i.rt, zxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LHx(inst.bits, zxvalue, addr); PGXP::CPU_LHx(inst.bits, addr, zxvalue);
} }
break; break;
@ -1372,7 +1376,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, new_value); WriteRegDelayed(inst.i.rt, new_value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LW(inst.bits, new_value, addr); PGXP::CPU_LW(inst.bits, addr, new_value);
} }
break; break;
@ -1386,7 +1390,7 @@ restart_instruction:
WriteMemoryByte(addr, value); WriteMemoryByte(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SB(inst.bits, Truncate8(value), addr); PGXP::CPU_SB(inst.bits, addr, value);
} }
break; break;
@ -1400,7 +1404,7 @@ restart_instruction:
WriteMemoryHalfWord(addr, value); WriteMemoryHalfWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SH(inst.bits, Truncate16(value), addr); PGXP::CPU_SH(inst.bits, addr, value);
} }
break; break;
@ -1414,7 +1418,7 @@ restart_instruction:
WriteMemoryWord(addr, value); WriteMemoryWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SW(inst.bits, value, addr); PGXP::CPU_SW(inst.bits, addr, value);
} }
break; break;
@ -1447,22 +1451,22 @@ restart_instruction:
WriteMemoryWord(aligned_addr, new_value); WriteMemoryWord(aligned_addr, new_value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SW(inst.bits, new_value, addr); PGXP::CPU_SW(inst.bits, aligned_addr, new_value);
} }
break; break;
case InstructionOp::j: case InstructionOp::j:
{ {
g_state.next_instruction_is_branch_delay_slot = true; g_state.next_instruction_is_branch_delay_slot = true;
Branch((g_state.regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2)); Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
} }
break; break;
case InstructionOp::jal: case InstructionOp::jal:
{ {
WriteReg(Reg::ra, g_state.regs.npc); WriteReg(Reg::ra, g_state.npc);
g_state.next_instruction_is_branch_delay_slot = true; g_state.next_instruction_is_branch_delay_slot = true;
Branch((g_state.regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2)); Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
} }
break; break;
@ -1472,7 +1476,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true; g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (ReadReg(inst.i.rs) == ReadReg(inst.i.rt)); const bool branch = (ReadReg(inst.i.rs) == ReadReg(inst.i.rt));
if (branch) if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2)); Branch(g_state.pc + (inst.i.imm_sext32() << 2));
} }
break; break;
@ -1481,7 +1485,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true; g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (ReadReg(inst.i.rs) != ReadReg(inst.i.rt)); const bool branch = (ReadReg(inst.i.rs) != ReadReg(inst.i.rt));
if (branch) if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2)); Branch(g_state.pc + (inst.i.imm_sext32() << 2));
} }
break; break;
@ -1490,7 +1494,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true; g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (static_cast<s32>(ReadReg(inst.i.rs)) > 0); const bool branch = (static_cast<s32>(ReadReg(inst.i.rs)) > 0);
if (branch) if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2)); Branch(g_state.pc + (inst.i.imm_sext32() << 2));
} }
break; break;
@ -1499,7 +1503,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true; g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (static_cast<s32>(ReadReg(inst.i.rs)) <= 0); const bool branch = (static_cast<s32>(ReadReg(inst.i.rs)) <= 0);
if (branch) if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2)); Branch(g_state.pc + (inst.i.imm_sext32() << 2));
} }
break; break;
@ -1515,10 +1519,10 @@ restart_instruction:
// register is still linked even if the branch isn't taken // register is still linked even if the branch isn't taken
const bool link = (rt & u8(0x1E)) == u8(0x10); const bool link = (rt & u8(0x1E)) == u8(0x10);
if (link) if (link)
WriteReg(Reg::ra, g_state.regs.npc); WriteReg(Reg::ra, g_state.npc);
if (branch) if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2)); Branch(g_state.pc + (inst.i.imm_sext32() << 2));
} }
break; break;
@ -1610,7 +1614,7 @@ restart_instruction:
WriteRegDelayed(inst.r.rt, value); WriteRegDelayed(inst.r.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_CFC2(inst.bits, value, value); PGXP::CPU_MFC2(inst.bits, value);
} }
break; break;
@ -1620,7 +1624,7 @@ restart_instruction:
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()) + 32, value); GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()) + 32, value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_CTC2(inst.bits, value, value); PGXP::CPU_MTC2(inst.bits, value);
} }
break; break;
@ -1630,7 +1634,7 @@ restart_instruction:
WriteRegDelayed(inst.r.rt, value); WriteRegDelayed(inst.r.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_MFC2(inst.bits, value, value); PGXP::CPU_MFC2(inst.bits, value);
} }
break; break;
@ -1640,7 +1644,7 @@ restart_instruction:
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()), value); GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()), value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_MTC2(inst.bits, value, value); PGXP::CPU_MTC2(inst.bits, value);
} }
break; break;
@ -1674,7 +1678,7 @@ restart_instruction:
GTE::WriteRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())), value); GTE::WriteRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())), value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LWC2(inst.bits, value, addr); PGXP::CPU_LWC2(inst.bits, addr, value);
} }
break; break;
@ -1694,7 +1698,7 @@ restart_instruction:
WriteMemoryWord(addr, value); WriteMemoryWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory) if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SWC2(inst.bits, value, addr); PGXP::CPU_SWC2(inst.bits, addr, value);
} }
break; break;
@ -1734,7 +1738,7 @@ void DispatchInterrupt()
{ {
// If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next // If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering.. // instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits); SafeReadInstruction(g_state.pc, &g_state.next_instruction.bits);
if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction()) if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction())
{ {
StallUntilGTEComplete(); StallUntilGTEComplete();
@ -1745,7 +1749,10 @@ void DispatchInterrupt()
RaiseException( RaiseException(
Cop0Registers::CAUSE::MakeValueForException(Exception::INT, g_state.next_instruction_is_branch_delay_slot, Cop0Registers::CAUSE::MakeValueForException(Exception::INT, g_state.next_instruction_is_branch_delay_slot,
g_state.branch_was_taken, g_state.next_instruction.cop.cop_n), g_state.branch_was_taken, g_state.next_instruction.cop.cop_n),
g_state.regs.pc); g_state.pc);
// Fix up downcount, the pending IRQ set it to zero.
TimingEvents::UpdateCPUDowncount();
} }
void UpdateDebugDispatcherFlag() void UpdateDebugDispatcherFlag()
@ -1763,14 +1770,16 @@ void UpdateDebugDispatcherFlag()
Log_DevPrintf("%s debug dispatcher", use_debug_dispatcher ? "Now using" : "No longer using"); Log_DevPrintf("%s debug dispatcher", use_debug_dispatcher ? "Now using" : "No longer using");
g_state.use_debug_dispatcher = use_debug_dispatcher; g_state.use_debug_dispatcher = use_debug_dispatcher;
ForceDispatcherExit(); ExitExecution();
} }
void ForceDispatcherExit() void ExitExecution()
{ {
// zero the downcount so we break out and switch // can't exit while running events without messing things up
g_state.downcount = 0; if (TimingEvents::IsRunningEvents())
g_state.frame_done = true; TimingEvents::SetFrameDone();
else
fastjmp_jmp(&s_jmp_buf, 1);
} }
bool HasAnyBreakpoints() bool HasAnyBreakpoints()
@ -1869,7 +1878,7 @@ void ClearBreakpoints()
bool AddStepOverBreakpoint() bool AddStepOverBreakpoint()
{ {
u32 bp_pc = g_state.regs.pc; u32 bp_pc = g_state.pc;
Instruction inst; Instruction inst;
if (!SafeReadInstruction(bp_pc, &inst.bits)) if (!SafeReadInstruction(bp_pc, &inst.bits))
@ -1880,7 +1889,7 @@ bool AddStepOverBreakpoint()
if (!IsCallInstruction(inst)) if (!IsCallInstruction(inst))
{ {
Host::ReportFormattedDebuggerMessage(Host::TranslateString("DebuggerMessage", "0x%08X is not a call instruction."), Host::ReportFormattedDebuggerMessage(Host::TranslateString("DebuggerMessage", "0x%08X is not a call instruction."),
g_state.regs.pc); g_state.pc);
return false; return false;
} }
@ -1890,7 +1899,7 @@ bool AddStepOverBreakpoint()
if (IsBranchInstruction(inst)) if (IsBranchInstruction(inst))
{ {
Host::ReportFormattedDebuggerMessage( Host::ReportFormattedDebuggerMessage(
Host::TranslateString("DebuggerMessage", "Can't step over double branch at 0x%08X"), g_state.regs.pc); Host::TranslateString("DebuggerMessage", "Can't step over double branch at 0x%08X"), g_state.pc);
return false; return false;
} }
@ -1905,7 +1914,7 @@ bool AddStepOverBreakpoint()
bool AddStepOutBreakpoint(u32 max_instructions_to_search) bool AddStepOutBreakpoint(u32 max_instructions_to_search)
{ {
// find the branch-to-ra instruction. // find the branch-to-ra instruction.
u32 ret_pc = g_state.regs.pc; u32 ret_pc = g_state.pc;
for (u32 i = 0; i < max_instructions_to_search; i++) for (u32 i = 0; i < max_instructions_to_search; i++)
{ {
ret_pc += sizeof(Instruction); ret_pc += sizeof(Instruction);
@ -1929,21 +1938,24 @@ bool AddStepOutBreakpoint(u32 max_instructions_to_search)
Host::ReportFormattedDebuggerMessage( Host::ReportFormattedDebuggerMessage(
Host::TranslateString("DebuggerMessage", "No return instruction found after %u instructions for step-out at %08X."), Host::TranslateString("DebuggerMessage", "No return instruction found after %u instructions for step-out at %08X."),
max_instructions_to_search, g_state.regs.pc); max_instructions_to_search, g_state.pc);
return false; return false;
} }
ALWAYS_INLINE_RELEASE static bool BreakpointCheck() ALWAYS_INLINE_RELEASE static bool BreakpointCheck()
{ {
const u32 pc = g_state.regs.pc; const u32 pc = g_state.pc;
// single step - we want to break out after this instruction, so set a pending exit // single step - we want to break out after this instruction, so set a pending exit
// the bp check happens just before execution, so this is fine // the bp check happens just before execution, so this is fine
if (s_single_step) if (s_single_step)
{ {
ForceDispatcherExit(); if (s_single_step_done)
s_single_step = false; ExitExecution();
else
s_single_step_done = true;
s_last_breakpoint_check_pc = pc; s_last_breakpoint_check_pc = pc;
return false; return false;
} }
@ -2004,19 +2016,14 @@ ALWAYS_INLINE_RELEASE static bool BreakpointCheck()
} }
template<PGXPMode pgxp_mode, bool debug> template<PGXPMode pgxp_mode, bool debug>
static void ExecuteImpl() [[noreturn]] static void ExecuteImpl()
{ {
g_using_interpreter = true; for (;;)
g_state.frame_done = false;
while (!g_state.frame_done)
{ {
TimingEvents::UpdateCPUDowncount(); TimingEvents::RunEvents();
while (g_state.pending_ticks < g_state.downcount) while (g_state.pending_ticks < g_state.downcount)
{ {
if (HasPendingInterrupt() && !g_state.interrupt_delay)
DispatchInterrupt();
if constexpr (debug) if constexpr (debug)
{ {
Cop0ExecutionBreakpointCheck(); Cop0ExecutionBreakpointCheck();
@ -2028,12 +2035,11 @@ static void ExecuteImpl()
} }
} }
g_state.interrupt_delay = false;
g_state.pending_ticks++; g_state.pending_ticks++;
// now executing the instruction we previously fetched // now executing the instruction we previously fetched
g_state.current_instruction.bits = g_state.next_instruction.bits; g_state.current_instruction.bits = g_state.next_instruction.bits;
g_state.current_instruction_pc = g_state.regs.pc; g_state.current_instruction_pc = g_state.pc;
g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot; g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot;
g_state.current_instruction_was_branch_taken = g_state.branch_was_taken; g_state.current_instruction_was_branch_taken = g_state.branch_was_taken;
g_state.next_instruction_is_branch_delay_slot = false; g_state.next_instruction_is_branch_delay_slot = false;
@ -2065,46 +2071,74 @@ static void ExecuteImpl()
// next load delay // next load delay
UpdateLoadDelay(); UpdateLoadDelay();
} }
TimingEvents::RunEvents();
} }
} }
void Execute() static void ExecuteDebug()
{ {
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
{ {
if (g_settings.gpu_pgxp_cpu) if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU, false>(); ExecuteImpl<PGXPMode::CPU, true>();
else else
ExecuteImpl<PGXPMode::Memory, false>(); ExecuteImpl<PGXPMode::Memory, true>();
} }
else else
{ {
ExecuteImpl<PGXPMode::Disabled, false>(); ExecuteImpl<PGXPMode::Disabled, true>();
} }
} }
void ExecuteDebug() void Execute()
{ {
if (g_settings.gpu_pgxp_enable) const CPUExecutionMode exec_mode = g_settings.cpu_execution_mode;
const bool use_debug_dispatcher = g_state.use_debug_dispatcher;
if (fastjmp_set(&s_jmp_buf) != 0)
{ {
if (g_settings.gpu_pgxp_cpu) // Before we return, set npc to pc so that we can switch from recs to int.
ExecuteImpl<PGXPMode::CPU, true>(); if (exec_mode != CPUExecutionMode::Interpreter && !use_debug_dispatcher)
else g_state.npc = g_state.pc;
ExecuteImpl<PGXPMode::Memory, true>();
return;
} }
else
if (use_debug_dispatcher)
{ {
ExecuteImpl<PGXPMode::Disabled, true>(); ExecuteDebug();
return;
}
switch (exec_mode)
{
case CPUExecutionMode::Recompiler:
case CPUExecutionMode::CachedInterpreter:
CodeCache::Execute();
break;
case CPUExecutionMode::Interpreter:
default:
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU, false>();
else
ExecuteImpl<PGXPMode::Memory, false>();
}
else
{
ExecuteImpl<PGXPMode::Disabled, false>();
}
}
break;
} }
} }
void SingleStep() void SingleStep()
{ {
s_single_step = true; if (fastjmp_set(&s_jmp_buf) == 0)
ExecuteDebug(); ExecuteDebug();
Host::ReportFormattedDebuggerMessage("Stepped to 0x%08X.", g_state.regs.pc); Host::ReportFormattedDebuggerMessage("Stepped to 0x%08X.", g_state.pc);
} }
namespace CodeCache { namespace CodeCache {
@ -2113,8 +2147,8 @@ template<PGXPMode pgxp_mode>
void InterpretCachedBlock(const CodeBlock& block) void InterpretCachedBlock(const CodeBlock& block)
{ {
// set up the state so we've already fetched the instruction // set up the state so we've already fetched the instruction
DebugAssert(g_state.regs.pc == block.GetPC()); DebugAssert(g_state.pc == block.GetPC());
g_state.regs.npc = block.GetPC() + 4; g_state.npc = block.GetPC() + 4;
for (const CodeBlockInstruction& cbi : block.instructions) for (const CodeBlockInstruction& cbi : block.instructions)
{ {
@ -2129,8 +2163,8 @@ void InterpretCachedBlock(const CodeBlock& block)
g_state.exception_raised = false; g_state.exception_raised = false;
// update pc // update pc
g_state.regs.pc = g_state.regs.npc; g_state.pc = g_state.npc;
g_state.regs.npc += 4; g_state.npc += 4;
// execute the instruction we previously fetched // execute the instruction we previously fetched
ExecuteInstruction<pgxp_mode, false>(); ExecuteInstruction<pgxp_mode, false>();
@ -2153,7 +2187,7 @@ template void InterpretCachedBlock<PGXPMode::CPU>(const CodeBlock& block);
template<PGXPMode pgxp_mode> template<PGXPMode pgxp_mode>
void InterpretUncachedBlock() void InterpretUncachedBlock()
{ {
g_state.regs.npc = g_state.regs.pc; g_state.npc = g_state.pc;
if (!FetchInstructionForInterpreterFallback()) if (!FetchInstructionForInterpreterFallback())
return; return;
@ -2166,7 +2200,7 @@ void InterpretUncachedBlock()
// now executing the instruction we previously fetched // now executing the instruction we previously fetched
g_state.current_instruction.bits = g_state.next_instruction.bits; g_state.current_instruction.bits = g_state.next_instruction.bits;
g_state.current_instruction_pc = g_state.regs.pc; g_state.current_instruction_pc = g_state.pc;
g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot; g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot;
g_state.current_instruction_was_branch_taken = g_state.branch_was_taken; g_state.current_instruction_was_branch_taken = g_state.branch_was_taken;
g_state.next_instruction_is_branch_delay_slot = false; g_state.next_instruction_is_branch_delay_slot = false;
@ -2182,7 +2216,7 @@ void InterpretUncachedBlock()
} }
else else
{ {
g_state.regs.pc = g_state.regs.npc; g_state.pc = g_state.npc;
} }
// execute the instruction we previously fetched // execute the instruction we previously fetched

View file

@ -56,7 +56,9 @@ struct State
Registers regs = {}; Registers regs = {};
Cop0Registers cop0_regs = {}; Cop0Registers cop0_regs = {};
Instruction next_instruction = {};
u32 pc; // at execution time: the address of the next instruction to execute (already fetched)
u32 npc; // at execution time: the address of the next instruction to fetch
// address of the instruction currently being executed // address of the instruction currently being executed
Instruction current_instruction = {}; Instruction current_instruction = {};
@ -66,15 +68,14 @@ struct State
bool next_instruction_is_branch_delay_slot = false; bool next_instruction_is_branch_delay_slot = false;
bool branch_was_taken = false; bool branch_was_taken = false;
bool exception_raised = false; bool exception_raised = false;
bool interrupt_delay = false;
bool frame_done = false;
// load delays // load delays
Reg load_delay_reg = Reg::count; Reg load_delay_reg = Reg::count;
u32 load_delay_value = 0;
Reg next_load_delay_reg = Reg::count; Reg next_load_delay_reg = Reg::count;
u32 load_delay_value = 0;
u32 next_load_delay_value = 0; u32 next_load_delay_value = 0;
Instruction next_instruction = {};
CacheControl cache_control{0}; CacheControl cache_control{0};
// GTE registers are stored here so we can access them on ARM with a single instruction // GTE registers are stored here so we can access them on ARM with a single instruction
@ -95,7 +96,6 @@ struct State
}; };
extern State g_state; extern State g_state;
extern bool g_using_interpreter;
void Initialize(); void Initialize();
void Shutdown(); void Shutdown();
@ -106,38 +106,37 @@ void UpdateFastmemBase();
/// Executes interpreter loop. /// Executes interpreter loop.
void Execute(); void Execute();
void ExecuteDebug();
void SingleStep(); void SingleStep();
// Forces an early exit from the CPU dispatcher. // Forces an early exit from the CPU dispatcher.
void ForceDispatcherExit(); void ExitExecution();
ALWAYS_INLINE Registers& GetRegs() ALWAYS_INLINE static Registers& GetRegs()
{ {
return g_state.regs; return g_state.regs;
} }
ALWAYS_INLINE TickCount GetPendingTicks() ALWAYS_INLINE static TickCount GetPendingTicks()
{ {
return g_state.pending_ticks; return g_state.pending_ticks;
} }
ALWAYS_INLINE void ResetPendingTicks() ALWAYS_INLINE static void ResetPendingTicks()
{ {
g_state.gte_completion_tick = g_state.gte_completion_tick =
(g_state.pending_ticks < g_state.gte_completion_tick) ? (g_state.gte_completion_tick - g_state.pending_ticks) : 0; (g_state.pending_ticks < g_state.gte_completion_tick) ? (g_state.gte_completion_tick - g_state.pending_ticks) : 0;
g_state.pending_ticks = 0; g_state.pending_ticks = 0;
} }
ALWAYS_INLINE void AddPendingTicks(TickCount ticks) ALWAYS_INLINE static void AddPendingTicks(TickCount ticks)
{ {
g_state.pending_ticks += ticks; g_state.pending_ticks += ticks;
} }
// state helpers // state helpers
ALWAYS_INLINE bool InUserMode() ALWAYS_INLINE static bool InUserMode()
{ {
return g_state.cop0_regs.sr.KUc; return g_state.cop0_regs.sr.KUc;
} }
ALWAYS_INLINE bool InKernelMode() ALWAYS_INLINE static bool InKernelMode()
{ {
return !g_state.cop0_regs.sr.KUc; return !g_state.cop0_regs.sr.KUc;
} }

View file

@ -12,13 +12,13 @@ void RaiseException(Exception excode);
void RaiseException(u32 CAUSE_bits, u32 EPC); void RaiseException(u32 CAUSE_bits, u32 EPC);
void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits); void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits);
ALWAYS_INLINE bool HasPendingInterrupt() ALWAYS_INLINE static bool HasPendingInterrupt()
{ {
return g_state.cop0_regs.sr.IEc && return g_state.cop0_regs.sr.IEc &&
(((g_state.cop0_regs.cause.bits & g_state.cop0_regs.sr.bits) & (UINT32_C(0xFF) << 8)) != 0); (((g_state.cop0_regs.cause.bits & g_state.cop0_regs.sr.bits) & (UINT32_C(0xFF) << 8)) != 0);
} }
ALWAYS_INLINE void CheckForPendingInterrupt() ALWAYS_INLINE static void CheckForPendingInterrupt()
{ {
if (HasPendingInterrupt()) if (HasPendingInterrupt())
g_state.downcount = 0; g_state.downcount = 0;
@ -28,36 +28,36 @@ void DispatchInterrupt();
void UpdateDebugDispatcherFlag(); void UpdateDebugDispatcherFlag();
// icache stuff // icache stuff
ALWAYS_INLINE bool IsCachedAddress(VirtualMemoryAddress address) ALWAYS_INLINE static bool IsCachedAddress(VirtualMemoryAddress address)
{ {
// KUSEG, KSEG0 // KUSEG, KSEG0
return (address >> 29) <= 4; return (address >> 29) <= 4;
} }
ALWAYS_INLINE u32 GetICacheLine(VirtualMemoryAddress address) ALWAYS_INLINE static u32 GetICacheLine(VirtualMemoryAddress address)
{ {
return ((address >> 4) & 0xFFu); return ((address >> 4) & 0xFFu);
} }
ALWAYS_INLINE u32 GetICacheLineOffset(VirtualMemoryAddress address) ALWAYS_INLINE static u32 GetICacheLineOffset(VirtualMemoryAddress address)
{ {
return (address & (ICACHE_LINE_SIZE - 1)); return (address & (ICACHE_LINE_SIZE - 1));
} }
ALWAYS_INLINE u32 GetICacheTagForAddress(VirtualMemoryAddress address) ALWAYS_INLINE static u32 GetICacheTagForAddress(VirtualMemoryAddress address)
{ {
return (address & ICACHE_TAG_ADDRESS_MASK); return (address & ICACHE_TAG_ADDRESS_MASK);
} }
ALWAYS_INLINE u32 GetICacheFillTagForAddress(VirtualMemoryAddress address) ALWAYS_INLINE static u32 GetICacheFillTagForAddress(VirtualMemoryAddress address)
{ {
static const u32 invalid_bits[4] = {0, 1, 3, 7}; static const u32 invalid_bits[4] = {0, 1, 3, 7};
return GetICacheTagForAddress(address) | invalid_bits[(address >> 2) & 0x03u]; return GetICacheTagForAddress(address) | invalid_bits[(address >> 2) & 0x03u];
} }
ALWAYS_INLINE u32 GetICacheTagMaskForAddress(VirtualMemoryAddress address) ALWAYS_INLINE static u32 GetICacheTagMaskForAddress(VirtualMemoryAddress address)
{ {
static const u32 mask[4] = {ICACHE_TAG_ADDRESS_MASK | 1, ICACHE_TAG_ADDRESS_MASK | 2, ICACHE_TAG_ADDRESS_MASK | 4, static const u32 mask[4] = {ICACHE_TAG_ADDRESS_MASK | 1, ICACHE_TAG_ADDRESS_MASK | 2, ICACHE_TAG_ADDRESS_MASK | 4,
ICACHE_TAG_ADDRESS_MASK | 8}; ICACHE_TAG_ADDRESS_MASK | 8};
return mask[(address >> 2) & 0x03u]; return mask[(address >> 2) & 0x03u];
} }
ALWAYS_INLINE bool CompareICacheTag(VirtualMemoryAddress address) ALWAYS_INLINE static bool CompareICacheTag(VirtualMemoryAddress address)
{ {
const u32 line = GetICacheLine(address); const u32 line = GetICacheLine(address);
return ((g_state.icache_tags[line] & GetICacheTagMaskForAddress(address)) == GetICacheTagForAddress(address)); return ((g_state.icache_tags[line] & GetICacheTagMaskForAddress(address)) == GetICacheTagForAddress(address));
@ -68,7 +68,7 @@ TickCount GetICacheFillTicks(VirtualMemoryAddress address);
u32 FillICache(VirtualMemoryAddress address); u32 FillICache(VirtualMemoryAddress address);
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks); void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks);
ALWAYS_INLINE Segment GetSegmentForAddress(VirtualMemoryAddress address) ALWAYS_INLINE static Segment GetSegmentForAddress(VirtualMemoryAddress address)
{ {
switch ((address >> 29)) switch ((address >> 29))
{ {
@ -91,12 +91,12 @@ ALWAYS_INLINE Segment GetSegmentForAddress(VirtualMemoryAddress address)
} }
} }
ALWAYS_INLINE PhysicalMemoryAddress VirtualAddressToPhysical(VirtualMemoryAddress address) ALWAYS_INLINE static constexpr PhysicalMemoryAddress VirtualAddressToPhysical(VirtualMemoryAddress address)
{ {
return (address & PHYSICAL_MEMORY_ADDRESS_MASK); return (address & PHYSICAL_MEMORY_ADDRESS_MASK);
} }
ALWAYS_INLINE VirtualMemoryAddress PhysicalAddressToVirtual(PhysicalMemoryAddress address, Segment segment) ALWAYS_INLINE static VirtualMemoryAddress PhysicalAddressToVirtual(PhysicalMemoryAddress address, Segment segment)
{ {
static constexpr std::array<VirtualMemoryAddress, 4> bases = {{0x00000000, 0x80000000, 0xA0000000, 0xE0000000}}; static constexpr std::array<VirtualMemoryAddress, 4> bases = {{0x00000000, 0x80000000, 0xA0000000, 0xE0000000}};
return bases[static_cast<u32>(segment)] | address; return bases[static_cast<u32>(segment)] | address;
@ -115,12 +115,12 @@ bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value);
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks); void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks);
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size); void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size);
ALWAYS_INLINE void AddGTETicks(TickCount ticks) ALWAYS_INLINE static void AddGTETicks(TickCount ticks)
{ {
g_state.gte_completion_tick = g_state.pending_ticks + ticks + 1; g_state.gte_completion_tick = g_state.pending_ticks + ticks + 1;
} }
ALWAYS_INLINE void StallUntilGTEComplete() ALWAYS_INLINE static void StallUntilGTEComplete()
{ {
g_state.pending_ticks = g_state.pending_ticks =
(g_state.gte_completion_tick > g_state.pending_ticks) ? g_state.gte_completion_tick : g_state.pending_ticks; (g_state.gte_completion_tick > g_state.pending_ticks) ? g_state.gte_completion_tick : g_state.pending_ticks;

View file

@ -1156,7 +1156,7 @@ Value CodeGenerator::GetCurrentInstructionPC(u32 offset /* = 0 */)
void CodeGenerator::WriteNewPC(const Value& value, bool commit) void CodeGenerator::WriteNewPC(const Value& value, bool commit)
{ {
// TODO: This _could_ be moved into the register cache, but would it gain anything? // TODO: This _could_ be moved into the register cache, but would it gain anything?
EmitStoreGuestRegister(Reg::pc, value); EmitStoreCPUStructField(offsetof(CPU::State, pc), value);
if (commit) if (commit)
{ {
m_pc_valid = value.IsConstant(); m_pc_valid = value.IsConstant();
@ -1450,7 +1450,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi)
result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_8); result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_8);
ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lb)); ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lb));
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(cbi.instruction.bits), result, address); EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(cbi.instruction.bits), address, result);
if (address_spec) if (address_spec)
{ {
@ -1468,7 +1468,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi)
ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lh)); ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lh));
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LHx, Value::FromConstantU32(cbi.instruction.bits), result, address); EmitFunctionCall(nullptr, PGXP::CPU_LHx, Value::FromConstantU32(cbi.instruction.bits), address, result);
if (address_spec) if (address_spec)
{ {
@ -1483,7 +1483,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi)
{ {
result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_32); result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_32);
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), result, address); EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), address, result);
if (address_spec) if (address_spec)
value_spec = SpeculativeReadMemory(*address_spec); value_spec = SpeculativeReadMemory(*address_spec);
@ -1522,10 +1522,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi)
case InstructionOp::sb: case InstructionOp::sb:
{ {
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
{ EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(cbi.instruction.bits), address, value);
EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(cbi.instruction.bits),
value.ViewAsSize(RegSize_8), address);
}
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_8, value); EmitStoreGuestMemory(cbi, address, address_spec, RegSize_8, value);
@ -1553,10 +1550,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi)
case InstructionOp::sh: case InstructionOp::sh:
{ {
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
{ EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(cbi.instruction.bits), address, value);
EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(cbi.instruction.bits),
value.ViewAsSize(RegSize_16), address);
}
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_16, value); EmitStoreGuestMemory(cbi, address, address_spec, RegSize_16, value);
@ -1584,7 +1578,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi)
case InstructionOp::sw: case InstructionOp::sw:
{ {
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), value, address); EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), address, value);
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_32, value); EmitStoreGuestMemory(cbi, address, address_spec, RegSize_32, value);
@ -1688,7 +1682,7 @@ bool CodeGenerator::Compile_LoadLeftRight(const CodeBlockInstruction& cbi)
shift.ReleaseAndClear(); shift.ReleaseAndClear();
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), mem, address); EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), address, mem);
m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.i.rt, std::move(mem)); m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.i.rt, std::move(mem));
@ -1751,7 +1745,7 @@ bool CodeGenerator::Compile_StoreLeftRight(const CodeBlockInstruction& cbi)
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_32, mem); EmitStoreGuestMemory(cbi, address, address_spec, RegSize_32, mem);
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), mem, address); EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), address, mem);
InstructionEpilogue(cbi); InstructionEpilogue(cbi);
return true; return true;
@ -2950,7 +2944,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
DoGTERegisterWrite(reg, value); DoGTERegisterWrite(reg, value);
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(cbi.instruction.bits), value, address); EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(cbi.instruction.bits), address, value);
} }
else else
{ {
@ -2958,7 +2952,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
EmitStoreGuestMemory(cbi, address, spec_address, RegSize_32, value); EmitStoreGuestMemory(cbi, address, spec_address, RegSize_32, value);
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(cbi.instruction.bits), value, address); EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(cbi.instruction.bits), address, value);
SpeculativeValue spec_base = SpeculativeReadReg(cbi.instruction.i.rs); SpeculativeValue spec_base = SpeculativeReadReg(cbi.instruction.i.rs);
if (spec_base) if (spec_base)
@ -2988,11 +2982,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
// PGXP done first here before ownership is transferred. // PGXP done first here before ownership is transferred.
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
{ EmitFunctionCall(nullptr, PGXP::CPU_MFC2, Value::FromConstantU32(cbi.instruction.bits), value);
EmitFunctionCall(
nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? PGXP::CPU_CFC2 : PGXP::CPU_MFC2,
Value::FromConstantU32(cbi.instruction.bits), value, value);
}
m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.r.rt, std::move(value)); m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.r.rt, std::move(value));
SpeculativeWriteReg(cbi.instruction.r.rt, std::nullopt); SpeculativeWriteReg(cbi.instruction.r.rt, std::nullopt);
@ -3014,11 +3004,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
DoGTERegisterWrite(reg, value); DoGTERegisterWrite(reg, value);
if (g_settings.gpu_pgxp_enable) if (g_settings.gpu_pgxp_enable)
{ EmitFunctionCall(nullptr, PGXP::CPU_MTC2, Value::FromConstantU32(cbi.instruction.bits), value);
EmitFunctionCall(
nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? PGXP::CPU_CTC2 : PGXP::CPU_MTC2,
Value::FromConstantU32(cbi.instruction.bits), value, value);
}
InstructionEpilogue(cbi); InstructionEpilogue(cbi);
return true; return true;

View file

@ -32,9 +32,6 @@ constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224
constexpr u32 FUNCTION_STACK_SIZE = constexpr u32 FUNCTION_STACK_SIZE =
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE; FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
// PC we return to after the end of the block
static void* s_dispatcher_return_address;
static s32 GetPCDisplacement(const void* current, const void* target) static s32 GetPCDisplacement(const void* current, const void* target)
{ {
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4)); Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
@ -201,10 +198,7 @@ void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_ret
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE); m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
if (emit_return) if (emit_return)
{
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->bx(a32::lr); m_emit->bx(a32::lr);
}
} }
void CodeGenerator::EmitExceptionExit() void CodeGenerator::EmitExceptionExit()
@ -219,7 +213,6 @@ void CodeGenerator::EmitExceptionExit()
m_register_cache.PopCalleeSavedRegisters(false); m_register_cache.PopCalleeSavedRegisters(false);
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE); m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->bx(a32::lr); m_emit->bx(a32::lr);
} }
@ -2072,64 +2065,16 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
EmitLoadGlobalAddress(RCPUPTR, &g_state); EmitLoadGlobalAddress(RCPUPTR, &g_state);
a32::Label frame_done_loop; a32::Label event_test;
a32::Label exit_dispatcher; m_emit->b(&event_test);
m_emit->Bind(&frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->ldrb(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, frame_done)));
m_emit->tst(a32::r0, 1);
m_emit->b(a32::ne, &exit_dispatcher);
// r0 <- sr
a32::Label no_interrupt;
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, cop0_regs.sr.bits)));
// if Iec == 0 then goto no_interrupt
m_emit->tst(a32::r0, 1);
m_emit->b(a32::eq, &no_interrupt);
// r1 <- cause
// r0 (sr) & cause
m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, cop0_regs.cause.bits)));
m_emit->and_(a32::r0, a32::r0, a32::r1);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->tst(a32::r0, 0xFF00);
m_emit->b(a32::eq, &no_interrupt);
// we have an interrupt
EmitCall(reinterpret_cast<const void*>(&DispatchInterrupt));
// no interrupt or we just serviced it
m_emit->Bind(&no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// r0 <- head event->downcount
// downcount <- r0
EmitLoadGlobalAddress(0, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a32::r0, a32::MemOperand(a32::r0));
m_emit->ldr(a32::r0, a32::MemOperand(a32::r0, offsetof(TimingEvent, m_downcount)));
m_emit->str(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount)));
// main dispatch loop // main dispatch loop
a32::Label main_loop; a32::Label main_loop;
m_emit->Bind(&main_loop); m_emit->Bind(&main_loop);
s_dispatcher_return_address = GetCurrentCodePointer();
// r0 <- pending_ticks
// r1 <- downcount
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pending_ticks)));
m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
a32::Label downcount_hit;
m_emit->cmp(a32::r0, a32::r1);
m_emit->b(a32::ge, &downcount_hit);
// time to lookup the block // time to lookup the block
// r0 <- pc // r0 <- pc
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, regs.pc))); m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pc)));
// r1 <- s_fast_map[pc >> 16] // r1 <- s_fast_map[pc >> 16]
EmitLoadGlobalAddress(2, CodeCache::GetFastMapPointer()); EmitLoadGlobalAddress(2, CodeCache::GetFastMapPointer());
@ -2140,21 +2085,20 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
m_emit->ldr(a32::r0, a32::MemOperand(a32::r1, a32::r0)); m_emit->ldr(a32::r0, a32::MemOperand(a32::r1, a32::r0));
m_emit->blx(a32::r0); m_emit->blx(a32::r0);
// end while // r0 <- pending_ticks
m_emit->Bind(&downcount_hit); // r1 <- downcount
// check events then for frame done
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pending_ticks))); m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pending_ticks)));
EmitLoadGlobalAddress(1, TimingEvents::GetHeadEventPtr()); m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount)));
m_emit->ldr(a32::r1, a32::MemOperand(a32::r1));
m_emit->ldr(a32::r1, a32::MemOperand(a32::r1, offsetof(TimingEvent, m_downcount))); // while downcount < pending_ticks
a32::Label downcount_hit;
m_emit->cmp(a32::r0, a32::r1); m_emit->cmp(a32::r0, a32::r1);
m_emit->b(a32::lt, &frame_done_loop); m_emit->b(a32::lt, &main_loop);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->b(&frame_done_loop);
// all done // end while
m_emit->Bind(&exit_dispatcher); m_emit->Bind(&event_test);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->b(&main_loop);
RestoreStackAfterCall(stack_adjust); RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true); m_register_cache.PopCalleeSavedRegisters(true);

View file

@ -30,9 +30,6 @@ constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224
constexpr u64 FUNCTION_STACK_SIZE = constexpr u64 FUNCTION_STACK_SIZE =
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE; FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
// PC we return to after the end of the block
static void* s_dispatcher_return_address;
static s64 GetPCDisplacement(const void* current, const void* target) static s64 GetPCDisplacement(const void* current, const void* target)
{ {
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4)); Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
@ -256,7 +253,6 @@ void CodeGenerator::EmitExceptionExit()
m_register_cache.PopCalleeSavedRegisters(false); m_register_cache.PopCalleeSavedRegisters(false);
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE); m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->Ret(); m_emit->Ret();
} }
@ -2278,62 +2274,16 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
EmitLoadGlobalAddress(RCPUPTR, &g_state); EmitLoadGlobalAddress(RCPUPTR, &g_state);
a64::Label frame_done_loop; a64::Label event_test;
a64::Label exit_dispatcher; m_emit->b(&event_test);
m_emit->Bind(&frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->ldrb(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, frame_done)));
m_emit->tbnz(a64::w8, 0, &exit_dispatcher);
// x8 <- sr
a64::Label no_interrupt;
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.sr.bits)));
// if Iec == 0 then goto no_interrupt
m_emit->tbz(a64::w8, 0, &no_interrupt);
// x9 <- cause
// x8 (sr) & cause
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.cause.bits)));
m_emit->and_(a64::w8, a64::w8, a64::w9);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->tst(a64::w8, 0xFF00);
m_emit->b(&no_interrupt, a64::eq);
// we have an interrupt
EmitCall(reinterpret_cast<const void*>(&DispatchInterrupt));
// no interrupt or we just serviced it
m_emit->Bind(&no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// x8 <- head event->downcount
// downcount <- x8
EmitLoadGlobalAddress(8, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a64::x8, a64::MemOperand(a64::x8));
m_emit->ldr(a64::w8, a64::MemOperand(a64::x8, offsetof(TimingEvent, m_downcount)));
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// main dispatch loop // main dispatch loop
a64::Label main_loop; a64::Label main_loop;
m_emit->Bind(&main_loop); m_emit->Bind(&main_loop);
s_dispatcher_return_address = GetCurrentCodePointer();
// w8 <- pending_ticks
// w9 <- downcount
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
a64::Label downcount_hit;
m_emit->cmp(a64::w8, a64::w9);
m_emit->b(&downcount_hit, a64::ge);
// time to lookup the block // time to lookup the block
// w8 <- pc // w8 <- pc
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, regs.pc))); m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pc)));
// x9 <- s_fast_map[pc >> 16] // x9 <- s_fast_map[pc >> 16]
EmitLoadGlobalAddress(10, CodeCache::GetFastMapPointer()); EmitLoadGlobalAddress(10, CodeCache::GetFastMapPointer());
@ -2345,21 +2295,20 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3)); m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3));
m_emit->blr(a64::x8); m_emit->blr(a64::x8);
// end while // w8 <- pending_ticks
m_emit->Bind(&downcount_hit); // w9 <- downcount
// check events then for frame done
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks))); m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
EmitLoadGlobalAddress(9, TimingEvents::GetHeadEventPtr()); m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
m_emit->ldr(a64::x9, a64::MemOperand(a64::x9));
m_emit->ldr(a64::w9, a64::MemOperand(a64::x9, offsetof(TimingEvent, m_downcount))); // while downcount < pending_ticks
m_emit->cmp(a64::w8, a64::w9); m_emit->cmp(a64::w8, a64::w9);
m_emit->b(&frame_done_loop, a64::lt); m_emit->b(&main_loop, a64::lt);
m_emit->Bind(&event_test);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents)); EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->b(&frame_done_loop); m_emit->b(&main_loop);
// all done // all done
m_emit->Bind(&exit_dispatcher);
RestoreStackAfterCall(stack_adjust); RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true); m_register_cache.PopCalleeSavedRegisters(true);
m_emit->add(a64::sp, a64::sp, FUNCTION_STACK_SIZE); m_emit->add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);

View file

@ -3024,59 +3024,17 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state); EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state);
Xbyak::Label frame_done_loop; Xbyak::Label event_test;
Xbyak::Label exit_dispatcher; m_emit->jmp(event_test);
m_emit->L(frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->test(m_emit->byte[m_emit->rbp + offsetof(State, frame_done)], 1);
m_emit->jnz(exit_dispatcher, Xbyak::CodeGenerator::T_NEAR);
// eax <- sr
Xbyak::Label no_interrupt;
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.sr.bits)]);
// if Iec == 0 then goto no_interrupt
m_emit->test(m_emit->eax, 1);
m_emit->jz(no_interrupt);
// sr & cause
m_emit->and_(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.cause.bits)]);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->test(m_emit->eax, 0xFF00);
m_emit->jz(no_interrupt);
// we have an interrupt
EmitCall(reinterpret_cast<const void*>(&DispatchInterrupt));
// no interrupt or we just serviced it
m_emit->L(no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// eax <- head event->downcount
// downcount <- eax
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, downcount)], m_emit->eax);
// main dispatch loop // main dispatch loop
Xbyak::Label main_loop; Xbyak::Label main_loop;
m_emit->align(16); m_emit->align(16);
m_emit->L(main_loop); m_emit->L(main_loop);
// eax <- pending_ticks
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
// while eax < downcount
Xbyak::Label downcount_hit;
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]);
m_emit->jge(downcount_hit);
// time to lookup the block // time to lookup the block
// eax <- pc // eax <- pc
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, regs.pc)]); m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pc)]);
// rcx <- s_fast_map[pc >> 16] // rcx <- s_fast_map[pc >> 16]
EmitLoadGlobalAddress(Xbyak::Operand::RBX, CodeCache::GetFastMapPointer()); EmitLoadGlobalAddress(Xbyak::Operand::RBX, CodeCache::GetFastMapPointer());
@ -3087,22 +3045,19 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
// call(rcx[pc * 2]) (fast_map[pc >> 2]) // call(rcx[pc * 2]) (fast_map[pc >> 2])
m_emit->call(m_emit->qword[m_emit->rcx + m_emit->rax * 2]); m_emit->call(m_emit->qword[m_emit->rcx + m_emit->rax * 2]);
m_emit->jmp(main_loop); // eax <- pending_ticks
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
// end while // while eax < downcount
m_emit->L(downcount_hit); Xbyak::Label downcount_hit;
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]);
m_emit->jl(main_loop);
// check events then for frame done m_emit->L(event_test);
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
m_emit->jg(frame_done_loop);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents)); EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->jmp(frame_done_loop); m_emit->jmp(main_loop);
// all done // all done
m_emit->L(exit_dispatcher);
RestoreStackAfterCall(stack_adjust); RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true); m_register_cache.PopCalleeSavedRegisters(true);
m_emit->ret(); m_emit->ret();

View file

@ -130,6 +130,13 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
// Alignment of code stoarge. // Alignment of code stoarge.
constexpr u32 CODE_STORAGE_ALIGNMENT = 4096; constexpr u32 CODE_STORAGE_ALIGNMENT = 4096;
#elif defined(CPU_RISCV64)
using HostReg = unsigned;
// Alignment of code stoarge.
constexpr u32 CODE_STORAGE_ALIGNMENT = 4096;
#else #else
using HostReg = int; using HostReg = int;

View file

@ -60,12 +60,8 @@ enum class Reg : u8
sp, sp,
fp, fp,
ra, ra,
// not accessible to instructions
hi, hi,
lo, lo,
pc,
npc,
count count
}; };
@ -213,6 +209,7 @@ union Instruction
} }
ALWAYS_INLINE Cop0Instruction Cop0Op() const { return static_cast<Cop0Instruction>(bits & UINT32_C(0x3F)); } ALWAYS_INLINE Cop0Instruction Cop0Op() const { return static_cast<Cop0Instruction>(bits & UINT32_C(0x3F)); }
ALWAYS_INLINE u32 Cop2Index() const { return ((bits >> 11) & 0x1F) | ((bits >> 17) & 0x20); }
} cop; } cop;
bool IsCop2Instruction() const bool IsCop2Instruction() const
@ -240,7 +237,7 @@ struct Registers
{ {
union union
{ {
u32 r[static_cast<u8>(Reg::count)]; u32 r[static_cast<u8>(Reg::count) + 1]; // +1 for the dummy load delay write slot
struct struct
{ {
@ -276,12 +273,8 @@ struct Registers
u32 sp; // r29 u32 sp; // r29
u32 fp; // r30 u32 fp; // r30
u32 ra; // r31 u32 ra; // r31
// not accessible to instructions
u32 hi; u32 hi;
u32 lo; u32 lo;
u32 pc; // at execution time: the address of the next instruction to execute (already fetched)
u32 npc; // at execution time: the address of the next instruction to fetch
}; };
}; };
}; };

View file

@ -107,7 +107,7 @@ static const std::array<u32*, 38> REGISTERS {
&CPU::g_state.regs.hi, &CPU::g_state.regs.hi,
&CPU::g_state.cop0_regs.BadVaddr, &CPU::g_state.cop0_regs.BadVaddr,
&CPU::g_state.cop0_regs.cause.bits, &CPU::g_state.cop0_regs.cause.bits,
&CPU::g_state.regs.pc, &CPU::g_state.pc,
}; };
/// Number of registers in GDB remote protocol for MIPS III. /// Number of registers in GDB remote protocol for MIPS III.

View file

@ -901,9 +901,10 @@ void GPU::CRTCTickEvent(TickCount ticks)
InterruptController::InterruptRequest(InterruptController::IRQ::VBLANK); InterruptController::InterruptRequest(InterruptController::IRQ::VBLANK);
// flush any pending draws and "scan out" the image // flush any pending draws and "scan out" the image
// TODO: move present in here I guess
FlushRender(); FlushRender();
UpdateDisplay(); UpdateDisplay();
System::FrameDone(); TimingEvents::SetFrameDone();
// switch fields early. this is needed so we draw to the correct one. // switch fields early. this is needed so we draw to the correct one.
if (m_GPUSTAT.InInterleaved480iMode()) if (m_GPUSTAT.InInterleaved480iMode())

View file

@ -4,13 +4,13 @@
#include "gte.h" #include "gte.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/bitutils.h" #include "common/bitutils.h"
#include "util/state_wrapper.h"
#include "cpu_core.h" #include "cpu_core.h"
#include "cpu_core_private.h" #include "cpu_core_private.h"
#include "host_display.h" #include "host_display.h"
#include "pgxp.h" #include "pgxp.h"
#include "settings.h" #include "settings.h"
#include "timing_event.h" #include "timing_event.h"
#include "util/state_wrapper.h"
#include <algorithm> #include <algorithm>
#include <array> #include <array>
#include <numeric> #include <numeric>
@ -471,11 +471,12 @@ ALWAYS_INLINE static u32 UNRDivide(u32 lhs, u32 rhs)
return std::min<u32>(0x1FFFF, result); return std::min<u32>(0x1FFFF, result);
} }
static void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm) static void MulMatVec(const s16* M_, const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
{ {
#define M(i, j) M_[((i)*3) + (j)]
#define dot3(i) \ #define dot3(i) \
TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M[i][0]) * s64(Vx)) + (s64(M[i][1]) * s64(Vy))) + \ TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M(i, 0)) * s64(Vx)) + (s64(M(i, 1)) * s64(Vy))) + \
(s64(M[i][2]) * s64(Vz)), \ (s64(M(i, 2)) * s64(Vz)), \
shift, lm) shift, lm)
dot3(0); dot3(0);
@ -483,15 +484,17 @@ static void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 V
dot3(2); dot3(2);
#undef dot3 #undef dot3
#undef M
} }
static void MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm) static void MulMatVec(const s16* M_, const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
{ {
#define M(i, j) M_[((i)*3) + (j)]
#define dot3(i) \ #define dot3(i) \
TruncateAndSetMACAndIR<i + 1>( \ TruncateAndSetMACAndIR<i + 1>( \
SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>((s64(T[i]) << 12) + (s64(M[i][0]) * s64(Vx))) + \ SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>((s64(T[i]) << 12) + (s64(M(i, 0)) * s64(Vx))) + \
(s64(M[i][1]) * s64(Vy))) + \ (s64(M(i, 1)) * s64(Vy))) + \
(s64(M[i][2]) * s64(Vz)), \ (s64(M(i, 2)) * s64(Vz)), \
shift, lm) shift, lm)
dot3(0); dot3(0);
@ -499,19 +502,20 @@ static void MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16
dot3(2); dot3(2);
#undef dot3 #undef dot3
#undef M
} }
static void MulMatVecBuggy(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, static void MulMatVecBuggy(const s16* M_, const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
bool lm)
{ {
#define M(i, j) M_[((i)*3) + (j)]
#define dot3(i) \ #define dot3(i) \
do \ do \
{ \ { \
TruncateAndSetIR<i + 1>(static_cast<s32>(SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>( \ TruncateAndSetIR<i + 1>(static_cast<s32>(SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>( \
(s64(T[i]) << 12) + (s64(M[i][0]) * s64(Vx)))) >> \ (s64(T[i]) << 12) + (s64(M(i, 0)) * s64(Vx)))) >> \
shift), \ shift), \
false); \ false); \
TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M[i][1]) * s64(Vy))) + (s64(M[i][2]) * s64(Vz)), \ TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M(i, 1)) * s64(Vy))) + (s64(M(i, 2)) * s64(Vz)), \
shift, lm); \ shift, lm); \
} while (0) } while (0)
@ -520,82 +524,50 @@ static void MulMatVecBuggy(const s16 M[3][3], const s32 T[3], const s16 Vx, cons
dot3(2); dot3(2);
#undef dot3 #undef dot3
#undef M
} }
static void Execute_MVMVA(Instruction inst) static void Execute_MVMVA(Instruction inst)
{ {
REGS.FLAG.Clear(); REGS.FLAG.Clear();
// TODO: Remove memcpy.. static constexpr const s16* M_lookup[4] = {&REGS.RT[0][0], &REGS.LLM[0][0], &REGS.LCM[0][0], nullptr};
s16 M[3][3]; static constexpr const s16* V_lookup[4][3] = {
switch (inst.mvmva_multiply_matrix) {&REGS.V0[0], &REGS.V0[1], &REGS.V0[2]},
{&REGS.V1[0], &REGS.V1[1], &REGS.V1[2]},
{&REGS.V2[0], &REGS.V2[1], &REGS.V2[2]},
{&REGS.IR1, &REGS.IR2, &REGS.IR3},
};
static constexpr const s32 zero_T[3] = {};
static constexpr const s32* T_lookup[4] = {REGS.TR, REGS.BK, REGS.FC, zero_T};
const s16* M = M_lookup[inst.mvmva_multiply_matrix];
const s16* const* const V = V_lookup[inst.mvmva_multiply_vector];
const s32* const T = T_lookup[inst.mvmva_translation_vector];
s16 buggy_M[3][3];
if (!M)
{ {
case 0: // buggy
std::memcpy(M, REGS.RT, sizeof(s16) * 3 * 3); buggy_M[0][0] = -static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
break; buggy_M[0][1] = static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
case 1: buggy_M[0][2] = REGS.IR0;
std::memcpy(M, REGS.LLM, sizeof(s16) * 3 * 3); buggy_M[1][0] = REGS.RT[0][2];
break; buggy_M[1][1] = REGS.RT[0][2];
case 2: buggy_M[1][2] = REGS.RT[0][2];
std::memcpy(M, REGS.LCM, sizeof(s16) * 3 * 3); buggy_M[2][0] = REGS.RT[1][1];
break; buggy_M[2][1] = REGS.RT[1][1];
default: buggy_M[2][2] = REGS.RT[1][1];
{ M = &buggy_M[0][0];
// buggy
M[0][0] = -static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
M[0][1] = static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
M[0][2] = REGS.IR0;
M[1][0] = REGS.RT[0][2];
M[1][1] = REGS.RT[0][2];
M[1][2] = REGS.RT[0][2];
M[2][0] = REGS.RT[1][1];
M[2][1] = REGS.RT[1][1];
M[2][2] = REGS.RT[1][1];
}
break;
} }
s16 Vx, Vy, Vz; const s16 Vx = *V[0];
switch (inst.mvmva_multiply_vector) const s16 Vy = *V[1];
{ const s16 Vz = *V[2];
case 0: if (inst.mvmva_translation_vector != 2)
Vx = REGS.V0[0]; MulMatVec(M, T, Vx, Vy, Vz, inst.GetShift(), inst.lm);
Vy = REGS.V0[1]; else
Vz = REGS.V0[2]; MulMatVecBuggy(M, T, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
case 1:
Vx = REGS.V1[0];
Vy = REGS.V1[1];
Vz = REGS.V1[2];
break;
case 2:
Vx = REGS.V2[0];
Vy = REGS.V2[1];
Vz = REGS.V2[2];
break;
default:
Vx = REGS.IR1;
Vy = REGS.IR2;
Vz = REGS.IR3;
break;
}
static const s32 zero_T[3] = {};
switch (inst.mvmva_translation_vector)
{
case 0:
MulMatVec(M, REGS.TR, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
case 1:
MulMatVec(M, REGS.BK, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
case 2:
MulMatVecBuggy(M, REGS.FC, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
default:
MulMatVec(M, zero_T, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
}
REGS.FLAG.UpdateError(); REGS.FLAG.UpdateError();
} }
@ -874,10 +846,10 @@ static ALWAYS_INLINE void InterpolateColor(s64 in_MAC1, s64 in_MAC2, s64 in_MAC3
static void NCS(const s16 V[3], u8 shift, bool lm) static void NCS(const s16 V[3], u8 shift, bool lm)
{ {
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12) // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm); MulMatVec(&REGS.LLM[0][0], V[0], V[1], V[2], shift, lm);
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12) // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// Color FIFO = [MAC1/16,MAC2/16,MAC3/16,CODE], [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] // Color FIFO = [MAC1/16,MAC2/16,MAC3/16,CODE], [IR1,IR2,IR3] = [MAC1,MAC2,MAC3]
PushRGBFromMAC(); PushRGBFromMAC();
@ -909,10 +881,10 @@ static void Execute_NCT(Instruction inst)
static void NCCS(const s16 V[3], u8 shift, bool lm) static void NCCS(const s16 V[3], u8 shift, bool lm)
{ {
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12) // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm); MulMatVec(&REGS.LLM[0][0], V[0], V[1], V[2], shift, lm);
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12) // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx // [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx // [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx
@ -950,10 +922,10 @@ static void Execute_NCCT(Instruction inst)
static void NCDS(const s16 V[3], u8 shift, bool lm) static void NCDS(const s16 V[3], u8 shift, bool lm)
{ {
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12) // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm); MulMatVec(&REGS.LLM[0][0], V[0], V[1], V[2], shift, lm);
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12) // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// No need to assign these to MAC[1-3], as it'll never overflow. // No need to assign these to MAC[1-3], as it'll never overflow.
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx // [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx
@ -999,7 +971,7 @@ static void Execute_CC(Instruction inst)
const bool lm = inst.lm; const bool lm = inst.lm;
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12) // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 // [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) // [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12)
@ -1021,7 +993,7 @@ static void Execute_CDP(Instruction inst)
const bool lm = inst.lm; const bool lm = inst.lm;
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12) // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// No need to assign these to MAC[1-3], as it'll never overflow. // No need to assign these to MAC[1-3], as it'll never overflow.
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 // [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4

View file

@ -108,8 +108,7 @@ static PGXP_value CP0_reg[32];
#define CPU_Lo CPU_reg[33] #define CPU_Lo CPU_reg[33]
// GTE registers // GTE registers
static PGXP_value GTE_data_reg[32]; static PGXP_value GTE_regs[64];
static PGXP_value GTE_ctrl_reg[32];
static PGXP_value* Mem = nullptr; static PGXP_value* Mem = nullptr;
static PGXP_value* vertexCache = nullptr; static PGXP_value* vertexCache = nullptr;
@ -274,8 +273,7 @@ void Initialize()
std::memset(CPU_reg, 0, sizeof(CPU_reg)); std::memset(CPU_reg, 0, sizeof(CPU_reg));
std::memset(CP0_reg, 0, sizeof(CP0_reg)); std::memset(CP0_reg, 0, sizeof(CP0_reg));
std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg)); std::memset(GTE_regs, 0, sizeof(GTE_regs));
std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg));
if (!Mem) if (!Mem)
{ {
@ -306,8 +304,7 @@ void Reset()
std::memset(CPU_reg, 0, sizeof(CPU_reg)); std::memset(CPU_reg, 0, sizeof(CPU_reg));
std::memset(CP0_reg, 0, sizeof(CP0_reg)); std::memset(CP0_reg, 0, sizeof(CP0_reg));
std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg)); std::memset(GTE_regs, 0, sizeof(GTE_regs));
std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg));
if (Mem) if (Mem)
std::memset(Mem, 0, sizeof(PGXP_value) * PGXP_MEM_SIZE); std::memset(Mem, 0, sizeof(PGXP_value) * PGXP_MEM_SIZE);
@ -329,8 +326,7 @@ void Shutdown()
Mem = nullptr; Mem = nullptr;
} }
std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg)); std::memset(GTE_regs, 0, sizeof(GTE_regs));
std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg));
std::memset(CPU_reg, 0, sizeof(CPU_reg)); std::memset(CPU_reg, 0, sizeof(CPU_reg));
std::memset(CP0_reg, 0, sizeof(CP0_reg)); std::memset(CP0_reg, 0, sizeof(CP0_reg));
@ -344,18 +340,19 @@ void Shutdown()
#define rt(_instr) ((_instr >> 16) & 0x1F) // The rt part of the instruction register #define rt(_instr) ((_instr >> 16) & 0x1F) // The rt part of the instruction register
#define rs(_instr) ((_instr >> 21) & 0x1F) // The rs part of the instruction register #define rs(_instr) ((_instr >> 21) & 0x1F) // The rs part of the instruction register
#define imm(_instr) (_instr & 0xFFFF) // The immediate part of the instruction register #define imm(_instr) (_instr & 0xFFFF) // The immediate part of the instruction register
#define cop2idx(_instr) (((_instr >> 11) & 0x1F) | ((_instr >> 17) & 0x20))
#define SX0 (GTE_data_reg[12].x) #define SX0 (GTE_regs[12].x)
#define SY0 (GTE_data_reg[12].y) #define SY0 (GTE_regs[12].y)
#define SX1 (GTE_data_reg[13].x) #define SX1 (GTE_regs[13].x)
#define SY1 (GTE_data_reg[13].y) #define SY1 (GTE_regs[13].y)
#define SX2 (GTE_data_reg[14].x) #define SX2 (GTE_regs[14].x)
#define SY2 (GTE_data_reg[14].y) #define SY2 (GTE_regs[14].y)
#define SXY0 (GTE_data_reg[12]) #define SXY0 (GTE_regs[12])
#define SXY1 (GTE_data_reg[13]) #define SXY1 (GTE_regs[13])
#define SXY2 (GTE_data_reg[14]) #define SXY2 (GTE_regs[14])
#define SXYP (GTE_data_reg[15]) #define SXYP (GTE_regs[15])
void GTE_PushSXYZ2f(float x, float y, float z, u32 v) void GTE_PushSXYZ2f(float x, float y, float z, u32 v)
{ {
@ -428,49 +425,35 @@ static void PGXP_MTC2_int(PGXP_value value, u32 reg)
return; return;
} }
GTE_data_reg[reg] = value; GTE_regs[reg] = value;
} }
//////////////////////////////////// ////////////////////////////////////
// Data transfer tracking // Data transfer tracking
//////////////////////////////////// ////////////////////////////////////
void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal) void CPU_MFC2(u32 instr, u32 rdVal)
{ {
// CPU[Rt] = GTE_D[Rd] // CPU[Rt] = GTE_D[Rd]
Validate(&GTE_data_reg[rd(instr)], rdVal); const u32 idx = cop2idx(instr);
CPU_reg[rt(instr)] = GTE_data_reg[rd(instr)]; Validate(&GTE_regs[idx], rdVal);
CPU_reg[rt(instr)].value = rtVal; CPU_reg[rt(instr)] = GTE_regs[idx];
CPU_reg[rt(instr)].value = rdVal;
} }
void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal) void CPU_MTC2(u32 instr, u32 rtVal)
{ {
// GTE_D[Rd] = CPU[Rt] // GTE_D[Rd] = CPU[Rt]
const u32 idx = cop2idx(instr);
Validate(&CPU_reg[rt(instr)], rtVal); Validate(&CPU_reg[rt(instr)], rtVal);
PGXP_MTC2_int(CPU_reg[rt(instr)], rd(instr)); PGXP_MTC2_int(CPU_reg[rt(instr)], idx);
GTE_data_reg[rd(instr)].value = rdVal; GTE_regs[idx].value = rtVal;
}
void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal)
{
// CPU[Rt] = GTE_C[Rd]
Validate(&GTE_ctrl_reg[rd(instr)], rdVal);
CPU_reg[rt(instr)] = GTE_ctrl_reg[rd(instr)];
CPU_reg[rt(instr)].value = rtVal;
}
void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal)
{
// GTE_C[Rd] = CPU[Rt]
Validate(&CPU_reg[rt(instr)], rtVal);
GTE_ctrl_reg[rd(instr)] = CPU_reg[rt(instr)];
GTE_ctrl_reg[rd(instr)].value = rdVal;
} }
//////////////////////////////////// ////////////////////////////////////
// Memory Access // Memory Access
//////////////////////////////////// ////////////////////////////////////
void CPU_LWC2(u32 instr, u32 rtVal, u32 addr) void CPU_LWC2(u32 instr, u32 addr, u32 rtVal)
{ {
// GTE_D[Rt] = Mem[addr] // GTE_D[Rt] = Mem[addr]
PGXP_value val; PGXP_value val;
@ -478,11 +461,11 @@ void CPU_LWC2(u32 instr, u32 rtVal, u32 addr)
PGXP_MTC2_int(val, rt(instr)); PGXP_MTC2_int(val, rt(instr));
} }
void CPU_SWC2(u32 instr, u32 rtVal, u32 addr) void CPU_SWC2(u32 instr, u32 addr, u32 rtVal)
{ {
// Mem[addr] = GTE_D[Rt] // Mem[addr] = GTE_D[Rt]
Validate(&GTE_data_reg[rt(instr)], rtVal); Validate(&GTE_regs[rt(instr)], rtVal);
WriteMem(&GTE_data_reg[rt(instr)], addr); WriteMem(&GTE_regs[rt(instr)], addr);
} }
ALWAYS_INLINE_RELEASE void PGXP_CacheVertex(s16 sx, s16 sy, const PGXP_value& vertex) ALWAYS_INLINE_RELEASE void PGXP_CacheVertex(s16 sx, s16 sy, const PGXP_value& vertex)
@ -575,29 +558,29 @@ bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, f
#define imm_sext(_instr) \ #define imm_sext(_instr) \
static_cast<s32>(static_cast<s16>(_instr & 0xFFFF)) // The immediate part of the instruction register static_cast<s32>(static_cast<s16>(_instr & 0xFFFF)) // The immediate part of the instruction register
void CPU_LW(u32 instr, u32 rtVal, u32 addr) void CPU_LW(u32 instr, u32 addr, u32 rtVal)
{ {
// Rt = Mem[Rs + Im] // Rt = Mem[Rs + Im]
ValidateAndCopyMem(&CPU_reg[rt(instr)], addr, rtVal); ValidateAndCopyMem(&CPU_reg[rt(instr)], addr, rtVal);
} }
void CPU_LBx(u32 instr, u32 rtVal, u32 addr) void CPU_LBx(u32 instr, u32 addr, u32 rtVal)
{ {
CPU_reg[rt(instr)] = PGXP_value_invalid; CPU_reg[rt(instr)] = PGXP_value_invalid;
} }
void CPU_LHx(u32 instr, u32 rtVal, u32 addr) void CPU_LHx(u32 instr, u32 addr, u32 rtVal)
{ {
// Rt = Mem[Rs + Im] (sign/zero extended) // Rt = Mem[Rs + Im] (sign/zero extended)
ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, rtVal, 1); ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, rtVal, 1);
} }
void CPU_SB(u32 instr, u8 rtVal, u32 addr) void CPU_SB(u32 instr, u32 addr, u32 rtVal)
{ {
WriteMem(&PGXP_value_invalid, addr); WriteMem(&PGXP_value_invalid, addr);
} }
void CPU_SH(u32 instr, u16 rtVal, u32 addr) void CPU_SH(u32 instr, u32 addr, u32 rtVal)
{ {
PGXP_value* val = &CPU_reg[rt(instr)]; PGXP_value* val = &CPU_reg[rt(instr)];
@ -606,7 +589,7 @@ void CPU_SH(u32 instr, u16 rtVal, u32 addr)
WriteMem16(val, addr); WriteMem16(val, addr);
} }
void CPU_SW(u32 instr, u32 rtVal, u32 addr) void CPU_SW(u32 instr, u32 addr, u32 rtVal)
{ {
// Mem[Rs + Im] = Rt // Mem[Rs + Im] = Rt
PGXP_value* val = &CPU_reg[rt(instr)]; PGXP_value* val = &CPU_reg[rt(instr)];
@ -1587,10 +1570,10 @@ void CPU_MFHI(u32 instr, u32 hiVal)
CPU_reg[rd(instr)] = CPU_Hi; CPU_reg[rd(instr)] = CPU_Hi;
} }
void CPU_MTHI(u32 instr, u32 rdVal) void CPU_MTHI(u32 instr, u32 rsVal)
{ {
// Hi = Rd // Hi = Rd
Validate(&CPU_reg[rd(instr)], rdVal); Validate(&CPU_reg[rs(instr)], rsVal);
CPU_Hi = CPU_reg[rd(instr)]; CPU_Hi = CPU_reg[rd(instr)];
} }
@ -1603,10 +1586,10 @@ void CPU_MFLO(u32 instr, u32 loVal)
CPU_reg[rd(instr)] = CPU_Lo; CPU_reg[rd(instr)] = CPU_Lo;
} }
void CPU_MTLO(u32 instr, u32 rdVal) void CPU_MTLO(u32 instr, u32 rsVal)
{ {
// Lo = Rd // Lo = Rd
Validate(&CPU_reg[rd(instr)], rdVal); Validate(&CPU_reg[rs(instr)], rsVal);
CPU_Lo = CPU_reg[rd(instr)]; CPU_Lo = CPU_reg[rd(instr)];
} }

View file

@ -34,24 +34,22 @@ int GTE_NCLIP_valid(u32 sxy0, u32 sxy1, u32 sxy2);
float GTE_NCLIP(); float GTE_NCLIP();
// Data transfer tracking // Data transfer tracking
void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE data reg to GPR reg (MFC2) void CPU_MFC2(u32 instr, u32 rdVal); // copy GTE data reg to GPR reg (MFC2)
void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE data reg (MTC2) void CPU_MTC2(u32 instr, u32 rtVal); // copy GPR reg to GTE data reg (MTC2)
void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE ctrl reg to GPR reg (CFC2)
void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE ctrl reg (CTC2)
// Memory Access // Memory Access
void CPU_LWC2(u32 instr, u32 rtVal, u32 addr); // copy memory to GTE reg void CPU_LWC2(u32 instr, u32 addr, u32 rtVal); // copy memory to GTE reg
void CPU_SWC2(u32 instr, u32 rtVal, u32 addr); // copy GTE reg to memory void CPU_SWC2(u32 instr, u32 addr, u32 rtVal); // copy GTE reg to memory
bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y, bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y,
float* out_w); float* out_w);
// -- CPU functions // -- CPU functions
void CPU_LW(u32 instr, u32 rtVal, u32 addr); void CPU_LW(u32 instr, u32 addr, u32 rtVal);
void CPU_LHx(u32 instr, u32 rtVal, u32 addr); void CPU_LHx(u32 instr, u32 addr, u32 rtVal);
void CPU_LBx(u32 instr, u32 rtVal, u32 addr); void CPU_LBx(u32 instr, u32 addr, u32 rtVal);
void CPU_SB(u32 instr, u8 rtVal, u32 addr); void CPU_SB(u32 instr, u32 addr, u32 rtVal);
void CPU_SH(u32 instr, u16 rtVal, u32 addr); void CPU_SH(u32 instr, u32 addr, u32 rtVal);
void CPU_SW(u32 instr, u32 rtVal, u32 addr); void CPU_SW(u32 instr, u32 addr, u32 rtVal);
void CPU_MOVE(u32 rd_and_rs, u32 rsVal); void CPU_MOVE(u32 rd_and_rs, u32 rsVal);
// Arithmetic with immediate value // Arithmetic with immediate value
@ -93,9 +91,9 @@ void CPU_SRAV(u32 instr, u32 rtVal, u32 rsVal);
// Move registers // Move registers
void CPU_MFHI(u32 instr, u32 hiVal); void CPU_MFHI(u32 instr, u32 hiVal);
void CPU_MTHI(u32 instr, u32 rdVal); void CPU_MTHI(u32 instr, u32 rsVal);
void CPU_MFLO(u32 instr, u32 loVal); void CPU_MFLO(u32 instr, u32 loVal);
void CPU_MTLO(u32 instr, u32 rdVal); void CPU_MTLO(u32 instr, u32 rsVal);
// CP0 Data transfer tracking // CP0 Data transfer tracking
void CPU_MFC0(u32 instr, u32 rdVal); void CPU_MFC0(u32 instr, u32 rdVal);

View file

@ -5,7 +5,7 @@
#include "types.h" #include "types.h"
static constexpr u32 SAVE_STATE_MAGIC = 0x43435544; static constexpr u32 SAVE_STATE_MAGIC = 0x43435544;
static constexpr u32 SAVE_STATE_VERSION = 58; static constexpr u32 SAVE_STATE_VERSION = 59;
static constexpr u32 SAVE_STATE_MINIMUM_VERSION = 42; static constexpr u32 SAVE_STATE_MINIMUM_VERSION = 42;
static_assert(SAVE_STATE_VERSION >= SAVE_STATE_MINIMUM_VERSION); static_assert(SAVE_STATE_VERSION >= SAVE_STATE_MINIMUM_VERSION);

View file

@ -103,20 +103,21 @@ static void DestroySystem();
static std::string GetMediaPathFromSaveState(const char* path); static std::string GetMediaPathFromSaveState(const char* path);
static bool DoLoadState(ByteStream* stream, bool force_software_renderer, bool update_display); static bool DoLoadState(ByteStream* stream, bool force_software_renderer, bool update_display);
static bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display, bool is_memory_state); static bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display, bool is_memory_state);
static void DoRunFrame();
static bool CreateGPU(GPURenderer renderer); static bool CreateGPU(GPURenderer renderer);
static bool SaveUndoLoadState(); static bool SaveUndoLoadState();
/// Throttles the system, i.e. sleeps until it's time to execute the next frame.
static void Throttle();
static void SetRewinding(bool enabled); static void SetRewinding(bool enabled);
static bool SaveRewindState(); static bool SaveRewindState();
static void DoRewind(); static void DoRewind();
static void SaveRunaheadState(); static void SaveRunaheadState();
static void DoRunahead(); static bool DoRunahead();
static void DoMemorySaveStates();
static bool Initialize(bool force_software_renderer); static bool Initialize(bool force_software_renderer);
static bool FastForwardToFirstFrame();
static bool UpdateGameSettingsLayer(); static bool UpdateGameSettingsLayer();
static void UpdateRunningGame(const char* path, CDImage* image, bool booting); static void UpdateRunningGame(const char* path, CDImage* image, bool booting);
@ -149,12 +150,16 @@ static std::string s_running_game_serial;
static std::string s_running_game_title; static std::string s_running_game_title;
static System::GameHash s_running_game_hash; static System::GameHash s_running_game_hash;
static bool s_running_unknown_game; static bool s_running_unknown_game;
static bool s_was_fast_booted;
static float s_throttle_frequency = 60.0f; static float s_throttle_frequency = 60.0f;
static float s_target_speed = 1.0f; static float s_target_speed = 1.0f;
static Common::Timer::Value s_frame_period = 0; static Common::Timer::Value s_frame_period = 0;
static Common::Timer::Value s_next_frame_time = 0; static Common::Timer::Value s_next_frame_time = 0;
static bool s_last_frame_skipped = false;
static bool s_system_executing = false;
static bool s_system_interrupted = false;
static bool s_frame_step_request = false; static bool s_frame_step_request = false;
static bool s_fast_forward_enabled = false; static bool s_fast_forward_enabled = false;
static bool s_turbo_enabled = false; static bool s_turbo_enabled = false;
@ -208,6 +213,7 @@ static bool s_rewinding_first_save = false;
static std::deque<MemorySaveState> s_runahead_states; static std::deque<MemorySaveState> s_runahead_states;
static bool s_runahead_replay_pending = false; static bool s_runahead_replay_pending = false;
static u32 s_runahead_frames = 0; static u32 s_runahead_frames = 0;
static u32 s_runahead_replay_frames = 0;
static TinyString GetTimestampStringForFileName() static TinyString GetTimestampStringForFileName()
{ {
@ -227,9 +233,6 @@ void System::SetState(State new_state)
Assert(s_state == State::Paused || s_state == State::Running); Assert(s_state == State::Paused || s_state == State::Running);
Assert(new_state == State::Paused || new_state == State::Running); Assert(new_state == State::Paused || new_state == State::Running);
s_state = new_state; s_state = new_state;
if (new_state == State::Paused)
CPU::ForceDispatcherExit();
} }
bool System::IsRunning() bool System::IsRunning()
@ -237,6 +240,11 @@ bool System::IsRunning()
return s_state == State::Running; return s_state == State::Running;
} }
bool System::IsExecutionInterrupted()
{
return s_state != State::Running || s_system_interrupted;
}
bool System::IsPaused() bool System::IsPaused()
{ {
return s_state == State::Paused; return s_state == State::Paused;
@ -304,18 +312,6 @@ u32 System::GetInternalFrameNumber()
return s_internal_frame_number; return s_internal_frame_number;
} }
void System::FrameDone()
{
s_frame_number++;
CPU::g_state.frame_done = true;
CPU::g_state.downcount = 0;
}
void System::IncrementInternalFrameNumber()
{
s_internal_frame_number++;
}
const std::string& System::GetDiscPath() const std::string& System::GetDiscPath()
{ {
return s_running_game_path; return s_running_game_path;
@ -340,6 +336,11 @@ bool System::IsRunningUnknownGame()
return s_running_unknown_game; return s_running_unknown_game;
} }
bool System::WasFastBooted()
{
return s_was_fast_booted;
}
const BIOS::ImageInfo* System::GetBIOSImageInfo() const BIOS::ImageInfo* System::GetBIOSImageInfo()
{ {
return s_bios_image_info; return s_bios_image_info;
@ -529,7 +530,7 @@ bool System::GetGameDetailsFromImage(CDImage* cdi, std::string* out_id, GameHash
pos++; pos++;
} }
} }
if (out_id) if (out_id)
{ {
if (id.empty()) if (id.empty())
@ -644,7 +645,7 @@ std::string System::GetExecutableNameForImage(CDImage* cdi, bool strip_subdirect
} }
bool System::ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_name, bool System::ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_name,
std::vector<u8>* out_executable_data) std::vector<u8>* out_executable_data)
{ {
ISOReader iso; ISOReader iso;
if (!iso.Open(cdi, 1)) if (!iso.Open(cdi, 1))
@ -653,7 +654,8 @@ bool System::ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_n
return ReadExecutableFromImage(iso, out_executable_name, out_executable_data); return ReadExecutableFromImage(iso, out_executable_name, out_executable_data);
} }
bool System::ReadExecutableFromImage(ISOReader& iso, std::string* out_executable_name, std::vector<u8>* out_executable_data) bool System::ReadExecutableFromImage(ISOReader& iso, std::string* out_executable_name,
std::vector<u8>* out_executable_data)
{ {
const std::string executable_path = GetExecutableNameForImage(iso, false); const std::string executable_path = GetExecutableNameForImage(iso, false);
Log_DevPrintf("Executable path: '%s'", executable_path.c_str()); Log_DevPrintf("Executable path: '%s'", executable_path.c_str());
@ -886,7 +888,11 @@ void System::ApplySettings(bool display_osd_messages)
Host::CheckForSettingsChanges(old_config); Host::CheckForSettingsChanges(old_config);
if (IsValid()) if (IsValid())
{
ResetPerformanceCounters(); ResetPerformanceCounters();
if (s_system_executing)
s_system_interrupted = true;
}
} }
bool System::ReloadGameSettings(bool display_osd_messages) bool System::ReloadGameSettings(bool display_osd_messages)
@ -1304,9 +1310,15 @@ bool System::BootSystem(SystemBootParameters parameters)
g_settings.bios_patch_fast_boot)) g_settings.bios_patch_fast_boot))
{ {
if (s_bios_image_info && s_bios_image_info->patch_compatible) if (s_bios_image_info && s_bios_image_info->patch_compatible)
{
// TODO: Fast boot without patches...
BIOS::PatchBIOSFastBoot(Bus::g_bios, Bus::BIOS_SIZE); BIOS::PatchBIOSFastBoot(Bus::g_bios, Bus::BIOS_SIZE);
s_was_fast_booted = true;
}
else else
{
Log_ErrorPrintf("Not patching fast boot, as BIOS is not patch compatible."); Log_ErrorPrintf("Not patching fast boot, as BIOS is not patch compatible.");
}
} }
// Good to go. // Good to go.
@ -1346,6 +1358,9 @@ bool System::BootSystem(SystemBootParameters parameters)
if (parameters.load_image_to_ram || g_settings.cdrom_load_image_to_ram) if (parameters.load_image_to_ram || g_settings.cdrom_load_image_to_ram)
CDROM::PrecacheMedia(); CDROM::PrecacheMedia();
if (parameters.fast_forward_to_first_frame)
FastForwardToFirstFrame();
if (g_settings.audio_dump_on_boot) if (g_settings.audio_dump_on_boot)
StartDumpingAudio(); StartDumpingAudio();
@ -1370,6 +1385,10 @@ bool System::Initialize(bool force_software_renderer)
s_turbo_enabled = false; s_turbo_enabled = false;
s_fast_forward_enabled = false; s_fast_forward_enabled = false;
s_rewind_load_frequency = -1;
s_rewind_load_counter = -1;
s_rewinding_first_save = true;
s_average_frame_time_accumulator = 0.0f; s_average_frame_time_accumulator = 0.0f;
s_minimum_frame_time_accumulator = 0.0f; s_minimum_frame_time_accumulator = 0.0f;
s_maximum_frame_time_accumulator = 0.0f; s_maximum_frame_time_accumulator = 0.0f;
@ -1488,6 +1507,7 @@ bool System::Initialize(bool force_software_renderer)
void System::DestroySystem() void System::DestroySystem()
{ {
DebugAssert(!s_system_executing);
if (s_state == State::Shutdown) if (s_state == State::Shutdown)
return; return;
@ -1528,6 +1548,10 @@ void System::DestroySystem()
s_bios_hash = {}; s_bios_hash = {};
s_bios_image_info = nullptr; s_bios_image_info = nullptr;
s_was_fast_booted = false;
s_cheat_list.reset();
s_state = State::Shutdown;
Host::OnSystemDestroyed(); Host::OnSystemDestroyed();
} }
@ -1539,8 +1563,6 @@ void System::ClearRunningGame()
s_running_game_title.clear(); s_running_game_title.clear();
s_running_game_hash = 0; s_running_game_hash = 0;
s_running_unknown_game = false; s_running_unknown_game = false;
s_cheat_list.reset();
s_state = State::Shutdown;
Host::OnGameChanged(s_running_game_path, s_running_game_serial, s_running_game_title); Host::OnGameChanged(s_running_game_path, s_running_game_serial, s_running_game_title);
@ -1549,25 +1571,124 @@ void System::ClearRunningGame()
#endif #endif
} }
bool System::FastForwardToFirstFrame()
{
// If we're taking more than 60 seconds to load the game, oof..
static constexpr u32 MAX_FRAMES_TO_SKIP = 30 * 60;
const u32 current_frame_number = s_frame_number;
const u32 current_internal_frame_number = s_internal_frame_number;
SPU::SetAudioOutputMuted(true);
while (s_internal_frame_number == current_internal_frame_number &&
(s_frame_number - current_frame_number) <= MAX_FRAMES_TO_SKIP)
{
Panic("Fixme");
// System::RunFrame();
}
SPU::SetAudioOutputMuted(false);
return (s_internal_frame_number != current_internal_frame_number);
}
void System::Execute() void System::Execute()
{ {
while (System::IsRunning()) for (;;)
{ {
if (s_display_all_frames) switch (s_state)
System::RunFrame(); {
else case State::Running:
System::RunFrames(); {
s_system_executing = true;
// this can shut us down // TODO: Purge reset/restore
Host::PumpMessagesOnCPUThread(); g_gpu->RestoreGraphicsAPIState();
if (!IsValid())
return; if (s_rewind_load_counter >= 0)
DoRewind();
else
CPU::Execute();
g_gpu->ResetGraphicsAPIState();
s_system_executing = false;
continue;
}
case State::Stopping:
{
DestroySystem();
return;
}
case State::Paused:
default:
return;
}
}
}
void System::FrameDone()
{
s_frame_number++;
// Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns.
SPU::GeneratePendingSamples();
if (s_cheat_list)
s_cheat_list->Apply();
if (s_frame_step_request) if (s_frame_step_request)
{
s_frame_step_request = false;
PauseSystem(true);
}
// Save states for rewind and runahead.
if (s_rewind_save_counter >= 0)
{
if (s_rewind_save_counter == 0)
{ {
s_frame_step_request = false; SaveRewindState();
PauseSystem(true); s_rewind_save_counter = s_rewind_save_frequency;
} }
else
{
s_rewind_save_counter--;
}
}
else if (s_runahead_frames > 0)
{
// We don't want to poll during replay, because otherwise we'll lose frames.
if (s_runahead_replay_frames == 0)
{
// For runahead, poll input early, that way we can use the remainder of this frame to replay.
// *technically* this means higher input latency (by less than a frame), but runahead itself
// counter-acts that.
Host::PumpMessagesOnCPUThread();
if (IsExecutionInterrupted())
{
s_system_interrupted = false;
CPU::ExitExecution();
return;
}
}
if (DoRunahead())
{
// running ahead, get it done as soon as possible
return;
}
SaveRunaheadState();
}
const Common::Timer::Value current_time = Common::Timer::GetCurrentValue();
if (current_time < s_next_frame_time || s_display_all_frames || s_last_frame_skipped)
{
s_last_frame_skipped = false;
// TODO: Purge reset/restore
g_gpu->ResetGraphicsAPIState();
const bool skip_present = g_host_display->ShouldSkipDisplayingFrame(); const bool skip_present = g_host_display->ShouldSkipDisplayingFrame();
Host::RenderDisplay(skip_present); Host::RenderDisplay(skip_present);
@ -1577,14 +1698,109 @@ void System::Execute()
s_presents_since_last_update++; s_presents_since_last_update++;
} }
if (s_throttler_enabled) g_gpu->RestoreGraphicsAPIState();
System::Throttle(); }
else if (current_time >= s_next_frame_time)
{
Log_DebugPrintf("Skipping displaying frame");
s_last_frame_skipped = true;
}
if (s_throttler_enabled && !IsExecutionInterrupted())
Throttle();
// Input poll already done above
if (s_runahead_frames == 0)
{
Host::PumpMessagesOnCPUThread();
if (IsExecutionInterrupted())
{
s_system_interrupted = false;
CPU::ExitExecution();
return;
}
}
// Update perf counters *after* throttling, we want to measure from start-of-frame
// to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different
// amounts of computation happening in each frame).
System::UpdatePerformanceCounters();
}
void System::SetThrottleFrequency(float frequency)
{
if (s_throttle_frequency == frequency)
return;
s_throttle_frequency = frequency;
UpdateThrottlePeriod();
}
void System::UpdateThrottlePeriod()
{
if (s_target_speed > std::numeric_limits<double>::epsilon())
{
const double target_speed = std::max(static_cast<double>(s_target_speed), std::numeric_limits<double>::epsilon());
s_frame_period =
Common::Timer::ConvertSecondsToValue(1.0 / (static_cast<double>(s_throttle_frequency) * target_speed));
}
else
{
s_frame_period = 1;
}
ResetThrottler();
}
void System::ResetThrottler()
{
s_next_frame_time = Common::Timer::GetCurrentValue() + s_frame_period;
}
// Update perf counters *after* throttling, we want to measure from start-of-frame void System::Throttle()
// to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different {
// amounts of computation happening in each frame). // If we're running too slow, advance the next frame time based on the time we lost. Effectively skips
System::UpdatePerformanceCounters(); // running those frames at the intended time, because otherwise if we pause in the debugger, we'll run
// hundreds of frames when we resume.
Common::Timer::Value current_time = Common::Timer::GetCurrentValue();
if (current_time > s_next_frame_time)
{
const Common::Timer::Value diff = static_cast<s64>(current_time) - static_cast<s64>(s_next_frame_time);
s_next_frame_time += (diff / s_frame_period) * s_frame_period + s_frame_period;
return;
} }
// Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery.
// Linux also seems to do a much better job of waking up at the requested time.
#if !defined(__linux__) && !defined(__ANDROID__)
Common::Timer::SleepUntil(s_next_frame_time, g_settings.display_all_frames);
#else
Common::Timer::SleepUntil(s_next_frame_time, false);
#endif
s_next_frame_time += s_frame_period;
}
void System::SingleStepCPU()
{
s_frame_timer.Reset();
s_system_executing = true;
g_gpu->RestoreGraphicsAPIState();
CPU::SingleStep();
SPU::GeneratePendingSamples();
g_gpu->ResetGraphicsAPIState();
s_system_executing = false;
}
void System::IncrementInternalFrameNumber()
{
s_internal_frame_number++;
} }
void System::RecreateSystem() void System::RecreateSystem()
@ -2163,159 +2379,11 @@ bool System::InternalSaveState(ByteStream* state, u32 screenshot_size /* = 256 *
return true; return true;
} }
void System::SingleStepCPU()
{
const u32 old_frame_number = s_frame_number;
s_frame_timer.Reset();
g_gpu->RestoreGraphicsAPIState();
CPU::SingleStep();
SPU::GeneratePendingSamples();
if (s_frame_number != old_frame_number && s_cheat_list)
s_cheat_list->Apply();
g_gpu->ResetGraphicsAPIState();
}
void System::DoRunFrame()
{
g_gpu->RestoreGraphicsAPIState();
if (CPU::g_state.use_debug_dispatcher)
{
CPU::ExecuteDebug();
}
else
{
switch (g_settings.cpu_execution_mode)
{
case CPUExecutionMode::Recompiler:
#ifdef WITH_RECOMPILER
CPU::CodeCache::ExecuteRecompiler();
#else
CPU::CodeCache::Execute();
#endif
break;
case CPUExecutionMode::CachedInterpreter:
CPU::CodeCache::Execute();
break;
case CPUExecutionMode::Interpreter:
default:
CPU::Execute();
break;
}
}
// Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns.
SPU::GeneratePendingSamples();
if (s_cheat_list)
s_cheat_list->Apply();
g_gpu->ResetGraphicsAPIState();
}
void System::RunFrame()
{
if (s_rewind_load_counter >= 0)
{
DoRewind();
return;
}
if (s_runahead_frames > 0)
DoRunahead();
DoRunFrame();
s_next_frame_time += s_frame_period;
if (s_memory_saves_enabled)
DoMemorySaveStates();
}
float System::GetTargetSpeed() float System::GetTargetSpeed()
{ {
return s_target_speed; return s_target_speed;
} }
void System::SetThrottleFrequency(float frequency)
{
s_throttle_frequency = frequency;
UpdateThrottlePeriod();
}
void System::UpdateThrottlePeriod()
{
if (s_target_speed > std::numeric_limits<double>::epsilon())
{
const double target_speed = std::max(static_cast<double>(s_target_speed), std::numeric_limits<double>::epsilon());
s_frame_period =
Common::Timer::ConvertSecondsToValue(1.0 / (static_cast<double>(s_throttle_frequency) * target_speed));
}
else
{
s_frame_period = 1;
}
ResetThrottler();
}
void System::ResetThrottler()
{
s_next_frame_time = Common::Timer::GetCurrentValue();
}
void System::Throttle()
{
// If we're running too slow, advance the next frame time based on the time we lost. Effectively skips
// running those frames at the intended time, because otherwise if we pause in the debugger, we'll run
// hundreds of frames when we resume.
Common::Timer::Value current_time = Common::Timer::GetCurrentValue();
if (current_time > s_next_frame_time)
{
const Common::Timer::Value diff = static_cast<s64>(current_time) - static_cast<s64>(s_next_frame_time);
s_next_frame_time += (diff / s_frame_period) * s_frame_period;
return;
}
// Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery.
// Linux also seems to do a much better job of waking up at the requested time.
#if !defined(__linux__) && !defined(__ANDROID__)
Common::Timer::SleepUntil(s_next_frame_time, g_settings.display_all_frames);
#else
Common::Timer::SleepUntil(s_next_frame_time, false);
#endif
}
void System::RunFrames()
{
// If we're running more than this in a single loop... we're in for a bad time.
const u32 max_frames_to_run = 2;
u32 frames_run = 0;
Common::Timer::Value value = Common::Timer::GetCurrentValue();
while (frames_run < max_frames_to_run)
{
if (value < s_next_frame_time)
break;
RunFrame();
frames_run++;
value = Common::Timer::GetCurrentValue();
}
if (frames_run != 1)
Log_VerbosePrintf("Ran %u frames in a single host frame", frames_run);
}
void System::UpdatePerformanceCounters() void System::UpdatePerformanceCounters()
{ {
const float frame_time = static_cast<float>(s_frame_timer.GetTimeMillisecondsAndReset()); const float frame_time = static_cast<float>(s_frame_timer.GetTimeMillisecondsAndReset());
@ -3625,18 +3693,22 @@ void System::SetRewinding(bool enabled)
{ {
if (enabled) if (enabled)
{ {
const bool was_enabled = IsRewinding();
// Try to rewind at the replay speed, or one per second maximum. // Try to rewind at the replay speed, or one per second maximum.
const float load_frequency = std::min(g_settings.rewind_save_frequency, 1.0f); const float load_frequency = std::min(g_settings.rewind_save_frequency, 1.0f);
s_rewind_load_frequency = static_cast<s32>(std::ceil(load_frequency * s_throttle_frequency)); s_rewind_load_frequency = static_cast<s32>(std::ceil(load_frequency * s_throttle_frequency));
s_rewind_load_counter = 0; s_rewind_load_counter = 0;
if (!was_enabled && s_system_executing)
s_system_interrupted = true;
} }
else else
{ {
s_rewind_load_frequency = -1; s_rewind_load_frequency = -1;
s_rewind_load_counter = -1; s_rewind_load_counter = -1;
s_rewinding_first_save = true;
} }
s_rewinding_first_save = true;
} }
void System::DoRewind() void System::DoRewind()
@ -3655,6 +3727,15 @@ void System::DoRewind()
} }
s_next_frame_time += s_frame_period; s_next_frame_time += s_frame_period;
// TODO: Purge reset/restore
g_gpu->ResetGraphicsAPIState();
Host::RenderDisplay(false);
g_gpu->RestoreGraphicsAPIState();
Host::PumpMessagesOnCPUThread();
Throttle();
} }
void System::SaveRunaheadState() void System::SaveRunaheadState()
@ -3676,84 +3757,70 @@ void System::SaveRunaheadState()
s_runahead_states.push_back(std::move(mss)); s_runahead_states.push_back(std::move(mss));
} }
void System::DoRunahead() bool System::DoRunahead()
{ {
#ifdef PROFILE_MEMORY_SAVE_STATES #ifdef PROFILE_MEMORY_SAVE_STATES
Common::Timer timer; static Common::Timer replay_timer;
Log_DevPrintf("runahead starting at frame %u", s_frame_number);
#endif #endif
if (s_runahead_replay_pending) if (s_runahead_replay_pending)
{ {
#ifdef PROFILE_MEMORY_SAVE_STATES
Log_DevPrintf("runahead starting at frame %u", s_frame_number);
replay_timer.Reset();
#endif
// we need to replay and catch up - load the state, // we need to replay and catch up - load the state,
s_runahead_replay_pending = false; s_runahead_replay_pending = false;
if (s_runahead_states.empty() || !LoadMemoryState(s_runahead_states.front())) if (s_runahead_states.empty() || !LoadMemoryState(s_runahead_states.front()))
{ {
s_runahead_states.clear(); s_runahead_states.clear();
return; return false;
} }
// figure out how many frames we need to run to catch up
s_runahead_replay_frames = static_cast<u32>(s_runahead_states.size());
// and throw away all the states, forcing us to catch up below // and throw away all the states, forcing us to catch up below
// TODO: can we leave one frame here and run, avoiding the extra save?
s_runahead_states.clear(); s_runahead_states.clear();
#ifdef PROFILE_MEMORY_SAVE_STATES // run the frames with no audio
Log_VerbosePrintf("Rewound to frame %u, took %.2f ms", s_frame_number, timer.GetTimeMilliseconds());
#endif
}
// run the frames with no audio
s32 frames_to_run = static_cast<s32>(s_runahead_frames) - static_cast<s32>(s_runahead_states.size());
if (frames_to_run > 0)
{
Common::Timer timer2;
#ifdef PROFILE_MEMORY_SAVE_STATES
const s32 temp = frames_to_run;
#endif
SPU::SetAudioOutputMuted(true); SPU::SetAudioOutputMuted(true);
while (frames_to_run > 0)
{
DoRunFrame();
SaveRunaheadState();
frames_to_run--;
}
SPU::SetAudioOutputMuted(false);
#ifdef PROFILE_MEMORY_SAVE_STATES #ifdef PROFILE_MEMORY_SAVE_STATES
Log_VerbosePrintf("Running %d frames to catch up took %.2f ms", temp, timer2.GetTimeMilliseconds()); Log_VerbosePrintf("Rewound to frame %u, took %.2f ms", s_frame_number, replay_timer.GetTimeMilliseconds());
#endif #endif
// we don't want to save the frame we just loaded. but we are "one frame ahead", because the frame we just tossed
// was never saved, so return but don't decrement the counter
return true;
} }
else else if (s_runahead_replay_frames == 0)
{ {
// save this frame return false;
}
s_runahead_replay_frames--;
if (s_runahead_replay_frames > 0)
{
// keep running ahead
SaveRunaheadState(); SaveRunaheadState();
return true;
} }
#ifdef PROFILE_MEMORY_SAVE_STATES #ifdef PROFILE_MEMORY_SAVE_STATES
Log_DevPrintf("runahead ending at frame %u, took %.2f ms", s_frame_number, timer.GetTimeMilliseconds()); Log_VerbosePrintf("Running %d frames to catch up took %.2f ms", s_runahead_frames,
replay_timer.GetTimeMilliseconds());
#endif #endif
}
void System::DoMemorySaveStates() // we're all caught up. this frame gets saved in DoMemoryStates().
{ SPU::SetAudioOutputMuted(false);
if (s_rewind_save_counter >= 0)
{
if (s_rewind_save_counter == 0)
{
SaveRewindState();
s_rewind_save_counter = s_rewind_save_frequency;
}
else
{
s_rewind_save_counter--;
}
}
if (s_runahead_frames > 0) #ifdef PROFILE_MEMORY_SAVE_STATES
SaveRunaheadState(); Log_DevPrintf("runahead ending at frame %u, took %.2f ms", s_frame_number, replay_timer.GetTimeMilliseconds());
#endif
return false;
} }
void System::SetRunaheadReplayFlag() void System::SetRunaheadReplayFlag()
@ -3776,7 +3843,10 @@ void System::ShutdownSystem(bool save_resume_state)
if (save_resume_state) if (save_resume_state)
SaveResumeState(); SaveResumeState();
DestroySystem(); if (s_system_executing)
s_state = State::Stopping;
else
DestroySystem();
} }
bool System::CanUndoLoadState() bool System::CanUndoLoadState()

View file

@ -42,6 +42,7 @@ struct SystemBootParameters
u32 media_playlist_index = 0; u32 media_playlist_index = 0;
bool load_image_to_ram = false; bool load_image_to_ram = false;
bool force_software_renderer = false; bool force_software_renderer = false;
bool fast_forward_to_first_frame = false;
}; };
struct SaveStateInfo struct SaveStateInfo
@ -85,7 +86,8 @@ enum class State
Shutdown, Shutdown,
Starting, Starting,
Running, Running,
Paused Paused,
Stopping,
}; };
using GameHash = u64; using GameHash = u64;
@ -110,7 +112,6 @@ ConsoleRegion GetConsoleRegionForDiscRegion(DiscRegion region);
std::string GetExecutableNameForImage(CDImage* cdi, bool strip_subdirectories); std::string GetExecutableNameForImage(CDImage* cdi, bool strip_subdirectories);
bool ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_name, std::vector<u8>* out_executable_data); bool ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_name, std::vector<u8>* out_executable_data);
bool IsValidGameImage(CDImage* cdi);
std::string GetGameHashId(GameHash hash); std::string GetGameHashId(GameHash hash);
bool GetGameDetailsFromImage(CDImage* cdi, std::string* out_id, GameHash* out_hash); bool GetGameDetailsFromImage(CDImage* cdi, std::string* out_id, GameHash* out_hash);
DiscRegion GetRegionForSerial(std::string_view serial); DiscRegion GetRegionForSerial(std::string_view serial);
@ -129,6 +130,7 @@ std::string GetInputProfilePath(const std::string_view& name);
State GetState(); State GetState();
void SetState(State new_state); void SetState(State new_state);
bool IsRunning(); bool IsRunning();
bool IsExecutionInterrupted();
bool IsPaused(); bool IsPaused();
bool IsShutdown(); bool IsShutdown();
bool IsValid(); bool IsValid();
@ -176,14 +178,15 @@ bool InjectEXEFromBuffer(const void* buffer, u32 buffer_size, bool patch_loader
u32 GetFrameNumber(); u32 GetFrameNumber();
u32 GetInternalFrameNumber(); u32 GetInternalFrameNumber();
void FrameDone();
void IncrementInternalFrameNumber(); void IncrementInternalFrameNumber();
void FrameDone();
const std::string& GetDiscPath(); const std::string& GetDiscPath();
const std::string& GetGameSerial(); const std::string& GetGameSerial();
const std::string& GetGameTitle(); const std::string& GetGameTitle();
GameHash GetGameHash(); GameHash GetGameHash();
bool IsRunningUnknownGame(); bool IsRunningUnknownGame();
bool WasFastBooted();
const BIOS::ImageInfo* GetBIOSImageInfo(); const BIOS::ImageInfo* GetBIOSImageInfo();
const BIOS::Hash& GetBIOSHash(); const BIOS::Hash& GetBIOSHash();
@ -237,8 +240,6 @@ void RecreateSystem();
bool RecreateGPU(GPURenderer renderer, bool force_recreate_display = false, bool update_display = true); bool RecreateGPU(GPURenderer renderer, bool force_recreate_display = false, bool update_display = true);
void SingleStepCPU(); void SingleStepCPU();
void RunFrame();
void RunFrames();
/// Sets target emulation speed. /// Sets target emulation speed.
float GetTargetSpeed(); float GetTargetSpeed();
@ -250,9 +251,6 @@ void SetThrottleFrequency(float frequency);
void UpdateThrottlePeriod(); void UpdateThrottlePeriod();
void ResetThrottler(); void ResetThrottler();
/// Throttles the system, i.e. sleeps until it's time to execute the next frame.
void Throttle();
void UpdatePerformanceCounters(); void UpdatePerformanceCounters();
void ResetPerformanceCounters(); void ResetPerformanceCounters();

View file

@ -17,6 +17,7 @@ static TimingEvent* s_active_events_tail;
static TimingEvent* s_current_event = nullptr; static TimingEvent* s_current_event = nullptr;
static u32 s_active_event_count = 0; static u32 s_active_event_count = 0;
static u32 s_global_tick_counter = 0; static u32 s_global_tick_counter = 0;
static bool s_frame_done = false;
u32 GetGlobalTickCounter() u32 GetGlobalTickCounter()
{ {
@ -51,10 +52,7 @@ std::unique_ptr<TimingEvent> CreateTimingEvent(std::string name, TickCount perio
void UpdateCPUDowncount() void UpdateCPUDowncount()
{ {
if (!CPU::g_state.frame_done && (!CPU::HasPendingInterrupt() || CPU::g_using_interpreter)) CPU::g_state.downcount = CPU::HasPendingInterrupt() ? 0 : s_active_events_head->GetDowncount();
{
CPU::g_state.downcount = s_active_events_head->GetDowncount();
}
} }
TimingEvent** GetHeadEventPtr() TimingEvent** GetHeadEventPtr()
@ -260,48 +258,76 @@ static TimingEvent* FindActiveEvent(const char* name)
return nullptr; return nullptr;
} }
bool IsRunningEvents()
{
return (s_current_event != nullptr);
}
void SetFrameDone()
{
s_frame_done = true;
CPU::g_state.downcount = 0;
}
void RunEvents() void RunEvents()
{ {
DebugAssert(!s_current_event); DebugAssert(!s_current_event);
TickCount pending_ticks = CPU::GetPendingTicks(); do
CPU::ResetPendingTicks();
while (pending_ticks > 0)
{ {
const TickCount time = std::min(pending_ticks, s_active_events_head->GetDowncount()); if (CPU::HasPendingInterrupt())
s_global_tick_counter += static_cast<u32>(time); CPU::DispatchInterrupt();
pending_ticks -= time;
// Apply downcount to all events. TickCount pending_ticks = CPU::GetPendingTicks();
// This will result in a negative downcount for those events which are late. if (pending_ticks >= s_active_events_head->GetDowncount())
for (TimingEvent* event = s_active_events_head; event; event = event->next)
{ {
event->m_downcount -= time; CPU::ResetPendingTicks();
event->m_time_since_last_run += time;
do
{
const TickCount time = std::min(pending_ticks, s_active_events_head->GetDowncount());
s_global_tick_counter += static_cast<u32>(time);
pending_ticks -= time;
// Apply downcount to all events.
// This will result in a negative downcount for those events which are late.
for (TimingEvent* event = s_active_events_head; event; event = event->next)
{
event->m_downcount -= time;
event->m_time_since_last_run += time;
}
// Now we can actually run the callbacks.
while (s_active_events_head->m_downcount <= 0)
{
// move it to the end, since that'll likely be its new position
TimingEvent* event = s_active_events_head;
s_current_event = event;
// Factor late time into the time for the next invocation.
const TickCount ticks_late = -event->m_downcount;
const TickCount ticks_to_execute = event->m_time_since_last_run;
event->m_downcount += event->m_interval;
event->m_time_since_last_run = 0;
// The cycles_late is only an indicator, it doesn't modify the cycles to execute.
event->m_callback(event->m_callback_param, ticks_to_execute, ticks_late);
if (event->m_active)
SortEvent(event);
}
} while (pending_ticks > 0);
s_current_event = nullptr;
} }
// Now we can actually run the callbacks. if (s_frame_done)
while (s_active_events_head->m_downcount <= 0)
{ {
// move it to the end, since that'll likely be its new position s_frame_done = false;
TimingEvent* event = s_active_events_head; System::FrameDone();
s_current_event = event;
// Factor late time into the time for the next invocation.
const TickCount ticks_late = -event->m_downcount;
const TickCount ticks_to_execute = event->m_time_since_last_run;
event->m_downcount += event->m_interval;
event->m_time_since_last_run = 0;
// The cycles_late is only an indicator, it doesn't modify the cycles to execute.
event->m_callback(event->m_callback_param, ticks_to_execute, ticks_late);
if (event->m_active)
SortEvent(event);
} }
}
s_current_event = nullptr; UpdateCPUDowncount();
UpdateCPUDowncount(); } while (CPU::GetPendingTicks() >= CPU::g_state.downcount);
} }
bool DoState(StateWrapper& sw) bool DoState(StateWrapper& sw)
@ -347,7 +373,7 @@ bool DoState(StateWrapper& sw)
sw.Do(&last_event_run_time); sw.Do(&last_event_run_time);
} }
Log_DevPrintf("Loaded %u events from save state.", event_count); Log_DebugPrintf("Loaded %u events from save state.", event_count);
SortEvents(); SortEvents();
} }
else else
@ -364,7 +390,7 @@ bool DoState(StateWrapper& sw)
sw.Do(&event->m_interval); sw.Do(&event->m_interval);
} }
Log_DevPrintf("Wrote %u events to save state.", s_active_event_count); Log_DebugPrintf("Wrote %u events to save state.", s_active_event_count);
} }
return !sw.HasError(); return !sw.HasError();
@ -407,6 +433,8 @@ void TimingEvent::Delay(TickCount ticks)
DebugAssert(TimingEvents::s_current_event != this); DebugAssert(TimingEvents::s_current_event != this);
TimingEvents::SortEvent(this); TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
} }
void TimingEvent::Schedule(TickCount ticks) void TimingEvent::Schedule(TickCount ticks)
@ -426,7 +454,11 @@ void TimingEvent::Schedule(TickCount ticks)
// Event is already active, so we leave the time since last run alone, and just modify the downcount. // Event is already active, so we leave the time since last run alone, and just modify the downcount.
// If this is a call from an IO handler for example, re-sort the event queue. // If this is a call from an IO handler for example, re-sort the event queue.
if (TimingEvents::s_current_event != this) if (TimingEvents::s_current_event != this)
{
TimingEvents::SortEvent(this); TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
}
} }
} }
@ -451,7 +483,11 @@ void TimingEvent::Reset()
m_downcount = m_interval; m_downcount = m_interval;
m_time_since_last_run = 0; m_time_since_last_run = 0;
if (TimingEvents::s_current_event != this) if (TimingEvents::s_current_event != this)
{
TimingEvents::SortEvent(this); TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
}
} }
void TimingEvent::InvokeEarly(bool force /* = false */) void TimingEvent::InvokeEarly(bool force /* = false */)
@ -471,6 +507,8 @@ void TimingEvent::InvokeEarly(bool force /* = false */)
// Since we've changed the downcount, we need to re-sort the events. // Since we've changed the downcount, we need to re-sort the events.
DebugAssert(TimingEvents::s_current_event != this); DebugAssert(TimingEvents::s_current_event != this);
TimingEvents::SortEvent(this); TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
} }
void TimingEvent::Activate() void TimingEvent::Activate()

View file

@ -93,6 +93,8 @@ std::unique_ptr<TimingEvent> CreateTimingEvent(std::string name, TickCount perio
/// Serialization. /// Serialization.
bool DoState(StateWrapper& sw); bool DoState(StateWrapper& sw);
bool IsRunningEvents();
void SetFrameDone();
void RunEvents(); void RunEvents();
void UpdateCPUDowncount(); void UpdateCPUDowncount();

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com> // SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "win32_nogui_platform.h" #include "win32_nogui_platform.h"
@ -161,7 +161,7 @@ std::optional<WindowInfo> Win32NoGUIPlatform::GetPlatformWindowInfo()
return std::nullopt; return std::nullopt;
RECT rc = {}; RECT rc = {};
GetWindowRect(m_hwnd, &rc); GetClientRect(m_hwnd, &rc);
WindowInfo wi; WindowInfo wi;
wi.surface_width = static_cast<u32>(rc.right - rc.left); wi.surface_width = static_cast<u32>(rc.right - rc.left);
@ -338,7 +338,7 @@ LRESULT CALLBACK Win32NoGUIPlatform::WndProc(HWND hwnd, UINT msg, WPARAM wParam,
const WCHAR utf16[1] = {static_cast<WCHAR>(wParam)}; const WCHAR utf16[1] = {static_cast<WCHAR>(wParam)};
char utf8[8] = {}; char utf8[8] = {};
const int utf8_len = WideCharToMultiByte(CP_UTF8, 0, utf16, static_cast<int>(std::size(utf16)), utf8, const int utf8_len = WideCharToMultiByte(CP_UTF8, 0, utf16, static_cast<int>(std::size(utf16)), utf8,
static_cast<int>(sizeof(utf8)) - 1, nullptr, nullptr); static_cast<int>(sizeof(utf8) - 1), nullptr, nullptr);
if (utf8_len > 0) if (utf8_len > 0)
{ {
utf8[utf8_len] = 0; utf8[utf8_len] = 0;
@ -439,4 +439,4 @@ std::unique_ptr<NoGUIPlatform> NoGUIPlatform::CreateWin32Platform()
return {}; return {};
return ret; return ret;
} }

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com> // SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once #pragma once

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com> // SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "x11_nogui_platform.h" #include "x11_nogui_platform.h"
@ -24,7 +24,7 @@ X11NoGUIPlatform::~X11NoGUIPlatform()
bool X11NoGUIPlatform::Initialize() bool X11NoGUIPlatform::Initialize()
{ {
const int res = XInitThreads(); const int res = XInitThreads();
if (res != 0) if (res == 0)
Log_WarningPrintf("XInitThreads() returned %d, things might not be stable.", res); Log_WarningPrintf("XInitThreads() returned %d, things might not be stable.", res);
m_display = XOpenDisplay(nullptr); m_display = XOpenDisplay(nullptr);
@ -34,6 +34,7 @@ bool X11NoGUIPlatform::Initialize()
return false; return false;
} }
InitializeKeyMap();
return true; return true;
} }
@ -68,7 +69,7 @@ bool X11NoGUIPlatform::CreatePlatformWindow(std::string title)
window_height, 0, 0, BlackPixel(m_display, 0)); window_height, 0, 0, BlackPixel(m_display, 0));
if (!m_window) if (!m_window)
{ {
Log_ErrorPrintf("Failed to create X window"); Log_ErrorPrint("Failed to create X window");
return false; return false;
} }
@ -92,7 +93,6 @@ bool X11NoGUIPlatform::CreatePlatformWindow(std::string title)
XMapRaised(m_display, m_window); XMapRaised(m_display, m_window);
XFlush(m_display); XFlush(m_display);
XSync(m_display, True); XSync(m_display, True);
InitializeKeyMap();
} }
ProcessXEvents(); ProcessXEvents();
@ -144,15 +144,15 @@ void X11NoGUIPlatform::InitializeKeyMap()
if (keysym == NoSymbol) if (keysym == NoSymbol)
continue; continue;
KeySym upper_sym; KeySym upper_keysym;
XConvertCase(keysym, &keysym, &upper_sym); XConvertCase(keysym, &keysym, &upper_keysym);
// Would this fail? // Would this fail?
const char* keyname = XKeysymToString(keysym); const char* keyname = XKeysymToString(upper_keysym);
if (!keyname) if (!keyname)
continue; continue;
m_key_map.emplace(static_cast<s32>(keysym), keyname); m_key_map.emplace(static_cast<s32>(upper_keysym), keyname);
} }
} }
@ -160,7 +160,7 @@ std::optional<u32> X11NoGUIPlatform::ConvertHostKeyboardStringToCode(const std::
{ {
for (const auto& it : m_key_map) for (const auto& it : m_key_map)
{ {
if (StringUtil::Strncasecmp(it.second.c_str(), str.data(), str.length()) == 0) if (str == it.second)
return it.first; return it.first;
} }
@ -175,7 +175,7 @@ std::optional<std::string> X11NoGUIPlatform::ConvertHostKeyboardCodeToString(u32
void X11NoGUIPlatform::ProcessXEvents() void X11NoGUIPlatform::ProcessXEvents()
{ {
XLockDisplay(m_display); XDisplayLocker locker(m_display);
for (int num_events = XPending(m_display); num_events > 0; num_events--) for (int num_events = XPending(m_display); num_events > 0; num_events--)
{ {
@ -186,19 +186,30 @@ void X11NoGUIPlatform::ProcessXEvents()
case KeyPress: case KeyPress:
case KeyRelease: case KeyRelease:
{ {
const KeySym sym = XLookupKeysym(&event.xkey, 0); KeySym sym = XLookupKeysym(&event.xkey, 0);
if (sym != NoSymbol) if (sym != NoSymbol)
NoGUIHost::ProcessPlatformKeyEvent(static_cast<s32>(sym), (event.type == KeyPress)); {
KeySym upper_sym = sym;
XConvertCase(sym, &sym, &upper_sym);
NoGUIHost::ProcessPlatformKeyEvent(static_cast<s32>(upper_sym), (event.type == KeyPress));
}
} }
break; break;
case ButtonPress: case ButtonPress:
case ButtonRelease: case ButtonRelease:
{ {
if (event.xbutton.button >= Button1) if (event.xbutton.button >= Button4 && event.xbutton.button <= Button5)
{ {
NoGUIHost::ProcessPlatformMouseButtonEvent(static_cast<s32>(event.xbutton.button - Button1), // Button 4/5 are mouse wheel events on X, apparently...
event.type == ButtonPress); NoGUIHost::ProcessPlatformMouseWheelEvent(0.0f, (event.xbutton.button == Button4) ? 1.0f : -1.0f);
}
else if (event.xbutton.button >= Button1)
{
// Swap middle and right buttons.
const u32 xbutton = event.xbutton.button;
const u32 mapped_button = (xbutton == Button3) ? 1 : (xbutton == Button2 ? 2 : (xbutton - Button1));
NoGUIHost::ProcessPlatformMouseButtonEvent(mapped_button, event.type == ButtonPress);
} }
} }
break; break;
@ -241,8 +252,6 @@ void X11NoGUIPlatform::ProcessXEvents()
break; break;
} }
} }
XUnlockDisplay(m_display);
} }
void X11NoGUIPlatform::RunMessageLoop() void X11NoGUIPlatform::RunMessageLoop()

View file

@ -58,13 +58,13 @@ void DebuggerWindow::refreshAll()
m_stack_model->invalidateView(); m_stack_model->invalidateView();
m_ui.memoryView->repaint(); m_ui.memoryView->repaint();
m_code_model->setPC(CPU::g_state.regs.pc); m_code_model->setPC(CPU::g_state.pc);
scrollToPC(); scrollToPC();
} }
void DebuggerWindow::scrollToPC() void DebuggerWindow::scrollToPC()
{ {
return scrollToCodeAddress(CPU::g_state.regs.pc); return scrollToCodeAddress(CPU::g_state.pc);
} }
void DebuggerWindow::scrollToCodeAddress(VirtualMemoryAddress address) void DebuggerWindow::scrollToCodeAddress(VirtualMemoryAddress address)

View file

@ -95,10 +95,10 @@ bool RegTestHost::InitializeConfig()
si.SetStringValue("MemoryCards", "Card2Type", Settings::GetMemoryCardTypeName(MemoryCardType::None)); si.SetStringValue("MemoryCards", "Card2Type", Settings::GetMemoryCardTypeName(MemoryCardType::None));
si.SetStringValue("ControllerPorts", "MultitapMode", Settings::GetMultitapModeName(MultitapMode::Disabled)); si.SetStringValue("ControllerPorts", "MultitapMode", Settings::GetMultitapModeName(MultitapMode::Disabled));
si.SetStringValue("Audio", "Backend", Settings::GetAudioBackendName(AudioBackend::Null)); si.SetStringValue("Audio", "Backend", Settings::GetAudioBackendName(AudioBackend::Null));
si.SetStringValue("Logging", "LogLevel", Settings::GetLogLevelName(LOGLEVEL_VERBOSE));
si.SetBoolValue("Logging", "LogToConsole", true); si.SetBoolValue("Logging", "LogToConsole", true);
si.SetBoolValue("Main", "ApplyGameSettings", false); // don't want game settings interfering si.SetBoolValue("Main", "ApplyGameSettings", false); // don't want game settings interfering
si.SetBoolValue("BIOS", "PatchFastBoot", true); // no point validating the bios intro.. si.SetBoolValue("BIOS", "PatchFastBoot", true); // no point validating the bios intro..
si.SetFloatValue("Main", "EmulationSpeed", 0.0f);
// disable all sources // disable all sources
for (u32 i = 0; i < static_cast<u32>(InputSourceType::Count); i++) for (u32 i = 0; i < static_cast<u32>(InputSourceType::Count); i++)
@ -251,7 +251,9 @@ void Host::OnGameChanged(const std::string& disc_path, const std::string& game_s
void Host::PumpMessagesOnCPUThread() void Host::PumpMessagesOnCPUThread()
{ {
// s_frames_to_run--;
if (s_frames_to_run == 0)
System::ShutdownSystem(false);
} }
void Host::RunOnCPUThread(std::function<void()> function, bool block /* = false */) void Host::RunOnCPUThread(std::function<void()> function, bool block /* = false */)
@ -496,6 +498,7 @@ bool RegTestHost::ParseCommandLineParameters(int argc, char* argv[], std::option
} }
Log::SetConsoleOutputParams(true, nullptr, level.value()); Log::SetConsoleOutputParams(true, nullptr, level.value());
s_base_settings_interface->SetStringValue("Logging", "LogLevel", Settings::GetLogLevelName(level.value()));
continue; continue;
} }
else if (CHECK_ARG_PARAM("-renderer")) else if (CHECK_ARG_PARAM("-renderer"))
@ -577,16 +580,7 @@ int main(int argc, char* argv[])
} }
Log_InfoPrintf("Running for %d frames...", s_frames_to_run); Log_InfoPrintf("Running for %d frames...", s_frames_to_run);
System::Execute();
for (u32 frame = 0; frame < s_frames_to_run; frame++)
{
System::RunFrame();
Host::RenderDisplay(false);
System::UpdatePerformanceCounters();
}
Log_InfoPrintf("All done, shutting down system.");
System::ShutdownSystem(false);
Log_InfoPrintf("Exiting with success."); Log_InfoPrintf("Exiting with success.");
result = 0; result = 0;

View file

@ -2690,7 +2690,7 @@ void FullscreenUI::DrawInterfaceSettingsPage()
DrawToggleSetting(bsi, ICON_FA_SPINNER " Show GPU Usage", DrawToggleSetting(bsi, ICON_FA_SPINNER " Show GPU Usage",
"Shows the host's GPU usage in the top-right corner of the display.", "Display", "ShowGPU", false); "Shows the host's GPU usage in the top-right corner of the display.", "Display", "ShowGPU", false);
DrawToggleSetting(bsi, ICON_FA_RULER_HORIZONTAL " Show Frame Times", DrawToggleSetting(bsi, ICON_FA_RULER_HORIZONTAL " Show Frame Times",
"Shows a visual history of frame times in the upper-left corner of the display.", "EmuCore/GS", "Shows a visual history of frame times in the upper-left corner of the display.", "Display",
"ShowFrameTimes", false); "ShowFrameTimes", false);
DrawToggleSetting(bsi, ICON_FA_RULER_VERTICAL " Show Resolution", DrawToggleSetting(bsi, ICON_FA_RULER_VERTICAL " Show Resolution",
"Shows the current rendering resolution of the system in the top-right corner of the display.", "Shows the current rendering resolution of the system in the top-right corner of the display.",

View file

@ -209,7 +209,7 @@ void JitCodeBuffer::CommitCode(u32 length)
if (length == 0) if (length == 0)
return; return;
#if defined(CPU_AARCH32) || defined(CPU_AARCH64) #if defined(CPU_AARCH32) || defined(CPU_AARCH64) || defined(CPU_RISCV64)
// ARM instruction and data caches are not coherent, we need to flush after every block. // ARM instruction and data caches are not coherent, we need to flush after every block.
FlushInstructionCache(m_free_code_ptr, length); FlushInstructionCache(m_free_code_ptr, length);
#endif #endif
@ -224,7 +224,7 @@ void JitCodeBuffer::CommitFarCode(u32 length)
if (length == 0) if (length == 0)
return; return;
#if defined(CPU_AARCH32) || defined(CPU_AARCH64) #if defined(CPU_AARCH32) || defined(CPU_AARCH64) || defined(CPU_RISCV64)
// ARM instruction and data caches are not coherent, we need to flush after every block. // ARM instruction and data caches are not coherent, we need to flush after every block.
FlushInstructionCache(m_free_far_code_ptr, length); FlushInstructionCache(m_free_far_code_ptr, length);
#endif #endif

View file

@ -21,6 +21,15 @@ public:
ALWAYS_INLINE u8* GetCodePointer() const { return m_code_ptr; } ALWAYS_INLINE u8* GetCodePointer() const { return m_code_ptr; }
ALWAYS_INLINE u32 GetTotalSize() const { return m_total_size; } ALWAYS_INLINE u32 GetTotalSize() const { return m_total_size; }
ALWAYS_INLINE float GetUsedPct() const
{
return (static_cast<float>(m_code_used) / static_cast<float>(m_code_size)) * 100.0f;
}
ALWAYS_INLINE float GetFarUsedPct() const
{
return (static_cast<float>(m_far_code_used) / static_cast<float>(m_far_code_size)) * 100.0f;
}
ALWAYS_INLINE u32 GetTotalUsed() const { return m_code_used + m_far_code_used; }
ALWAYS_INLINE u8* GetFreeCodePointer() const { return m_free_code_ptr; } ALWAYS_INLINE u8* GetFreeCodePointer() const { return m_free_code_ptr; }
ALWAYS_INLINE u32 GetFreeCodeSpace() const { return static_cast<u32>(m_code_size - m_code_used); } ALWAYS_INLINE u32 GetFreeCodeSpace() const { return static_cast<u32>(m_code_size - m_code_used); }

View file

@ -81,6 +81,14 @@ static bool IsStoreInstruction(const void* ptr)
return false; return false;
} }
} }
#elif defined(CPU_RISCV64)
static bool IsStoreInstruction(const void* ptr)
{
u32 bits;
std::memcpy(&bits, ptr, sizeof(bits));
return ((bits & 0x7Fu) == 0b0100011u);
}
#endif #endif
#if defined(_WIN32) && (defined(CPU_X64) || defined(CPU_AARCH64)) #if defined(_WIN32) && (defined(CPU_X64) || defined(CPU_AARCH64))
@ -143,6 +151,9 @@ static void SIGSEGVHandler(int sig, siginfo_t* info, void* ctx)
#elif defined(CPU_AARCH64) #elif defined(CPU_AARCH64)
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.pc); void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.pc);
const bool is_write = IsStoreInstruction(exception_pc); const bool is_write = IsStoreInstruction(exception_pc);
#elif defined(CPU_RISCV64)
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.__gregs[REG_PC]);
const bool is_write = IsStoreInstruction(exception_pc);
#else #else
void* const exception_pc = nullptr; void* const exception_pc = nullptr;
const bool is_write = false; const bool is_write = false;