mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-01-18 22:35:39 +00:00
CPU/CodeCache: Rewrite using new-rec's block management
This commit is contained in:
parent
f82d08e223
commit
79e1ae3e54
|
@ -38,6 +38,8 @@ add_library(common
|
|||
minizip_helpers.cpp
|
||||
minizip_helpers.h
|
||||
path.h
|
||||
perf_scope.cpp
|
||||
perf_scope.h
|
||||
progress_callback.cpp
|
||||
progress_callback.h
|
||||
rectangle.h
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
<ClInclude Include="memory_settings_interface.h" />
|
||||
<ClInclude Include="md5_digest.h" />
|
||||
<ClInclude Include="path.h" />
|
||||
<ClInclude Include="perf_scope.h" />
|
||||
<ClInclude Include="progress_callback.h" />
|
||||
<ClInclude Include="rectangle.h" />
|
||||
<ClInclude Include="scoped_guard.h" />
|
||||
|
@ -59,6 +60,7 @@
|
|||
<ClCompile Include="memory_settings_interface.cpp" />
|
||||
<ClCompile Include="md5_digest.cpp" />
|
||||
<ClCompile Include="minizip_helpers.cpp" />
|
||||
<ClCompile Include="perf_scope.cpp" />
|
||||
<ClCompile Include="progress_callback.cpp" />
|
||||
<ClCompile Include="sha1_digest.cpp" />
|
||||
<ClCompile Include="small_string.cpp" />
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
<ClInclude Include="fastjmp.h" />
|
||||
<ClInclude Include="memmap.h" />
|
||||
<ClInclude Include="intrin.h" />
|
||||
<ClInclude Include="perf_scope.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="small_string.cpp" />
|
||||
|
@ -69,6 +70,7 @@
|
|||
<ClCompile Include="sha1_digest.cpp" />
|
||||
<ClCompile Include="fastjmp.cpp" />
|
||||
<ClCompile Include="memmap.cpp" />
|
||||
<ClCompile Include="perf_scope.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Natvis Include="bitfield.natvis" />
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
#endif
|
||||
|
||||
template<typename T>
|
||||
static inline void MemsetPtrs(T* ptr, T value, u32 count)
|
||||
ALWAYS_INLINE_RELEASE static void MemsetPtrs(T* ptr, T value, u32 count)
|
||||
{
|
||||
static_assert(std::is_pointer_v<T>, "T is pointer type");
|
||||
static_assert(sizeof(T) == sizeof(void*), "T isn't a fat pointer");
|
||||
|
|
198
src/common/perf_scope.cpp
Normal file
198
src/common/perf_scope.cpp
Normal file
|
@ -0,0 +1,198 @@
|
|||
|
||||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>, PCSX2 Team
|
||||
// SPDX-License-Identifier: GPL-3.0
|
||||
|
||||
#include "perf_scope.h"
|
||||
#include "assert.h"
|
||||
#include "string_util.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <atomic>
|
||||
#include <ctime>
|
||||
#include <elf.h>
|
||||
#include <mutex>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
// #define ProfileWithPerf
|
||||
// #define ProfileWithPerfJitDump
|
||||
|
||||
// Perf is only supported on linux
|
||||
#if defined(__linux__) && defined(ProfileWithPerf)
|
||||
|
||||
static std::FILE* s_map_file = nullptr;
|
||||
static bool s_map_file_opened = false;
|
||||
static std::mutex s_mutex;
|
||||
static void RegisterMethod(const void* ptr, size_t size, const char* symbol)
|
||||
{
|
||||
std::unique_lock lock(s_mutex);
|
||||
|
||||
if (!s_map_file)
|
||||
{
|
||||
if (s_map_file_opened)
|
||||
return;
|
||||
|
||||
char file[256];
|
||||
snprintf(file, std::size(file), "/tmp/perf-%d.map", getpid());
|
||||
s_map_file = std::fopen(file, "wb");
|
||||
s_map_file_opened = true;
|
||||
if (!s_map_file)
|
||||
return;
|
||||
}
|
||||
|
||||
std::fprintf(s_map_file, "%" PRIx64 " %zx %s\n", static_cast<u64>(reinterpret_cast<uintptr_t>(ptr)), size, symbol);
|
||||
std::fflush(s_map_file);
|
||||
}
|
||||
|
||||
#elif defined(__linux__) && defined(ProfileWithPerfJitDump)
|
||||
enum : u32
|
||||
{
|
||||
JIT_CODE_LOAD = 0,
|
||||
JIT_CODE_MOVE = 1,
|
||||
JIT_CODE_DEBUG_INFO = 2,
|
||||
JIT_CODE_CLOSE = 3,
|
||||
JIT_CODE_UNWINDING_INFO = 4
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
struct JITDUMP_HEADER
|
||||
{
|
||||
u32 magic = 0x4A695444; // JiTD
|
||||
u32 version = 1;
|
||||
u32 header_size = sizeof(JITDUMP_HEADER);
|
||||
u32 elf_mach;
|
||||
u32 pad1 = 0;
|
||||
u32 pid;
|
||||
u64 timestamp;
|
||||
u64 flags = 0;
|
||||
};
|
||||
struct JITDUMP_RECORD_HEADER
|
||||
{
|
||||
u32 id;
|
||||
u32 total_size;
|
||||
u64 timestamp;
|
||||
};
|
||||
struct JITDUMP_CODE_LOAD
|
||||
{
|
||||
JITDUMP_RECORD_HEADER header;
|
||||
u32 pid;
|
||||
u32 tid;
|
||||
u64 vma;
|
||||
u64 code_addr;
|
||||
u64 code_size;
|
||||
u64 code_index;
|
||||
// name
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
static u64 JitDumpTimestamp()
|
||||
{
|
||||
struct timespec ts = {};
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (static_cast<u64>(ts.tv_sec) * 1000000000ULL) + static_cast<u64>(ts.tv_nsec);
|
||||
}
|
||||
|
||||
static FILE* s_jitdump_file = nullptr;
|
||||
static bool s_jitdump_file_opened = false;
|
||||
static std::mutex s_jitdump_mutex;
|
||||
static u32 s_jitdump_record_id;
|
||||
|
||||
static void RegisterMethod(const void* ptr, size_t size, const char* symbol)
|
||||
{
|
||||
const u32 namelen = std::strlen(symbol) + 1;
|
||||
|
||||
std::unique_lock lock(s_jitdump_mutex);
|
||||
if (!s_jitdump_file)
|
||||
{
|
||||
if (!s_jitdump_file_opened)
|
||||
{
|
||||
char file[256];
|
||||
snprintf(file, std::size(file), "jit-%d.dump", getpid());
|
||||
s_jitdump_file = fopen(file, "w+b");
|
||||
s_jitdump_file_opened = true;
|
||||
if (!s_jitdump_file)
|
||||
return;
|
||||
}
|
||||
|
||||
void* perf_marker = mmap(nullptr, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, fileno(s_jitdump_file), 0);
|
||||
AssertMsg(perf_marker != MAP_FAILED, "Map perf marker");
|
||||
|
||||
JITDUMP_HEADER jh = {};
|
||||
#if defined(__aarch64__)
|
||||
jh.elf_mach = EM_AARCH64;
|
||||
#else
|
||||
jh.elf_mach = EM_X86_64;
|
||||
#endif
|
||||
jh.pid = getpid();
|
||||
jh.timestamp = JitDumpTimestamp();
|
||||
std::fwrite(&jh, sizeof(jh), 1, s_jitdump_file);
|
||||
}
|
||||
|
||||
JITDUMP_CODE_LOAD cl = {};
|
||||
cl.header.id = JIT_CODE_LOAD;
|
||||
cl.header.total_size = sizeof(cl) + namelen + static_cast<u32>(size);
|
||||
cl.header.timestamp = JitDumpTimestamp();
|
||||
cl.pid = getpid();
|
||||
cl.tid = syscall(SYS_gettid);
|
||||
cl.vma = 0;
|
||||
cl.code_addr = static_cast<u64>(reinterpret_cast<uintptr_t>(ptr));
|
||||
cl.code_size = static_cast<u64>(size);
|
||||
cl.code_index = s_jitdump_record_id++;
|
||||
std::fwrite(&cl, sizeof(cl), 1, s_jitdump_file);
|
||||
std::fwrite(symbol, namelen, 1, s_jitdump_file);
|
||||
std::fwrite(ptr, size, 1, s_jitdump_file);
|
||||
std::fflush(s_jitdump_file);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && (defined(ProfileWithPerf) || defined(ProfileWithPerfJitDump))
|
||||
|
||||
void PerfScope::Register(const void* ptr, size_t size, const char* symbol)
|
||||
{
|
||||
char full_symbol[128];
|
||||
if (HasPrefix())
|
||||
std::snprintf(full_symbol, std::size(full_symbol), "%s_%s", m_prefix, symbol);
|
||||
else
|
||||
StringUtil::Strlcpy(full_symbol, symbol, std::size(full_symbol));
|
||||
RegisterMethod(ptr, size, full_symbol);
|
||||
}
|
||||
|
||||
void PerfScope::RegisterPC(const void* ptr, size_t size, u32 pc)
|
||||
{
|
||||
char full_symbol[128];
|
||||
if (HasPrefix())
|
||||
std::snprintf(full_symbol, std::size(full_symbol), "%s_%08X", m_prefix, pc);
|
||||
else
|
||||
std::snprintf(full_symbol, std::size(full_symbol), "%08X", pc);
|
||||
RegisterMethod(ptr, size, full_symbol);
|
||||
}
|
||||
|
||||
void PerfScope::RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key)
|
||||
{
|
||||
char full_symbol[128];
|
||||
if (HasPrefix())
|
||||
std::snprintf(full_symbol, std::size(full_symbol), "%s_%s%016" PRIX64, m_prefix, prefix, key);
|
||||
else
|
||||
std::snprintf(full_symbol, std::size(full_symbol), "%s%016" PRIX64, prefix, key);
|
||||
RegisterMethod(ptr, size, full_symbol);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void PerfScope::Register(const void* ptr, size_t size, const char* symbol)
|
||||
{
|
||||
}
|
||||
void PerfScope::RegisterPC(const void* ptr, size_t size, u32 pc)
|
||||
{
|
||||
}
|
||||
void PerfScope::RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
20
src/common/perf_scope.h
Normal file
20
src/common/perf_scope.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>, PCSX2 Team
|
||||
// SPDX-License-Identifier: GPL-3.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
|
||||
class PerfScope
|
||||
{
|
||||
public:
|
||||
constexpr PerfScope(const char* prefix) : m_prefix(prefix) {}
|
||||
bool HasPrefix() const { return (m_prefix && m_prefix[0]); }
|
||||
|
||||
void Register(const void* ptr, size_t size, const char* symbol);
|
||||
void RegisterPC(const void* ptr, size_t size, u32 pc);
|
||||
void RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key);
|
||||
|
||||
private:
|
||||
const char* m_prefix;
|
||||
};
|
|
@ -19,6 +19,7 @@ add_library(core
|
|||
controller.h
|
||||
cpu_code_cache.cpp
|
||||
cpu_code_cache.h
|
||||
cpu_code_cache_private.h
|
||||
cpu_core.cpp
|
||||
cpu_core.h
|
||||
cpu_core_private.h
|
||||
|
|
|
@ -85,8 +85,8 @@ enum : TickCount
|
|||
|
||||
enum : u32
|
||||
{
|
||||
RAM_2MB_CODE_PAGE_COUNT = (RAM_2MB_SIZE + (HOST_PAGE_SIZE + 1)) / HOST_PAGE_SIZE,
|
||||
RAM_8MB_CODE_PAGE_COUNT = (RAM_8MB_SIZE + (HOST_PAGE_SIZE + 1)) / HOST_PAGE_SIZE,
|
||||
RAM_2MB_CODE_PAGE_COUNT = (RAM_2MB_SIZE + (HOST_PAGE_SIZE - 1)) / HOST_PAGE_SIZE,
|
||||
RAM_8MB_CODE_PAGE_COUNT = (RAM_8MB_SIZE + (HOST_PAGE_SIZE - 1)) / HOST_PAGE_SIZE,
|
||||
|
||||
MEMORY_LUT_PAGE_SIZE = 4096,
|
||||
MEMORY_LUT_PAGE_SHIFT = 12,
|
||||
|
|
|
@ -85,6 +85,7 @@
|
|||
<ClInclude Include="cdrom_async_reader.h" />
|
||||
<ClInclude Include="cheats.h" />
|
||||
<ClInclude Include="achievements.h" />
|
||||
<ClInclude Include="cpu_code_cache_private.h" />
|
||||
<ClInclude Include="cpu_core.h" />
|
||||
<ClInclude Include="cpu_core_private.h" />
|
||||
<ClInclude Include="cpu_disasm.h" />
|
||||
|
@ -176,6 +177,9 @@
|
|||
<ProjectReference Include="..\..\dep\zstd\zstd.vcxproj">
|
||||
<Project>{73ee0c55-6ffe-44e7-9c12-baa52434a797}</Project>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\..\dep\zydis\zydis.vcxproj">
|
||||
<Project>{c51a346a-86b2-46df-9bb3-d0aa7e5d8699}</Project>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\scmversion\scmversion.vcxproj">
|
||||
<Project>{075ced82-6a20-46df-94c7-9624ac9ddbeb}</Project>
|
||||
</ProjectReference>
|
||||
|
|
|
@ -124,5 +124,6 @@
|
|||
<ClInclude Include="shader_cache_version.h" />
|
||||
<ClInclude Include="gpu_shadergen.h" />
|
||||
<ClInclude Include="pch.h" />
|
||||
<ClInclude Include="cpu_code_cache_private.h" />
|
||||
</ItemGroup>
|
||||
</Project>
|
File diff suppressed because it is too large
Load diff
|
@ -1,160 +1,42 @@
|
|||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bus.h"
|
||||
#include "common/bitfield.h"
|
||||
#include "cpu_types.h"
|
||||
#include "util/jit_code_buffer.h"
|
||||
#include "util/page_fault_handler.h"
|
||||
#include <array>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
#include "cpu_recompiler_types.h"
|
||||
#endif
|
||||
namespace CPU::CodeCache {
|
||||
|
||||
namespace CPU {
|
||||
/// Returns true if any recompiler is in use.
|
||||
bool IsUsingAnyRecompiler();
|
||||
|
||||
union CodeBlockKey
|
||||
{
|
||||
u32 bits;
|
||||
/// Returns true if any recompiler and fastmem is in use.
|
||||
bool IsUsingFastmem();
|
||||
|
||||
BitField<u32, bool, 0, 1> user_mode;
|
||||
BitField<u32, u32, 2, 30> aligned_pc;
|
||||
/// Allocates resources, call once at startup.
|
||||
void ProcessStartup();
|
||||
|
||||
ALWAYS_INLINE u32 GetPC() const { return aligned_pc << 2; }
|
||||
ALWAYS_INLINE void SetPC(u32 pc) { aligned_pc = pc >> 2; }
|
||||
|
||||
ALWAYS_INLINE u32 GetPCPhysicalAddress() const { return (aligned_pc << 2) & PHYSICAL_MEMORY_ADDRESS_MASK; }
|
||||
|
||||
ALWAYS_INLINE CodeBlockKey() = default;
|
||||
|
||||
ALWAYS_INLINE CodeBlockKey(const CodeBlockKey& rhs) : bits(rhs.bits) {}
|
||||
|
||||
ALWAYS_INLINE CodeBlockKey& operator=(const CodeBlockKey& rhs)
|
||||
{
|
||||
bits = rhs.bits;
|
||||
return *this;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool operator==(const CodeBlockKey& rhs) const { return bits == rhs.bits; }
|
||||
ALWAYS_INLINE bool operator!=(const CodeBlockKey& rhs) const { return bits != rhs.bits; }
|
||||
ALWAYS_INLINE bool operator<(const CodeBlockKey& rhs) const { return bits < rhs.bits; }
|
||||
};
|
||||
|
||||
struct CodeBlockInstruction
|
||||
{
|
||||
Instruction instruction;
|
||||
u32 pc;
|
||||
|
||||
bool is_branch_instruction : 1;
|
||||
bool is_direct_branch_instruction : 1;
|
||||
bool is_unconditional_branch_instruction : 1;
|
||||
bool is_branch_delay_slot : 1;
|
||||
bool is_load_instruction : 1;
|
||||
bool is_store_instruction : 1;
|
||||
bool is_load_delay_slot : 1;
|
||||
bool is_last_instruction : 1;
|
||||
bool has_load_delay : 1;
|
||||
bool can_trap : 1;
|
||||
};
|
||||
|
||||
struct CodeBlock
|
||||
{
|
||||
using HostCodePointer = void (*)();
|
||||
|
||||
struct LinkInfo
|
||||
{
|
||||
CodeBlock* block;
|
||||
void* host_pc;
|
||||
void* host_resolve_pc;
|
||||
u32 host_pc_size;
|
||||
};
|
||||
|
||||
CodeBlock(const CodeBlockKey key_) : key(key_) {}
|
||||
|
||||
CodeBlockKey key;
|
||||
u32 host_code_size = 0;
|
||||
HostCodePointer host_code = nullptr;
|
||||
|
||||
std::vector<CodeBlockInstruction> instructions;
|
||||
std::vector<LinkInfo> link_predecessors;
|
||||
std::vector<LinkInfo> link_successors;
|
||||
|
||||
TickCount uncached_fetch_ticks = 0;
|
||||
u32 icache_line_count = 0;
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
std::vector<Recompiler::LoadStoreBackpatchInfo> loadstore_backpatch_info;
|
||||
#endif
|
||||
|
||||
bool contains_loadstore_instructions = false;
|
||||
bool contains_double_branches = false;
|
||||
bool invalidated = false;
|
||||
bool can_link = true;
|
||||
|
||||
u32 recompile_frame_number = 0;
|
||||
u32 recompile_count = 0;
|
||||
u32 invalidate_frame_number = 0;
|
||||
|
||||
u32 GetPC() const { return key.GetPC(); }
|
||||
u32 GetSizeInBytes() const { return static_cast<u32>(instructions.size()) * sizeof(Instruction); }
|
||||
u32 GetStartPageIndex() const { return (key.GetPCPhysicalAddress() / HOST_PAGE_SIZE); }
|
||||
u32 GetEndPageIndex() const { return ((key.GetPCPhysicalAddress() + GetSizeInBytes()) / HOST_PAGE_SIZE); }
|
||||
bool IsInRAM() const
|
||||
{
|
||||
// TODO: Constant
|
||||
return key.GetPCPhysicalAddress() < 0x200000;
|
||||
}
|
||||
};
|
||||
|
||||
namespace CodeCache {
|
||||
|
||||
enum : u32
|
||||
{
|
||||
FAST_MAP_TABLE_COUNT = 0x10000,
|
||||
FAST_MAP_TABLE_SIZE = 0x10000 / 4, // 16384
|
||||
FAST_MAP_TABLE_SHIFT = 16,
|
||||
};
|
||||
|
||||
using FastMapTable = CodeBlock::HostCodePointer*;
|
||||
/// Frees resources, call once at shutdown.
|
||||
void ProcessShutdown();
|
||||
|
||||
/// Initializes resources for the system.
|
||||
void Initialize();
|
||||
|
||||
/// Frees resources used by the system.
|
||||
void Shutdown();
|
||||
|
||||
/// Runs the system.
|
||||
[[noreturn]] void Execute();
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
using DispatcherFunction = void (*)();
|
||||
using SingleBlockDispatcherFunction = void (*)(const CodeBlock::HostCodePointer);
|
||||
|
||||
FastMapTable* GetFastMapPointer();
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_RECOMPILER)
|
||||
JitCodeBuffer& GetCodeBuffer();
|
||||
#endif
|
||||
|
||||
/// Flushes the code cache, forcing all blocks to be recompiled.
|
||||
void Flush();
|
||||
|
||||
/// Changes whether the recompiler is enabled.
|
||||
void Reinitialize();
|
||||
void Reset();
|
||||
|
||||
/// Invalidates all blocks which are in the range of the specified code page.
|
||||
void InvalidateBlocksWithPageIndex(u32 page_index);
|
||||
|
||||
/// Invalidates all blocks in the cache.
|
||||
void InvalidateAll();
|
||||
|
||||
template<PGXPMode pgxp_mode>
|
||||
void InterpretCachedBlock(const CodeBlock& block);
|
||||
|
||||
template<PGXPMode pgxp_mode>
|
||||
void InterpretUncachedBlock();
|
||||
void InvalidateAllRAMBlocks();
|
||||
|
||||
/// Invalidates any code pages which overlap the specified range.
|
||||
ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_count)
|
||||
|
@ -168,6 +50,4 @@ ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_c
|
|||
}
|
||||
}
|
||||
|
||||
}; // namespace CodeCache
|
||||
|
||||
} // namespace CPU
|
||||
} // namespace CPU::CodeCache
|
||||
|
|
279
src/core/cpu_code_cache_private.h
Normal file
279
src/core/cpu_code_cache_private.h
Normal file
|
@ -0,0 +1,279 @@
|
|||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bus.h"
|
||||
#include "common/bitfield.h"
|
||||
#include "common/perf_scope.h"
|
||||
#include "cpu_code_cache.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_types.h"
|
||||
|
||||
#include "util/jit_code_buffer.h"
|
||||
#include "util/page_fault_handler.h"
|
||||
|
||||
#include <array>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
// #include "cpu_recompiler_types.h"
|
||||
#endif
|
||||
|
||||
namespace CPU::CodeCache {
|
||||
|
||||
enum : u32
|
||||
{
|
||||
LUT_TABLE_COUNT = 0x10000,
|
||||
LUT_TABLE_SIZE = 0x10000 / sizeof(u32), // 16384, one for each PC
|
||||
LUT_TABLE_SHIFT = 16,
|
||||
|
||||
MAX_BLOCK_EXIT_LINKS = 2,
|
||||
};
|
||||
|
||||
using CodeLUT = const void**;
|
||||
using CodeLUTArray = std::array<CodeLUT, LUT_TABLE_COUNT>;
|
||||
using BlockLinkMap = std::unordered_multimap<u32, void*>; // TODO: try ordered?
|
||||
|
||||
enum RegInfoFlags : u8
|
||||
{
|
||||
RI_LIVE = (1 << 0),
|
||||
RI_USED = (1 << 1),
|
||||
RI_LASTUSE = (1 << 2),
|
||||
};
|
||||
|
||||
struct InstructionInfo
|
||||
{
|
||||
u32 pc; // TODO: Remove this, old recs still depend on it.
|
||||
|
||||
bool is_branch_instruction : 1;
|
||||
bool is_direct_branch_instruction : 1;
|
||||
bool is_unconditional_branch_instruction : 1;
|
||||
bool is_branch_delay_slot : 1;
|
||||
bool is_load_instruction : 1;
|
||||
bool is_store_instruction : 1;
|
||||
bool is_load_delay_slot : 1;
|
||||
bool is_last_instruction : 1;
|
||||
bool has_load_delay : 1;
|
||||
bool can_trap : 1;
|
||||
|
||||
u8 reg_flags[static_cast<u8>(Reg::count)];
|
||||
// Reg write_reg[3];
|
||||
Reg read_reg[3];
|
||||
|
||||
// If unset, values which are not live will not be written back to memory.
|
||||
// Tends to break stuff at the moment.
|
||||
static constexpr bool WRITE_DEAD_VALUES = true;
|
||||
|
||||
/// Returns true if the register is used later in the block, and this isn't the last instruction to use it.
|
||||
/// In other words, the register is worth keeping in a host register/caching it.
|
||||
inline bool UsedTest(Reg reg) const { return (reg_flags[static_cast<u8>(reg)] & (RI_USED | RI_LASTUSE)) == RI_USED; }
|
||||
|
||||
/// Returns true if the value should be computed/written back.
|
||||
/// Basically, this means it's either used before it's overwritten, or not overwritten by the end of the block.
|
||||
inline bool LiveTest(Reg reg) const
|
||||
{
|
||||
return WRITE_DEAD_VALUES || ((reg_flags[static_cast<u8>(reg)] & RI_LIVE) != 0);
|
||||
}
|
||||
|
||||
/// Returns true if the register can be renamed into another.
|
||||
inline bool RenameTest(Reg reg) const { return (reg == Reg::zero || !UsedTest(reg) || !LiveTest(reg)); }
|
||||
|
||||
/// Returns true if this instruction reads this register.
|
||||
inline bool ReadsReg(Reg reg) const { return (read_reg[0] == reg || read_reg[1] == reg || read_reg[2] == reg); }
|
||||
};
|
||||
|
||||
enum class BlockState : u8
|
||||
{
|
||||
Valid,
|
||||
Invalidated,
|
||||
NeedsRecompile,
|
||||
FallbackToInterpreter
|
||||
};
|
||||
|
||||
enum class BlockFlags : u8
|
||||
{
|
||||
None = 0,
|
||||
ContainsLoadStoreInstructions = (1 << 0),
|
||||
SpansPages = (1 << 1),
|
||||
BranchDelaySpansPages = (1 << 2),
|
||||
};
|
||||
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(BlockFlags);
|
||||
|
||||
enum class PageProtectionMode : u8
|
||||
{
|
||||
WriteProtected,
|
||||
ManualCheck,
|
||||
Unprotected,
|
||||
};
|
||||
|
||||
struct BlockMetadata
|
||||
{
|
||||
TickCount uncached_fetch_ticks;
|
||||
u32 icache_line_count;
|
||||
BlockFlags flags;
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4324) // C4324: 'CPU::CodeCache::Block': structure was padded due to alignment specifier)
|
||||
#endif
|
||||
|
||||
struct alignas(16) Block
|
||||
{
|
||||
u32 pc;
|
||||
u32 size; // in guest instructions
|
||||
const void* host_code;
|
||||
|
||||
// links to previous/next block within page
|
||||
Block* next_block_in_page;
|
||||
|
||||
BlockLinkMap::iterator exit_links[MAX_BLOCK_EXIT_LINKS];
|
||||
u8 num_exit_links;
|
||||
|
||||
// TODO: Move up so it's part of the same cache line
|
||||
BlockState state;
|
||||
BlockFlags flags;
|
||||
PageProtectionMode protection;
|
||||
|
||||
TickCount uncached_fetch_ticks;
|
||||
u32 icache_line_count;
|
||||
|
||||
u32 compile_frame;
|
||||
u8 compile_count;
|
||||
|
||||
// followed by Instruction * size, InstructionRegInfo * size
|
||||
ALWAYS_INLINE const Instruction* Instructions() const { return reinterpret_cast<const Instruction*>(this + 1); }
|
||||
ALWAYS_INLINE Instruction* Instructions() { return reinterpret_cast<Instruction*>(this + 1); }
|
||||
|
||||
ALWAYS_INLINE const InstructionInfo* InstructionsInfo() const
|
||||
{
|
||||
return reinterpret_cast<const InstructionInfo*>(Instructions() + size);
|
||||
}
|
||||
ALWAYS_INLINE InstructionInfo* InstructionsInfo()
|
||||
{
|
||||
return reinterpret_cast<InstructionInfo*>(Instructions() + size);
|
||||
}
|
||||
|
||||
// returns true if the block has a given flag
|
||||
ALWAYS_INLINE bool HasFlag(BlockFlags flag) const { return ((flags & flag) != BlockFlags::None); }
|
||||
|
||||
// returns the page index for the start of the block
|
||||
ALWAYS_INLINE u32 StartPageIndex() const { return Bus::GetRAMCodePageIndex(pc); }
|
||||
|
||||
// returns the page index for the last instruction in the block (inclusive)
|
||||
ALWAYS_INLINE u32 EndPageIndex() const { return Bus::GetRAMCodePageIndex(pc + ((size - 1) * sizeof(Instruction))); }
|
||||
|
||||
// returns true if the block spans multiple pages
|
||||
ALWAYS_INLINE bool SpansPages() const { return StartPageIndex() != EndPageIndex(); }
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
using BlockLUTArray = std::array<Block**, LUT_TABLE_COUNT>;
|
||||
|
||||
struct LoadstoreBackpatchInfo
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 gpr_bitmask;
|
||||
u16 cycles;
|
||||
u16 address_register : 5;
|
||||
u16 data_register : 5;
|
||||
u16 size : 2;
|
||||
u16 is_signed : 1;
|
||||
u16 is_load : 1;
|
||||
};
|
||||
|
||||
const void* thunk_address; // only needed for oldrec
|
||||
};
|
||||
|
||||
u32 guest_pc;
|
||||
u8 code_size;
|
||||
|
||||
MemoryAccessSize AccessSize() const { return static_cast<MemoryAccessSize>(size); }
|
||||
u32 AccessSizeInBytes() const { return 1u << size; }
|
||||
};
|
||||
static_assert(sizeof(LoadstoreBackpatchInfo) == 16);
|
||||
|
||||
static inline bool AddressInRAM(VirtualMemoryAddress pc)
|
||||
{
|
||||
return VirtualAddressToPhysical(pc) < Bus::g_ram_size;
|
||||
}
|
||||
|
||||
struct PageProtectionInfo
|
||||
{
|
||||
Block* first_block_in_page;
|
||||
Block* last_block_in_page;
|
||||
|
||||
PageProtectionMode mode;
|
||||
u16 invalidate_count;
|
||||
u32 invalidate_frame;
|
||||
};
|
||||
static_assert(sizeof(PageProtectionInfo) == (sizeof(Block*) * 2 + 8));
|
||||
|
||||
template<PGXPMode pgxp_mode>
|
||||
void InterpretCachedBlock(const Block* block);
|
||||
|
||||
template<PGXPMode pgxp_mode>
|
||||
void InterpretUncachedBlock();
|
||||
|
||||
void LogCurrentState();
|
||||
|
||||
#if defined(ENABLE_RECOMPILER)
|
||||
#define ENABLE_RECOMPILER_SUPPORT 1
|
||||
|
||||
#if defined(_DEBUG) || false
|
||||
// Enable disassembly of host assembly code.
|
||||
#define ENABLE_HOST_DISASSEMBLY 1
|
||||
#endif
|
||||
|
||||
#if false
|
||||
// Enable profiling of JIT blocks.
|
||||
#define ENABLE_RECOMPILER_PROFILING 1
|
||||
#endif
|
||||
|
||||
JitCodeBuffer& GetCodeBuffer();
|
||||
const void* GetInterpretUncachedBlockFunction();
|
||||
|
||||
void CompileOrRevalidateBlock(u32 start_pc);
|
||||
void DiscardAndRecompileBlock(u32 start_pc);
|
||||
const void* CreateBlockLink(Block* from_block, void* code, u32 newpc);
|
||||
|
||||
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, const void* thunk_address);
|
||||
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles, u32 gpr_bitmask,
|
||||
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, bool is_load);
|
||||
|
||||
u32 EmitASMFunctions(void* code, u32 code_size);
|
||||
u32 EmitJump(void* code, const void* dst, bool flush_icache);
|
||||
|
||||
void DisassembleAndLogHostCode(const void* start, u32 size);
|
||||
u32 GetHostInstructionCount(const void* start, u32 size);
|
||||
|
||||
extern CodeLUTArray g_code_lut;
|
||||
|
||||
extern NORETURN_FUNCTION_POINTER void (*g_enter_recompiler)();
|
||||
extern const void* g_compile_or_revalidate_block;
|
||||
extern const void* g_check_events_and_dispatch;
|
||||
extern const void* g_run_events_and_dispatch;
|
||||
extern const void* g_dispatcher;
|
||||
extern const void* g_block_dispatcher;
|
||||
extern const void* g_interpret_block;
|
||||
extern const void* g_discard_and_recompile_block;
|
||||
|
||||
#ifdef ENABLE_RECOMPILER_PROFILING
|
||||
|
||||
extern PerfScope MIPSPerfScope;
|
||||
|
||||
#endif // ENABLE_RECOMPILER_PROFILING
|
||||
|
||||
#endif // ENABLE_RECOMPILER
|
||||
|
||||
} // namespace CPU::CodeCache
|
|
@ -7,6 +7,7 @@
|
|||
#include "common/fastjmp.h"
|
||||
#include "common/file_system.h"
|
||||
#include "common/log.h"
|
||||
#include "cpu_code_cache_private.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_disasm.h"
|
||||
#include "cpu_recompiler_thunks.h"
|
||||
|
@ -2262,20 +2263,24 @@ void CPU::SingleStep()
|
|||
}
|
||||
|
||||
template<PGXPMode pgxp_mode>
|
||||
void CPU::CodeCache::InterpretCachedBlock(const CodeBlock& block)
|
||||
void CPU::CodeCache::InterpretCachedBlock(const Block* block)
|
||||
{
|
||||
// set up the state so we've already fetched the instruction
|
||||
DebugAssert(g_state.pc == block.GetPC());
|
||||
g_state.npc = block.GetPC() + 4;
|
||||
DebugAssert(g_state.pc == block->pc);
|
||||
g_state.npc = block->pc + 4;
|
||||
|
||||
for (const CodeBlockInstruction& cbi : block.instructions)
|
||||
const Instruction* instruction = block->Instructions();
|
||||
const Instruction* end_instruction = instruction + block->size;
|
||||
const CodeCache::InstructionInfo* info = block->InstructionsInfo();
|
||||
|
||||
do
|
||||
{
|
||||
g_state.pending_ticks++;
|
||||
|
||||
// now executing the instruction we previously fetched
|
||||
g_state.current_instruction.bits = cbi.instruction.bits;
|
||||
g_state.current_instruction_pc = cbi.pc;
|
||||
g_state.current_instruction_in_branch_delay_slot = cbi.is_branch_delay_slot;
|
||||
g_state.current_instruction.bits = instruction->bits;
|
||||
g_state.current_instruction_pc = info->pc;
|
||||
g_state.current_instruction_in_branch_delay_slot = info->is_branch_delay_slot; // TODO: let int set it instead
|
||||
g_state.current_instruction_was_branch_taken = g_state.branch_was_taken;
|
||||
g_state.branch_was_taken = false;
|
||||
g_state.exception_raised = false;
|
||||
|
@ -2292,15 +2297,18 @@ void CPU::CodeCache::InterpretCachedBlock(const CodeBlock& block)
|
|||
|
||||
if (g_state.exception_raised)
|
||||
break;
|
||||
}
|
||||
|
||||
instruction++;
|
||||
info++;
|
||||
} while (instruction != end_instruction);
|
||||
|
||||
// cleanup so the interpreter can kick in if needed
|
||||
g_state.next_instruction_is_branch_delay_slot = false;
|
||||
}
|
||||
|
||||
template void CPU::CodeCache::InterpretCachedBlock<PGXPMode::Disabled>(const CodeBlock& block);
|
||||
template void CPU::CodeCache::InterpretCachedBlock<PGXPMode::Memory>(const CodeBlock& block);
|
||||
template void CPU::CodeCache::InterpretCachedBlock<PGXPMode::CPU>(const CodeBlock& block);
|
||||
template void CPU::CodeCache::InterpretCachedBlock<PGXPMode::Disabled>(const Block* block);
|
||||
template void CPU::CodeCache::InterpretCachedBlock<PGXPMode::Memory>(const Block* block);
|
||||
template void CPU::CodeCache::InterpretCachedBlock<PGXPMode::CPU>(const Block* block);
|
||||
|
||||
template<PGXPMode pgxp_mode>
|
||||
void CPU::CodeCache::InterpretUncachedBlock()
|
||||
|
@ -2989,6 +2997,8 @@ static void MemoryBreakpoint(MemoryAccessType type, MemoryAccessSize size, Virtu
|
|||
static constexpr const char* types[2] = { "read", "write" };
|
||||
|
||||
const u32 cycle = TimingEvents::GetGlobalTickCounter() + CPU::g_state.pending_ticks;
|
||||
if (cycle == 3301006373)
|
||||
__debugbreak();
|
||||
|
||||
#if 0
|
||||
static std::FILE* fp = nullptr;
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -9,7 +9,7 @@
|
|||
|
||||
#include "util/jit_code_buffer.h"
|
||||
|
||||
#include "cpu_code_cache.h"
|
||||
#include "cpu_code_cache_private.h"
|
||||
#include "cpu_recompiler_register_cache.h"
|
||||
#include "cpu_recompiler_thunks.h"
|
||||
#include "cpu_recompiler_types.h"
|
||||
|
@ -17,34 +17,56 @@
|
|||
|
||||
namespace CPU::Recompiler {
|
||||
|
||||
enum class Condition : u8
|
||||
{
|
||||
Always,
|
||||
NotEqual,
|
||||
Equal,
|
||||
Overflow,
|
||||
Greater,
|
||||
GreaterEqual,
|
||||
LessEqual,
|
||||
Less,
|
||||
Negative,
|
||||
PositiveOrZero,
|
||||
Above, // unsigned variant of Greater
|
||||
AboveEqual, // unsigned variant of GreaterEqual
|
||||
Below, // unsigned variant of Less
|
||||
BelowEqual, // unsigned variant of LessEqual
|
||||
|
||||
NotZero,
|
||||
Zero
|
||||
};
|
||||
|
||||
class CodeGenerator
|
||||
{
|
||||
public:
|
||||
using SpeculativeValue = std::optional<u32>;
|
||||
|
||||
struct CodeBlockInstruction
|
||||
{
|
||||
const Instruction* instruction;
|
||||
const CodeCache::InstructionInfo* info;
|
||||
};
|
||||
|
||||
CodeGenerator(JitCodeBuffer* code_buffer);
|
||||
~CodeGenerator();
|
||||
|
||||
static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize);
|
||||
static void AlignCodeBuffer(JitCodeBuffer* code_buffer);
|
||||
|
||||
static bool BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi);
|
||||
static void BackpatchBranch(void* pc, u32 pc_size, void* target);
|
||||
static void BackpatchReturn(void* pc, u32 pc_size);
|
||||
static void BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi);
|
||||
|
||||
bool CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
|
||||
|
||||
CodeCache::DispatcherFunction CompileDispatcher();
|
||||
CodeCache::SingleBlockDispatcherFunction CompileSingleBlockDispatcher();
|
||||
const void* CompileBlock(CodeCache::Block* block, u32* out_host_code_size, u32* out_host_far_code_size);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Code Generation
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void EmitBeginBlock(bool allocate_registers = true);
|
||||
void EmitEndBlock(bool free_registers = true, bool emit_return = true);
|
||||
void EmitEndBlock(bool free_registers, const void* jump_to);
|
||||
void EmitExceptionExit();
|
||||
void EmitExceptionExitOnBool(const Value& value);
|
||||
void FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
|
||||
const void* FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size);
|
||||
|
||||
void EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size);
|
||||
void EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size);
|
||||
|
@ -77,6 +99,7 @@ public:
|
|||
void EmitMoveNextInterpreterLoadDelay();
|
||||
void EmitCancelInterpreterLoadDelayForReg(Reg reg);
|
||||
void EmitICacheCheckAndUpdate();
|
||||
void EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size);
|
||||
void EmitStallUntilGTEComplete();
|
||||
void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset);
|
||||
void EmitStoreCPUStructField(u32 offset, const Value& value);
|
||||
|
@ -88,18 +111,19 @@ public:
|
|||
// Automatically generates an exception handler.
|
||||
Value GetFastmemLoadBase();
|
||||
Value GetFastmemStoreBase();
|
||||
Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const SpeculativeValue& address_spec,
|
||||
RegSize size);
|
||||
Value EmitLoadGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address,
|
||||
const SpeculativeValue& address_spec, RegSize size);
|
||||
void EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result);
|
||||
void EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result);
|
||||
void EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result,
|
||||
bool in_far_code);
|
||||
void EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const SpeculativeValue& address_spec,
|
||||
RegSize size, const Value& value);
|
||||
void EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
const Value& value);
|
||||
void EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
const Value& value, bool in_far_code);
|
||||
void EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address,
|
||||
RegSize size, Value& result);
|
||||
void EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address,
|
||||
RegSize size, Value& result, bool in_far_code);
|
||||
void EmitStoreGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address,
|
||||
const SpeculativeValue& address_spec, RegSize size, const Value& value);
|
||||
void EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, const Value& value);
|
||||
void EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, const Value& value, bool in_far_code);
|
||||
void EmitUpdateFastmemBase();
|
||||
|
||||
// Unconditional branch to pointer. May allocate a scratch register.
|
||||
|
@ -179,7 +203,7 @@ public:
|
|||
Value NotValue(const Value& val);
|
||||
|
||||
// Raising exception if condition is true.
|
||||
void GenerateExceptionExit(const CodeBlockInstruction& cbi, Exception excode,
|
||||
void GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info, Exception excode,
|
||||
Condition condition = Condition::Always);
|
||||
|
||||
private:
|
||||
|
@ -194,6 +218,7 @@ private:
|
|||
|
||||
void SwitchToFarCode();
|
||||
void SwitchToNearCode();
|
||||
void* GetStartNearCodePointer() const;
|
||||
void* GetCurrentCodePointer() const;
|
||||
void* GetCurrentNearCodePointer() const;
|
||||
void* GetCurrentFarCodePointer() const;
|
||||
|
@ -204,8 +229,9 @@ private:
|
|||
// branch target, memory address, etc
|
||||
void BlockPrologue();
|
||||
void BlockEpilogue();
|
||||
void InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles, bool force_sync = false);
|
||||
void InstructionEpilogue(const CodeBlockInstruction& cbi);
|
||||
void InstructionPrologue(Instruction instruction, const CodeCache::InstructionInfo& info, TickCount cycles,
|
||||
bool force_sync = false);
|
||||
void InstructionEpilogue(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
void TruncateBlockAtCurrentInstruction();
|
||||
void AddPendingCycles(bool commit);
|
||||
void AddGTETicks(TickCount ticks);
|
||||
|
@ -221,32 +247,33 @@ private:
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
// Instruction Code Generators
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
bool CompileInstruction(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Fallback(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Nop(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Bitwise(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Shift(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Load(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Store(const CodeBlockInstruction& cbi);
|
||||
bool Compile_LoadLeftRight(const CodeBlockInstruction& cbi);
|
||||
bool Compile_StoreLeftRight(const CodeBlockInstruction& cbi);
|
||||
bool Compile_MoveHiLo(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Add(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Subtract(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Multiply(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Divide(const CodeBlockInstruction& cbi);
|
||||
bool Compile_SignedDivide(const CodeBlockInstruction& cbi);
|
||||
bool Compile_SetLess(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Branch(const CodeBlockInstruction& cbi);
|
||||
bool Compile_lui(const CodeBlockInstruction& cbi);
|
||||
bool Compile_cop0(const CodeBlockInstruction& cbi);
|
||||
bool Compile_cop2(const CodeBlockInstruction& cbi);
|
||||
bool CompileInstruction(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_Fallback(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_Nop(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_Bitwise(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_Shift(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_Load(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_Store(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_LoadLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_StoreLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_MoveHiLo(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_Add(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_Subtract(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_Multiply(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_Divide(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_SignedDivide(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_SetLess(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_Branch(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_lui(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_cop0(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
bool Compile_cop2(Instruction instruction, const CodeCache::InstructionInfo& info);
|
||||
|
||||
JitCodeBuffer* m_code_buffer;
|
||||
CodeBlock* m_block = nullptr;
|
||||
const CodeBlockInstruction* m_block_start = nullptr;
|
||||
const CodeBlockInstruction* m_block_end = nullptr;
|
||||
const CodeBlockInstruction* m_current_instruction = nullptr;
|
||||
|
||||
CodeCache::Block* m_block = nullptr;
|
||||
CodeBlockInstruction m_block_start = {};
|
||||
CodeBlockInstruction m_block_end = {};
|
||||
CodeBlockInstruction m_current_instruction = {};
|
||||
RegisterCache m_register_cache;
|
||||
CodeEmitter m_near_emitter;
|
||||
CodeEmitter m_far_emitter;
|
||||
|
@ -267,9 +294,6 @@ private:
|
|||
bool m_next_load_delay_dirty = false;
|
||||
bool m_gte_busy_cycles_dirty = false;
|
||||
|
||||
bool m_fastmem_load_base_in_register = false;
|
||||
bool m_fastmem_store_base_in_register = false;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Speculative Constants
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
|
||||
#include "cpu_code_cache_private.h"
|
||||
#include "cpu_core.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_recompiler_code_generator.h"
|
||||
|
@ -12,38 +14,230 @@
|
|||
#include "timing_event.h"
|
||||
Log_SetChannel(CPU::Recompiler);
|
||||
|
||||
// #include "vixl/aarch32/disasm-aarch32.h"
|
||||
// #include <iostream>
|
||||
#ifdef ENABLE_HOST_DISASSEMBLY
|
||||
#include "vixl/aarch32/disasm-aarch32.h"
|
||||
#include <iostream>
|
||||
#endif
|
||||
|
||||
namespace a32 = vixl::aarch32;
|
||||
|
||||
namespace CPU::Recompiler {
|
||||
|
||||
constexpr HostReg RCPUPTR = 4;
|
||||
constexpr HostReg RRETURN = 0;
|
||||
constexpr HostReg RARG1 = 0;
|
||||
constexpr HostReg RARG2 = 1;
|
||||
constexpr HostReg RARG3 = 2;
|
||||
constexpr HostReg RARG4 = 3;
|
||||
constexpr HostReg RSCRATCH = 12;
|
||||
constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 32;
|
||||
constexpr u32 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers
|
||||
constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes
|
||||
constexpr u32 FUNCTION_STACK_SIZE =
|
||||
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
|
||||
constexpr u32 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE;
|
||||
} // namespace CPU::Recompiler
|
||||
|
||||
static s32 GetPCDisplacement(const void* current, const void* target)
|
||||
s32 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target)
|
||||
{
|
||||
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
|
||||
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
|
||||
return static_cast<s32>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)));
|
||||
}
|
||||
|
||||
static bool IsPCDisplacementInImmediateRange(s32 displacement)
|
||||
bool CPU::Recompiler::armIsPCDisplacementInImmediateRange(s32 displacement)
|
||||
{
|
||||
return (displacement >= -33554432 && displacement <= 33554428);
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& rd, u32 imm)
|
||||
{
|
||||
if (vixl::IsUintN(16, imm))
|
||||
{
|
||||
armAsm->mov(vixl::aarch32::al, rd, imm & 0xffff);
|
||||
return;
|
||||
}
|
||||
|
||||
armAsm->mov(vixl::aarch32::al, rd, imm & 0xffff);
|
||||
armAsm->movt(vixl::aarch32::al, rd, imm >> 16);
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg,
|
||||
const void* addr)
|
||||
{
|
||||
armEmitMov(armAsm, reg, static_cast<u32>(reinterpret_cast<uintptr_t>(addr)));
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline)
|
||||
{
|
||||
// TODO: pooling
|
||||
|
||||
const s32 displacement = armGetPCDisplacement(armAsm->GetCursorAddress<const void*>(), ptr);
|
||||
if (!armIsPCDisplacementInImmediateRange(displacement))
|
||||
{
|
||||
armMoveAddressToReg(armAsm, RSCRATCH, ptr);
|
||||
armAsm->bx(RSCRATCH);
|
||||
}
|
||||
else
|
||||
{
|
||||
a32::Label label(displacement + armAsm->GetCursorOffset());
|
||||
armAsm->b(&label);
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline)
|
||||
{
|
||||
// TODO: pooling
|
||||
|
||||
const s32 displacement = armGetPCDisplacement(armAsm->GetCursorAddress<const void*>(), ptr);
|
||||
if (!armIsPCDisplacementInImmediateRange(displacement))
|
||||
{
|
||||
armMoveAddressToReg(armAsm, RSCRATCH, ptr);
|
||||
armAsm->blx(RSCRATCH);
|
||||
}
|
||||
else
|
||||
{
|
||||
a32::Label label(displacement + armAsm->GetCursorOffset());
|
||||
armAsm->bl(&label);
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
|
||||
{
|
||||
#ifdef ENABLE_HOST_DISASSEMBLY
|
||||
a32::PrintDisassembler dis(std::cout, 0);
|
||||
dis.SetCodeAddress(reinterpret_cast<uintptr_t>(start));
|
||||
dis.DisassembleA32Buffer(static_cast<const u32*>(start), size);
|
||||
#else
|
||||
Log_ErrorPrint("Not compiled with ENABLE_HOST_DISASSEMBLY.");
|
||||
#endif
|
||||
}
|
||||
|
||||
u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size)
|
||||
{
|
||||
return size / a32::kA32InstructionSizeInBytes;
|
||||
}
|
||||
|
||||
u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
|
||||
{
|
||||
using namespace vixl::aarch32;
|
||||
using namespace CPU::Recompiler;
|
||||
|
||||
const s32 disp = armGetPCDisplacement(code, dst);
|
||||
DebugAssert(armIsPCDisplacementInImmediateRange(disp));
|
||||
|
||||
// A32 jumps are silly.
|
||||
{
|
||||
vixl::aarch32::Assembler emit(static_cast<vixl::byte*>(code), kA32InstructionSizeInBytes, a32::A32);
|
||||
a32::Label label(disp);
|
||||
emit.b(&label);
|
||||
}
|
||||
|
||||
if (flush_icache)
|
||||
JitCodeBuffer::FlushInstructionCache(code, kA32InstructionSizeInBytes);
|
||||
|
||||
return kA32InstructionSizeInBytes;
|
||||
}
|
||||
|
||||
u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
||||
{
|
||||
using namespace vixl::aarch32;
|
||||
using namespace CPU::Recompiler;
|
||||
|
||||
#define PTR(x) a32::MemOperand(RSTATE, (s32)(((u8*)(x)) - ((u8*)&g_state)))
|
||||
|
||||
Assembler actual_asm(static_cast<u8*>(code), code_size);
|
||||
Assembler* armAsm = &actual_asm;
|
||||
|
||||
#ifdef VIXL_DEBUG
|
||||
vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
|
||||
#endif
|
||||
|
||||
Label dispatch;
|
||||
|
||||
g_enter_recompiler = armAsm->GetCursorAddress<decltype(g_enter_recompiler)>();
|
||||
{
|
||||
// reserve some space for saving caller-saved registers
|
||||
armAsm->sub(sp, sp, FUNCTION_STACK_SIZE);
|
||||
|
||||
// Need the CPU state for basically everything :-)
|
||||
armMoveAddressToReg(armAsm, RSTATE, &g_state);
|
||||
}
|
||||
|
||||
// check events then for frame done
|
||||
g_check_events_and_dispatch = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
Label skip_event_check;
|
||||
armAsm->ldr(RARG1, PTR(&g_state.pending_ticks));
|
||||
armAsm->ldr(RARG2, PTR(&g_state.downcount));
|
||||
armAsm->cmp(RARG1, RARG2);
|
||||
armAsm->b(lt, &skip_event_check);
|
||||
|
||||
g_run_events_and_dispatch = armAsm->GetCursorAddress<const void*>();
|
||||
armEmitCall(armAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents), true);
|
||||
|
||||
armAsm->bind(&skip_event_check);
|
||||
}
|
||||
|
||||
// TODO: align?
|
||||
g_dispatcher = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armAsm->bind(&dispatch);
|
||||
|
||||
// x9 <- s_fast_map[pc >> 16]
|
||||
armAsm->ldr(RARG1, PTR(&g_state.pc));
|
||||
armMoveAddressToReg(armAsm, RARG3, g_code_lut.data());
|
||||
armAsm->lsr(RARG2, RARG1, 16);
|
||||
armAsm->ldr(RARG2, MemOperand(RARG3, RARG2, LSL, 2));
|
||||
|
||||
// blr(x9[pc * 2]) (fast_map[pc >> 2])
|
||||
armAsm->ldr(RARG1, MemOperand(RARG2, RARG1));
|
||||
armAsm->blx(RARG1);
|
||||
}
|
||||
|
||||
g_compile_or_revalidate_block = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armAsm->ldr(RARG1, PTR(&g_state.pc));
|
||||
armEmitCall(armAsm, reinterpret_cast<const void*>(&CompileOrRevalidateBlock), true);
|
||||
armAsm->b(&dispatch);
|
||||
}
|
||||
|
||||
g_discard_and_recompile_block = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armAsm->ldr(RARG1, PTR(&g_state.pc));
|
||||
armEmitCall(armAsm, reinterpret_cast<const void*>(&DiscardAndRecompileBlock), true);
|
||||
armAsm->b(&dispatch);
|
||||
}
|
||||
|
||||
g_interpret_block = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armEmitCall(armAsm, reinterpret_cast<const void*>(GetInterpretUncachedBlockFunction()), true);
|
||||
armAsm->b(&dispatch);
|
||||
}
|
||||
|
||||
armAsm->FinalizeCode();
|
||||
|
||||
#if 0
|
||||
// TODO: align?
|
||||
s_trampoline_targets.clear();
|
||||
s_trampoline_start_ptr = static_cast<u8*>(code) + armAsm->GetCursorOffset();
|
||||
s_trampoline_used = 0;
|
||||
#endif
|
||||
|
||||
#undef PTR
|
||||
return static_cast<u32>(armAsm->GetCursorOffset()) /* + TRAMPOLINE_AREA_SIZE*/;
|
||||
}
|
||||
|
||||
// Macros aren't used with old-rec.
|
||||
#undef RRET
|
||||
#undef RARG1
|
||||
#undef RARG2
|
||||
#undef RARG3
|
||||
#undef RARG4
|
||||
#undef RSCRATCH
|
||||
#undef RMEMBASE
|
||||
#undef RSTATE
|
||||
|
||||
namespace CPU::Recompiler {
|
||||
|
||||
constexpr HostReg RCPUPTR = 4;
|
||||
constexpr HostReg RMEMBASEPTR = 5;
|
||||
constexpr HostReg RRETURN = 0;
|
||||
constexpr HostReg RARG1 = 0;
|
||||
constexpr HostReg RARG2 = 1;
|
||||
constexpr HostReg RARG3 = 2;
|
||||
constexpr HostReg RARG4 = 3;
|
||||
constexpr HostReg RSCRATCH = 12;
|
||||
|
||||
static const a32::Register GetHostReg8(HostReg reg)
|
||||
{
|
||||
return a32::Register(reg);
|
||||
|
@ -82,6 +276,11 @@ static const a32::Register GetCPUPtrReg()
|
|||
return GetHostReg32(RCPUPTR);
|
||||
}
|
||||
|
||||
static const a32::Register GetFastmemBasePtrReg()
|
||||
{
|
||||
return GetHostReg32(RMEMBASEPTR);
|
||||
}
|
||||
|
||||
CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer)
|
||||
: m_code_buffer(code_buffer), m_register_cache(*this),
|
||||
m_near_emitter(static_cast<vixl::byte*>(code_buffer->GetFreeCodePointer()), code_buffer->GetFreeCodeSpace(),
|
||||
|
@ -136,6 +335,11 @@ void CodeGenerator::SwitchToNearCode()
|
|||
m_emit = &m_near_emitter;
|
||||
}
|
||||
|
||||
void* CodeGenerator::GetStartNearCodePointer() const
|
||||
{
|
||||
return static_cast<u8*>(m_code_buffer->GetFreeCodePointer());
|
||||
}
|
||||
|
||||
void* CodeGenerator::GetCurrentNearCodePointer() const
|
||||
{
|
||||
return static_cast<u8*>(m_code_buffer->GetFreeCodePointer()) + m_near_emitter.GetCursorOffset();
|
||||
|
@ -168,8 +372,6 @@ Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool al
|
|||
|
||||
void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
||||
{
|
||||
m_emit->sub(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
|
||||
|
||||
if (allocate_registers)
|
||||
{
|
||||
// Save the link register, since we'll be calling functions.
|
||||
|
@ -183,22 +385,31 @@ void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
|||
// m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state));
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
Assert(fastmem_reg_allocated);
|
||||
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_return /* = true */)
|
||||
void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, const void* jump_to)
|
||||
{
|
||||
if (free_registers)
|
||||
{
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
m_register_cache.FreeHostReg(14);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
}
|
||||
|
||||
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
|
||||
|
||||
if (emit_return)
|
||||
m_emit->bx(a32::lr);
|
||||
if (jump_to)
|
||||
armEmitJmp(m_emit, jump_to, true);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitExceptionExit()
|
||||
|
@ -212,8 +423,7 @@ void CodeGenerator::EmitExceptionExit()
|
|||
|
||||
m_register_cache.PopCalleeSavedRegisters(false);
|
||||
|
||||
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
|
||||
m_emit->bx(a32::lr);
|
||||
armEmitJmp(m_emit, CodeCache::g_check_events_and_dispatch, true);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitExceptionExitOnBool(const Value& value)
|
||||
|
@ -236,13 +446,14 @@ void CodeGenerator::EmitExceptionExitOnBool(const Value& value)
|
|||
m_register_cache.PopState();
|
||||
}
|
||||
|
||||
void CodeGenerator::FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size)
|
||||
const void* CodeGenerator::FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size)
|
||||
{
|
||||
m_near_emitter.FinalizeCode();
|
||||
m_far_emitter.FinalizeCode();
|
||||
|
||||
*out_host_code = reinterpret_cast<CodeBlock::HostCodePointer>(m_code_buffer->GetFreeCodePointer());
|
||||
const void* code = m_code_buffer->GetFreeCodePointer();
|
||||
*out_host_code_size = static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated());
|
||||
*out_host_far_code_size = static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated());
|
||||
|
||||
m_code_buffer->CommitCode(static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated()));
|
||||
m_code_buffer->CommitFarCode(static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated()));
|
||||
|
@ -252,11 +463,7 @@ void CodeGenerator::FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32
|
|||
m_far_emitter = CodeEmitter(static_cast<vixl::byte*>(m_code_buffer->GetFreeFarCodePointer()),
|
||||
m_code_buffer->GetFreeFarCodeSpace(), a32::A32);
|
||||
|
||||
#if 0
|
||||
a32::PrintDisassembler dis(std::cout, 0);
|
||||
dis.SetCodeAddress(reinterpret_cast<uintptr_t>(*out_host_code));
|
||||
dis.DisassembleA32Buffer(reinterpret_cast<u32*>(*out_host_code), *out_host_code_size);
|
||||
#endif
|
||||
return code;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size)
|
||||
|
@ -847,8 +1054,6 @@ void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Cond
|
|||
|
||||
u32 CodeGenerator::PrepareStackForCall()
|
||||
{
|
||||
m_fastmem_load_base_in_register = false;
|
||||
m_fastmem_store_base_in_register = false;
|
||||
m_register_cache.PushCallerSavedRegisters();
|
||||
return 0;
|
||||
}
|
||||
|
@ -860,17 +1065,7 @@ void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
|
|||
|
||||
void CodeGenerator::EmitCall(const void* ptr)
|
||||
{
|
||||
const s32 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
|
||||
if (!IsPCDisplacementInImmediateRange(displacement))
|
||||
{
|
||||
m_emit->Mov(GetHostReg32(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
|
||||
m_emit->blx(GetHostReg32(RSCRATCH));
|
||||
}
|
||||
else
|
||||
{
|
||||
a32::Label label(displacement + m_emit->GetCursorOffset());
|
||||
m_emit->bl(&label);
|
||||
}
|
||||
armEmitCall(m_emit, ptr, false);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
|
||||
|
@ -1005,7 +1200,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
|
|||
|
||||
void CodeGenerator::EmitPushHostReg(HostReg reg, u32 position)
|
||||
{
|
||||
const a32::MemOperand addr(a32::sp, FUNCTION_STACK_SIZE - FUNCTION_CALL_SHADOW_SPACE - (position * 4));
|
||||
const a32::MemOperand addr(a32::sp, FUNCTION_STACK_SIZE - (position * 4));
|
||||
m_emit->str(GetHostReg32(reg), addr);
|
||||
}
|
||||
|
||||
|
@ -1018,7 +1213,7 @@ void CodeGenerator::EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position)
|
|||
|
||||
void CodeGenerator::EmitPopHostReg(HostReg reg, u32 position)
|
||||
{
|
||||
const a32::MemOperand addr(a32::sp, FUNCTION_STACK_SIZE - FUNCTION_CALL_SHADOW_SPACE - (position * 4));
|
||||
const a32::MemOperand addr(a32::sp, FUNCTION_STACK_SIZE - (position * 4));
|
||||
m_emit->ldr(GetHostReg32(reg), addr);
|
||||
}
|
||||
|
||||
|
@ -1153,51 +1348,13 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
|
|||
}
|
||||
}
|
||||
|
||||
Value CodeGenerator::GetFastmemLoadBase()
|
||||
{
|
||||
Value val = Value::FromHostReg(&m_register_cache, RARG4, RegSize_32);
|
||||
if (!m_fastmem_load_base_in_register)
|
||||
{
|
||||
m_emit->ldr(GetHostReg32(val), a32::MemOperand(GetCPUPtrReg(), offsetof(CPU::State, fastmem_base)));
|
||||
m_fastmem_load_base_in_register = true;
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
Value CodeGenerator::GetFastmemStoreBase()
|
||||
{
|
||||
Value val = Value::FromHostReg(&m_register_cache, RARG3, RegSize_32);
|
||||
if (!m_fastmem_store_base_in_register)
|
||||
{
|
||||
m_emit->ldr(GetHostReg32(val), a32::MemOperand(GetCPUPtrReg(), offsetof(CPU::State, fastmem_base)));
|
||||
m_emit->add(GetHostReg32(val), GetHostReg32(val), sizeof(u32*) * Bus::FASTMEM_LUT_NUM_PAGES);
|
||||
m_fastmem_store_base_in_register = true;
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitUpdateFastmemBase()
|
||||
{
|
||||
if (m_fastmem_load_base_in_register)
|
||||
{
|
||||
Value val = Value::FromHostReg(&m_register_cache, RARG4, RegSize_32);
|
||||
m_emit->ldr(GetHostReg32(val), a32::MemOperand(GetCPUPtrReg(), offsetof(CPU::State, fastmem_base)));
|
||||
}
|
||||
|
||||
if (m_fastmem_store_base_in_register)
|
||||
{
|
||||
Value val = Value::FromHostReg(&m_register_cache, RARG3, RegSize_32);
|
||||
m_emit->ldr(GetHostReg32(val), a32::MemOperand(GetCPUPtrReg(), offsetof(CPU::State, fastmem_base)));
|
||||
m_emit->add(GetHostReg32(val), GetHostReg32(val), sizeof(u32*) * Bus::FASTMEM_LUT_NUM_PAGES);
|
||||
}
|
||||
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
|
||||
{
|
||||
Value fastmem_base = GetFastmemLoadBase();
|
||||
|
||||
HostReg address_reg;
|
||||
if (address.IsConstant())
|
||||
{
|
||||
|
@ -1212,7 +1369,7 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
|||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
|
||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK);
|
||||
m_emit->ldr(GetHostReg32(RARG1),
|
||||
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||
a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1234,18 +1391,9 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result)
|
||||
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, Value& result)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = result.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
bpi.fault_count = 0;
|
||||
|
||||
Value fastmem_base = GetFastmemLoadBase();
|
||||
|
||||
HostReg address_reg;
|
||||
if (address.IsConstant())
|
||||
{
|
||||
|
@ -1258,25 +1406,25 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
|
||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
|
||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK);
|
||||
m_emit->ldr(GetHostReg32(RARG1),
|
||||
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||
a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||
|
||||
m_register_cache.InhibitAllocation();
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
void* host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
|
||||
m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
|
||||
m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
|
||||
m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -1284,13 +1432,11 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
break;
|
||||
}
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
const bool old_store_fastmem_base = m_fastmem_store_base_in_register;
|
||||
const u32 host_code_size =
|
||||
static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
const void* host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||
|
@ -1298,27 +1444,22 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// restore fastmem base state for the next instruction
|
||||
if (old_store_fastmem_base)
|
||||
fastmem_base = GetFastmemStoreBase();
|
||||
fastmem_base = GetFastmemLoadBase();
|
||||
|
||||
// return to the block code
|
||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||
|
||||
SwitchToNearCode();
|
||||
m_register_cache.UninhibitAllocation();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result, bool in_far_code)
|
||||
void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, Value& result, bool in_far_code)
|
||||
{
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
|
@ -1359,7 +1500,7 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi,
|
|||
m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2);
|
||||
EmitOr(result.host_reg, result.host_reg,
|
||||
Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException(
|
||||
static_cast<Exception>(0), cbi.is_branch_delay_slot, false, cbi.instruction.cop.cop_n)));
|
||||
static_cast<Exception>(0), info.is_branch_delay_slot, false, instruction.cop.cop_n)));
|
||||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
EmitExceptionExit();
|
||||
|
@ -1392,16 +1533,9 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
const Value& value)
|
||||
void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, const Value& value)
|
||||
{
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = value.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
bpi.fault_count = 0;
|
||||
|
||||
Value fastmem_base = GetFastmemStoreBase();
|
||||
Value actual_value = GetValueInHostRegister(value);
|
||||
|
||||
HostReg address_reg;
|
||||
|
@ -1418,25 +1552,27 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
// TODO: if this gets backpatched, these instructions are wasted
|
||||
|
||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
|
||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK);
|
||||
m_emit->ldr(GetHostReg32(RARG1),
|
||||
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||
a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||
|
||||
m_register_cache.InhibitAllocation();
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
void* host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
m_emit->strb(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
|
||||
m_emit->strb(GetHostReg32(actual_value.host_reg),
|
||||
a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
m_emit->strh(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
|
||||
m_emit->strh(GetHostReg32(actual_value.host_reg),
|
||||
a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
m_emit->str(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
|
||||
m_emit->str(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg)));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -1444,39 +1580,33 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
break;
|
||||
}
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
const bool old_load_fastmem_base = m_fastmem_load_base_in_register;
|
||||
const u32 host_code_size =
|
||||
static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
void* host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, size, actual_value, true);
|
||||
EmitStoreGuestMemorySlowmem(instruction, info, address, size, actual_value, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// restore fastmem base state for the next instruction
|
||||
if (old_load_fastmem_base)
|
||||
fastmem_base = GetFastmemLoadBase();
|
||||
fastmem_base = GetFastmemStoreBase();
|
||||
|
||||
// return to the block code
|
||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||
|
||||
SwitchToNearCode();
|
||||
m_register_cache.UninhibitAllocation();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
const Value& value, bool in_far_code)
|
||||
void CodeGenerator::EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, const Value& value,
|
||||
bool in_far_code)
|
||||
{
|
||||
Value value_in_hr = GetValueInHostRegister(value);
|
||||
|
||||
|
@ -1520,7 +1650,7 @@ void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi,
|
|||
m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2);
|
||||
EmitOr(result.host_reg, result.host_reg,
|
||||
Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException(
|
||||
static_cast<Exception>(0), cbi.is_branch_delay_slot, false, cbi.instruction.cop.cop_n)));
|
||||
static_cast<Exception>(0), info.is_branch_delay_slot, false, instruction.cop.cop_n)));
|
||||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
if (!in_far_code)
|
||||
|
@ -1552,18 +1682,18 @@ void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
}
|
||||
|
||||
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
||||
void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi)
|
||||
{
|
||||
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem at %p", lbi.host_pc, lbi.guest_pc, lbi.host_slowmem_pc);
|
||||
Log_DevFmt("Backpatching {} (guest PC 0x{:08X}) to slowmem at {}", host_pc, lbi.guest_pc, lbi.thunk_address);
|
||||
|
||||
// turn it into a jump to the slowmem handler
|
||||
vixl::aarch32::MacroAssembler emit(static_cast<vixl::byte*>(lbi.host_pc), lbi.host_code_size, a32::A32);
|
||||
vixl::aarch32::MacroAssembler emit(static_cast<vixl::byte*>(host_pc), lbi.code_size, a32::A32);
|
||||
|
||||
// check jump distance
|
||||
const s32 displacement = GetPCDisplacement(lbi.host_pc, lbi.host_slowmem_pc);
|
||||
if (!IsPCDisplacementInImmediateRange(displacement))
|
||||
const s32 displacement = armGetPCDisplacement(host_pc, lbi.thunk_address);
|
||||
if (!armIsPCDisplacementInImmediateRange(displacement))
|
||||
{
|
||||
emit.Mov(GetHostReg32(RSCRATCH), reinterpret_cast<uintptr_t>(lbi.host_slowmem_pc));
|
||||
armMoveAddressToReg(&emit, GetHostReg32(RSCRATCH), lbi.thunk_address);
|
||||
emit.bx(GetHostReg32(RSCRATCH));
|
||||
}
|
||||
else
|
||||
|
@ -1572,56 +1702,12 @@ bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
|||
emit.b(&label);
|
||||
}
|
||||
|
||||
const s32 nops = (static_cast<s32>(lbi.host_code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
const s32 nops = (static_cast<s32>(lbi.code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchReturn(void* pc, u32 pc_size)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to return", pc);
|
||||
|
||||
vixl::aarch32::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a32::A32);
|
||||
emit.bx(a32::lr);
|
||||
|
||||
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchBranch(void* pc, u32 pc_size, void* target)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to %p [branch]", pc, target);
|
||||
|
||||
vixl::aarch32::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a32::A32);
|
||||
|
||||
// check jump distance
|
||||
const s32 displacement = GetPCDisplacement(pc, target);
|
||||
if (!IsPCDisplacementInImmediateRange(displacement))
|
||||
{
|
||||
emit.Mov(GetHostReg32(RSCRATCH), reinterpret_cast<uintptr_t>(target));
|
||||
emit.bx(GetHostReg32(RSCRATCH));
|
||||
}
|
||||
else
|
||||
{
|
||||
a32::Label label(displacement + emit.GetCursorOffset());
|
||||
emit.b(&label);
|
||||
}
|
||||
|
||||
// shouldn't have any nops
|
||||
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
JitCodeBuffer::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
|
@ -1751,7 +1837,8 @@ void CodeGenerator::EmitICacheCheckAndUpdate()
|
|||
{
|
||||
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
|
||||
{
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1789,6 +1876,82 @@ void CodeGenerator::EmitICacheCheckAndUpdate()
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size)
|
||||
{
|
||||
// store it first to reduce code size, because we can offset
|
||||
armMoveAddressToReg(m_emit, GetHostReg32(RARG1), ram_ptr);
|
||||
armMoveAddressToReg(m_emit, GetHostReg32(RARG2), shadow_ptr);
|
||||
|
||||
u32 offset = 0;
|
||||
a32::Label block_changed;
|
||||
|
||||
#if 0
|
||||
/* TODO: Vectorize
|
||||
#include <arm_neon.h>
|
||||
#include <stdint.h>
|
||||
|
||||
bool foo(const void* a, const void* b)
|
||||
{
|
||||
uint8x16_t v1 = vld1q_u8((const uint8_t*)a);
|
||||
uint8x16_t v2 = vld1q_u8((const uint8_t*)b);
|
||||
uint8x16_t v3 = vld1q_u8((const uint8_t*)a + 16);
|
||||
uint8x16_t v4 = vld1q_u8((const uint8_t*)a + 16);
|
||||
uint8x16_t r = vceqq_u8(v1, v2);
|
||||
uint8x16_t r2 = vceqq_u8(v2, v3);
|
||||
uint8x16_t r3 = vandq_u8(r, r2);
|
||||
uint32x2_t rr = vpmin_u32(vget_low_u32(vreinterpretq_u32_u8(r3)), vget_high_u32(vreinterpretq_u32_u8(r3)));
|
||||
if ((vget_lane_u32(rr, 0) & vget_lane_u32(rr, 1)) != 0xFFFFFFFFu)
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
bool first = true;
|
||||
|
||||
while (size >= 16)
|
||||
{
|
||||
const a32::VRegister vtmp = a32::v2.V4S();
|
||||
const a32::VRegister dst = first ? a32::v0.V4S() : a32::v1.V4S();
|
||||
m_emit->ldr(dst, a32::MemOperand(RXARG1, offset));
|
||||
m_emit->ldr(vtmp, a32::MemOperand(RXARG2, offset));
|
||||
m_emit->cmeq(dst, dst, vtmp);
|
||||
if (!first)
|
||||
m_emit->and_(dst.V16B(), dst.V16B(), vtmp.V16B());
|
||||
else
|
||||
first = false;
|
||||
|
||||
offset += 16;
|
||||
size -= 16;
|
||||
}
|
||||
|
||||
if (!first)
|
||||
{
|
||||
// TODO: make sure this doesn't choke on ffffffff
|
||||
m_emit->uminv(a32::s0, a32::v0.V4S());
|
||||
m_emit->fcmp(a32::s0, 0.0);
|
||||
m_emit->b(&block_changed, a32::eq);
|
||||
}
|
||||
#endif
|
||||
|
||||
while (size >= 4)
|
||||
{
|
||||
m_emit->ldr(GetHostReg32(RARG3), a32::MemOperand(GetHostReg32(RARG1), offset));
|
||||
m_emit->ldr(GetHostReg32(RARG4), a32::MemOperand(GetHostReg32(RARG2), offset));
|
||||
m_emit->cmp(GetHostReg32(RARG3), GetHostReg32(RARG4));
|
||||
m_emit->b(a32::ne, &block_changed);
|
||||
offset += 4;
|
||||
size -= 4;
|
||||
}
|
||||
|
||||
DebugAssert(size == 0);
|
||||
|
||||
a32::Label block_unchanged;
|
||||
m_emit->b(&block_unchanged);
|
||||
m_emit->bind(&block_changed);
|
||||
armEmitJmp(m_emit, CodeCache::g_discard_and_recompile_block, false);
|
||||
m_emit->bind(&block_unchanged);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStallUntilGTEComplete()
|
||||
{
|
||||
static_assert(offsetof(State, pending_ticks) + sizeof(u32) == offsetof(State, gte_completion_tick));
|
||||
|
@ -1809,8 +1972,8 @@ void CodeGenerator::EmitStallUntilGTEComplete()
|
|||
|
||||
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
|
||||
{
|
||||
const s32 displacement = GetPCDisplacement(GetCurrentCodePointer(), address);
|
||||
if (IsPCDisplacementInImmediateRange(displacement))
|
||||
const s32 displacement = armGetPCDisplacement(GetCurrentCodePointer(), address);
|
||||
if (armIsPCDisplacementInImmediateRange(displacement))
|
||||
{
|
||||
a32::Label label(displacement + m_emit->GetCursorOffset());
|
||||
m_emit->b(&label);
|
||||
|
@ -2057,81 +2220,4 @@ void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
|
|||
m_emit->Mov(GetHostReg32(host_reg), reinterpret_cast<uintptr_t>(ptr));
|
||||
}
|
||||
|
||||
CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
|
||||
{
|
||||
m_emit->sub(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
|
||||
m_register_cache.ReserveCalleeSavedRegisters();
|
||||
const u32 stack_adjust = PrepareStackForCall();
|
||||
|
||||
EmitLoadGlobalAddress(RCPUPTR, &g_state);
|
||||
|
||||
a32::Label event_test;
|
||||
m_emit->b(&event_test);
|
||||
|
||||
// main dispatch loop
|
||||
a32::Label main_loop;
|
||||
m_emit->Bind(&main_loop);
|
||||
|
||||
// time to lookup the block
|
||||
// r0 <- pc
|
||||
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pc)));
|
||||
|
||||
// r1 <- s_fast_map[pc >> 16]
|
||||
EmitLoadGlobalAddress(2, CodeCache::GetFastMapPointer());
|
||||
m_emit->lsr(a32::r1, a32::r0, 16);
|
||||
m_emit->ldr(a32::r1, a32::MemOperand(a32::r2, a32::r1, a32::LSL, 2));
|
||||
|
||||
// blr(r1[pc]) (fast_map[pc >> 2])
|
||||
m_emit->ldr(a32::r0, a32::MemOperand(a32::r1, a32::r0));
|
||||
m_emit->blx(a32::r0);
|
||||
|
||||
// r0 <- pending_ticks
|
||||
// r1 <- downcount
|
||||
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pending_ticks)));
|
||||
m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount)));
|
||||
|
||||
// while downcount < pending_ticks
|
||||
a32::Label downcount_hit;
|
||||
m_emit->cmp(a32::r0, a32::r1);
|
||||
m_emit->b(a32::lt, &main_loop);
|
||||
|
||||
// end while
|
||||
m_emit->Bind(&event_test);
|
||||
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
|
||||
m_emit->b(&main_loop);
|
||||
|
||||
RestoreStackAfterCall(stack_adjust);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
|
||||
m_emit->bx(a32::lr);
|
||||
|
||||
CodeBlock::HostCodePointer ptr;
|
||||
u32 code_size;
|
||||
FinalizeBlock(&ptr, &code_size);
|
||||
Log_DevPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
|
||||
return reinterpret_cast<CodeCache::DispatcherFunction>(ptr);
|
||||
}
|
||||
|
||||
CodeCache::SingleBlockDispatcherFunction CodeGenerator::CompileSingleBlockDispatcher()
|
||||
{
|
||||
m_emit->sub(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
|
||||
m_register_cache.ReserveCalleeSavedRegisters();
|
||||
const u32 stack_adjust = PrepareStackForCall();
|
||||
|
||||
EmitLoadGlobalAddress(RCPUPTR, &g_state);
|
||||
|
||||
m_emit->blx(GetHostReg32(RARG1));
|
||||
|
||||
RestoreStackAfterCall(stack_adjust);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
|
||||
m_emit->bx(a32::lr);
|
||||
|
||||
CodeBlock::HostCodePointer ptr;
|
||||
u32 code_size;
|
||||
FinalizeBlock(&ptr, &code_size);
|
||||
Log_DevPrintf("Single block dispatcher is %u bytes at %p", code_size, ptr);
|
||||
return reinterpret_cast<CodeCache::SingleBlockDispatcherFunction>(ptr);
|
||||
}
|
||||
|
||||
} // namespace CPU::Recompiler
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "cpu_code_cache_private.h"
|
||||
#include "cpu_core.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_recompiler_code_generator.h"
|
||||
|
@ -12,8 +13,399 @@
|
|||
#include "timing_event.h"
|
||||
Log_SetChannel(CPU::Recompiler);
|
||||
|
||||
#ifdef ENABLE_HOST_DISASSEMBLY
|
||||
#include "vixl/aarch64/disasm-aarch64.h"
|
||||
#endif
|
||||
|
||||
namespace a64 = vixl::aarch64;
|
||||
|
||||
namespace CPU::Recompiler {
|
||||
constexpr u64 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers
|
||||
constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes
|
||||
constexpr u64 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE;
|
||||
|
||||
static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024;
|
||||
static std::unordered_map<const void*, u32> s_trampoline_targets;
|
||||
static u8* s_trampoline_start_ptr = nullptr;
|
||||
static u32 s_trampoline_used = 0;
|
||||
} // namespace CPU::Recompiler
|
||||
|
||||
bool CPU::Recompiler::armIsCallerSavedRegister(u32 id)
|
||||
{
|
||||
// same on both linux and windows
|
||||
return (id <= 18);
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armEmitMov(a64::Assembler* armAsm, const a64::Register& rd, u64 imm)
|
||||
{
|
||||
DebugAssert(vixl::IsUint32(imm) || vixl::IsInt32(imm) || rd.Is64Bits());
|
||||
DebugAssert(rd.GetCode() != a64::sp.GetCode());
|
||||
|
||||
if (imm == 0)
|
||||
{
|
||||
armAsm->mov(rd, a64::Assembler::AppropriateZeroRegFor(rd));
|
||||
return;
|
||||
}
|
||||
|
||||
// The worst case for size is mov 64-bit immediate to sp:
|
||||
// * up to 4 instructions to materialise the constant
|
||||
// * 1 instruction to move to sp
|
||||
|
||||
// Immediates on Aarch64 can be produced using an initial value, and zero to
|
||||
// three move keep operations.
|
||||
//
|
||||
// Initial values can be generated with:
|
||||
// 1. 64-bit move zero (movz).
|
||||
// 2. 32-bit move inverted (movn).
|
||||
// 3. 64-bit move inverted.
|
||||
// 4. 32-bit orr immediate.
|
||||
// 5. 64-bit orr immediate.
|
||||
// Move-keep may then be used to modify each of the 16-bit half words.
|
||||
//
|
||||
// The code below supports all five initial value generators, and
|
||||
// applying move-keep operations to move-zero and move-inverted initial
|
||||
// values.
|
||||
|
||||
// Try to move the immediate in one instruction, and if that fails, switch to
|
||||
// using multiple instructions.
|
||||
const unsigned reg_size = rd.GetSizeInBits();
|
||||
|
||||
if (a64::Assembler::IsImmMovz(imm, reg_size) && !rd.IsSP())
|
||||
{
|
||||
// Immediate can be represented in a move zero instruction. Movz can't write
|
||||
// to the stack pointer.
|
||||
armAsm->movz(rd, imm);
|
||||
return;
|
||||
}
|
||||
else if (a64::Assembler::IsImmMovn(imm, reg_size) && !rd.IsSP())
|
||||
{
|
||||
// Immediate can be represented in a move negative instruction. Movn can't
|
||||
// write to the stack pointer.
|
||||
armAsm->movn(rd, rd.Is64Bits() ? ~imm : (~imm & a64::kWRegMask));
|
||||
return;
|
||||
}
|
||||
else if (a64::Assembler::IsImmLogical(imm, reg_size))
|
||||
{
|
||||
// Immediate can be represented in a logical orr instruction.
|
||||
DebugAssert(!rd.IsZero());
|
||||
armAsm->orr(rd, a64::Assembler::AppropriateZeroRegFor(rd), imm);
|
||||
return;
|
||||
}
|
||||
|
||||
// Generic immediate case. Imm will be represented by
|
||||
// [imm3, imm2, imm1, imm0], where each imm is 16 bits.
|
||||
// A move-zero or move-inverted is generated for the first non-zero or
|
||||
// non-0xffff immX, and a move-keep for subsequent non-zero immX.
|
||||
|
||||
uint64_t ignored_halfword = 0;
|
||||
bool invert_move = false;
|
||||
// If the number of 0xffff halfwords is greater than the number of 0x0000
|
||||
// halfwords, it's more efficient to use move-inverted.
|
||||
if (vixl::CountClearHalfWords(~imm, reg_size) > vixl::CountClearHalfWords(imm, reg_size))
|
||||
{
|
||||
ignored_halfword = 0xffff;
|
||||
invert_move = true;
|
||||
}
|
||||
|
||||
// Iterate through the halfwords. Use movn/movz for the first non-ignored
|
||||
// halfword, and movk for subsequent halfwords.
|
||||
DebugAssert((reg_size % 16) == 0);
|
||||
bool first_mov_done = false;
|
||||
for (unsigned i = 0; i < (reg_size / 16); i++)
|
||||
{
|
||||
uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
|
||||
if (imm16 != ignored_halfword)
|
||||
{
|
||||
if (!first_mov_done)
|
||||
{
|
||||
if (invert_move)
|
||||
armAsm->movn(rd, ~imm16 & 0xffff, 16 * i);
|
||||
else
|
||||
armAsm->movz(rd, imm16, 16 * i);
|
||||
first_mov_done = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Construct a wider constant.
|
||||
armAsm->movk(rd, imm16, 16 * i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DebugAssert(first_mov_done);
|
||||
}
|
||||
|
||||
s64 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target)
|
||||
{
|
||||
// pxAssert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
|
||||
// pxAssert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
|
||||
return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armMoveAddressToReg(a64::Assembler* armAsm, const a64::XRegister& reg, const void* addr)
|
||||
{
|
||||
const void* cur = armAsm->GetCursorAddress<const void*>();
|
||||
const void* current_code_ptr_page =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
|
||||
const void* ptr_page =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF));
|
||||
const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
|
||||
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu);
|
||||
if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmAddSub(page_offset))
|
||||
{
|
||||
armAsm->adrp(reg, page_displacement);
|
||||
armAsm->add(reg, reg, page_offset);
|
||||
}
|
||||
else if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64))
|
||||
{
|
||||
armAsm->adrp(reg, page_displacement);
|
||||
armAsm->orr(reg, reg, page_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
armEmitMov(armAsm, reg, reinterpret_cast<uintptr_t>(addr));
|
||||
}
|
||||
}
|
||||
void CPU::Recompiler::armEmitJmp(a64::Assembler* armAsm, const void* ptr, bool force_inline)
|
||||
{
|
||||
const void* cur = armAsm->GetCursorAddress<const void*>();
|
||||
s64 displacement = armGetPCDisplacement(cur, ptr);
|
||||
bool use_blr = !vixl::IsInt26(displacement);
|
||||
if (use_blr && !force_inline)
|
||||
{
|
||||
if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline)
|
||||
{
|
||||
displacement = armGetPCDisplacement(cur, trampoline);
|
||||
use_blr = !vixl::IsInt26(displacement);
|
||||
}
|
||||
}
|
||||
|
||||
if (use_blr)
|
||||
{
|
||||
armMoveAddressToReg(armAsm, RXSCRATCH, ptr);
|
||||
armAsm->br(RXSCRATCH);
|
||||
}
|
||||
else
|
||||
{
|
||||
armAsm->b(displacement);
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armEmitCall(a64::Assembler* armAsm, const void* ptr, bool force_inline)
|
||||
{
|
||||
const void* cur = armAsm->GetCursorAddress<const void*>();
|
||||
s64 displacement = armGetPCDisplacement(cur, ptr);
|
||||
bool use_blr = !vixl::IsInt26(displacement);
|
||||
if (use_blr && !force_inline)
|
||||
{
|
||||
if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline)
|
||||
{
|
||||
displacement = armGetPCDisplacement(cur, trampoline);
|
||||
use_blr = !vixl::IsInt26(displacement);
|
||||
}
|
||||
}
|
||||
|
||||
if (use_blr)
|
||||
{
|
||||
armMoveAddressToReg(armAsm, RXSCRATCH, ptr);
|
||||
armAsm->blr(RXSCRATCH);
|
||||
}
|
||||
else
|
||||
{
|
||||
armAsm->bl(displacement);
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armEmitCondBranch(a64::Assembler* armAsm, a64::Condition cond, const void* ptr)
|
||||
{
|
||||
const s64 jump_distance = static_cast<s64>(reinterpret_cast<intptr_t>(ptr) -
|
||||
reinterpret_cast<intptr_t>(armAsm->GetCursorAddress<const void*>()));
|
||||
// pxAssert(Common::IsAligned(jump_distance, 4));
|
||||
|
||||
if (a64::Instruction::IsValidImmPCOffset(a64::CondBranchType, jump_distance >> 2))
|
||||
{
|
||||
armAsm->b(jump_distance >> 2, cond);
|
||||
}
|
||||
else
|
||||
{
|
||||
a64::Label branch_not_taken;
|
||||
armAsm->b(&branch_not_taken, InvertCondition(cond));
|
||||
|
||||
const s64 new_jump_distance = static_cast<s64>(reinterpret_cast<intptr_t>(ptr) -
|
||||
reinterpret_cast<intptr_t>(armAsm->GetCursorAddress<const void*>()));
|
||||
armAsm->b(new_jump_distance >> 2);
|
||||
armAsm->bind(&branch_not_taken);
|
||||
}
|
||||
}
|
||||
|
||||
u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)
|
||||
{
|
||||
auto it = s_trampoline_targets.find(target);
|
||||
if (it != s_trampoline_targets.end())
|
||||
return s_trampoline_start_ptr + it->second;
|
||||
|
||||
// align to 16 bytes?
|
||||
const u32 offset = s_trampoline_used; // Common::AlignUpPow2(s_trampoline_used, 16);
|
||||
|
||||
// 4 movs plus a jump
|
||||
if (TRAMPOLINE_AREA_SIZE - offset < 20)
|
||||
{
|
||||
Panic("Ran out of space in constant pool");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
u8* start = s_trampoline_start_ptr + offset;
|
||||
a64::Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset);
|
||||
armMoveAddressToReg(&armAsm, RXSCRATCH, target);
|
||||
armAsm.br(RXSCRATCH);
|
||||
|
||||
const u32 size = static_cast<u32>(armAsm.GetSizeOfCodeGenerated());
|
||||
DebugAssert(size < 20);
|
||||
s_trampoline_targets.emplace(target, offset);
|
||||
s_trampoline_used = offset + static_cast<u32>(size);
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(start, size);
|
||||
return start;
|
||||
}
|
||||
|
||||
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
|
||||
{
|
||||
#ifdef ENABLE_HOST_DISASSEMBLY
|
||||
class MyDisassembler : public a64::Disassembler
|
||||
{
|
||||
protected:
|
||||
void ProcessOutput(const a64::Instruction* instr) override
|
||||
{
|
||||
Log_DebugPrintf("0x%016" PRIx64 " %08" PRIx32 "\t\t%s", reinterpret_cast<uint64_t>(instr),
|
||||
instr->GetInstructionBits(), GetOutput());
|
||||
}
|
||||
};
|
||||
|
||||
a64::Decoder decoder;
|
||||
MyDisassembler disas;
|
||||
decoder.AppendVisitor(&disas);
|
||||
decoder.Decode(static_cast<const a64::Instruction*>(start),
|
||||
reinterpret_cast<const a64::Instruction*>(static_cast<const u8*>(start) + size));
|
||||
#else
|
||||
Log_ErrorPrint("Not compiled with ENABLE_HOST_DISASSEMBLY.");
|
||||
#endif
|
||||
}
|
||||
|
||||
u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size)
|
||||
{
|
||||
return size / a64::kInstructionSize;
|
||||
}
|
||||
|
||||
u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
|
||||
{
|
||||
using namespace a64;
|
||||
using namespace CPU::Recompiler;
|
||||
|
||||
const s64 disp = armGetPCDisplacement(code, dst);
|
||||
DebugAssert(vixl::IsInt26(disp));
|
||||
|
||||
const u32 new_code = B | Assembler::ImmUncondBranch(disp);
|
||||
std::memcpy(code, &new_code, sizeof(new_code));
|
||||
if (flush_icache)
|
||||
JitCodeBuffer::FlushInstructionCache(code, kInstructionSize);
|
||||
|
||||
return kInstructionSize;
|
||||
}
|
||||
|
||||
u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
||||
{
|
||||
using namespace vixl::aarch64;
|
||||
using namespace CPU::Recompiler;
|
||||
|
||||
#define PTR(x) a64::MemOperand(RSTATE, (s64)(((u8*)(x)) - ((u8*)&g_state)))
|
||||
|
||||
Assembler actual_asm(static_cast<u8*>(code), code_size);
|
||||
Assembler* armAsm = &actual_asm;
|
||||
|
||||
#ifdef VIXL_DEBUG
|
||||
vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
|
||||
#endif
|
||||
|
||||
Label dispatch;
|
||||
|
||||
g_enter_recompiler = armAsm->GetCursorAddress<decltype(g_enter_recompiler)>();
|
||||
{
|
||||
// reserve some space for saving caller-saved registers
|
||||
armAsm->sub(sp, sp, CPU::Recompiler::FUNCTION_STACK_SIZE);
|
||||
|
||||
// Need the CPU state for basically everything :-)
|
||||
armMoveAddressToReg(armAsm, RSTATE, &g_state);
|
||||
|
||||
// Fastmem setup, oldrec doesn't need it
|
||||
if (IsUsingFastmem() && g_settings.cpu_execution_mode != CPUExecutionMode::Recompiler)
|
||||
armAsm->ldr(RMEMBASE, PTR(&g_state.fastmem_base));
|
||||
|
||||
// Fall through to event dispatcher
|
||||
}
|
||||
|
||||
// check events then for frame done
|
||||
g_check_events_and_dispatch = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
Label skip_event_check;
|
||||
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
|
||||
armAsm->ldr(RWARG2, PTR(&g_state.downcount));
|
||||
armAsm->cmp(RWARG1, RWARG2);
|
||||
armAsm->b(&skip_event_check, lt);
|
||||
|
||||
g_run_events_and_dispatch = armAsm->GetCursorAddress<const void*>();
|
||||
armEmitCall(armAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents), true);
|
||||
|
||||
armAsm->bind(&skip_event_check);
|
||||
}
|
||||
|
||||
// TODO: align?
|
||||
g_dispatcher = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armAsm->bind(&dispatch);
|
||||
|
||||
// x9 <- s_fast_map[pc >> 16]
|
||||
armAsm->ldr(RWARG1, PTR(&g_state.pc));
|
||||
armMoveAddressToReg(armAsm, RXARG3, g_code_lut.data());
|
||||
armAsm->lsr(RWARG2, RWARG1, 16);
|
||||
armAsm->lsr(RWARG1, RWARG1, 2);
|
||||
armAsm->ldr(RXARG2, MemOperand(RXARG3, RXARG2, LSL, 3));
|
||||
|
||||
// blr(x9[pc * 2]) (fast_map[pc >> 2])
|
||||
armAsm->ldr(RXARG1, MemOperand(RXARG2, RXARG1, LSL, 3));
|
||||
armAsm->blr(RXARG1);
|
||||
}
|
||||
|
||||
g_compile_or_revalidate_block = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armAsm->ldr(RWARG1, PTR(&g_state.pc));
|
||||
armEmitCall(armAsm, reinterpret_cast<const void*>(&CompileOrRevalidateBlock), true);
|
||||
armAsm->b(&dispatch);
|
||||
}
|
||||
|
||||
g_discard_and_recompile_block = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armAsm->ldr(RWARG1, PTR(&g_state.pc));
|
||||
armEmitCall(armAsm, reinterpret_cast<const void*>(&DiscardAndRecompileBlock), true);
|
||||
armAsm->b(&dispatch);
|
||||
}
|
||||
|
||||
g_interpret_block = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armEmitCall(armAsm, reinterpret_cast<const void*>(GetInterpretUncachedBlockFunction()), true);
|
||||
armAsm->b(&dispatch);
|
||||
}
|
||||
|
||||
armAsm->FinalizeCode();
|
||||
|
||||
// TODO: align?
|
||||
s_trampoline_targets.clear();
|
||||
s_trampoline_start_ptr = static_cast<u8*>(code) + armAsm->GetCursorOffset();
|
||||
s_trampoline_used = 0;
|
||||
|
||||
#undef PTR
|
||||
return static_cast<u32>(armAsm->GetCursorOffset()) + TRAMPOLINE_AREA_SIZE;
|
||||
}
|
||||
|
||||
namespace CPU::Recompiler {
|
||||
|
||||
constexpr HostReg RCPUPTR = 19;
|
||||
|
@ -24,18 +416,6 @@ constexpr HostReg RARG2 = 1;
|
|||
constexpr HostReg RARG3 = 2;
|
||||
constexpr HostReg RARG4 = 3;
|
||||
constexpr HostReg RSCRATCH = 8;
|
||||
constexpr u64 FUNCTION_CALL_SHADOW_SPACE = 32;
|
||||
constexpr u64 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers
|
||||
constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes
|
||||
constexpr u64 FUNCTION_STACK_SIZE =
|
||||
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
|
||||
|
||||
static s64 GetPCDisplacement(const void* current, const void* target)
|
||||
{
|
||||
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
|
||||
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
|
||||
return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
|
||||
}
|
||||
|
||||
static const a64::WRegister GetHostReg8(HostReg reg)
|
||||
{
|
||||
|
@ -158,6 +538,11 @@ void CodeGenerator::SwitchToNearCode()
|
|||
m_emit = &m_near_emitter;
|
||||
}
|
||||
|
||||
void* CodeGenerator::GetStartNearCodePointer() const
|
||||
{
|
||||
return static_cast<u8*>(m_code_buffer->GetFreeCodePointer());
|
||||
}
|
||||
|
||||
void* CodeGenerator::GetCurrentNearCodePointer() const
|
||||
{
|
||||
return static_cast<u8*>(m_code_buffer->GetFreeCodePointer()) + m_near_emitter.GetCursorOffset();
|
||||
|
@ -196,8 +581,6 @@ Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool al
|
|||
|
||||
void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
||||
{
|
||||
m_emit->Sub(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
|
||||
if (allocate_registers)
|
||||
{
|
||||
// Save the link register, since we'll be calling functions.
|
||||
|
@ -213,7 +596,7 @@ void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
|||
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
Assert(fastmem_reg_allocated);
|
||||
|
@ -222,11 +605,11 @@ void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_return /* = true */)
|
||||
void CodeGenerator::EmitEndBlock(bool free_registers, const void* jump_to)
|
||||
{
|
||||
if (free_registers)
|
||||
{
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
|
@ -235,10 +618,8 @@ void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_ret
|
|||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
}
|
||||
|
||||
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
|
||||
if (emit_return)
|
||||
m_emit->Ret();
|
||||
if (jump_to)
|
||||
armEmitJmp(m_emit, jump_to, true);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitExceptionExit()
|
||||
|
@ -252,8 +633,7 @@ void CodeGenerator::EmitExceptionExit()
|
|||
|
||||
m_register_cache.PopCalleeSavedRegisters(false);
|
||||
|
||||
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
m_emit->Ret();
|
||||
armEmitJmp(m_emit, CodeCache::g_check_events_and_dispatch, true);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitExceptionExitOnBool(const Value& value)
|
||||
|
@ -275,19 +655,22 @@ void CodeGenerator::EmitExceptionExitOnBool(const Value& value)
|
|||
m_register_cache.PopState();
|
||||
}
|
||||
|
||||
void CodeGenerator::FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size)
|
||||
const void* CodeGenerator::FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size)
|
||||
{
|
||||
m_near_emitter.FinalizeCode();
|
||||
m_far_emitter.FinalizeCode();
|
||||
|
||||
*out_host_code = reinterpret_cast<CodeBlock::HostCodePointer>(m_code_buffer->GetFreeCodePointer());
|
||||
const void* code = m_code_buffer->GetFreeCodePointer();
|
||||
*out_host_code_size = static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated());
|
||||
*out_host_far_code_size = static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated());
|
||||
|
||||
m_code_buffer->CommitCode(static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated()));
|
||||
m_code_buffer->CommitFarCode(static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated()));
|
||||
|
||||
m_near_emitter.Reset();
|
||||
m_far_emitter.Reset();
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size)
|
||||
|
@ -1028,7 +1411,7 @@ void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
|
|||
|
||||
void CodeGenerator::EmitCall(const void* ptr)
|
||||
{
|
||||
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const s64 displacement = armGetPCDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const bool use_blr = !vixl::IsInt26(displacement);
|
||||
if (use_blr)
|
||||
{
|
||||
|
@ -1173,25 +1556,25 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
|
|||
|
||||
void CodeGenerator::EmitPushHostReg(HostReg reg, u32 position)
|
||||
{
|
||||
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - FUNCTION_CALL_SHADOW_SPACE - (position * 8));
|
||||
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8));
|
||||
m_emit->str(GetHostReg64(reg), addr);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position)
|
||||
{
|
||||
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - FUNCTION_CALL_SHADOW_SPACE - ((position + 1) * 8));
|
||||
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - ((position + 1) * 8));
|
||||
m_emit->stp(GetHostReg64(reg2), GetHostReg64(reg), addr);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitPopHostReg(HostReg reg, u32 position)
|
||||
{
|
||||
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - FUNCTION_CALL_SHADOW_SPACE - (position * 8));
|
||||
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8));
|
||||
m_emit->ldr(GetHostReg64(reg), addr);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position)
|
||||
{
|
||||
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - FUNCTION_CALL_SHADOW_SPACE - (position * 8));
|
||||
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8));
|
||||
m_emit->ldp(GetHostReg64(reg2), GetHostReg64(reg), addr);
|
||||
}
|
||||
|
||||
|
@ -1399,15 +1782,11 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result)
|
||||
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, Value& result)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = result.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
bpi.fault_count = 0;
|
||||
void* host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
HostReg address_reg;
|
||||
if (address.IsConstant())
|
||||
|
@ -1424,7 +1803,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
|
||||
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
|
||||
{
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1451,7 +1830,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_MASK);
|
||||
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3));
|
||||
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1473,11 +1852,11 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
}
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
const u32 host_code_size =
|
||||
static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
const void* host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||
|
@ -1485,7 +1864,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
@ -1496,11 +1875,11 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
SwitchToNearCode();
|
||||
m_register_cache.UninhibitAllocation();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result, bool in_far_code)
|
||||
void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, Value& result, bool in_far_code)
|
||||
{
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
|
@ -1540,7 +1919,7 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi,
|
|||
m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2);
|
||||
EmitOr(result.host_reg, result.host_reg,
|
||||
Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException(
|
||||
static_cast<Exception>(0), cbi.is_branch_delay_slot, false, cbi.instruction.cop.cop_n)));
|
||||
static_cast<Exception>(0), info.is_branch_delay_slot, false, instruction.cop.cop_n)));
|
||||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
EmitExceptionExit();
|
||||
|
@ -1573,17 +1952,13 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
const Value& value)
|
||||
void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, const Value& value)
|
||||
{
|
||||
Value value_in_hr = GetValueInHostRegister(value);
|
||||
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = value.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
bpi.fault_count = 0;
|
||||
void* host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
HostReg address_reg;
|
||||
if (address.IsConstant())
|
||||
|
@ -1599,7 +1974,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
m_register_cache.InhibitAllocation();
|
||||
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
|
||||
{
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1627,7 +2002,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
m_emit->add(GetHostReg64(RARG3), GetFastmemBasePtrReg(), Bus::FASTMEM_LUT_NUM_PAGES * sizeof(u32*));
|
||||
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetHostReg64(RARG3), GetHostReg32(RARG1), a64::LSL, 3));
|
||||
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1649,17 +2024,17 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
}
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
const u32 host_code_size =
|
||||
static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
void* host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, size, value_in_hr, true);
|
||||
EmitStoreGuestMemorySlowmem(instruction, info, address, size, value_in_hr, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
@ -1670,11 +2045,12 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
SwitchToNearCode();
|
||||
m_register_cache.UninhibitAllocation();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
const Value& value, bool in_far_code)
|
||||
void CodeGenerator::EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, const Value& value,
|
||||
bool in_far_code)
|
||||
{
|
||||
Value value_in_hr = GetValueInHostRegister(value);
|
||||
|
||||
|
@ -1717,7 +2093,7 @@ void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi,
|
|||
m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2);
|
||||
EmitOr(result.host_reg, result.host_reg,
|
||||
Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException(
|
||||
static_cast<Exception>(0), cbi.is_branch_delay_slot, false, cbi.instruction.cop.cop_n)));
|
||||
static_cast<Exception>(0), info.is_branch_delay_slot, false, instruction.cop.cop_n)));
|
||||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
if (!in_far_code)
|
||||
|
@ -1754,64 +2130,26 @@ void CodeGenerator::EmitUpdateFastmemBase()
|
|||
m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
||||
}
|
||||
|
||||
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
||||
void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi)
|
||||
{
|
||||
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem at %p", lbi.host_pc, lbi.guest_pc, lbi.host_slowmem_pc);
|
||||
Log_DevFmt("Backpatching {} (guest PC 0x{:08X}) to slowmem at {}", host_pc, lbi.guest_pc, lbi.thunk_address);
|
||||
|
||||
// check jump distance
|
||||
const s64 jump_distance =
|
||||
static_cast<s64>(reinterpret_cast<intptr_t>(lbi.host_slowmem_pc) - reinterpret_cast<intptr_t>(lbi.host_pc));
|
||||
static_cast<s64>(reinterpret_cast<intptr_t>(lbi.thunk_address) - reinterpret_cast<intptr_t>(host_pc));
|
||||
Assert(Common::IsAligned(jump_distance, 4));
|
||||
Assert(a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2));
|
||||
|
||||
// turn it into a jump to the slowmem handler
|
||||
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(lbi.host_pc), lbi.host_code_size,
|
||||
a64::PositionDependentCode);
|
||||
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(host_pc), lbi.code_size, a64::PositionDependentCode);
|
||||
emit.b(jump_distance >> 2);
|
||||
|
||||
const s32 nops = (static_cast<s32>(lbi.host_code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
const s32 nops = (static_cast<s32>(lbi.code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchReturn(void* pc, u32 pc_size)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to return", pc);
|
||||
|
||||
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a64::PositionDependentCode);
|
||||
emit.ret();
|
||||
|
||||
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchBranch(void* pc, u32 pc_size, void* target)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to %p [branch]", pc, target);
|
||||
|
||||
// check jump distance
|
||||
const s64 jump_distance = static_cast<s64>(reinterpret_cast<intptr_t>(target) - reinterpret_cast<intptr_t>(pc));
|
||||
Assert(Common::IsAligned(jump_distance, 4));
|
||||
Assert(a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2));
|
||||
|
||||
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a64::PositionDependentCode);
|
||||
emit.b(jump_distance >> 2);
|
||||
|
||||
// shouldn't have any nops
|
||||
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
JitCodeBuffer::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
|
@ -1980,6 +2318,69 @@ void CodeGenerator::EmitICacheCheckAndUpdate()
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size)
|
||||
{
|
||||
// store it first to reduce code size, because we can offset
|
||||
armMoveAddressToReg(m_emit, RXARG1, ram_ptr);
|
||||
armMoveAddressToReg(m_emit, RXARG2, shadow_ptr);
|
||||
|
||||
bool first = true;
|
||||
u32 offset = 0;
|
||||
a64::Label block_changed;
|
||||
|
||||
while (size >= 16)
|
||||
{
|
||||
const a64::VRegister vtmp = a64::v2.V4S();
|
||||
const a64::VRegister dst = first ? a64::v0.V4S() : a64::v1.V4S();
|
||||
m_emit->ldr(dst, a64::MemOperand(RXARG1, offset));
|
||||
m_emit->ldr(vtmp, a64::MemOperand(RXARG2, offset));
|
||||
m_emit->cmeq(dst, dst, vtmp);
|
||||
if (!first)
|
||||
m_emit->and_(dst.V16B(), dst.V16B(), vtmp.V16B());
|
||||
else
|
||||
first = false;
|
||||
|
||||
offset += 16;
|
||||
size -= 16;
|
||||
}
|
||||
|
||||
if (!first)
|
||||
{
|
||||
// TODO: make sure this doesn't choke on ffffffff
|
||||
m_emit->uminv(a64::s0, a64::v0.V4S());
|
||||
m_emit->fcmp(a64::s0, 0.0);
|
||||
m_emit->b(&block_changed, a64::eq);
|
||||
}
|
||||
|
||||
while (size >= 8)
|
||||
{
|
||||
m_emit->ldr(RXARG3, a64::MemOperand(RXARG1, offset));
|
||||
m_emit->ldr(RXSCRATCH, a64::MemOperand(RXARG2, offset));
|
||||
m_emit->cmp(RXARG3, RXSCRATCH);
|
||||
m_emit->b(&block_changed, a64::ne);
|
||||
offset += 8;
|
||||
size -= 8;
|
||||
}
|
||||
|
||||
while (size >= 4)
|
||||
{
|
||||
m_emit->ldr(RWARG3, a64::MemOperand(RXARG1, offset));
|
||||
m_emit->ldr(RWSCRATCH, a64::MemOperand(RXARG2, offset));
|
||||
m_emit->cmp(RWARG3, RWSCRATCH);
|
||||
m_emit->b(&block_changed, a64::ne);
|
||||
offset += 4;
|
||||
size -= 4;
|
||||
}
|
||||
|
||||
DebugAssert(size == 0);
|
||||
|
||||
a64::Label block_unchanged;
|
||||
m_emit->b(&block_unchanged);
|
||||
m_emit->bind(&block_changed);
|
||||
armEmitJmp(m_emit, CodeCache::g_discard_and_recompile_block, false);
|
||||
m_emit->bind(&block_unchanged);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStallUntilGTEComplete()
|
||||
{
|
||||
static_assert(offsetof(State, pending_ticks) + sizeof(u32) == offsetof(State, gte_completion_tick));
|
||||
|
@ -2253,7 +2654,7 @@ void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
|
|||
reinterpret_cast<uintptr_t>(GetCurrentCodePointer()) & ~static_cast<uintptr_t>(0xFFF));
|
||||
const void* ptr_page =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(ptr) & ~static_cast<uintptr_t>(0xFFF));
|
||||
const s64 page_displacement = GetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
|
||||
const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
|
||||
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(ptr) & 0xFFFu);
|
||||
if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64))
|
||||
{
|
||||
|
@ -2266,81 +2667,4 @@ void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
|
|||
}
|
||||
}
|
||||
|
||||
CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
|
||||
{
|
||||
m_emit->sub(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
m_register_cache.ReserveCalleeSavedRegisters();
|
||||
const u32 stack_adjust = PrepareStackForCall();
|
||||
|
||||
EmitLoadGlobalAddress(RCPUPTR, &g_state);
|
||||
|
||||
a64::Label event_test;
|
||||
m_emit->b(&event_test);
|
||||
|
||||
// main dispatch loop
|
||||
a64::Label main_loop;
|
||||
m_emit->Bind(&main_loop);
|
||||
|
||||
// time to lookup the block
|
||||
// w8 <- pc
|
||||
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pc)));
|
||||
|
||||
// x9 <- s_fast_map[pc >> 16]
|
||||
EmitLoadGlobalAddress(10, CodeCache::GetFastMapPointer());
|
||||
m_emit->lsr(a64::w9, a64::w8, 16);
|
||||
m_emit->lsr(a64::w8, a64::w8, 2);
|
||||
m_emit->ldr(a64::x9, a64::MemOperand(a64::x10, a64::x9, a64::LSL, 3));
|
||||
|
||||
// blr(x9[pc * 2]) (fast_map[pc >> 2])
|
||||
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3));
|
||||
m_emit->blr(a64::x8);
|
||||
|
||||
// w8 <- pending_ticks
|
||||
// w9 <- downcount
|
||||
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
|
||||
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
|
||||
|
||||
// while downcount < pending_ticks
|
||||
m_emit->cmp(a64::w8, a64::w9);
|
||||
m_emit->b(&main_loop, a64::lt);
|
||||
|
||||
m_emit->Bind(&event_test);
|
||||
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
|
||||
m_emit->b(&main_loop);
|
||||
|
||||
// all done
|
||||
RestoreStackAfterCall(stack_adjust);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
m_emit->add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
m_emit->ret();
|
||||
|
||||
CodeBlock::HostCodePointer ptr;
|
||||
u32 code_size;
|
||||
FinalizeBlock(&ptr, &code_size);
|
||||
Log_DevPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
|
||||
return reinterpret_cast<CodeCache::DispatcherFunction>(ptr);
|
||||
}
|
||||
|
||||
CodeCache::SingleBlockDispatcherFunction CodeGenerator::CompileSingleBlockDispatcher()
|
||||
{
|
||||
m_emit->sub(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
m_register_cache.ReserveCalleeSavedRegisters();
|
||||
const u32 stack_adjust = PrepareStackForCall();
|
||||
|
||||
EmitLoadGlobalAddress(RCPUPTR, &g_state);
|
||||
|
||||
m_emit->blr(GetHostReg64(RARG1));
|
||||
|
||||
RestoreStackAfterCall(stack_adjust);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
m_emit->add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
m_emit->ret();
|
||||
|
||||
CodeBlock::HostCodePointer ptr;
|
||||
u32 code_size;
|
||||
FinalizeBlock(&ptr, &code_size);
|
||||
Log_DevPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
|
||||
return reinterpret_cast<CodeCache::SingleBlockDispatcherFunction>(ptr);
|
||||
}
|
||||
|
||||
} // namespace CPU::Recompiler
|
||||
|
|
|
@ -29,8 +29,8 @@ void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value)
|
|||
m_load_delay_dirty = true;
|
||||
}
|
||||
|
||||
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const SpeculativeValue& address_spec, RegSize size)
|
||||
Value CodeGenerator::EmitLoadGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, const SpeculativeValue& address_spec, RegSize size)
|
||||
{
|
||||
if (address.IsConstant() && !SpeculativeIsCacheIsolated())
|
||||
{
|
||||
|
@ -44,7 +44,8 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
{
|
||||
Value result = m_register_cache.AllocateScratch(size);
|
||||
|
||||
if (g_settings.IsUsingFastmem() && Bus::IsRAMAddress(static_cast<u32>(address.constant_value)))
|
||||
// TODO: mask off...
|
||||
if (CodeCache::IsUsingFastmem() && Bus::IsRAMAddress(static_cast<u32>(address.constant_value)))
|
||||
{
|
||||
// have to mask away the high bits for mirrors, since we don't map them in fastmem
|
||||
EmitLoadGuestRAMFastmem(Value::FromConstantU32(static_cast<u32>(address.constant_value) & Bus::g_ram_mask),
|
||||
|
@ -68,25 +69,25 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
{
|
||||
if (!use_fastmem)
|
||||
{
|
||||
Log_ProfilePrintf("Non-constant load at 0x%08X, speculative address 0x%08X, using fastmem = %s", cbi.pc,
|
||||
Log_ProfilePrintf("Non-constant load at 0x%08X, speculative address 0x%08X, using fastmem = %s", info.pc,
|
||||
*address_spec, use_fastmem ? "yes" : "no");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Log_ProfilePrintf("Non-constant load at 0x%08X, speculative address UNKNOWN, using fastmem = %s", cbi.pc,
|
||||
Log_ProfilePrintf("Non-constant load at 0x%08X, speculative address UNKNOWN, using fastmem = %s", info.pc,
|
||||
use_fastmem ? "yes" : "no");
|
||||
}
|
||||
|
||||
if (g_settings.IsUsingFastmem() && use_fastmem)
|
||||
if (CodeCache::IsUsingFastmem() && use_fastmem)
|
||||
{
|
||||
EmitLoadGuestMemoryFastmem(cbi, address, size, result);
|
||||
EmitLoadGuestMemoryFastmem(instruction, info, address, size, result);
|
||||
}
|
||||
else
|
||||
{
|
||||
AddPendingCycles(true);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
|
||||
EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, false);
|
||||
}
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
|
@ -115,8 +116,9 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
return result;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const SpeculativeValue& address_spec, RegSize size, const Value& value)
|
||||
void CodeGenerator::EmitStoreGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, const SpeculativeValue& address_spec, RegSize size,
|
||||
const Value& value)
|
||||
{
|
||||
if (address.IsConstant() && !SpeculativeIsCacheIsolated())
|
||||
{
|
||||
|
@ -141,25 +143,25 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
{
|
||||
if (!use_fastmem)
|
||||
{
|
||||
Log_ProfilePrintf("Non-constant store at 0x%08X, speculative address 0x%08X, using fastmem = %s", cbi.pc,
|
||||
Log_ProfilePrintf("Non-constant store at 0x%08X, speculative address 0x%08X, using fastmem = %s", info.pc,
|
||||
*address_spec, use_fastmem ? "yes" : "no");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Log_ProfilePrintf("Non-constant store at 0x%08X, speculative address UNKNOWN, using fastmem = %s", cbi.pc,
|
||||
Log_ProfilePrintf("Non-constant store at 0x%08X, speculative address UNKNOWN, using fastmem = %s", info.pc,
|
||||
use_fastmem ? "yes" : "no");
|
||||
}
|
||||
|
||||
if (g_settings.IsUsingFastmem() && use_fastmem)
|
||||
if (CodeCache::IsUsingFastmem() && use_fastmem)
|
||||
{
|
||||
EmitStoreGuestMemoryFastmem(cbi, address, size, value);
|
||||
EmitStoreGuestMemoryFastmem(instruction, info, address, size, value);
|
||||
}
|
||||
else
|
||||
{
|
||||
AddPendingCycles(true);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, size, value, false);
|
||||
EmitStoreGuestMemorySlowmem(instruction, info, address, size, value, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,38 +1,315 @@
|
|||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "cpu_code_cache_private.h"
|
||||
#include "cpu_core.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_recompiler_code_generator.h"
|
||||
#include "cpu_recompiler_thunks.h"
|
||||
#include "settings.h"
|
||||
#include "timing_event.h"
|
||||
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
|
||||
Log_SetChannel(Recompiler::CodeGenerator);
|
||||
|
||||
#ifdef ENABLE_HOST_DISASSEMBLY
|
||||
#include "Zycore/Format.h"
|
||||
#include "Zycore/Status.h"
|
||||
#include "Zydis/Zydis.h"
|
||||
#endif
|
||||
|
||||
bool CPU::Recompiler::IsCallerSavedRegister(u32 id)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// The x64 ABI considers the registers RAX, RCX, RDX, R8, R9, R10, R11, and XMM0-XMM5 volatile.
|
||||
return (id <= 2 || (id >= 8 && id <= 11));
|
||||
#else
|
||||
// rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers.
|
||||
return (id <= 2 || id == 6 || id == 7 || (id >= 8 && id <= 11));
|
||||
#endif
|
||||
}
|
||||
|
||||
u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
||||
{
|
||||
using namespace Xbyak;
|
||||
|
||||
#define PTR(x) (cg->rbp + (((u8*)(x)) - ((u8*)&g_state)))
|
||||
|
||||
#ifdef _WIN32
|
||||
// Shadow space for Win32
|
||||
constexpr u32 stack_size = 32 + 8;
|
||||
#else
|
||||
// Stack still needs to be aligned
|
||||
constexpr u32 stack_size = 8;
|
||||
#endif
|
||||
|
||||
DebugAssert(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
|
||||
|
||||
CodeGenerator acg(code_size, static_cast<u8*>(code));
|
||||
CodeGenerator* cg = &acg;
|
||||
|
||||
Label dispatch;
|
||||
Label exit_recompiler;
|
||||
|
||||
g_enter_recompiler = reinterpret_cast<decltype(g_enter_recompiler)>(const_cast<u8*>(cg->getCurr()));
|
||||
{
|
||||
// Don't need to save registers, because we fastjmp out when execution is interrupted.
|
||||
cg->sub(cg->rsp, stack_size);
|
||||
|
||||
// CPU state pointer
|
||||
cg->lea(cg->rbp, cg->qword[cg->rip + &g_state]);
|
||||
|
||||
// newrec preloads fastmem base
|
||||
if (g_settings.cpu_execution_mode != CPUExecutionMode::Recompiler && CodeCache::IsUsingFastmem())
|
||||
cg->mov(cg->rbx, cg->qword[PTR(&g_state.fastmem_base)]);
|
||||
|
||||
// Fall through to event dispatcher
|
||||
}
|
||||
|
||||
// check events then for frame done
|
||||
g_check_events_and_dispatch = cg->getCurr();
|
||||
{
|
||||
Label skip_event_check;
|
||||
cg->mov(RWARG1, cg->dword[PTR(&g_state.pending_ticks)]);
|
||||
cg->cmp(RWARG1, cg->dword[PTR(&g_state.downcount)]);
|
||||
cg->jl(skip_event_check);
|
||||
|
||||
g_run_events_and_dispatch = cg->getCurr();
|
||||
cg->call(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
|
||||
|
||||
cg->L(skip_event_check);
|
||||
}
|
||||
|
||||
// TODO: align?
|
||||
g_dispatcher = cg->getCurr();
|
||||
{
|
||||
cg->L(dispatch);
|
||||
|
||||
// rcx <- s_fast_map[pc >> 16]
|
||||
cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
|
||||
cg->lea(RXARG2, cg->dword[PTR(g_code_lut.data())]);
|
||||
cg->mov(RWARG3, RWARG1);
|
||||
cg->shr(RWARG3, 16);
|
||||
cg->mov(RXARG2, cg->qword[RXARG2 + RXARG3 * 8]);
|
||||
|
||||
// call(rcx[pc * 2]) (fast_map[pc >> 2])
|
||||
cg->jmp(cg->qword[RXARG2 + RXARG1 * 2]);
|
||||
}
|
||||
|
||||
g_compile_or_revalidate_block = cg->getCurr();
|
||||
{
|
||||
cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
|
||||
cg->call(&CompileOrRevalidateBlock);
|
||||
cg->jmp(dispatch);
|
||||
}
|
||||
|
||||
g_discard_and_recompile_block = cg->getCurr();
|
||||
{
|
||||
cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
|
||||
cg->call(&DiscardAndRecompileBlock);
|
||||
cg->jmp(dispatch);
|
||||
}
|
||||
|
||||
g_interpret_block = cg->getCurr();
|
||||
{
|
||||
cg->call(CodeCache::GetInterpretUncachedBlockFunction());
|
||||
cg->jmp(dispatch);
|
||||
}
|
||||
|
||||
#undef PTR
|
||||
|
||||
return static_cast<u32>(cg->getSize());
|
||||
}
|
||||
|
||||
u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
|
||||
{
|
||||
u8* ptr = static_cast<u8*>(code);
|
||||
*(ptr++) = 0xE9; // jmp
|
||||
|
||||
const ptrdiff_t disp = (reinterpret_cast<uintptr_t>(dst) - reinterpret_cast<uintptr_t>(code)) - 5;
|
||||
DebugAssert(disp >= static_cast<ptrdiff_t>(std::numeric_limits<s32>::min()) &&
|
||||
disp <= static_cast<ptrdiff_t>(std::numeric_limits<s32>::max()));
|
||||
|
||||
const s32 disp32 = static_cast<s32>(disp);
|
||||
std::memcpy(ptr, &disp32, sizeof(disp32));
|
||||
return 5;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_HOST_DISASSEMBLY
|
||||
|
||||
static ZydisFormatterFunc s_old_print_address;
|
||||
|
||||
static ZyanStatus ZydisFormatterPrintAddressAbsolute(const ZydisFormatter* formatter, ZydisFormatterBuffer* buffer,
|
||||
ZydisFormatterContext* context)
|
||||
{
|
||||
using namespace CPU;
|
||||
|
||||
ZyanU64 address;
|
||||
ZYAN_CHECK(ZydisCalcAbsoluteAddress(context->instruction, context->operand, context->runtime_address, &address));
|
||||
|
||||
char buf[128];
|
||||
u32 len = 0;
|
||||
|
||||
#define A(x) static_cast<ZyanU64>(reinterpret_cast<uintptr_t>(x))
|
||||
|
||||
if (address >= A(Bus::g_ram) && address < A(Bus::g_ram + Bus::g_ram_size))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "g_ram+0x%08X", static_cast<u32>(address - A(Bus::g_ram)));
|
||||
}
|
||||
else if (address >= A(&g_state.regs) &&
|
||||
address < A(reinterpret_cast<const u8*>(&g_state.regs) + sizeof(CPU::Registers)))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "g_state.regs.%s",
|
||||
GetRegName(static_cast<CPU::Reg>(((address - A(&g_state.regs.r[0])) / 4u))));
|
||||
}
|
||||
else if (address >= A(&g_state.cop0_regs) &&
|
||||
address < A(reinterpret_cast<const u8*>(&g_state.cop0_regs) + sizeof(CPU::Cop0Registers)))
|
||||
{
|
||||
for (const DebuggerRegisterListEntry& rle : g_debugger_register_list)
|
||||
{
|
||||
if (address == static_cast<ZyanU64>(reinterpret_cast<uintptr_t>(rle.value_ptr)))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "g_state.cop0_regs.%s", rle.name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (address >= A(&g_state.gte_regs) &&
|
||||
address < A(reinterpret_cast<const u8*>(&g_state.gte_regs) + sizeof(GTE::Regs)))
|
||||
{
|
||||
for (const DebuggerRegisterListEntry& rle : g_debugger_register_list)
|
||||
{
|
||||
if (address == static_cast<ZyanU64>(reinterpret_cast<uintptr_t>(rle.value_ptr)))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "g_state.gte_regs.%s", rle.name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (address == A(&g_state.load_delay_reg))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "g_state.load_delay_reg");
|
||||
}
|
||||
else if (address == A(&g_state.next_load_delay_reg))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "g_state.next_load_delay_reg");
|
||||
}
|
||||
else if (address == A(&g_state.load_delay_value))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "g_state.load_delay_value");
|
||||
}
|
||||
else if (address == A(&g_state.next_load_delay_value))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "g_state.next_load_delay_value");
|
||||
}
|
||||
else if (address == A(&g_state.pending_ticks))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "g_state.pending_ticks");
|
||||
}
|
||||
else if (address == A(&g_state.downcount))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "g_state.downcount");
|
||||
}
|
||||
|
||||
#undef A
|
||||
|
||||
if (len > 0)
|
||||
{
|
||||
ZYAN_CHECK(ZydisFormatterBufferAppend(buffer, ZYDIS_TOKEN_SYMBOL));
|
||||
ZyanString* string;
|
||||
ZYAN_CHECK(ZydisFormatterBufferGetString(buffer, &string));
|
||||
return ZyanStringAppendFormat(string, "&%s", buf);
|
||||
}
|
||||
|
||||
return s_old_print_address(formatter, buffer, context);
|
||||
}
|
||||
|
||||
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
|
||||
{
|
||||
ZydisDecoder disas_decoder;
|
||||
ZydisFormatter disas_formatter;
|
||||
ZydisDecodedInstruction disas_instruction;
|
||||
ZydisDecodedOperand disas_operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64);
|
||||
ZydisFormatterInit(&disas_formatter, ZYDIS_FORMATTER_STYLE_INTEL);
|
||||
s_old_print_address = (ZydisFormatterFunc)&ZydisFormatterPrintAddressAbsolute;
|
||||
ZydisFormatterSetHook(&disas_formatter, ZYDIS_FORMATTER_FUNC_PRINT_ADDRESS_ABS, (const void**)&s_old_print_address);
|
||||
|
||||
const u8* ptr = static_cast<const u8*>(start);
|
||||
TinyString hex;
|
||||
ZyanUSize remaining = size;
|
||||
while (ZYAN_SUCCESS(ZydisDecoderDecodeFull(&disas_decoder, ptr, remaining, &disas_instruction, disas_operands)))
|
||||
{
|
||||
char buffer[256];
|
||||
if (ZYAN_SUCCESS(ZydisFormatterFormatInstruction(&disas_formatter, &disas_instruction, disas_operands,
|
||||
ZYDIS_MAX_OPERAND_COUNT, buffer, sizeof(buffer),
|
||||
static_cast<ZyanU64>(reinterpret_cast<uintptr_t>(ptr)), nullptr)))
|
||||
{
|
||||
hex.clear();
|
||||
for (u32 i = 0; i < 10; i++)
|
||||
{
|
||||
if (i < disas_instruction.length)
|
||||
hex.append_fmt(" {:02X}", ptr[i]);
|
||||
else
|
||||
hex.append(" ");
|
||||
}
|
||||
Log::WriteFmt("HostCode", "", LOGLEVEL_DEBUG, " {:016X} {} {}",
|
||||
static_cast<u64>(reinterpret_cast<uintptr_t>(ptr)), hex, buffer);
|
||||
}
|
||||
|
||||
ptr += disas_instruction.length;
|
||||
remaining -= disas_instruction.length;
|
||||
}
|
||||
}
|
||||
|
||||
u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size)
|
||||
{
|
||||
ZydisDecoder disas_decoder;
|
||||
ZydisDecodedInstruction disas_instruction;
|
||||
ZydisDecoderContext disas_context;
|
||||
ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64);
|
||||
|
||||
const u8* ptr = static_cast<const u8*>(start);
|
||||
ZyanUSize remaining = size;
|
||||
u32 inst_count = 0;
|
||||
while (
|
||||
ZYAN_SUCCESS(ZydisDecoderDecodeInstruction(&disas_decoder, &disas_context, ptr, remaining, &disas_instruction)))
|
||||
{
|
||||
ptr += disas_instruction.length;
|
||||
remaining -= disas_instruction.length;
|
||||
inst_count++;
|
||||
}
|
||||
|
||||
return inst_count;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
|
||||
{
|
||||
Log_ErrorPrint("Not compiled with ENABLE_HOST_DISASSEMBLY.");
|
||||
}
|
||||
|
||||
u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size)
|
||||
{
|
||||
Log_ErrorPrint("Not compiled with ENABLE_HOST_DISASSEMBLY.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // ENABLE_HOST_DISASSEMBLY
|
||||
|
||||
namespace CPU::Recompiler {
|
||||
|
||||
#if defined(ABI_WIN64)
|
||||
constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
|
||||
constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
|
||||
constexpr HostReg RRETURN = Xbyak::Operand::RAX;
|
||||
constexpr HostReg RARG1 = Xbyak::Operand::RCX;
|
||||
constexpr HostReg RARG2 = Xbyak::Operand::RDX;
|
||||
constexpr HostReg RARG3 = Xbyak::Operand::R8;
|
||||
constexpr HostReg RARG4 = Xbyak::Operand::R9;
|
||||
constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 32;
|
||||
#elif defined(ABI_SYSV)
|
||||
constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
|
||||
constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
|
||||
constexpr HostReg RRETURN = Xbyak::Operand::RAX;
|
||||
constexpr HostReg RARG1 = Xbyak::Operand::RDI;
|
||||
constexpr HostReg RARG2 = Xbyak::Operand::RSI;
|
||||
constexpr HostReg RARG3 = Xbyak::Operand::RDX;
|
||||
constexpr HostReg RARG4 = Xbyak::Operand::RCX;
|
||||
constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 0;
|
||||
#endif
|
||||
static constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
|
||||
static constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
|
||||
static constexpr HostReg RRETURN = RXRET.getIdx();
|
||||
static constexpr HostReg RARG1 = RXARG1.getIdx();
|
||||
static constexpr HostReg RARG2 = RXARG2.getIdx();
|
||||
static constexpr HostReg RARG3 = RXARG3.getIdx();
|
||||
static constexpr HostReg RARG4 = RXARG4.getIdx();
|
||||
|
||||
static const Xbyak::Reg8 GetHostReg8(HostReg reg)
|
||||
{
|
||||
|
@ -80,7 +357,7 @@ static const Xbyak::Reg64 GetHostReg64(const Value& value)
|
|||
|
||||
static const Xbyak::Reg64 GetCPUPtrReg()
|
||||
{
|
||||
return GetHostReg64(RCPUPTR);
|
||||
return Xbyak::Reg64(RCPUPTR);
|
||||
}
|
||||
|
||||
static const Xbyak::Reg64 GetFastmemBasePtrReg()
|
||||
|
@ -177,6 +454,11 @@ void CodeGenerator::SwitchToNearCode()
|
|||
m_emit = &m_near_emitter;
|
||||
}
|
||||
|
||||
void* CodeGenerator::GetStartNearCodePointer() const
|
||||
{
|
||||
return m_near_emitter.getCode<u8*>();
|
||||
}
|
||||
|
||||
void* CodeGenerator::GetCurrentNearCodePointer() const
|
||||
{
|
||||
return m_near_emitter.getCurr<void*>();
|
||||
|
@ -217,10 +499,9 @@ void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
|||
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
||||
// m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
DebugAssert(fastmem_reg_allocated);
|
||||
|
@ -230,19 +511,19 @@ void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_return /* = true */)
|
||||
void CodeGenerator::EmitEndBlock(bool free_registers, const void* jump_to)
|
||||
{
|
||||
if (free_registers)
|
||||
{
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
}
|
||||
|
||||
if (emit_return)
|
||||
m_emit->ret();
|
||||
if (jump_to)
|
||||
m_emit->jmp(jump_to);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitExceptionExit()
|
||||
|
@ -257,7 +538,7 @@ void CodeGenerator::EmitExceptionExit()
|
|||
m_register_cache.FlushLoadDelay(false);
|
||||
|
||||
m_register_cache.PopCalleeSavedRegisters(false);
|
||||
m_emit->ret();
|
||||
m_emit->jmp(CodeCache::g_check_events_and_dispatch);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitExceptionExitOnBool(const Value& value)
|
||||
|
@ -276,20 +557,23 @@ void CodeGenerator::EmitExceptionExitOnBool(const Value& value)
|
|||
m_register_cache.PopState();
|
||||
}
|
||||
|
||||
void CodeGenerator::FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size)
|
||||
const void* CodeGenerator::FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size)
|
||||
{
|
||||
m_near_emitter.ready();
|
||||
m_far_emitter.ready();
|
||||
|
||||
const u32 near_size = static_cast<u32>(m_near_emitter.getSize());
|
||||
const u32 far_size = static_cast<u32>(m_far_emitter.getSize());
|
||||
*out_host_code = m_near_emitter.getCode<CodeBlock::HostCodePointer>();
|
||||
const void* code = m_near_emitter.getCode<const void*>();
|
||||
*out_host_code_size = near_size;
|
||||
*out_host_far_code_size = far_size;
|
||||
m_code_buffer->CommitCode(near_size);
|
||||
m_code_buffer->CommitFarCode(far_size);
|
||||
|
||||
m_near_emitter.reset();
|
||||
m_far_emitter.reset();
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size)
|
||||
|
@ -1461,8 +1745,9 @@ u32 CodeGenerator::PrepareStackForCall()
|
|||
// we assume that the stack is unaligned at this point
|
||||
const u32 num_callee_saved = m_register_cache.GetActiveCalleeSavedRegisterCount();
|
||||
const u32 num_caller_saved = m_register_cache.PushCallerSavedRegisters();
|
||||
const u32 current_offset = 8 + (num_callee_saved + num_caller_saved) * 8;
|
||||
const u32 aligned_offset = Common::AlignUp(current_offset + FUNCTION_CALL_SHADOW_SPACE, 16);
|
||||
const u32 current_offset = (num_callee_saved + num_caller_saved) * 8;
|
||||
const u32 aligned_offset =
|
||||
(current_offset == 0) ? 0 : Common::AlignUp(current_offset + FUNCTION_CALL_SHADOW_SPACE, 16);
|
||||
const u32 adjust_size = aligned_offset - current_offset;
|
||||
if (adjust_size > 0)
|
||||
m_emit->sub(m_emit->rsp, adjust_size);
|
||||
|
@ -1902,16 +2187,11 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result)
|
||||
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, Value& result)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = result.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
bpi.fault_count = 0;
|
||||
void* host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
|
||||
{
|
||||
|
@ -1921,7 +2201,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
{
|
||||
actual_address = &result;
|
||||
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
host_pc = GetCurrentNearCodePointer();
|
||||
}
|
||||
|
||||
m_register_cache.InhibitAllocation();
|
||||
|
@ -1988,7 +2268,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
m_emit->shr(GetHostReg32(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
|
||||
m_emit->and_(GetHostReg32(RARG2), Bus::FASTMEM_LUT_PAGE_MASK);
|
||||
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
switch (size)
|
||||
{
|
||||
|
@ -2011,18 +2291,17 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
|
||||
// insert nops, we need at least 5 bytes for a relative jump
|
||||
const u32 fastmem_size =
|
||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||
const u32 fastmem_size = static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc));
|
||||
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
||||
for (u32 i = 0; i < nops; i++)
|
||||
m_emit->nop();
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
const u32 host_code_size =
|
||||
static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
m_far_emitter.align(16);
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
void* thunk_host_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||
|
@ -2030,7 +2309,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
@ -2041,11 +2320,11 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
SwitchToNearCode();
|
||||
m_register_cache.UninhibitAllocation();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, thunk_host_pc);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result, bool in_far_code)
|
||||
void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, Value& result, bool in_far_code)
|
||||
{
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
|
@ -2082,8 +2361,8 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi,
|
|||
m_emit->neg(GetHostReg32(result.host_reg));
|
||||
m_emit->shl(GetHostReg32(result.host_reg), 2);
|
||||
m_emit->or_(GetHostReg32(result.host_reg),
|
||||
Cop0Registers::CAUSE::MakeValueForException(static_cast<Exception>(0), cbi.is_branch_delay_slot, false,
|
||||
cbi.instruction.cop.cop_n));
|
||||
Cop0Registers::CAUSE::MakeValueForException(static_cast<Exception>(0), info.is_branch_delay_slot, false,
|
||||
instruction.cop.cop_n));
|
||||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
EmitExceptionExit();
|
||||
|
@ -2116,16 +2395,11 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
const Value& value)
|
||||
void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, const Value& value)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = value.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
bpi.fault_count = 0;
|
||||
void* host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
|
||||
{
|
||||
|
@ -2137,7 +2411,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32);
|
||||
actual_address = &temp_address;
|
||||
m_emit->mov(GetHostReg32(temp_address), address.constant_value);
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
host_pc = GetCurrentNearCodePointer();
|
||||
}
|
||||
|
||||
m_register_cache.InhibitAllocation();
|
||||
|
@ -2252,7 +2526,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
m_emit->and_(GetHostReg32(RARG2), Bus::FASTMEM_LUT_PAGE_MASK);
|
||||
m_emit->mov(GetHostReg64(RARG1),
|
||||
m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8 + (Bus::FASTMEM_LUT_NUM_PAGES * 8)]);
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
switch (size)
|
||||
{
|
||||
|
@ -2290,24 +2564,23 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
|
||||
// insert nops, we need at least 5 bytes for a relative jump
|
||||
const u32 fastmem_size =
|
||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||
const u32 fastmem_size = static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc));
|
||||
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
||||
for (u32 i = 0; i < nops; i++)
|
||||
m_emit->nop();
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
const u32 host_code_size =
|
||||
static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
m_far_emitter.align();
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
const void* host_thunk_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, size, value, true);
|
||||
EmitStoreGuestMemorySlowmem(instruction, info, address, size, value, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
@ -2318,11 +2591,12 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
SwitchToNearCode();
|
||||
m_register_cache.UninhibitAllocation();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_thunk_pc);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
const Value& value, bool in_far_code)
|
||||
void CodeGenerator::EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, const Value& value,
|
||||
bool in_far_code)
|
||||
{
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
|
@ -2360,8 +2634,8 @@ void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi,
|
|||
// cause_bits = (result << 2) | BD | cop_n
|
||||
m_emit->shl(GetHostReg32(result), 2);
|
||||
m_emit->or_(GetHostReg32(result),
|
||||
Cop0Registers::CAUSE::MakeValueForException(static_cast<Exception>(0), cbi.is_branch_delay_slot, false,
|
||||
cbi.instruction.cop.cop_n));
|
||||
Cop0Registers::CAUSE::MakeValueForException(static_cast<Exception>(0), info.is_branch_delay_slot, false,
|
||||
instruction.cop.cop_n));
|
||||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
EmitExceptionExit();
|
||||
|
@ -2398,55 +2672,21 @@ void CodeGenerator::EmitUpdateFastmemBase()
|
|||
m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + offsetof(CPU::State, fastmem_base)]);
|
||||
}
|
||||
|
||||
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
||||
void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p (guest PC 0x%08X) to slowmem", lbi.host_pc, lbi.guest_pc);
|
||||
Log_ProfileFmt("Backpatching {} (guest PC 0x{:08X}) to slowmem", host_pc, lbi.guest_pc);
|
||||
|
||||
// turn it into a jump to the slowmem handler
|
||||
Xbyak::CodeGenerator cg(lbi.host_code_size, lbi.host_pc);
|
||||
cg.jmp(lbi.host_slowmem_pc);
|
||||
Xbyak::CodeGenerator cg(lbi.code_size, host_pc);
|
||||
cg.jmp(lbi.thunk_address);
|
||||
|
||||
const s32 nops = static_cast<s32>(lbi.host_code_size) -
|
||||
static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(lbi.host_pc)));
|
||||
const s32 nops = static_cast<s32>(lbi.code_size) -
|
||||
static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(host_pc)));
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
cg.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchReturn(void* pc, u32 pc_size)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to return", pc);
|
||||
|
||||
Xbyak::CodeGenerator cg(pc_size, pc);
|
||||
cg.ret();
|
||||
|
||||
const s32 nops =
|
||||
static_cast<s32>(pc_size) - static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(pc)));
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
cg.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchBranch(void* pc, u32 pc_size, void* target)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to %p [branch]", pc, target);
|
||||
|
||||
Xbyak::CodeGenerator cg(pc_size, pc);
|
||||
cg.jmp(target);
|
||||
|
||||
// shouldn't have any nops
|
||||
const s32 nops =
|
||||
static_cast<s32>(pc_size) - static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(pc)));
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
cg.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
JitCodeBuffer::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
|
@ -2737,6 +2977,62 @@ void CodeGenerator::EmitICacheCheckAndUpdate()
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size)
|
||||
{
|
||||
const auto ram_ptr_reg = GetHostReg64(RARG1);
|
||||
const auto shadow_ptr_reg = GetHostReg64(RARG2);
|
||||
const auto temp_reg = GetHostReg64(RARG3);
|
||||
const auto temp_reg32 = GetHostReg32(RARG3);
|
||||
|
||||
// store it first to reduce code size, because we can offset
|
||||
m_emit->mov(ram_ptr_reg, static_cast<size_t>(reinterpret_cast<uintptr_t>(ram_ptr)));
|
||||
m_emit->mov(shadow_ptr_reg, static_cast<size_t>(reinterpret_cast<uintptr_t>(shadow_ptr)));
|
||||
|
||||
bool first = true;
|
||||
u32 offset = 0;
|
||||
while (size >= 16)
|
||||
{
|
||||
const Xbyak::Xmm& dst = first ? m_emit->xmm0 : m_emit->xmm1;
|
||||
m_emit->movups(dst, m_emit->xword[ram_ptr_reg + offset]);
|
||||
m_emit->pcmpeqd(dst, m_emit->xword[shadow_ptr_reg + offset]);
|
||||
if (!first)
|
||||
m_emit->pand(m_emit->xmm0, dst);
|
||||
else
|
||||
first = false;
|
||||
|
||||
offset += 16;
|
||||
size -= 16;
|
||||
}
|
||||
|
||||
// TODO: better codegen for 16 byte aligned blocks
|
||||
if (!first)
|
||||
{
|
||||
m_emit->movmskps(temp_reg32, m_emit->xmm0);
|
||||
m_emit->cmp(temp_reg32, 0xf);
|
||||
m_emit->jne(CodeCache::g_discard_and_recompile_block);
|
||||
}
|
||||
|
||||
while (size >= 8)
|
||||
{
|
||||
m_emit->mov(temp_reg, m_emit->qword[ram_ptr_reg + offset]);
|
||||
m_emit->cmp(temp_reg, m_emit->qword[shadow_ptr_reg + offset]);
|
||||
m_emit->jne(CodeCache::g_discard_and_recompile_block);
|
||||
offset += 8;
|
||||
size -= 8;
|
||||
}
|
||||
|
||||
while (size >= 4)
|
||||
{
|
||||
m_emit->mov(temp_reg32, m_emit->dword[ram_ptr_reg + offset]);
|
||||
m_emit->cmp(temp_reg32, m_emit->dword[shadow_ptr_reg + offset]);
|
||||
m_emit->jne(CodeCache::g_discard_and_recompile_block);
|
||||
offset += 4;
|
||||
size -= 4;
|
||||
}
|
||||
|
||||
DebugAssert(size == 0);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStallUntilGTEComplete()
|
||||
{
|
||||
m_emit->mov(GetHostReg32(RRETURN), m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)]);
|
||||
|
@ -2759,7 +3055,7 @@ void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
|
|||
static_cast<s64>(reinterpret_cast<intptr_t>(address) - reinterpret_cast<intptr_t>(GetCurrentCodePointer()));
|
||||
if (Xbyak::inner::IsInInt32(static_cast<u64>(jump_distance)))
|
||||
{
|
||||
m_emit->jmp(address);
|
||||
m_emit->jmp(address, Xbyak::CodeGenerator::T_NEAR);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -3068,77 +3364,4 @@ void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
|
|||
else
|
||||
m_emit->mov(GetHostReg64(host_reg), reinterpret_cast<size_t>(ptr));
|
||||
}
|
||||
|
||||
CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
|
||||
{
|
||||
m_register_cache.ReserveCalleeSavedRegisters();
|
||||
const u32 stack_adjust = PrepareStackForCall();
|
||||
|
||||
EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state);
|
||||
|
||||
Xbyak::Label event_test;
|
||||
m_emit->jmp(event_test);
|
||||
|
||||
// main dispatch loop
|
||||
Xbyak::Label main_loop;
|
||||
m_emit->align(16);
|
||||
m_emit->L(main_loop);
|
||||
|
||||
// time to lookup the block
|
||||
// eax <- pc
|
||||
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pc)]);
|
||||
|
||||
// rcx <- s_fast_map[pc >> 16]
|
||||
EmitLoadGlobalAddress(Xbyak::Operand::RBX, CodeCache::GetFastMapPointer());
|
||||
m_emit->mov(m_emit->ecx, m_emit->eax);
|
||||
m_emit->shr(m_emit->ecx, 16);
|
||||
m_emit->mov(m_emit->rcx, m_emit->qword[m_emit->rbx + m_emit->rcx * 8]);
|
||||
|
||||
// call(rcx[pc * 2]) (fast_map[pc >> 2])
|
||||
m_emit->call(m_emit->qword[m_emit->rcx + m_emit->rax * 2]);
|
||||
|
||||
// eax <- pending_ticks
|
||||
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
|
||||
|
||||
// while eax < downcount
|
||||
Xbyak::Label downcount_hit;
|
||||
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]);
|
||||
m_emit->jl(main_loop);
|
||||
|
||||
m_emit->L(event_test);
|
||||
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
|
||||
m_emit->jmp(main_loop);
|
||||
|
||||
// all done
|
||||
RestoreStackAfterCall(stack_adjust);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
m_emit->ret();
|
||||
|
||||
CodeBlock::HostCodePointer ptr;
|
||||
u32 code_size;
|
||||
FinalizeBlock(&ptr, &code_size);
|
||||
Log_DevPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
CodeCache::SingleBlockDispatcherFunction CodeGenerator::CompileSingleBlockDispatcher()
|
||||
{
|
||||
m_register_cache.ReserveCalleeSavedRegisters();
|
||||
const u32 stack_adjust = PrepareStackForCall();
|
||||
|
||||
EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state);
|
||||
|
||||
m_emit->call(GetHostReg64(RARG1));
|
||||
|
||||
RestoreStackAfterCall(stack_adjust);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
m_emit->ret();
|
||||
|
||||
CodeBlock::HostCodePointer ptr;
|
||||
u32 code_size;
|
||||
FinalizeBlock(&ptr, &code_size);
|
||||
Log_DevPrintf("Single block dispatcher is %u bytes at %p", code_size, ptr);
|
||||
return reinterpret_cast<CodeCache::SingleBlockDispatcherFunction>(ptr);
|
||||
}
|
||||
|
||||
} // namespace CPU::Recompiler
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
|
@ -6,6 +6,12 @@
|
|||
#include "cpu_recompiler_types.h"
|
||||
#include "cpu_types.h"
|
||||
|
||||
#if defined(CPU_ARCH_ARM32)
|
||||
#include "vixl/aarch32/macro-assembler-aarch32.h"
|
||||
#elif defined(CPU_ARCH_ARM64)
|
||||
#include "vixl/aarch64/macro-assembler-aarch64.h"
|
||||
#endif
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <stack>
|
||||
|
@ -13,6 +19,59 @@
|
|||
|
||||
namespace CPU::Recompiler {
|
||||
|
||||
enum RegSize : u8
|
||||
{
|
||||
RegSize_8,
|
||||
RegSize_16,
|
||||
RegSize_32,
|
||||
RegSize_64,
|
||||
};
|
||||
|
||||
#if defined(CPU_ARCH_X64)
|
||||
|
||||
using HostReg = unsigned;
|
||||
using CodeEmitter = Xbyak::CodeGenerator;
|
||||
using LabelType = Xbyak::Label;
|
||||
enum : u32
|
||||
{
|
||||
HostReg_Count = 16
|
||||
};
|
||||
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
|
||||
constexpr RegSize HostPointerSize = RegSize_64;
|
||||
|
||||
#elif defined(CPU_ARCH_ARM32)
|
||||
|
||||
using HostReg = unsigned;
|
||||
using CodeEmitter = vixl::aarch32::MacroAssembler;
|
||||
using LabelType = vixl::aarch32::Label;
|
||||
enum : u32
|
||||
{
|
||||
HostReg_Count = vixl::aarch32::kNumberOfRegisters
|
||||
};
|
||||
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
|
||||
constexpr RegSize HostPointerSize = RegSize_32;
|
||||
|
||||
#elif defined(CPU_ARCH_ARM64)
|
||||
|
||||
using HostReg = unsigned;
|
||||
using CodeEmitter = vixl::aarch64::MacroAssembler;
|
||||
using LabelType = vixl::aarch64::Label;
|
||||
enum : u32
|
||||
{
|
||||
HostReg_Count = vixl::aarch64::kNumberOfRegisters
|
||||
};
|
||||
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
|
||||
constexpr RegSize HostPointerSize = RegSize_64;
|
||||
|
||||
#else
|
||||
|
||||
#error Unknown architecture.
|
||||
|
||||
#endif
|
||||
|
||||
class CodeGenerator;
|
||||
class RegisterCache;
|
||||
|
||||
enum class HostRegState : u8
|
||||
{
|
||||
None = 0,
|
||||
|
|
|
@ -1,15 +1,11 @@
|
|||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
#include "cpu_code_cache.h"
|
||||
#include "cpu_types.h"
|
||||
|
||||
namespace CPU {
|
||||
struct CodeBlock;
|
||||
struct CodeBlockInstruction;
|
||||
|
||||
namespace Recompiler::Thunks {
|
||||
namespace CPU::Recompiler::Thunks {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Trampolines for calling back from the JIT
|
||||
|
@ -18,7 +14,6 @@ namespace Recompiler::Thunks {
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
bool InterpretInstruction();
|
||||
bool InterpretInstructionPGXP();
|
||||
void CheckAndUpdateICache(u32 pc, u32 line_count);
|
||||
|
||||
// Memory access functions for the JIT - MSB is set on exception.
|
||||
u64 ReadMemoryByte(u32 address);
|
||||
|
@ -36,9 +31,6 @@ void UncheckedWriteMemoryByte(u32 address, u32 value);
|
|||
void UncheckedWriteMemoryHalfWord(u32 address, u32 value);
|
||||
void UncheckedWriteMemoryWord(u32 address, u32 value);
|
||||
|
||||
void ResolveBranch(CodeBlock* block, void* host_pc, void* host_resolve_pc, u32 host_pc_size);
|
||||
void LogPC(u32 pc);
|
||||
|
||||
} // namespace Recompiler::Thunks
|
||||
|
||||
} // namespace CPU
|
||||
} // namespace CPU::Recompiler::Thunks
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
// Shared code between recompiler backends.
|
||||
|
||||
#pragma once
|
||||
#include "cpu_types.h"
|
||||
|
||||
|
@ -14,158 +16,118 @@
|
|||
#define XBYAK_NO_OP_NAMES 1
|
||||
#include "xbyak.h"
|
||||
|
||||
#elif defined(CPU_ARCH_ARM32)
|
||||
|
||||
#include "vixl/aarch32/constants-aarch32.h"
|
||||
#include "vixl/aarch32/instructions-aarch32.h"
|
||||
#include "vixl/aarch32/macro-assembler-aarch32.h"
|
||||
|
||||
#elif defined(CPU_ARCH_ARM64)
|
||||
|
||||
#include "vixl/aarch64/constants-aarch64.h"
|
||||
#include "vixl/aarch64/macro-assembler-aarch64.h"
|
||||
|
||||
#endif
|
||||
|
||||
namespace CPU {
|
||||
|
||||
namespace Recompiler {
|
||||
|
||||
class CodeGenerator;
|
||||
class RegisterCache;
|
||||
|
||||
enum RegSize : u8
|
||||
{
|
||||
RegSize_8,
|
||||
RegSize_16,
|
||||
RegSize_32,
|
||||
RegSize_64,
|
||||
};
|
||||
|
||||
enum class Condition : u8
|
||||
{
|
||||
Always,
|
||||
NotEqual,
|
||||
Equal,
|
||||
Overflow,
|
||||
Greater,
|
||||
GreaterEqual,
|
||||
LessEqual,
|
||||
Less,
|
||||
Negative,
|
||||
PositiveOrZero,
|
||||
Above, // unsigned variant of Greater
|
||||
AboveEqual, // unsigned variant of GreaterEqual
|
||||
Below, // unsigned variant of Less
|
||||
BelowEqual, // unsigned variant of LessEqual
|
||||
|
||||
NotZero,
|
||||
Zero
|
||||
};
|
||||
|
||||
#if defined(CPU_ARCH_X64)
|
||||
|
||||
using HostReg = unsigned;
|
||||
using CodeEmitter = Xbyak::CodeGenerator;
|
||||
using LabelType = Xbyak::Label;
|
||||
enum : u32
|
||||
{
|
||||
HostReg_Count = 16
|
||||
};
|
||||
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
|
||||
constexpr RegSize HostPointerSize = RegSize_64;
|
||||
namespace CPU::Recompiler {
|
||||
|
||||
// A reasonable "maximum" number of bytes per instruction.
|
||||
constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
|
||||
constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
|
||||
|
||||
// Alignment of code stoarge.
|
||||
constexpr u32 CODE_STORAGE_ALIGNMENT = 4096;
|
||||
|
||||
// ABI selection
|
||||
#if defined(_WIN32)
|
||||
#define ABI_WIN64 1
|
||||
#elif defined(__linux__) || defined(__ANDROID__) || defined(__APPLE__) || defined(__HAIKU__) || defined(__FreeBSD__)
|
||||
|
||||
#define RWRET Xbyak::Reg32(Xbyak::Operand::EAX)
|
||||
#define RWARG1 Xbyak::Reg32(Xbyak::Operand::RCX)
|
||||
#define RWARG2 Xbyak::Reg32(Xbyak::Operand::RDX)
|
||||
#define RWARG3 Xbyak::Reg32(Xbyak::Operand::R8D)
|
||||
#define RWARG4 Xbyak::Reg32(Xbyak::Operand::R9D)
|
||||
#define RXRET Xbyak::Reg64(Xbyak::Operand::RAX)
|
||||
#define RXARG1 Xbyak::Reg64(Xbyak::Operand::RCX)
|
||||
#define RXARG2 Xbyak::Reg64(Xbyak::Operand::RDX)
|
||||
#define RXARG3 Xbyak::Reg64(Xbyak::Operand::R8)
|
||||
#define RXARG4 Xbyak::Reg64(Xbyak::Operand::R9)
|
||||
|
||||
static constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 32;
|
||||
|
||||
#elif defined(__linux__) || defined(__ANDROID__) || defined(__APPLE__) || defined(__FreeBSD__)
|
||||
#define ABI_SYSV 1
|
||||
|
||||
#define RWRET Xbyak::Reg32(Xbyak::Operand::EAX)
|
||||
#define RWARG1 Xbyak::Reg32(Xbyak::Operand::EDI)
|
||||
#define RWARG2 Xbyak::Reg32(Xbyak::Operand::ESI)
|
||||
#define RWARG3 Xbyak::Reg32(Xbyak::Operand::EDX)
|
||||
#define RWARG4 Xbyak::Reg32(Xbyak::Operand::ECX)
|
||||
#define RXRET Xbyak::Reg64(Xbyak::Operand::RAX)
|
||||
#define RXARG1 Xbyak::Reg64(Xbyak::Operand::RDI)
|
||||
#define RXARG2 Xbyak::Reg64(Xbyak::Operand::RSI)
|
||||
#define RXARG3 Xbyak::Reg64(Xbyak::Operand::RDX)
|
||||
#define RXARG4 Xbyak::Reg64(Xbyak::Operand::RCX)
|
||||
|
||||
static constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 0;
|
||||
|
||||
#else
|
||||
#error Unknown ABI.
|
||||
#endif
|
||||
|
||||
bool IsCallerSavedRegister(u32 id);
|
||||
|
||||
} // namespace CPU::Recompiler
|
||||
|
||||
#elif defined(CPU_ARCH_ARM32)
|
||||
|
||||
using HostReg = unsigned;
|
||||
using CodeEmitter = vixl::aarch32::MacroAssembler;
|
||||
using LabelType = vixl::aarch32::Label;
|
||||
enum : u32
|
||||
{
|
||||
HostReg_Count = vixl::aarch32::kNumberOfRegisters
|
||||
};
|
||||
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
|
||||
constexpr RegSize HostPointerSize = RegSize_32;
|
||||
#include "vixl/aarch32/assembler-aarch32.h"
|
||||
#include "vixl/aarch32/constants-aarch32.h"
|
||||
#include "vixl/aarch32/instructions-aarch32.h"
|
||||
|
||||
namespace CPU::Recompiler {
|
||||
|
||||
// A reasonable "maximum" number of bytes per instruction.
|
||||
constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
|
||||
constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
|
||||
|
||||
// Alignment of code stoarge.
|
||||
constexpr u32 CODE_STORAGE_ALIGNMENT = 4096;
|
||||
#define RRET vixl::aarch32::r0
|
||||
#define RARG1 vixl::aarch32::r0
|
||||
#define RARG2 vixl::aarch32::r1
|
||||
#define RARG3 vixl::aarch32::r2
|
||||
#define RARG4 vixl::aarch32::r3
|
||||
#define RSCRATCH vixl::aarch32::r12
|
||||
#define RSTATE vixl::aarch32::r4
|
||||
#define RMEMBASE vixl::aarch32::r5
|
||||
|
||||
s32 armGetPCDisplacement(const void* current, const void* target);
|
||||
bool armIsPCDisplacementInImmediateRange(s32 displacement);
|
||||
void armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr);
|
||||
void armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& rd, u32 imm);
|
||||
void armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
void armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
|
||||
} // namespace CPU::Recompiler
|
||||
|
||||
#elif defined(CPU_ARCH_ARM64)
|
||||
|
||||
using HostReg = unsigned;
|
||||
using CodeEmitter = vixl::aarch64::MacroAssembler;
|
||||
using LabelType = vixl::aarch64::Label;
|
||||
enum : u32
|
||||
{
|
||||
HostReg_Count = vixl::aarch64::kNumberOfRegisters
|
||||
};
|
||||
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
|
||||
constexpr RegSize HostPointerSize = RegSize_64;
|
||||
#include "vixl/aarch64/assembler-aarch64.h"
|
||||
#include "vixl/aarch64/constants-aarch64.h"
|
||||
|
||||
namespace CPU::Recompiler {
|
||||
|
||||
// A reasonable "maximum" number of bytes per instruction.
|
||||
constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
|
||||
constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
|
||||
|
||||
// Alignment of code stoarge.
|
||||
constexpr u32 CODE_STORAGE_ALIGNMENT = 4096;
|
||||
#define RWRET vixl::aarch64::w0
|
||||
#define RXRET vixl::aarch64::x0
|
||||
#define RWARG1 vixl::aarch64::w0
|
||||
#define RXARG1 vixl::aarch64::x0
|
||||
#define RWARG2 vixl::aarch64::w1
|
||||
#define RXARG2 vixl::aarch64::x1
|
||||
#define RWARG3 vixl::aarch64::w2
|
||||
#define RXARG3 vixl::aarch64::x2
|
||||
#define RWARG4 vixl::aarch64::w3
|
||||
#define RXARG4 vixl::aarch64::x3
|
||||
#define RWSCRATCH vixl::aarch64::w16
|
||||
#define RXSCRATCH vixl::aarch64::x16
|
||||
#define RSTATE vixl::aarch64::x19
|
||||
#define RMEMBASE vixl::aarch64::x20
|
||||
|
||||
#elif defined(CPU_ARCH_RISCV64)
|
||||
bool armIsCallerSavedRegister(u32 id);
|
||||
s64 armGetPCDisplacement(const void* current, const void* target);
|
||||
void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::XRegister& reg, const void* addr);
|
||||
void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm);
|
||||
void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr);
|
||||
u8* armGetJumpTrampoline(const void* target);
|
||||
|
||||
using HostReg = unsigned;
|
||||
|
||||
// Alignment of code stoarge.
|
||||
constexpr u32 CODE_STORAGE_ALIGNMENT = 4096;
|
||||
|
||||
#else
|
||||
|
||||
using HostReg = int;
|
||||
|
||||
class CodeEmitter
|
||||
{
|
||||
};
|
||||
|
||||
enum : u32
|
||||
{
|
||||
HostReg_Count = 1
|
||||
};
|
||||
|
||||
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
|
||||
constexpr RegSize HostPointerSize = RegSize_64;
|
||||
constexpr bool SHIFTS_ARE_IMPLICITLY_MASKED = false;
|
||||
} // namespace CPU::Recompiler
|
||||
|
||||
#endif
|
||||
|
||||
struct LoadStoreBackpatchInfo
|
||||
{
|
||||
void* host_pc; // pointer to instruction which will fault
|
||||
void* host_slowmem_pc; // pointer to slowmem callback code
|
||||
u32 host_code_size; // size of the fastmem load as well as the add for cycles
|
||||
HostReg address_host_reg; // register containing the guest address to load/store
|
||||
HostReg value_host_reg; // register containing the source/destination
|
||||
PhysicalMemoryAddress guest_pc;
|
||||
u32 fault_count;
|
||||
};
|
||||
|
||||
} // namespace Recompiler
|
||||
|
||||
} // namespace CPU
|
||||
|
|
|
@ -315,8 +315,7 @@ DEFINE_HOTKEY("TogglePGXP", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_NOO
|
|||
PGXP::Shutdown();
|
||||
|
||||
// we need to recompile all blocks if pgxp is toggled on/off
|
||||
if (g_settings.IsUsingCodeCache())
|
||||
CPU::CodeCache::Flush();
|
||||
CPU::CodeCache::Reset();
|
||||
|
||||
// need to swap interpreters
|
||||
System::InterruptExecution();
|
||||
|
@ -407,8 +406,7 @@ DEFINE_HOTKEY("TogglePGXPCPU", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_
|
|||
PGXP::Initialize();
|
||||
|
||||
// we need to recompile all blocks if pgxp is toggled on/off
|
||||
if (g_settings.IsUsingCodeCache())
|
||||
CPU::CodeCache::Flush();
|
||||
CPU::CodeCache::Reset();
|
||||
}
|
||||
})
|
||||
|
||||
|
|
|
@ -349,8 +349,9 @@ void ImGuiManager::DrawPerformanceOverlay()
|
|||
System::GetMaximumFrameTime());
|
||||
DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255));
|
||||
|
||||
if (g_settings.cpu_overclock_active || (!g_settings.IsUsingRecompiler() || g_settings.cpu_recompiler_icache ||
|
||||
g_settings.cpu_recompiler_memory_exceptions))
|
||||
if (g_settings.cpu_overclock_active ||
|
||||
(g_settings.cpu_execution_mode != CPUExecutionMode::Recompiler || g_settings.cpu_recompiler_icache ||
|
||||
g_settings.cpu_recompiler_memory_exceptions))
|
||||
{
|
||||
first = true;
|
||||
text.assign("CPU[");
|
||||
|
|
|
@ -254,8 +254,6 @@ struct Settings
|
|||
bool log_to_window = false;
|
||||
bool log_to_file = false;
|
||||
|
||||
ALWAYS_INLINE bool IsUsingCodeCache() const { return (cpu_execution_mode != CPUExecutionMode::Interpreter); }
|
||||
ALWAYS_INLINE bool IsUsingRecompiler() const { return (cpu_execution_mode == CPUExecutionMode::Recompiler); }
|
||||
ALWAYS_INLINE bool IsUsingSoftwareRenderer() const { return (gpu_renderer == GPURenderer::Software); }
|
||||
ALWAYS_INLINE bool IsRunaheadEnabled() const { return (runahead_frames > 0); }
|
||||
|
||||
|
@ -275,12 +273,6 @@ struct Settings
|
|||
gpu_pgxp_depth_clear_threshold = value / GPU_PGXP_DEPTH_THRESHOLD_SCALE;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool IsUsingFastmem() const
|
||||
{
|
||||
return (cpu_fastmem_mode != CPUFastmemMode::Disabled && cpu_execution_mode == CPUExecutionMode::Recompiler &&
|
||||
!cpu_recompiler_memory_exceptions);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE s32 GetAudioOutputVolume(bool fast_forwarding) const
|
||||
{
|
||||
return audio_output_muted ? 0 : (fast_forwarding ? audio_fast_forward_volume : audio_output_volume);
|
||||
|
|
|
@ -244,6 +244,8 @@ void System::Internal::ProcessStartup()
|
|||
if (!Bus::AllocateMemory())
|
||||
Panic("Failed to allocate memory for emulated bus.");
|
||||
|
||||
CPU::CodeCache::ProcessStartup();
|
||||
|
||||
// This will call back to Host::LoadSettings() -> ReloadSources().
|
||||
LoadSettings(false);
|
||||
|
||||
|
@ -265,6 +267,7 @@ void System::Internal::ProcessShutdown()
|
|||
|
||||
InputManager::CloseSources();
|
||||
|
||||
CPU::CodeCache::ProcessShutdown();
|
||||
Bus::ReleaseMemory();
|
||||
}
|
||||
|
||||
|
@ -1508,6 +1511,8 @@ bool System::Initialize(bool force_software_renderer)
|
|||
return false;
|
||||
}
|
||||
|
||||
CPU::CodeCache::Initialize();
|
||||
|
||||
if (!CreateGPU(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer, false))
|
||||
{
|
||||
Bus::Shutdown();
|
||||
|
@ -1536,9 +1541,6 @@ bool System::Initialize(bool force_software_renderer)
|
|||
return false;
|
||||
}
|
||||
|
||||
// CPU code cache must happen after GPU, because it might steal our address space.
|
||||
CPU::CodeCache::Initialize();
|
||||
|
||||
DMA::Initialize();
|
||||
InterruptController::Initialize();
|
||||
|
||||
|
@ -1704,6 +1706,7 @@ void System::Execute()
|
|||
|
||||
// TODO: Purge reset/restore
|
||||
g_gpu->RestoreDeviceContext();
|
||||
TimingEvents::UpdateCPUDowncount();
|
||||
|
||||
if (s_rewind_load_counter >= 0)
|
||||
DoRewind();
|
||||
|
@ -2037,9 +2040,9 @@ bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di
|
|||
if (sw.IsReading())
|
||||
{
|
||||
if (is_memory_state)
|
||||
CPU::CodeCache::InvalidateAll();
|
||||
CPU::CodeCache::InvalidateAllRAMBlocks();
|
||||
else
|
||||
CPU::CodeCache::Flush();
|
||||
CPU::CodeCache::Reset();
|
||||
}
|
||||
|
||||
// only reset pgxp if we're not runahead-rollbacking. the value checks will save us from broken rendering, and it
|
||||
|
@ -2158,7 +2161,7 @@ void System::InternalReset()
|
|||
return;
|
||||
|
||||
CPU::Reset();
|
||||
CPU::CodeCache::Flush();
|
||||
CPU::CodeCache::Reset();
|
||||
if (g_settings.gpu_pgxp_enable)
|
||||
PGXP::Initialize();
|
||||
|
||||
|
@ -3522,7 +3525,10 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
|
|||
g_settings.cpu_execution_mode))),
|
||||
5.0f);
|
||||
CPU::ExecutionModeChanged();
|
||||
CPU::CodeCache::Reinitialize();
|
||||
if (old_settings.cpu_execution_mode != CPUExecutionMode::Interpreter)
|
||||
CPU::CodeCache::Shutdown();
|
||||
if (g_settings.cpu_execution_mode != CPUExecutionMode::Interpreter)
|
||||
CPU::CodeCache::Initialize();
|
||||
CPU::ClearICache();
|
||||
}
|
||||
|
||||
|
@ -3534,12 +3540,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
|
|||
{
|
||||
Host::AddOSDMessage(TRANSLATE_STR("OSDMessage", "Recompiler options changed, flushing all blocks."), 5.0f);
|
||||
CPU::ExecutionModeChanged();
|
||||
|
||||
// changing memory exceptions can re-enable fastmem
|
||||
if (g_settings.cpu_recompiler_memory_exceptions != old_settings.cpu_recompiler_memory_exceptions)
|
||||
CPU::CodeCache::Reinitialize();
|
||||
else
|
||||
CPU::CodeCache::Flush();
|
||||
CPU::CodeCache::Reset();
|
||||
|
||||
if (g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache)
|
||||
CPU::ClearICache();
|
||||
|
@ -3597,20 +3598,13 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
|
|||
g_settings.gpu_pgxp_vertex_cache != old_settings.gpu_pgxp_vertex_cache ||
|
||||
g_settings.gpu_pgxp_cpu != old_settings.gpu_pgxp_cpu)))
|
||||
{
|
||||
if (g_settings.IsUsingCodeCache())
|
||||
{
|
||||
Host::AddOSDMessage(g_settings.gpu_pgxp_enable ?
|
||||
TRANSLATE_STR("OSDMessage", "PGXP enabled, recompiling all blocks.") :
|
||||
TRANSLATE_STR("OSDMessage", "PGXP disabled, recompiling all blocks."),
|
||||
5.0f);
|
||||
CPU::CodeCache::Flush();
|
||||
}
|
||||
|
||||
if (old_settings.gpu_pgxp_enable)
|
||||
PGXP::Shutdown();
|
||||
|
||||
if (g_settings.gpu_pgxp_enable)
|
||||
PGXP::Initialize();
|
||||
|
||||
CPU::CodeCache::Reset();
|
||||
}
|
||||
|
||||
if (g_settings.cdrom_readahead_sectors != old_settings.cdrom_readahead_sectors)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "page_fault_handler.h"
|
||||
|
@ -28,24 +28,12 @@ struct RegisteredHandler
|
|||
{
|
||||
Callback callback;
|
||||
const void* owner;
|
||||
void* start_pc;
|
||||
u32 code_size;
|
||||
};
|
||||
static std::vector<RegisteredHandler> m_handlers;
|
||||
static std::mutex m_handler_lock;
|
||||
static thread_local bool s_in_handler;
|
||||
|
||||
#if defined(CPU_ARCH_ARM32)
|
||||
static bool IsStoreInstruction(const void* ptr)
|
||||
{
|
||||
u32 bits;
|
||||
std::memcpy(&bits, ptr, sizeof(bits));
|
||||
|
||||
// TODO
|
||||
return false;
|
||||
}
|
||||
|
||||
#elif defined(CPU_ARCH_ARM64)
|
||||
#if defined(CPU_ARCH_ARM64)
|
||||
static bool IsStoreInstruction(const void* ptr)
|
||||
{
|
||||
u32 bits;
|
||||
|
@ -146,7 +134,7 @@ static void SIGSEGVHandler(int sig, siginfo_t* info, void* ctx)
|
|||
const bool is_write = (static_cast<ucontext_t*>(ctx)->uc_mcontext.gregs[REG_ERR] & 2) != 0;
|
||||
#elif defined(CPU_ARCH_ARM32)
|
||||
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.arm_pc);
|
||||
const bool is_write = IsStoreInstruction(exception_pc);
|
||||
const bool is_write = (static_cast<ucontext_t*>(ctx)->uc_mcontext.error_code & (1 << 11)) != 0; // DFSR.WnR
|
||||
#elif defined(CPU_ARCH_ARM64)
|
||||
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.pc);
|
||||
const bool is_write = IsStoreInstruction(exception_pc);
|
||||
|
@ -221,7 +209,7 @@ static void SIGSEGVHandler(int sig, siginfo_t* info, void* ctx)
|
|||
|
||||
#endif
|
||||
|
||||
bool InstallHandler(const void* owner, void* start_pc, u32 code_size, Callback callback)
|
||||
bool InstallHandler(const void* owner, Callback callback)
|
||||
{
|
||||
bool was_empty;
|
||||
{
|
||||
|
@ -267,7 +255,7 @@ bool InstallHandler(const void* owner, void* start_pc, u32 code_size, Callback c
|
|||
#endif
|
||||
}
|
||||
|
||||
m_handlers.push_back(RegisteredHandler{callback, owner, start_pc, code_size});
|
||||
m_handlers.push_back(RegisteredHandler{callback, owner});
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ enum class HandlerResult
|
|||
using Callback = HandlerResult (*)(void* exception_pc, void* fault_address, bool is_write);
|
||||
using Handle = void*;
|
||||
|
||||
bool InstallHandler(const void* owner, void* start_pc, u32 code_size, Callback callback);
|
||||
bool InstallHandler(const void* owner, Callback callback);
|
||||
bool RemoveHandler(const void* owner);
|
||||
|
||||
} // namespace Common::PageFaultHandler
|
||||
|
|
Loading…
Reference in a new issue