Merge pull request #1099 from stenzek/lut-fastmem-pr

CPU/Recompiler: Implement LUT-based fastmem
This commit is contained in:
Connor McLaughlin 2020-11-24 14:50:55 +10:00 committed by GitHub
commit 764b1ee49d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 1177 additions and 454 deletions

View file

@ -21,6 +21,16 @@
<item>CachedInterpreter</item>
<item>Recompiler</item>
</string-array>
<string-array name="settings_cpu_fastmem_mode_entries">
<item>Disabled (Slowest)</item>
<item>MMap (Hardware, Fastest, 64-Bit Only)</item>
<item>LUT (Faster)</item>
</string-array>
<string-array name="settings_cpu_fastmem_mode_values">
<item>Disabled</item>
<item>MMap</item>
<item>LUT</item>
</string-array>
<string-array name="gpu_renderer_entries">
<item>Hardware (OpenGL)</item>
<item>Hardware (Vulkan)</item>

View file

@ -47,10 +47,13 @@
app:defaultValue="false"
app:summary="Determines whether the CPU's instruction cache is simulated in the recompiler. Improves accuracy at a small cost to performance. If games are running too fast, try enabling this option."
app:iconSpaceReserved="false" />
<SwitchPreferenceCompat
app:key="CPU/Fastmem"
<ListPreference
app:key="CPU/FastmemMode"
app:title="CPU Recompiler Fast Memory Access"
app:defaultValue="true"
app:entries="@array/settings_cpu_fastmem_mode_entries"
app:entryValues="@array/settings_cpu_fastmem_mode_values"
app:useSimpleSummaryProvider="true"
app:defaultValue="MMap"
app:summary="Makes guest memory access more efficient by using page faults and backpatching. Disable if it is unstable on your device."
app:iconSpaceReserved="false" />
<ListPreference

View file

@ -31,7 +31,17 @@ static std::vector<RegisteredHandler> m_handlers;
static std::mutex m_handler_lock;
static thread_local bool s_in_handler;
#ifdef __aarch64__
#if defined(CPU_AARCH32)
static bool IsStoreInstruction(const void* ptr)
{
u32 bits;
std::memcpy(&bits, ptr, sizeof(bits));
// TODO
return false;
}
#elif defined(CPU_AARCH64)
static bool IsStoreInstruction(const void* ptr)
{
u32 bits;
@ -118,10 +128,13 @@ static void SIGSEGVHandler(int sig, siginfo_t* info, void* ctx)
#ifndef __APPLE__
void* const exception_address = reinterpret_cast<void*>(info->si_addr);
#if defined(__x86_64__)
#if defined(CPU_X64)
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.gregs[REG_RIP]);
const bool is_write = (static_cast<ucontext_t*>(ctx)->uc_mcontext.gregs[REG_ERR] & 2) != 0;
#elif defined(__aarch64__)
#elif defined(CPU_AARCH32)
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.arm_pc);
const bool is_write = IsStoreInstruction(exception_pc);
#elif defined(CPU_AARCH64)
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.pc);
const bool is_write = IsStoreInstruction(exception_pc);
#else
@ -129,12 +142,12 @@ static void SIGSEGVHandler(int sig, siginfo_t* info, void* ctx)
const bool is_write = false;
#endif
#else // __APPLE__
#if defined(__x86_64__)
#if defined(CPU_X64)
void* const exception_address =
reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext->__es.__faultvaddr);
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext->__ss.__rip);
const bool is_write = (static_cast<ucontext_t*>(ctx)->uc_mcontext->__es.__err & 2) != 0;
#elif defined(__aarch64__)
#elif defined(CPU_AARCH64)
void* const exception_address = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext->__es.__far);
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext->__ss.__pc);
const bool is_write = IsStoreInstruction(exception_pc);

View file

@ -119,25 +119,25 @@ if(WIN32)
endif()
if(${CPU_ARCH} STREQUAL "x64")
target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../../dep/xbyak/xbyak")
target_compile_definitions(core PRIVATE "WITH_RECOMPILER=1" "WITH_FASTMEM=1")
target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../dep/xbyak/xbyak")
target_compile_definitions(core PUBLIC "WITH_RECOMPILER=1" "WITH_MMAP_FASTMEM=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS}
cpu_recompiler_code_generator_x64.cpp
)
message("Building x64 recompiler")
elseif(${CPU_ARCH} STREQUAL "aarch32")
target_compile_definitions(core PRIVATE "WITH_RECOMPILER=1")
target_compile_definitions(core PUBLIC "WITH_RECOMPILER=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS}
cpu_recompiler_code_generator_aarch32.cpp
)
target_link_libraries(core PRIVATE vixl)
target_link_libraries(core PUBLIC vixl)
message("Building AArch32 recompiler")
elseif(${CPU_ARCH} STREQUAL "aarch64")
target_compile_definitions(core PRIVATE "WITH_RECOMPILER=1" "WITH_FASTMEM=1")
target_compile_definitions(core PUBLIC "WITH_RECOMPILER=1" "WITH_MMAP_FASTMEM=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS}
cpu_recompiler_code_generator_aarch64.cpp
)
target_link_libraries(core PRIVATE vixl)
target_link_libraries(core PUBLIC vixl)
message("Building AArch64 recompiler")
else()
message("Not building recompiler")

View file

@ -3,6 +3,7 @@
#include "common/align.h"
#include "common/assert.h"
#include "common/log.h"
#include "common/make_array.h"
#include "common/state_wrapper.h"
#include "cpu_code_cache.h"
#include "cpu_core.h"
@ -69,7 +70,7 @@ union MEMCTRL
};
};
std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits{};
std::bitset<RAM_CODE_PAGE_COUNT> m_ram_code_bits{};
u8* g_ram = nullptr; // 2MB RAM
u8 g_bios[BIOS_SIZE]{}; // 512K BIOS ROM
@ -88,20 +89,24 @@ static std::string m_tty_line_buffer;
static Common::MemoryArena m_memory_arena;
#ifdef WITH_FASTMEM
static CPUFastmemMode m_fastmem_mode = CPUFastmemMode::Disabled;
#ifdef WITH_MMAP_FASTMEM
static u8* m_fastmem_base = nullptr;
static std::vector<Common::MemoryArena::View> m_fastmem_ram_views;
#endif
static u8** m_fastmem_lut = nullptr;
static constexpr auto m_fastmem_ram_mirrors =
make_array(0x00000000u, 0x00200000u, 0x00400000u, 0x00600000u, 0x80000000u, 0x80200000u, 0x80400000u, 0x80600000u,
0xA0000000u, 0xA0200000u, 0xA0400000u, 0xA0600000u);
static std::tuple<TickCount, TickCount, TickCount> CalculateMemoryTiming(MEMDELAY mem_delay, COMDELAY common_delay);
static void RecalculateMemoryTimings();
static bool AllocateMemory();
#ifdef WITH_FASTMEM
static void SetCodePageFastmemProtection(u32 page_index, bool writable);
static void UnmapFastmemViews();
#endif
#define FIXUP_WORD_READ_OFFSET(offset) ((offset) & ~u32(3))
#define FIXUP_WORD_READ_VALUE(offset, value) ((value) >> (((offset)&u32(3)) * 8u))
@ -132,17 +137,22 @@ bool Initialize()
void Shutdown()
{
#ifdef WITH_FASTMEM
UnmapFastmemViews();
std::free(m_fastmem_lut);
m_fastmem_lut = nullptr;
#ifdef WITH_MMAP_FASTMEM
m_fastmem_base = nullptr;
m_fastmem_ram_views.clear();
#endif
CPU::g_state.fastmem_base = nullptr;
m_fastmem_mode = CPUFastmemMode::Disabled;
if (g_ram)
{
m_memory_arena.ReleaseViewPtr(g_ram, RAM_SIZE);
g_ram = nullptr;
}
CPU::g_state.fastmem_base = nullptr;
}
void Reset()
@ -268,23 +278,49 @@ bool AllocateMemory()
return true;
}
#ifdef WITH_FASTMEM
void UnmapFastmemViews()
static ALWAYS_INLINE u32 FastmemAddressToLUTPageIndex(u32 address)
{
m_fastmem_ram_views.clear();
return address >> 12;
}
void UpdateFastmemViews(bool enabled, bool isolate_cache)
static ALWAYS_INLINE_RELEASE void SetLUTFastmemPage(u32 address, u8* ptr, bool writable)
{
UnmapFastmemViews();
if (!enabled)
m_fastmem_lut[FastmemAddressToLUTPageIndex(address)] = ptr;
m_fastmem_lut[FASTMEM_LUT_NUM_PAGES + FastmemAddressToLUTPageIndex(address)] = writable ? ptr : nullptr;
}
CPUFastmemMode GetFastmemMode()
{
return m_fastmem_mode;
}
void UpdateFastmemViews(CPUFastmemMode mode, bool isolate_cache)
{
#ifndef WITH_MMAP_FASTMEM
Assert(mode != CPUFastmemMode::MMap);
#else
m_fastmem_ram_views.clear();
#endif
m_fastmem_mode = mode;
if (mode == CPUFastmemMode::Disabled)
{
#ifdef WITH_MMAP_FASTMEM
m_fastmem_base = nullptr;
#endif
std::free(m_fastmem_lut);
m_fastmem_lut = nullptr;
return;
}
Log_DevPrintf("Remapping fastmem area, isolate cache = %s", isolate_cache ? "true " : "false");
Log_DevPrintf("Remapping fastmem area, isolate cache = %s", isolate_cache ? "true" : "false");
#ifdef WITH_MMAP_FASTMEM
if (mode == CPUFastmemMode::MMap)
{
std::free(m_fastmem_lut);
m_fastmem_lut = nullptr;
if (!m_fastmem_base)
{
m_fastmem_base = static_cast<u8*>(m_memory_arena.FindBaseAddressForMapping(FASTMEM_REGION_SIZE));
@ -308,12 +344,12 @@ void UpdateFastmemViews(bool enabled, bool isolate_cache)
}
// mark all pages with code as non-writable
for (u32 i = 0; i < CPU_CODE_CACHE_PAGE_COUNT; i++)
for (u32 i = 0; i < RAM_CODE_PAGE_COUNT; i++)
{
if (m_ram_code_bits[i])
{
u8* page_address = map_address + (i * CPU_CODE_CACHE_PAGE_SIZE);
if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, false, false))
u8* page_address = map_address + (i * HOST_PAGE_SIZE);
if (!m_memory_arena.SetPageProtection(page_address, HOST_PAGE_SIZE, true, false, false))
{
Log_ErrorPrintf("Failed to write-protect code page at %p");
return;
@ -327,16 +363,16 @@ void UpdateFastmemViews(bool enabled, bool isolate_cache)
if (!isolate_cache)
{
// KUSEG - cached
MapRAM(0x00000000, !isolate_cache);
// MapRAM(0x00200000, !isolate_cache);
// MapRAM(0x00400000, !isolate_cache);
// MapRAM(0x00600000, !isolate_cache);
MapRAM(0x00000000, true);
// MapRAM(0x00200000, true);
// MapRAM(0x00400000, true);
// MapRAM(0x00600000, true);
// KSEG0 - cached
MapRAM(0x80000000, !isolate_cache);
// MapRAM(0x80200000, !isolate_cache);
// MapRAM(0x80400000, !isolate_cache);
// MapRAM(0x80600000, !isolate_cache);
MapRAM(0x80000000, true);
// MapRAM(0x80200000, true);
// MapRAM(0x80400000, true);
// MapRAM(0x80600000, true);
}
// KSEG1 - uncached
@ -344,19 +380,83 @@ void UpdateFastmemViews(bool enabled, bool isolate_cache)
// MapRAM(0xA0200000, true);
// MapRAM(0xA0400000, true);
// MapRAM(0xA0600000, true);
return;
}
#endif
#ifdef WITH_MMAP_FASTMEM
m_fastmem_base = nullptr;
#endif
if (!m_fastmem_lut)
{
m_fastmem_lut = static_cast<u8**>(std::calloc(FASTMEM_LUT_NUM_SLOTS, sizeof(u8*)));
Assert(m_fastmem_lut);
Log_InfoPrintf("Fastmem base (software): %p", m_fastmem_lut);
CPU::g_state.fastmem_base = reinterpret_cast<u8*>(m_fastmem_lut);
}
auto MapRAM = [](u32 base_address, bool readable, bool writable) {
if (readable)
{
for (u32 address = 0; address < RAM_SIZE; address += HOST_PAGE_SIZE)
{
SetLUTFastmemPage(base_address + address, &g_ram[address],
!m_ram_code_bits[FastmemAddressToLUTPageIndex(address)]);
}
}
else
{
for (u32 address = 0; address < RAM_SIZE; address += HOST_PAGE_SIZE)
SetLUTFastmemPage(base_address + address, nullptr, false);
}
};
// KUSEG - cached
MapRAM(0x00000000, !isolate_cache, !isolate_cache);
MapRAM(0x00200000, !isolate_cache, !isolate_cache);
MapRAM(0x00400000, !isolate_cache, !isolate_cache);
MapRAM(0x00600000, !isolate_cache, !isolate_cache);
// KSEG0 - cached
MapRAM(0x80000000, !isolate_cache, !isolate_cache);
MapRAM(0x80200000, !isolate_cache, !isolate_cache);
MapRAM(0x80400000, !isolate_cache, !isolate_cache);
MapRAM(0x80600000, !isolate_cache, !isolate_cache);
// KSEG1 - uncached
MapRAM(0xA0000000, true, true);
MapRAM(0xA0200000, true, true);
MapRAM(0xA0400000, true, true);
MapRAM(0xA0600000, true, true);
}
bool CanUseFastmemForAddress(VirtualMemoryAddress address)
{
const PhysicalMemoryAddress paddr = address & CPU::PHYSICAL_MEMORY_ADDRESS_MASK;
switch (m_fastmem_mode)
{
#ifdef WITH_MMAP_FASTMEM
case CPUFastmemMode::MMap:
{
// Currently since we don't map the mirrors, don't use fastmem for them.
// This is because the swapping of page code bits for SMC is too expensive.
return (paddr < RAM_SIZE);
}
return (paddr < RAM_MIRROR_END);
}
#endif
case CPUFastmemMode::LUT:
return (paddr < RAM_SIZE);
case CPUFastmemMode::Disabled:
default:
return false;
}
}
bool IsRAMCodePage(u32 index)
{
return m_ram_code_bits[index];
@ -369,10 +469,7 @@ void SetRAMCodePage(u32 index)
// protect fastmem pages
m_ram_code_bits[index] = true;
#ifdef WITH_FASTMEM
SetCodePageFastmemProtection(index, false);
#endif
}
void ClearRAMCodePage(u32 index)
@ -382,35 +479,45 @@ void ClearRAMCodePage(u32 index)
// unprotect fastmem pages
m_ram_code_bits[index] = false;
#ifdef WITH_FASTMEM
SetCodePageFastmemProtection(index, true);
#endif
}
#ifdef WITH_FASTMEM
void SetCodePageFastmemProtection(u32 page_index, bool writable)
{
#ifdef WITH_MMAP_FASTMEM
if (m_fastmem_mode == CPUFastmemMode::MMap)
{
// unprotect fastmem pages
for (const auto& view : m_fastmem_ram_views)
{
u8* page_address = static_cast<u8*>(view.GetBasePointer()) + (page_index * CPU_CODE_CACHE_PAGE_SIZE);
if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, writable, false))
u8* page_address = static_cast<u8*>(view.GetBasePointer()) + (page_index * HOST_PAGE_SIZE);
if (!m_memory_arena.SetPageProtection(page_address, HOST_PAGE_SIZE, true, writable, false))
{
Log_ErrorPrintf("Failed to %s code page %u (0x%08X) @ %p", writable ? "unprotect" : "protect", page_index,
page_index * CPU_CODE_CACHE_PAGE_SIZE, page_address);
page_index * HOST_PAGE_SIZE, page_address);
}
}
return;
}
#endif
if (m_fastmem_mode == CPUFastmemMode::LUT)
{
// mirrors...
const u32 ram_address = page_index * HOST_PAGE_SIZE;
for (u32 mirror_start : m_fastmem_ram_mirrors)
SetLUTFastmemPage(mirror_start + ram_address, &g_ram[ram_address], writable);
}
}
#endif
void ClearRAMCodePageFlags()
{
m_ram_code_bits.reset();
#ifdef WITH_FASTMEM
#ifdef WITH_MMAP_FASTMEM
if (m_fastmem_mode == CPUFastmemMode::MMap)
{
// unprotect fastmem pages
for (const auto& view : m_fastmem_ram_views)
{
@ -419,12 +526,23 @@ void ClearRAMCodePageFlags()
Log_ErrorPrintf("Failed to unprotect code pages for fastmem view @ %p", view.GetBasePointer());
}
}
}
#endif
if (m_fastmem_mode == CPUFastmemMode::LUT)
{
for (u32 i = 0; i < RAM_CODE_PAGE_COUNT; i++)
{
const u32 addr = (i * HOST_PAGE_SIZE);
for (u32 mirror_start : m_fastmem_ram_mirrors)
SetLUTFastmemPage(mirror_start + addr, &g_ram[addr], true);
}
}
}
bool IsCodePageAddress(PhysicalMemoryAddress address)
{
return IsRAMAddress(address) ? m_ram_code_bits[(address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE] : false;
return IsRAMAddress(address) ? m_ram_code_bits[(address & RAM_MASK) / HOST_PAGE_SIZE] : false;
}
bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size)
@ -437,11 +555,11 @@ bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size)
const u32 end_address = start_address + size;
while (start_address < end_address)
{
const u32 code_page_index = start_address / CPU_CODE_CACHE_PAGE_SIZE;
const u32 code_page_index = start_address / HOST_PAGE_SIZE;
if (m_ram_code_bits[code_page_index])
return true;
start_address += CPU_CODE_CACHE_PAGE_SIZE;
start_address += HOST_PAGE_SIZE;
}
return false;
@ -499,7 +617,7 @@ ALWAYS_INLINE static TickCount DoRAMAccess(u32 offset, u32& value)
}
else
{
const u32 page_index = offset / CPU_CODE_CACHE_PAGE_SIZE;
const u32 page_index = offset / HOST_PAGE_SIZE;
if (m_ram_code_bits[page_index])
CPU::CodeCache::InvalidateBlocksWithPageIndex(page_index);

View file

@ -82,10 +82,15 @@ enum : size_t
// Offsets within the memory arena.
MEMORY_ARENA_RAM_OFFSET = 0,
#ifdef WITH_FASTMEM
#ifdef WITH_MMAP_FASTMEM
// Fastmem region size is 4GB to cover the entire 32-bit address space.
FASTMEM_REGION_SIZE = UINT64_C(0x100000000)
FASTMEM_REGION_SIZE = UINT64_C(0x100000000),
#endif
RAM_CODE_PAGE_COUNT = (RAM_SIZE + (HOST_PAGE_SIZE + 1)) / HOST_PAGE_SIZE,
FASTMEM_LUT_NUM_PAGES = 0x100000, // 0x100000000 >> 12
FASTMEM_LUT_NUM_SLOTS = FASTMEM_LUT_NUM_PAGES * 2,
};
bool Initialize();
@ -93,15 +98,14 @@ void Shutdown();
void Reset();
bool DoState(StateWrapper& sw);
#ifdef WITH_FASTMEM
void UpdateFastmemViews(bool enabled, bool isolate_cache);
CPUFastmemMode GetFastmemMode();
void UpdateFastmemViews(CPUFastmemMode mode, bool isolate_cache);
bool CanUseFastmemForAddress(VirtualMemoryAddress address);
#endif
void SetExpansionROM(std::vector<u8> data);
void SetBIOS(const std::vector<u8>& image);
extern std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits;
extern std::bitset<RAM_CODE_PAGE_COUNT> m_ram_code_bits;
extern u8* g_ram; // 2MB RAM
extern u8 g_bios[BIOS_SIZE]; // 512K BIOS ROM
@ -114,7 +118,7 @@ ALWAYS_INLINE static bool IsRAMAddress(PhysicalMemoryAddress address)
/// Returns the code page index for a RAM address.
ALWAYS_INLINE static u32 GetRAMCodePageIndex(PhysicalMemoryAddress address)
{
return (address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE;
return (address & RAM_MASK) / HOST_PAGE_SIZE;
}
/// Returns true if the specified page contains code.

View file

@ -483,7 +483,7 @@
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
@ -564,7 +564,7 @@
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_FASTMEM=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
@ -678,7 +678,7 @@
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<WholeProgramOptimization>false</WholeProgramOptimization>
@ -732,7 +732,7 @@
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<WholeProgramOptimization>true</WholeProgramOptimization>

View file

@ -98,7 +98,7 @@ static void UnlinkBlock(CodeBlock* block);
static void ClearState();
static BlockMap s_blocks;
static std::array<std::vector<CodeBlock*>, CPU_CODE_CACHE_PAGE_COUNT> m_ram_block_map;
static std::array<std::vector<CodeBlock*>, Bus::RAM_CODE_PAGE_COUNT> m_ram_block_map;
#ifdef WITH_RECOMPILER
static HostCodeMap s_host_code_map;
@ -106,11 +106,14 @@ static HostCodeMap s_host_code_map;
static void AddBlockToHostCodeMap(CodeBlock* block);
static void RemoveBlockFromHostCodeMap(CodeBlock* block);
#ifdef WITH_FASTMEM
static bool InitializeFastmem();
static void ShutdownFastmem();
static Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write);
#endif // WITH_FASTMEM
static Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc, void* fault_address,
bool is_write);
#ifdef WITH_MMAP_FASTMEM
static Common::PageFaultHandler::HandlerResult MMapPageFaultHandler(void* exception_pc, void* fault_address,
bool is_write);
#endif
#endif // WITH_RECOMPILER
void Initialize()
@ -130,10 +133,8 @@ void Initialize()
Panic("Failed to initialize code space");
}
#ifdef WITH_FASTMEM
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem");
#endif
ResetFastMap();
CompileDispatcher();
@ -161,10 +162,8 @@ void ClearState()
void Shutdown()
{
ClearState();
#ifdef WITH_FASTMEM
ShutdownFastmem();
#endif
#ifdef WITH_RECOMPILER
ShutdownFastmem();
s_code_buffer.Destroy();
#endif
}
@ -339,10 +338,7 @@ void Reinitialize()
#ifdef WITH_RECOMPILER
#ifdef WITH_FASTMEM
ShutdownFastmem();
#endif
s_code_buffer.Destroy();
if (g_settings.IsUsingRecompiler())
@ -358,10 +354,8 @@ void Reinitialize()
Panic("Failed to initialize code space");
}
#ifdef WITH_FASTMEM
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem");
#endif
ResetFastMap();
CompileDispatcher();
@ -620,7 +614,7 @@ void FastCompileBlockFunction()
void InvalidateBlocksWithPageIndex(u32 page_index)
{
DebugAssert(page_index < CPU_CODE_CACHE_PAGE_COUNT);
DebugAssert(page_index < Bus::RAM_CODE_PAGE_COUNT);
auto& blocks = m_ram_block_map[page_index];
for (CodeBlock* block : blocks)
{
@ -737,27 +731,37 @@ void RemoveBlockFromHostCodeMap(CodeBlock* block)
s_host_code_map.erase(hc_iter);
}
#ifdef WITH_FASTMEM
bool InitializeFastmem()
{
if (!Common::PageFaultHandler::InstallHandler(&s_host_code_map, PageFaultHandler))
const CPUFastmemMode mode = g_settings.cpu_fastmem_mode;
Assert(mode != CPUFastmemMode::Disabled);
#ifdef WITH_MMAP_FASTMEM
const auto handler = (mode == CPUFastmemMode::MMap) ? MMapPageFaultHandler : LUTPageFaultHandler;
#else
const auto handler = LUTPageFaultHandler;
Assert(mode != CPUFastmemMode::MMap);
#endif
if (!Common::PageFaultHandler::InstallHandler(&s_host_code_map, handler))
{
Log_ErrorPrintf("Failed to install page fault handler");
return false;
}
Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc);
Bus::UpdateFastmemViews(mode, g_state.cop0_regs.sr.Isc);
return true;
}
void ShutdownFastmem()
{
Common::PageFaultHandler::RemoveHandler(&s_host_code_map);
Bus::UpdateFastmemViews(false, false);
Bus::UpdateFastmemViews(CPUFastmemMode::Disabled, false);
}
Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write)
#ifdef WITH_MMAP_FASTMEM
Common::PageFaultHandler::HandlerResult MMapPageFaultHandler(void* exception_pc, void* fault_address, bool is_write)
{
if (static_cast<u8*>(fault_address) < g_state.fastmem_base ||
(static_cast<u8*>(fault_address) - g_state.fastmem_base) >= Bus::FASTMEM_REGION_SIZE)
@ -827,7 +831,46 @@ Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, voi
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
#endif // WITH_FASTMEM
#endif
Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc, void* fault_address, bool is_write)
{
// use upper_bound to find the next block after the pc
HostCodeMap::iterator upper_iter =
s_host_code_map.upper_bound(reinterpret_cast<CodeBlock::HostCodePointer>(exception_pc));
if (upper_iter == s_host_code_map.begin())
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
// then decrement it by one to (hopefully) get the block we want
upper_iter--;
// find the loadstore info in the code block
CodeBlock* block = upper_iter->second;
for (auto bpi_iter = block->loadstore_backpatch_info.begin(); bpi_iter != block->loadstore_backpatch_info.end();
++bpi_iter)
{
Recompiler::LoadStoreBackpatchInfo& lbi = *bpi_iter;
if (lbi.host_pc == exception_pc)
{
// found it, do fixup
if (Recompiler::CodeGenerator::BackpatchLoadStore(lbi))
{
// remove the backpatch entry since we won't be coming back to this one
block->loadstore_backpatch_info.erase(bpi_iter);
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
}
else
{
Log_ErrorPrintf("Failed to backpatch %p in block 0x%08X", exception_pc, block->GetPC());
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
}
}
// we didn't find the pc in our list..
Log_ErrorPrintf("Loadstore PC not found for %p in block 0x%08X", exception_pc, block->GetPC());
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
#endif // WITH_RECOMPILER

View file

@ -89,10 +89,10 @@ struct CodeBlock
const u32 GetPC() const { return key.GetPC(); }
const u32 GetSizeInBytes() const { return static_cast<u32>(instructions.size()) * sizeof(Instruction); }
const u32 GetStartPageIndex() const { return (key.GetPCPhysicalAddress() / CPU_CODE_CACHE_PAGE_SIZE); }
const u32 GetStartPageIndex() const { return (key.GetPCPhysicalAddress() / HOST_PAGE_SIZE); }
const u32 GetEndPageIndex() const
{
return ((key.GetPCPhysicalAddress() + GetSizeInBytes()) / CPU_CODE_CACHE_PAGE_SIZE);
return ((key.GetPCPhysicalAddress() + GetSizeInBytes()) / HOST_PAGE_SIZE);
}
bool IsInRAM() const
{
@ -131,8 +131,8 @@ void InterpretUncachedBlock();
/// Invalidates any code pages which overlap the specified range.
ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_count)
{
const u32 start_page = address / CPU_CODE_CACHE_PAGE_SIZE;
const u32 end_page = (address + word_count * sizeof(u32) - sizeof(u32)) / CPU_CODE_CACHE_PAGE_SIZE;
const u32 start_page = address / HOST_PAGE_SIZE;
const u32 end_page = (address + word_count * sizeof(u32) - sizeof(u32)) / HOST_PAGE_SIZE;
for (u32 page = start_page; page <= end_page; page++)
{
if (Bus::m_ram_code_bits[page])

View file

@ -1600,15 +1600,11 @@ bool InterpretInstructionPGXP()
return g_state.exception_raised;
}
#ifdef WITH_FASTMEM
void UpdateFastmemMapping()
{
Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc);
Bus::UpdateFastmemViews(Bus::GetFastmemMode(), g_state.cop0_regs.sr.Isc);
}
#endif
} // namespace Recompiler::Thunks
} // namespace CPU

View file

@ -2260,9 +2260,8 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi)
value = AndValues(value, Value::FromConstantU32(write_mask));
}
#ifdef WITH_FASTMEM
// changing SR[Isc] needs to update fastmem views
if (reg == Cop0Reg::SR && g_settings.cpu_fastmem)
if (reg == Cop0Reg::SR && g_settings.IsUsingFastmem())
{
LabelType skip_fastmem_update;
Value old_value = m_register_cache.AllocateScratch(RegSize_32);
@ -2279,9 +2278,6 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi)
{
EmitStoreCPUStructField(offset, value);
}
#else
EmitStoreCPUStructField(offset, value);
#endif
}
}

View file

@ -52,9 +52,12 @@ public:
bool signed_divide);
void EmitInc(HostReg to_reg, RegSize size);
void EmitDec(HostReg to_reg, RegSize size);
void EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, bool assume_amount_masked = true);
void EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, bool assume_amount_masked = true);
void EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, bool assume_amount_masked = true);
void EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
bool assume_amount_masked = true);
void EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
bool assume_amount_masked = true);
void EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
bool assume_amount_masked = true);
void EmitAnd(HostReg to_reg, HostReg from_reg, const Value& value);
void EmitOr(HostReg to_reg, HostReg from_reg, const Value& value);
void EmitXor(HostReg to_reg, HostReg from_reg, const Value& value);
@ -77,19 +80,17 @@ public:
void EmitLoadGlobalAddress(HostReg host_reg, const void* ptr);
// Automatically generates an exception handler.
Value GetFastmemLoadBase();
Value GetFastmemStoreBase();
Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const SpeculativeValue& address_spec,
RegSize size);
#ifdef WITH_FASTMEM
void EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result);
void EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result);
#endif
void EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result,
bool in_far_code);
void EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const SpeculativeValue& address_spec,
const Value& value);
#ifdef WITH_FASTMEM
void EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value);
#endif
void EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value,
bool in_far_code);
@ -250,6 +251,9 @@ private:
bool m_load_delay_dirty = false;
bool m_next_load_delay_dirty = false;
bool m_fastmem_load_base_in_register = false;
bool m_fastmem_store_base_in_register = false;
//////////////////////////////////////////////////////////////////////////
// Speculative Constants
//////////////////////////////////////////////////////////////////////////

View file

@ -839,6 +839,8 @@ void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Cond
u32 CodeGenerator::PrepareStackForCall()
{
m_fastmem_load_base_in_register = false;
m_fastmem_store_base_in_register = false;
m_register_cache.PushCallerSavedRegisters();
return 0;
}
@ -1124,6 +1126,146 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
}
}
Value CodeGenerator::GetFastmemLoadBase()
{
Value val = Value::FromHostReg(&m_register_cache, RARG4, RegSize_32);
if (!m_fastmem_load_base_in_register)
{
m_emit->ldr(GetHostReg32(val), a32::MemOperand(GetCPUPtrReg(), offsetof(CPU::State, fastmem_base)));
m_fastmem_load_base_in_register = true;
}
return val;
}
Value CodeGenerator::GetFastmemStoreBase()
{
Value val = Value::FromHostReg(&m_register_cache, RARG3, RegSize_32);
if (!m_fastmem_store_base_in_register)
{
m_emit->ldr(GetHostReg32(val), a32::MemOperand(GetCPUPtrReg(), offsetof(CPU::State, fastmem_base)));
m_emit->add(GetHostReg32(val), GetHostReg32(val), sizeof(u32*) * Bus::FASTMEM_LUT_NUM_PAGES);
m_fastmem_store_base_in_register = true;
}
return val;
}
void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
{
Value fastmem_base = GetFastmemLoadBase();
HostReg address_reg;
if (address.IsConstant())
{
m_emit->Mov(GetHostReg32(RSCRATCH), static_cast<u32>(address.constant_value));
address_reg = RSCRATCH;
}
else
{
address_reg = address.host_reg;
}
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
m_emit->ldr(GetHostReg32(RARG1),
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
break;
default:
UnreachableCode();
break;
}
}
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
Value& result)
{
// fastmem
LoadStoreBackpatchInfo bpi;
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = result.host_reg;
bpi.guest_pc = m_current_instruction->pc;
Value fastmem_base = GetFastmemLoadBase();
HostReg address_reg;
if (address.IsConstant())
{
m_emit->Mov(GetHostReg32(RSCRATCH), static_cast<u32>(address.constant_value));
address_reg = RSCRATCH;
}
else
{
address_reg = address.host_reg;
}
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
m_emit->ldr(GetHostReg32(RARG1),
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
m_register_cache.InhibitAllocation();
bpi.host_pc = GetCurrentNearCodePointer();
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
break;
default:
UnreachableCode();
break;
}
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
bpi.host_code_size = static_cast<u32>(
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
const bool old_store_fastmem_base = m_fastmem_store_base_in_register;
// generate slowmem fallback
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
SwitchToFarCode();
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
// restore fastmem base state for the next instruction
if (old_store_fastmem_base)
fastmem_base = GetFastmemStoreBase();
fastmem_base = GetFastmemLoadBase();
// return to the block code
EmitBranch(GetCurrentNearCodePointer(), false);
SwitchToNearCode();
m_register_cache.UninhibitAllocation();
m_block->loadstore_backpatch_info.push_back(bpi);
}
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
Value& result, bool in_far_code)
{
@ -1199,6 +1341,81 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi,
}
}
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address,
const Value& value)
{
LoadStoreBackpatchInfo bpi;
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = value.host_reg;
bpi.guest_pc = m_current_instruction->pc;
Value fastmem_base = GetFastmemStoreBase();
Value actual_value = GetValueInHostRegister(value);
HostReg address_reg;
if (address.IsConstant())
{
m_emit->Mov(GetHostReg32(RSCRATCH), static_cast<u32>(address.constant_value));
address_reg = RSCRATCH;
}
else
{
address_reg = address.host_reg;
}
// TODO: if this gets backpatched, these instructions are wasted
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
m_emit->ldr(GetHostReg32(RARG1),
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
m_register_cache.InhibitAllocation();
bpi.host_pc = GetCurrentNearCodePointer();
switch (value.size)
{
case RegSize_8:
m_emit->strb(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_16:
m_emit->strh(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_32:
m_emit->str(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2)));
break;
default:
UnreachableCode();
break;
}
bpi.host_code_size = static_cast<u32>(
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
const bool old_load_fastmem_base = m_fastmem_load_base_in_register;
// generate slowmem fallback
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
SwitchToFarCode();
EmitStoreGuestMemorySlowmem(cbi, address, actual_value, true);
// restore fastmem base state for the next instruction
if (old_load_fastmem_base)
fastmem_base = GetFastmemLoadBase();
fastmem_base = GetFastmemStoreBase();
// return to the block code
EmitBranch(GetCurrentNearCodePointer(), false);
SwitchToNearCode();
m_register_cache.UninhibitAllocation();
m_block->loadstore_backpatch_info.push_back(bpi);
}
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address,
const Value& value, bool in_far_code)
{
@ -1278,6 +1495,35 @@ void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi,
}
}
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
{
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem at %p", lbi.host_pc, lbi.guest_pc, lbi.host_slowmem_pc);
// turn it into a jump to the slowmem handler
vixl::aarch32::MacroAssembler emit(static_cast<vixl::byte*>(lbi.host_pc), lbi.host_code_size, a32::A32);
// check jump distance
const s32 displacement = GetPCDisplacement(lbi.host_pc, lbi.host_slowmem_pc);
if (!IsPCDisplacementInImmediateRange(displacement))
{
emit.Mov(GetHostReg32(RSCRATCH), reinterpret_cast<uintptr_t>(lbi.host_slowmem_pc));
emit.bx(GetHostReg32(RSCRATCH));
}
else
{
a32::Label label(displacement + emit.GetCursorOffset());
emit.b(&label);
}
const s32 nops = (static_cast<s32>(lbi.host_code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
Assert(nops >= 0);
for (s32 i = 0; i < nops; i++)
emit.nop();
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
return true;
}
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{
EmitLoadGlobalAddress(RSCRATCH, ptr);

View file

@ -1298,35 +1298,64 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
{
HostReg address_reg;
a64::MemOperand actual_address;
if (address.IsConstant())
{
m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value);
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(result.host_reg));
address_reg = result.host_reg;
}
else
{
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address));
address_reg = address.host_reg;
}
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
switch (size)
{
case RegSize_8:
m_emit->Ldrb(GetHostReg32(result.host_reg), actual_address);
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_16:
m_emit->Ldrh(GetHostReg32(result.host_reg), actual_address);
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->Ldr(GetHostReg32(result.host_reg), actual_address);
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
}
else
{
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3));
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
default:
UnreachableCode();
break;
}
}
}
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
@ -1334,43 +1363,73 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
{
// fastmem
LoadStoreBackpatchInfo bpi;
bpi.host_pc = GetCurrentNearCodePointer();
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = result.host_reg;
bpi.guest_pc = m_current_instruction->pc;
a64::MemOperand actual_address;
HostReg address_reg;
if (address.IsConstant())
{
m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value);
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(result.host_reg));
bpi.host_pc = GetCurrentNearCodePointer();
address_reg = result.host_reg;
}
else
{
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address));
address_reg = address.host_reg;
}
m_register_cache.InhibitAllocation();
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
bpi.host_pc = GetCurrentNearCodePointer();
switch (size)
{
case RegSize_8:
m_emit->Ldrb(GetHostReg32(result.host_reg), actual_address);
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_16:
m_emit->Ldrh(GetHostReg32(result.host_reg), actual_address);
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->Ldr(GetHostReg32(result.host_reg), actual_address);
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
}
else
{
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3));
bpi.host_pc = GetCurrentNearCodePointer();
switch (size)
{
case RegSize_8:
m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_16:
m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_32:
m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
default:
UnreachableCode();
break;
}
}
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
@ -1472,43 +1531,73 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
// fastmem
LoadStoreBackpatchInfo bpi;
bpi.host_pc = GetCurrentNearCodePointer();
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = value.host_reg;
bpi.guest_pc = m_current_instruction->pc;
a64::MemOperand actual_address;
HostReg address_reg;
if (address.IsConstant())
{
m_emit->Mov(GetHostReg32(RSCRATCH), address.constant_value);
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RSCRATCH));
bpi.host_pc = GetCurrentNearCodePointer();
address_reg = RSCRATCH;
}
else
{
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address));
address_reg = address.host_reg;
}
m_register_cache.InhibitAllocation();
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
bpi.host_pc = GetCurrentNearCodePointer();
switch (value.size)
{
case RegSize_8:
m_emit->Strb(GetHostReg8(value_in_hr), actual_address);
m_emit->strb(GetHostReg8(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_16:
m_emit->Strh(GetHostReg16(value_in_hr), actual_address);
m_emit->strh(GetHostReg16(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
case RegSize_32:
m_emit->Str(GetHostReg32(value_in_hr), actual_address);
m_emit->str(GetHostReg32(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg)));
break;
default:
UnreachableCode();
break;
}
}
else
{
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
m_emit->add(GetHostReg64(RARG3), GetFastmemBasePtrReg(), Bus::FASTMEM_LUT_NUM_PAGES * sizeof(u32*));
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetHostReg64(RARG3), GetHostReg32(RARG1), a64::LSL, 3));
bpi.host_pc = GetCurrentNearCodePointer();
switch (value.size)
{
case RegSize_8:
m_emit->strb(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_16:
m_emit->strh(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
case RegSize_32:
m_emit->str(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2)));
break;
default:
UnreachableCode();
break;
}
}
bpi.host_code_size = static_cast<u32>(
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));

View file

@ -41,8 +41,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
{
Value result = m_register_cache.AllocateScratch(size);
#ifdef WITH_FASTMEM
if (g_settings.IsUsingFastmem() && Bus::IsRAMAddress(static_cast<u32>(address.constant_value)))
{
// have to mask away the high bits for mirrors, since we don't map them in fastmem
@ -54,12 +52,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
EmitLoadGlobal(result.GetHostRegister(), size, ptr);
}
#else
EmitLoadGlobal(result.GetHostRegister(), size, ptr);
#endif
m_delayed_cycles_add += read_ticks;
return result;
}
@ -67,7 +59,7 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
AddPendingCycles(true);
#ifdef WITH_FASTMEM
Value result = m_register_cache.AllocateScratch(HostPointerSize);
const bool use_fastmem = address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true;
if (address_spec)
@ -82,7 +74,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
use_fastmem ? "yes" : "no");
}
Value result = m_register_cache.AllocateScratch(RegSize_64);
if (g_settings.IsUsingFastmem() && use_fastmem)
{
EmitLoadGuestMemoryFastmem(cbi, address, size, result);
@ -93,14 +84,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
}
#else
Value result = m_register_cache.AllocateScratch(HostPointerSize);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
#endif
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
if (result.size != size)
{
@ -145,8 +128,6 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
AddPendingCycles(true);
#ifdef WITH_FASTMEM
const bool use_fastmem = address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true;
if (address_spec)
{
@ -169,13 +150,6 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitStoreGuestMemorySlowmem(cbi, address, value, false);
}
#else
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitStoreGuestMemorySlowmem(cbi, address, value, false);
#endif
}
#ifndef CPU_X64

View file

@ -1760,6 +1760,8 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
{
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
@ -1816,6 +1818,31 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
}
break;
}
}
else
{
// TODO: We could mask the LSBs here for unaligned protection.
EmitCopyValue(RARG1, address);
m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1));
m_emit->shr(GetHostReg32(RARG1), 12);
m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
switch (size)
{
case RegSize_8:
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
case RegSize_16:
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
case RegSize_32:
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
}
}
}
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
@ -1828,6 +1855,8 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
bpi.value_host_reg = result.host_reg;
bpi.guest_pc = m_current_instruction->pc;
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
@ -1886,6 +1915,34 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
}
break;
}
}
else
{
m_register_cache.InhibitAllocation();
// TODO: We could mask the LSBs here for unaligned protection.
EmitCopyValue(RARG1, address);
m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1));
m_emit->shr(GetHostReg32(RARG1), 12);
m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK);
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
bpi.host_pc = GetCurrentNearCodePointer();
switch (size)
{
case RegSize_8:
m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
case RegSize_16:
m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
case RegSize_32:
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
break;
}
}
// TODO: BIOS reads...
EmitAddCPUStructField(offsetof(CPU::State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
@ -1997,6 +2054,8 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
bpi.value_host_reg = value.host_reg;
bpi.guest_pc = m_current_instruction->pc;
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
Value temp_address;
@ -2102,6 +2161,50 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
}
break;
}
}
else
{
m_register_cache.InhibitAllocation();
// TODO: We could mask the LSBs here for unaligned protection.
EmitCopyValue(RARG1, address);
m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1));
m_emit->shr(GetHostReg32(RARG1), 12);
m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK);
m_emit->mov(GetHostReg64(RARG1),
m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8 + (Bus::FASTMEM_LUT_NUM_PAGES * 8)]);
bpi.host_pc = GetCurrentNearCodePointer();
switch (value.size)
{
case RegSize_8:
{
if (value.IsConstant())
m_emit->mov(m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value);
else
m_emit->mov(m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg8(value));
}
break;
case RegSize_16:
{
if (value.IsConstant())
m_emit->mov(m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value);
else
m_emit->mov(m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg16(value));
}
break;
case RegSize_32:
{
if (value.IsConstant())
m_emit->mov(m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value);
else
m_emit->mov(m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg32(value));
}
break;
}
}
// insert nops, we need at least 5 bytes for a relative jump
const u32 fastmem_size =

View file

@ -32,12 +32,8 @@ void UncheckedWriteMemoryByte(u32 address, u8 value);
void UncheckedWriteMemoryHalfWord(u32 address, u16 value);
void UncheckedWriteMemoryWord(u32 address, u32 value);
#ifdef WITH_FASTMEM
void UpdateFastmemMapping();
#endif
} // namespace Recompiler::Thunks
} // namespace CPU

View file

@ -425,7 +425,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
si.SetStringValue("CPU", "ExecutionMode", Settings::GetCPUExecutionModeName(Settings::DEFAULT_CPU_EXECUTION_MODE));
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false);
si.SetBoolValue("CPU", "ICache", false);
si.SetBoolValue("CPU", "Fastmem", true);
si.SetBoolValue("CPU", "FastmemMode", Settings::GetCPUFastmemModeName(Settings::DEFAULT_CPU_FASTMEM_MODE));
si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(Settings::DEFAULT_GPU_RENDERER));
si.SetIntValue("GPU", "ResolutionScale", 1);
@ -548,6 +548,15 @@ void HostInterface::FixIncompatibleSettings(bool display_osd_messages)
g_settings.cpu_execution_mode = CPUExecutionMode::CachedInterpreter;
}
}
#ifndef WITH_MMAP_FASTMEM
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
{
AddOSDMessage(
TranslateStdString("OSDMessage", "mmap fastmem is not available on this platform, using LUT instead."), 20.0f);
g_settings.cpu_fastmem_mode = CPUFastmemMode::LUT;
}
#endif
}
void HostInterface::SaveSettings(SettingsInterface& si)
@ -594,7 +603,7 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
System::UpdateThrottlePeriod();
if (g_settings.cpu_execution_mode != old_settings.cpu_execution_mode ||
g_settings.cpu_fastmem != old_settings.cpu_fastmem)
g_settings.cpu_fastmem_mode != old_settings.cpu_fastmem_mode)
{
AddFormattedOSDMessage(
5.0f, TranslateString("OSDMessage", "Switching to %s CPU execution mode."),

View file

@ -130,7 +130,9 @@ void Settings::Load(SettingsInterface& si)
UpdateOverclockActive();
cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false);
cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false);
cpu_fastmem = si.GetBoolValue("CPU", "Fastmem", true);
cpu_fastmem_mode = ParseCPUFastmemMode(
si.GetStringValue("CPU", "FastmemMode", GetCPUFastmemModeName(DEFAULT_CPU_FASTMEM_MODE)).c_str())
.value_or(DEFAULT_CPU_FASTMEM_MODE);
gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str())
.value_or(DEFAULT_GPU_RENDERER);
@ -266,7 +268,7 @@ void Settings::Save(SettingsInterface& si) const
si.SetIntValue("CPU", "OverclockDenominator", cpu_overclock_denominator);
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions);
si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache);
si.SetBoolValue("CPU", "Fastmem", cpu_fastmem);
si.SetStringValue("CPU", "FastmemMode", GetCPUFastmemModeName(cpu_fastmem_mode));
si.SetStringValue("GPU", "Renderer", GetRendererName(gpu_renderer));
si.SetStringValue("GPU", "Adapter", gpu_adapter.c_str());
@ -484,6 +486,37 @@ const char* Settings::GetCPUExecutionModeDisplayName(CPUExecutionMode mode)
return s_cpu_execution_mode_display_names[static_cast<u8>(mode)];
}
static std::array<const char*, static_cast<u32>(CPUFastmemMode::Count)> s_cpu_fastmem_mode_names = {
{"Disabled", "MMap", "LUT"}};
static std::array<const char*, static_cast<u32>(CPUFastmemMode::Count)> s_cpu_fastmem_mode_display_names = {
{TRANSLATABLE("CPUFastmemMode", "Disabled (Slowest)"),
TRANSLATABLE("CPUFastmemMode", "MMap (Hardware, Fastest, 64-Bit Only)"),
TRANSLATABLE("CPUFastmemMode", "LUT (Faster)")}};
std::optional<CPUFastmemMode> Settings::ParseCPUFastmemMode(const char* str)
{
u8 index = 0;
for (const char* name : s_cpu_fastmem_mode_names)
{
if (StringUtil::Strcasecmp(name, str) == 0)
return static_cast<CPUFastmemMode>(index);
index++;
}
return std::nullopt;
}
const char* Settings::GetCPUFastmemModeName(CPUFastmemMode mode)
{
return s_cpu_fastmem_mode_names[static_cast<u8>(mode)];
}
const char* Settings::GetCPUFastmemModeDisplayName(CPUFastmemMode mode)
{
return s_cpu_fastmem_mode_display_names[static_cast<u8>(mode)];
}
static constexpr auto s_gpu_renderer_names = make_array(
#ifdef WIN32
"D3D11",

View file

@ -76,7 +76,7 @@ struct Settings
bool cpu_overclock_active = false;
bool cpu_recompiler_memory_exceptions = false;
bool cpu_recompiler_icache = false;
bool cpu_fastmem = true;
CPUFastmemMode cpu_fastmem_mode = CPUFastmemMode::Disabled;
float emulation_speed = 1.0f;
float fast_forward_speed = 0.0f;
@ -188,7 +188,8 @@ struct Settings
ALWAYS_INLINE bool IsUsingFastmem() const
{
return (cpu_fastmem && cpu_execution_mode == CPUExecutionMode::Recompiler && !cpu_recompiler_memory_exceptions);
return (cpu_fastmem_mode != CPUFastmemMode::Disabled && cpu_execution_mode == CPUExecutionMode::Recompiler &&
!cpu_recompiler_memory_exceptions);
}
bool HasAnyPerGameMemoryCards() const;
@ -227,6 +228,10 @@ struct Settings
static const char* GetCPUExecutionModeName(CPUExecutionMode mode);
static const char* GetCPUExecutionModeDisplayName(CPUExecutionMode mode);
static std::optional<CPUFastmemMode> ParseCPUFastmemMode(const char* str);
static const char* GetCPUFastmemModeName(CPUFastmemMode mode);
static const char* GetCPUFastmemModeDisplayName(CPUFastmemMode mode);
static std::optional<GPURenderer> ParseRendererName(const char* str);
static const char* GetRendererName(GPURenderer renderer);
static const char* GetRendererDisplayName(GPURenderer renderer);
@ -264,7 +269,17 @@ struct Settings
static constexpr GPUTextureFilter DEFAULT_GPU_TEXTURE_FILTER = GPUTextureFilter::Nearest;
static constexpr ConsoleRegion DEFAULT_CONSOLE_REGION = ConsoleRegion::Auto;
#ifdef WITH_RECOMPILER
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler;
#ifdef WITH_MMAP_FASTMEM
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::MMap;
#else
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::LUT;
#endif
#else
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::CachedInterpreter;
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::Disabled;
#endif
#ifndef ANDROID
static constexpr AudioBackend DEFAULT_AUDIO_BACKEND = AudioBackend::Cubeb;

View file

@ -135,8 +135,16 @@ enum : u32
NUM_CONTROLLER_AND_CARD_PORTS = 2
};
enum : u32
enum class CPUFastmemMode
{
CPU_CODE_CACHE_PAGE_SIZE = 4096,
CPU_CODE_CACHE_PAGE_COUNT = 0x200000 / CPU_CODE_CACHE_PAGE_SIZE
Disabled,
MMap,
LUT,
Count
};
enum : size_t
{
HOST_PAGE_SIZE = 4096,
HOST_PAGE_OFFSET_MASK = HOST_PAGE_SIZE - 1,
};

View file

@ -788,12 +788,17 @@ static std::array<retro_core_option_definition, 45> s_option_definitions = {{
"to performance. If games are running too fast, try enabling this option.",
{{"true", "Enabled"}, {"false", "Disabled"}},
"false"},
{"duckstation_CPU.Fastmem",
{"duckstation_CPU.FastmemMode",
"CPU Recompiler Fast Memory Access",
"Uses page faults to determine hardware memory accesses at runtime. Can provide a significant performance "
"improvement in some games, but make the core more difficult to debug.",
{{"true", "Enabled"}, {"false", "Disabled"}},
"true"},
{{"Disabled", "Disabled (Slowest)"}, {"MMap", "MMap (Hardware, Fastest, 64-Bit Only)"}, {"LUT", "LUT (Faster)"}},
#if defined(CPU_X64) || defined(CPU_AARCH64)
"MMap"
#else
"LUT"
#endif
},
{},
}};

View file

@ -57,6 +57,40 @@ static void setIntRangeTweakOption(QTableWidget* table, int row, int value)
cb->setValue(value);
}
template<typename T>
static void addChoiceTweakOption(QtHostInterface* host_interface, QTableWidget* table, QString name,
std::string section, std::string key, std::optional<T> (*parse_callback)(const char*),
const char* (*get_value_callback)(T), const char* (*get_display_callback)(T),
const char* tr_context, u32 num_values, T default_value)
{
const int row = table->rowCount();
const std::string current_value =
host_interface->GetStringSettingValue(section.c_str(), key.c_str(), get_value_callback(default_value));
table->insertRow(row);
QTableWidgetItem* name_item = new QTableWidgetItem(name);
name_item->setFlags(name_item->flags() & ~(Qt::ItemIsEditable | Qt::ItemIsSelectable));
table->setItem(row, 0, name_item);
QComboBox* cb = new QComboBox(table);
for (u32 i = 0; i < num_values; i++)
cb->addItem(qApp->translate(tr_context, get_display_callback(static_cast<T>(i))));
SettingWidgetBinder::BindWidgetToEnumSetting(host_interface, cb, std::move(section), std::move(key), parse_callback,
get_value_callback, default_value);
table->setCellWidget(row, 1, cb);
}
template<typename T>
static void setChoiceTweakOption(QTableWidget* table, int row, T value)
{
QWidget* widget = table->cellWidget(row, 1);
QComboBox* cb = qobject_cast<QComboBox*>(widget);
Assert(cb);
cb->setCurrentIndex(static_cast<int>(value));
}
AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface, QWidget* parent, SettingsDialog* dialog)
: QWidget(parent), m_host_interface(host_interface)
{
@ -90,8 +124,10 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface,
addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Memory Exceptions"), "CPU",
"RecompilerMemoryExceptions", false);
addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Fast Memory Access"), "CPU",
"Fastmem", true);
addChoiceTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Fast Memory Access"), "CPU",
"FastmemMode", Settings::ParseCPUFastmemMode, Settings::GetCPUFastmemModeName,
Settings::GetCPUFastmemModeDisplayName, "CPUFastmemMode",
static_cast<u32>(CPUFastmemMode::Count), Settings::DEFAULT_CPU_FASTMEM_MODE);
addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler ICache"), "CPU",
"RecompilerICache", false);
@ -119,7 +155,7 @@ void AdvancedSettingsWidget::onResetToDefaultClicked()
setBooleanTweakOption(m_ui.tweakOptionTable, 1, false);
setBooleanTweakOption(m_ui.tweakOptionTable, 2, false);
setBooleanTweakOption(m_ui.tweakOptionTable, 3, false);
setBooleanTweakOption(m_ui.tweakOptionTable, 4, true);
setChoiceTweakOption(m_ui.tweakOptionTable, 4, Settings::DEFAULT_CPU_FASTMEM_MODE);
setBooleanTweakOption(m_ui.tweakOptionTable, 5, false);
setIntRangeTweakOption(m_ui.tweakOptionTable, 6, static_cast<int>(Settings::DEFAULT_DMA_MAX_SLICE_TICKS));
setIntRangeTweakOption(m_ui.tweakOptionTable, 7, static_cast<int>(Settings::DEFAULT_DMA_HALT_TICKS));

View file

@ -516,7 +516,7 @@
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
@ -538,7 +538,7 @@
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
@ -584,7 +584,7 @@
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
@ -608,7 +608,7 @@
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
@ -682,7 +682,7 @@
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<WholeProgramOptimization>false</WholeProgramOptimization>
@ -706,7 +706,7 @@
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<WholeProgramOptimization>false</WholeProgramOptimization>
@ -730,7 +730,7 @@
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<WholeProgramOptimization>true</WholeProgramOptimization>
@ -755,7 +755,7 @@
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<WholeProgramOptimization>true</WholeProgramOptimization>

View file

@ -61,6 +61,10 @@
<ClCompile Include="$(IntDir)moc_postprocessingshaderconfigwidget.cpp" />
<ClCompile Include="$(IntDir)moc_postprocessingsettingswidget.cpp" />
<ClCompile Include="inputbindingmonitor.cpp" />
<ClCompile Include="cheatmanagerdialog.cpp" />
<ClCompile Include="cheatcodeeditordialog.cpp" />
<ClCompile Include="$(IntDir)moc_cheatmanagerdialog.cpp" />
<ClCompile Include="$(IntDir)moc_cheatcodeeditordialog.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="qtutils.h" />
@ -105,6 +109,8 @@
<QtMoc Include="postprocessingchainconfigwidget.h" />
<QtMoc Include="postprocessingshaderconfigwidget.h" />
<QtMoc Include="postprocessingsettingswidget.h" />
<QtMoc Include="cheatmanagerdialog.h" />
<QtMoc Include="cheatcodeeditordialog.h" />
</ItemGroup>
<ItemGroup>
<QtUi Include="consolesettingswidget.ui" />
@ -124,6 +130,8 @@
<QtUi Include="postprocessingchainconfigwidget.ui" />
<QtUi Include="postprocessingsettingswidget.ui" />
<QtUi Include="memorycardeditordialog.ui" />
<QtUi Include="cheatmanagerdialog.ui" />
<QtUi Include="cheatcodeeditordialog.ui" />
</ItemGroup>
<ItemGroup>
<Natvis Include="qt5.natvis" />

View file

@ -913,7 +913,21 @@ void SDLHostInterface::DrawQuickSettingsMenu()
settings_changed |=
ImGui::MenuItem("Recompiler Memory Exceptions", nullptr, &m_settings_copy.cpu_recompiler_memory_exceptions);
settings_changed |= ImGui::MenuItem("Recompiler Fastmem", nullptr, &m_settings_copy.cpu_fastmem);
if (ImGui::BeginMenu("Recompiler Fastmem"))
{
for (u32 i = 0; i < static_cast<u32>(CPUFastmemMode::Count); i++)
{
if (ImGui::MenuItem(Settings::GetCPUFastmemModeDisplayName(static_cast<CPUFastmemMode>(i)), nullptr,
m_settings_copy.cpu_fastmem_mode == static_cast<CPUFastmemMode>(i)))
{
m_settings_copy.cpu_fastmem_mode = static_cast<CPUFastmemMode>(i);
settings_changed = true;
}
}
ImGui::EndMenu();
}
settings_changed |= ImGui::MenuItem("Recompiler ICache", nullptr, &m_settings_copy.cpu_recompiler_icache);
ImGui::Separator();