mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-22 13:55:38 +00:00
CPU/CodeCache: Always dynamically allocate code buffer
Reduces .bss size.
This commit is contained in:
parent
0d3e674500
commit
be8fbafd71
|
@ -11,24 +11,36 @@
|
|||
|
||||
#include "fmt/format.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include "windows_headers.h"
|
||||
#include <Psapi.h>
|
||||
#elif defined(__APPLE__)
|
||||
#ifdef __aarch64__
|
||||
#include <pthread.h> // pthread_jit_write_protect_np()
|
||||
#endif
|
||||
#include <mach-o/dyld.h>
|
||||
#include <mach-o/getsect.h>
|
||||
#include <mach/mach_init.h>
|
||||
#include <mach/mach_port.h>
|
||||
#include <mach/mach_vm.h>
|
||||
#include <mach/vm_map.h>
|
||||
#include <sys/mman.h>
|
||||
#elif !defined(__ANDROID__)
|
||||
#include <cerrno>
|
||||
#include <dlfcn.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
Log_SetChannel(MemoryArena);
|
||||
Log_SetChannel(MemMap);
|
||||
|
||||
namespace MemMap {
|
||||
/// Allocates RWX memory at the specified address.
|
||||
static void* AllocateJITMemoryAt(const void* addr, size_t size);
|
||||
} // namespace MemMap
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
|
@ -90,6 +102,44 @@ void MemMap::UnmapSharedMemory(void* baseaddr, size_t size)
|
|||
Panic("Failed to unmap shared memory");
|
||||
}
|
||||
|
||||
const void* MemMap::GetBaseAddress()
|
||||
{
|
||||
const HMODULE mod = GetModuleHandleW(nullptr);
|
||||
if (!mod)
|
||||
return nullptr;
|
||||
|
||||
MODULEINFO mi;
|
||||
if (!GetModuleInformation(GetCurrentProcess(), mod, &mi, sizeof(mi)))
|
||||
return mod;
|
||||
|
||||
return mi.lpBaseOfDll;
|
||||
}
|
||||
|
||||
void* MemMap::AllocateJITMemoryAt(const void* addr, size_t size)
|
||||
{
|
||||
void* ptr = static_cast<u8*>(VirtualAlloc(const_cast<void*>(addr), size,
|
||||
addr ? (MEM_RESERVE | MEM_COMMIT) : MEM_COMMIT, PAGE_EXECUTE_READWRITE));
|
||||
if (!ptr && !addr) [[unlikely]]
|
||||
ERROR_LOG("VirtualAlloc(RWX, {}) for internal buffer failed: {}", size, GetLastError());
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void MemMap::ReleaseJITMemory(void* ptr, size_t size)
|
||||
{
|
||||
if (!VirtualFree(ptr, 0, MEM_RELEASE))
|
||||
ERROR_LOG("Failed to free code pointer {}", static_cast<void*>(ptr));
|
||||
}
|
||||
|
||||
#if defined(CPU_ARCH_ARM32) || defined(CPU_ARCH_ARM64) || defined(CPU_ARCH_RISCV64)
|
||||
|
||||
void MemMap::FlushInstructionCache(void* address, size_t size)
|
||||
{
|
||||
::FlushInstructionCache(GetCurrentProcess(), address, size);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
SharedMemoryMappingArea::SharedMemoryMappingArea() = default;
|
||||
|
||||
SharedMemoryMappingArea::~SharedMemoryMappingArea()
|
||||
|
@ -346,6 +396,93 @@ void MemMap::UnmapSharedMemory(void* baseaddr, size_t size)
|
|||
Panic("Failed to unmap shared memory");
|
||||
}
|
||||
|
||||
const void* MemMap::GetBaseAddress()
|
||||
{
|
||||
u32 name_buffer_size = 0;
|
||||
_NSGetExecutablePath(nullptr, &name_buffer_size);
|
||||
if (name_buffer_size > 0) [[likely]]
|
||||
{
|
||||
std::unique_ptr<char[]> name_buffer = std::make_unique_for_overwrite<char[]>(name_buffer_size + 1);
|
||||
if (_NSGetExecutablePath(name_buffer.get(), &name_buffer_size) == 0) [[likely]]
|
||||
{
|
||||
name_buffer[name_buffer_size] = 0;
|
||||
|
||||
const struct segment_command_64* command = getsegbyname("__TEXT");
|
||||
if (command) [[likely]]
|
||||
{
|
||||
const u8* base = reinterpret_cast<const u8*>(command->vmaddr);
|
||||
const u32 image_count = _dyld_image_count();
|
||||
for (u32 i = 0; i < image_count; i++)
|
||||
{
|
||||
if (std::strcmp(_dyld_get_image_name(i), name_buffer.get()) == 0)
|
||||
return base + _dyld_get_image_vmaddr_slide(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return reinterpret_cast<const void*>(&GetBaseAddress);
|
||||
}
|
||||
|
||||
void* MemMap::AllocateJITMemoryAt(const void* addr, size_t size)
|
||||
{
|
||||
#if !defined(__aarch64__)
|
||||
kern_return_t ret = mach_vm_allocate(mach_task_self(), reinterpret_cast<mach_vm_address_t*>(&addr), size,
|
||||
addr ? VM_FLAGS_FIXED : VM_FLAGS_ANYWHERE);
|
||||
if (ret != KERN_SUCCESS)
|
||||
{
|
||||
ERROR_LOG("mach_vm_allocate() returned {}", ret);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ret = mach_vm_protect(mach_task_self(), reinterpret_cast<mach_vm_address_t>(addr), size, false,
|
||||
VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE);
|
||||
if (ret != KERN_SUCCESS)
|
||||
{
|
||||
ERROR_LOG("mach_vm_protect() returned {}", ret);
|
||||
mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(addr), size);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return const_cast<void*>(addr);
|
||||
#else
|
||||
// On ARM64, we need to use MAP_JIT, which means we can't use MAP_FIXED.
|
||||
if (addr)
|
||||
return nullptr;
|
||||
|
||||
constexpr int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT;
|
||||
void* ptr = mmap(const_cast<void*>(addr), size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
|
||||
if (ptr == MAP_FAILED)
|
||||
{
|
||||
ERROR_LOG("mmap(RWX, {}) for internal buffer failed: {}", size, errno);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
void MemMap::ReleaseJITMemory(void* ptr, size_t size)
|
||||
{
|
||||
#if !defined(__aarch64__)
|
||||
const kern_return_t res = mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(ptr), size);
|
||||
if (res != KERN_SUCCESS)
|
||||
ERROR_LOG("mach_vm_deallocate() failed: {}", res);
|
||||
#else
|
||||
if (munmap(ptr, size) != 0)
|
||||
ERROR_LOG("Failed to free code pointer {}", static_cast<void*>(ptr));
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(CPU_ARCH_ARM32) || defined(CPU_ARCH_ARM64) || defined(CPU_ARCH_RISCV64)
|
||||
|
||||
void MemMap::FlushInstructionCache(void* address, size_t size)
|
||||
{
|
||||
__builtin___clear_cache(reinterpret_cast<char*>(address), reinterpret_cast<char*>(address) + size);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
SharedMemoryMappingArea::SharedMemoryMappingArea() = default;
|
||||
|
||||
SharedMemoryMappingArea::~SharedMemoryMappingArea()
|
||||
|
@ -531,6 +668,72 @@ void MemMap::UnmapSharedMemory(void* baseaddr, size_t size)
|
|||
Panic("Failed to unmap shared memory");
|
||||
}
|
||||
|
||||
const void* MemMap::GetBaseAddress()
|
||||
{
|
||||
#ifndef __APPLE__
|
||||
Dl_info info;
|
||||
if (dladdr(reinterpret_cast<const void*>(&GetBaseAddress), &info) == 0)
|
||||
{
|
||||
ERROR_LOG("dladdr() failed");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return info.dli_fbase;
|
||||
#else
|
||||
#error Fixme
|
||||
#endif
|
||||
}
|
||||
|
||||
void* MemMap::AllocateJITMemoryAt(const void* addr, size_t size)
|
||||
{
|
||||
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
||||
#if defined(__linux__)
|
||||
// Linux does the right thing, allows us to not disturb an existing mapping.
|
||||
if (addr)
|
||||
flags |= MAP_FIXED_NOREPLACE;
|
||||
#elif defined(__FreeBSD__)
|
||||
// FreeBSD achieves the same with MAP_FIXED and MAP_EXCL.
|
||||
if (addr)
|
||||
flags |= MAP_FIXED | MAP_EXCL;
|
||||
#else
|
||||
// Targeted mapping not available?
|
||||
if (addr)
|
||||
return nullptr;
|
||||
#endif
|
||||
|
||||
void* ptr = mmap(const_cast<void*>(addr), size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
|
||||
if (ptr == MAP_FAILED)
|
||||
{
|
||||
if (!addr)
|
||||
ERROR_LOG("mmap(RWX, {}) for internal buffer failed: {}", size, errno);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
else if (addr && ptr != addr) [[unlikely]]
|
||||
{
|
||||
if (munmap(ptr, size) != 0)
|
||||
ERROR_LOG("Failed to munmap() incorrectly hinted allocation: {}", errno);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void MemMap::ReleaseJITMemory(void* ptr, size_t size)
|
||||
{
|
||||
if (munmap(ptr, size) != 0)
|
||||
ERROR_LOG("Failed to free code pointer {}", static_cast<void*>(ptr));
|
||||
}
|
||||
|
||||
#if defined(CPU_ARCH_ARM32) || defined(CPU_ARCH_ARM64) || defined(CPU_ARCH_RISCV64)
|
||||
|
||||
void MemMap::FlushInstructionCache(void* address, size_t size)
|
||||
{
|
||||
__builtin___clear_cache(reinterpret_cast<char*>(address), reinterpret_cast<char*>(address) + size);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
SharedMemoryMappingArea::SharedMemoryMappingArea() = default;
|
||||
|
||||
SharedMemoryMappingArea::~SharedMemoryMappingArea()
|
||||
|
@ -591,3 +794,95 @@ bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
void* MemMap::AllocateJITMemory(size_t size)
|
||||
{
|
||||
const u8* base =
|
||||
reinterpret_cast<const u8*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(GetBaseAddress()), HOST_PAGE_SIZE));
|
||||
u8* ptr = nullptr;
|
||||
#if !defined(CPU_ARCH_ARM64) || !defined(__APPLE__)
|
||||
|
||||
#if defined(CPU_ARCH_X64)
|
||||
static constexpr size_t assume_binary_size = 64 * 1024 * 1024;
|
||||
static constexpr size_t step = 64 * 1024 * 1024;
|
||||
static constexpr size_t max_displacement = 0x80000000u;
|
||||
#elif defined(CPU_ARCH_ARM64) || defined(CPU_ARCH_RISCV64)
|
||||
static constexpr size_t assume_binary_size = 16 * 1024 * 1024;
|
||||
static constexpr size_t step = 8 * 1024 * 1024;
|
||||
static constexpr size_t max_displacement =
|
||||
1024 * 1024 * 1024; // technically 4GB, but we don't want to spend that much time trying
|
||||
#elif defined(CPU_ARCH_ARM32)
|
||||
static constexpr size_t assume_binary_size = 8 * 1024 * 1024; // Wishful thinking...
|
||||
static constexpr size_t step = 2 * 1024 * 1024;
|
||||
static constexpr size_t max_displacement = 32 * 1024 * 1024;
|
||||
#else
|
||||
#error Unhandled architecture.
|
||||
#endif
|
||||
|
||||
const size_t max_displacement_from_start = max_displacement - size;
|
||||
Assert(size <= max_displacement);
|
||||
|
||||
// Try to find a region in the max displacement range of the process base address.
|
||||
// Assume that the DuckStation binary will at max be some size, release is currently around 12MB on Windows.
|
||||
// Therefore the max offset is +/- 12MB + code_size. Try allocating in steps by incrementing the pointer, then if no
|
||||
// address range is found, go backwards from the base address (which will probably fail).
|
||||
const u8* min_address =
|
||||
base - std::min(reinterpret_cast<ptrdiff_t>(base), static_cast<ptrdiff_t>(max_displacement_from_start));
|
||||
const u8* max_address = base + max_displacement_from_start;
|
||||
VERBOSE_LOG("Base address: {}", static_cast<const void*>(base));
|
||||
VERBOSE_LOG("Acceptable address range: {} - {}", static_cast<const void*>(min_address),
|
||||
static_cast<const void*>(max_address));
|
||||
|
||||
// Start offset by the expected binary size.
|
||||
for (const u8* current_address = base + assume_binary_size;; current_address += step)
|
||||
{
|
||||
VERBOSE_LOG("Trying {} (displacement 0x{:X})", static_cast<const void*>(current_address),
|
||||
static_cast<ptrdiff_t>(current_address - base));
|
||||
if ((ptr = static_cast<u8*>(AllocateJITMemoryAt(current_address, size))))
|
||||
break;
|
||||
|
||||
if ((reinterpret_cast<uintptr_t>(current_address) + step) > reinterpret_cast<uintptr_t>(max_address) ||
|
||||
(reinterpret_cast<uintptr_t>(current_address) + step) < reinterpret_cast<uintptr_t>(current_address))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Try before (will likely fail).
|
||||
if (!ptr && reinterpret_cast<uintptr_t>(base) >= step)
|
||||
{
|
||||
for (const u8* current_address = base - step;; current_address -= step)
|
||||
{
|
||||
VERBOSE_LOG("Trying {} (displacement 0x{:X})", static_cast<const void*>(current_address),
|
||||
static_cast<ptrdiff_t>(base - current_address));
|
||||
if ((ptr = static_cast<u8*>(AllocateJITMemoryAt(current_address, size))))
|
||||
break;
|
||||
|
||||
if ((reinterpret_cast<uintptr_t>(current_address) - step) < reinterpret_cast<uintptr_t>(min_address) ||
|
||||
(reinterpret_cast<uintptr_t>(current_address) - step) > reinterpret_cast<uintptr_t>(current_address))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!ptr)
|
||||
{
|
||||
#ifdef CPU_ARCH_X64
|
||||
ERROR_LOG("Failed to allocate JIT buffer in range, expect crashes.");
|
||||
#endif
|
||||
if (!(ptr = static_cast<u8*>(AllocateJITMemoryAt(nullptr, size))))
|
||||
return ptr;
|
||||
}
|
||||
#else
|
||||
// We cannot control where the buffer gets allocated on Apple Silicon. Hope for the best.
|
||||
if (!(ptr = static_cast<u8*>(AllocateJITMemoryAt(nullptr, size))))
|
||||
return ptr;
|
||||
#endif
|
||||
|
||||
INFO_LOG("Allocated JIT buffer of size {} at {} (0x{:X} bytes / {} MB away)", size, static_cast<void*>(ptr),
|
||||
std::abs(static_cast<ptrdiff_t>(ptr - base)),
|
||||
(std::abs(static_cast<ptrdiff_t>(ptr - base)) + (1024 * 1024 - 1)) / (1024 * 1024));
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
|
|
@ -58,6 +58,25 @@ void* MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size,
|
|||
void UnmapSharedMemory(void* baseaddr, size_t size);
|
||||
bool MemProtect(void* baseaddr, size_t size, PageProtect mode);
|
||||
|
||||
/// Returns the base address for the current process.
|
||||
const void* GetBaseAddress();
|
||||
|
||||
/// Allocates RWX memory in branch range from the base address.
|
||||
void* AllocateJITMemory(size_t size);
|
||||
|
||||
/// Releases RWX memory.
|
||||
void ReleaseJITMemory(void* ptr, size_t size);
|
||||
|
||||
/// Flushes the instruction cache on the host for the specified range.
|
||||
/// Only needed outside of X86, X86 has coherent D/I cache.
|
||||
#if !defined(CPU_ARCH_ARM32) && !defined(CPU_ARCH_ARM64) && !defined(CPU_ARCH_RISCV64)
|
||||
// clang-format off
|
||||
ALWAYS_INLINE static void FlushInstructionCache(void* address, size_t size) { }
|
||||
// clang-format on
|
||||
#else
|
||||
void FlushInstructionCache(void* address, size_t size);
|
||||
#endif
|
||||
|
||||
/// JIT write protect for Apple Silicon. Needs to be called prior to writing to any RWX pages.
|
||||
#if !defined(__APPLE__) || !defined(__aarch64__)
|
||||
// clang-format off
|
||||
|
|
|
@ -123,22 +123,27 @@ PerfScope MIPSPerfScope("MIPS");
|
|||
|
||||
#endif
|
||||
|
||||
// Currently remapping the code buffer doesn't work in macOS. TODO: Make dynamic instead...
|
||||
#ifndef __APPLE__
|
||||
#define USE_STATIC_CODE_BUFFER 1
|
||||
#endif
|
||||
|
||||
#if defined(CPU_ARCH_ARM32)
|
||||
// Use a smaller code buffer size on AArch32 to have a better chance of being in range.
|
||||
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 16 * 1024 * 1024;
|
||||
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 8 * 1024 * 1024;
|
||||
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 20 * 1024 * 1024;
|
||||
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 4 * 1024 * 1024;
|
||||
#else
|
||||
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 32 * 1024 * 1024;
|
||||
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 48 * 1024 * 1024;
|
||||
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 16 * 1024 * 1024;
|
||||
#endif
|
||||
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
alignas(HOST_PAGE_SIZE) static u8 s_code_storage[RECOMPILER_CODE_CACHE_SIZE + RECOMPILER_FAR_CODE_CACHE_SIZE];
|
||||
// On Linux ARM32/ARM64, we use a dedicated section in the ELF for storing code.
|
||||
// This is because without ASLR, or on certain ASLR offsets, the sbrk() heap ends up immediately following the text/data
|
||||
// sections, which means there isn't a large enough gap to fit within range on ARM32.
|
||||
#if defined(__linux__) && (defined(CPU_ARCH_ARM32) || defined(CPU_ARCH_ARM64))
|
||||
#define USE_CODE_BUFFER_SECTION 1
|
||||
#ifdef __clang__
|
||||
#pragma clang section bss = ".jitstorage"
|
||||
__attribute__((aligned(HOST_PAGE_SIZE))) static u8 s_code_buffer_ptr[RECOMPILER_CODE_CACHE_SIZE];
|
||||
#pragma clang section bss = ""
|
||||
#endif
|
||||
#else
|
||||
static u8* s_code_buffer_ptr = nullptr;
|
||||
#endif
|
||||
|
||||
static JitCodeBuffer s_code_buffer;
|
||||
|
@ -162,20 +167,26 @@ bool CPU::CodeCache::IsUsingFastmem()
|
|||
|
||||
bool CPU::CodeCache::ProcessStartup(Error* error)
|
||||
{
|
||||
AllocateLUTs();
|
||||
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
const bool has_buffer =
|
||||
s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE, HOST_PAGE_SIZE);
|
||||
#ifdef USE_CODE_BUFFER_SECTION
|
||||
const u8* module_base = static_cast<const u8*>(MemMap::GetBaseAddress());
|
||||
INFO_LOG("Using JIT buffer section of size {} at {} (0x{:X} bytes / {} MB away)", sizeof(s_code_buffer_ptr),
|
||||
static_cast<void*>(s_code_buffer_ptr), std::abs(static_cast<ptrdiff_t>(s_code_buffer_ptr - module_base)),
|
||||
(std::abs(static_cast<ptrdiff_t>(s_code_buffer_ptr - module_base)) + (1024 * 1024 - 1)) / (1024 * 1024));
|
||||
const bool code_buffer_allocated =
|
||||
MemMap::MemProtect(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE, PageProtect::ReadWriteExecute);
|
||||
#else
|
||||
const bool has_buffer = false;
|
||||
s_code_buffer_ptr = static_cast<u8*>(MemMap::AllocateJITMemory(RECOMPILER_CODE_CACHE_SIZE));
|
||||
const bool code_buffer_allocated = (s_code_buffer_ptr != nullptr);
|
||||
#endif
|
||||
if (!has_buffer && !s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
|
||||
if (!code_buffer_allocated) [[unlikely]]
|
||||
{
|
||||
Error::SetStringView(error, "Failed to initialize code space");
|
||||
Error::SetStringView(error, "Failed to allocate code storage. The log may contain more information, you will need "
|
||||
"to run DuckStation with -earlyconsole in the command line.");
|
||||
return false;
|
||||
}
|
||||
|
||||
AllocateLUTs();
|
||||
|
||||
if (!PageFaultHandler::Install(error))
|
||||
return false;
|
||||
|
||||
|
@ -184,17 +195,21 @@ bool CPU::CodeCache::ProcessStartup(Error* error)
|
|||
|
||||
void CPU::CodeCache::ProcessShutdown()
|
||||
{
|
||||
s_code_buffer.Destroy();
|
||||
DeallocateLUTs();
|
||||
|
||||
#ifndef USE_CODE_BUFFER_SECTION
|
||||
MemMap::ReleaseJITMemory(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE);
|
||||
#endif
|
||||
}
|
||||
|
||||
void CPU::CodeCache::Initialize()
|
||||
{
|
||||
Assert(s_blocks.empty());
|
||||
|
||||
// TODO: Reduce far code size when not using memory exceptions.
|
||||
if (IsUsingAnyRecompiler())
|
||||
{
|
||||
s_code_buffer.Reset();
|
||||
s_code_buffer.Reset(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE);
|
||||
CompileASMFunctions();
|
||||
ResetCodeLUT();
|
||||
}
|
||||
|
@ -219,7 +234,7 @@ void CPU::CodeCache::Reset()
|
|||
if (IsUsingAnyRecompiler())
|
||||
{
|
||||
ClearASMFunctions();
|
||||
s_code_buffer.Reset();
|
||||
s_code_buffer.Reset(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE);
|
||||
CompileASMFunctions();
|
||||
ResetCodeLUT();
|
||||
}
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "common/memmap.h"
|
||||
|
||||
#include "cpu_code_cache_private.h"
|
||||
#include "cpu_core.h"
|
||||
|
@ -171,7 +172,7 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
|
|||
}
|
||||
|
||||
if (flush_icache)
|
||||
JitCodeBuffer::FlushInstructionCache(code, kA32InstructionSizeInBytes);
|
||||
MemMap::FlushInstructionCache(code, kA32InstructionSizeInBytes);
|
||||
|
||||
return kA32InstructionSizeInBytes;
|
||||
}
|
||||
|
@ -202,7 +203,7 @@ u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)
|
|||
s_trampoline_targets.emplace(target, offset);
|
||||
s_trampoline_used = offset + static_cast<u32>(size);
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(start, size);
|
||||
MemMap::FlushInstructionCache(start, size);
|
||||
return start;
|
||||
}
|
||||
|
||||
|
@ -1790,7 +1791,7 @@ void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::Loadstore
|
|||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
MemMap::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "common/memmap.h"
|
||||
|
||||
#include "cpu_code_cache_private.h"
|
||||
#include "cpu_core.h"
|
||||
#include "cpu_core_private.h"
|
||||
|
@ -274,7 +276,7 @@ u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)
|
|||
s_trampoline_targets.emplace(target, offset);
|
||||
s_trampoline_used = offset + static_cast<u32>(size);
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(start, size);
|
||||
MemMap::FlushInstructionCache(start, size);
|
||||
return start;
|
||||
}
|
||||
|
||||
|
@ -316,7 +318,7 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
|
|||
const u32 new_code = B | Assembler::ImmUncondBranch(disp);
|
||||
std::memcpy(code, &new_code, sizeof(new_code));
|
||||
if (flush_icache)
|
||||
JitCodeBuffer::FlushInstructionCache(code, kInstructionSize);
|
||||
MemMap::FlushInstructionCache(code, kInstructionSize);
|
||||
|
||||
return kInstructionSize;
|
||||
}
|
||||
|
@ -2100,7 +2102,7 @@ void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::Loadstore
|
|||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
MemMap::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "common/memmap.h"
|
||||
|
||||
#ifdef CPU_ARCH_X64
|
||||
|
||||
|
@ -1768,16 +1769,9 @@ void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
|
|||
|
||||
void CodeGenerator::EmitCall(const void* ptr)
|
||||
{
|
||||
if (Xbyak::inner::IsInInt32(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())))
|
||||
{
|
||||
DebugAssert(Xbyak::inner::IsInInt32(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())));
|
||||
m_emit->call(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(GetHostReg64(RRETURN), reinterpret_cast<size_t>(ptr));
|
||||
m_emit->call(GetHostReg64(RRETURN));
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
|
||||
{
|
||||
|
@ -2530,7 +2524,7 @@ void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::Loadstore
|
|||
for (s32 i = 0; i < nops; i++)
|
||||
cg.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
MemMap::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
|
|
|
@ -318,6 +318,34 @@ void System::CheckCacheLineSize()
|
|||
}
|
||||
}
|
||||
|
||||
bool System::Internal::ProcessStartup(Error* error)
|
||||
{
|
||||
Common::Timer timer;
|
||||
|
||||
// Allocate JIT memory as soon as possible.
|
||||
if (!CPU::CodeCache::ProcessStartup(error))
|
||||
return false;
|
||||
|
||||
// Fastmem alloc *must* come after JIT alloc, otherwise it tends to eat the 4GB region after the executable on MacOS.
|
||||
if (!Bus::AllocateMemory(error))
|
||||
{
|
||||
CPU::CodeCache::ProcessShutdown();
|
||||
return false;
|
||||
}
|
||||
|
||||
VERBOSE_LOG("Memory allocation took {} ms.", timer.GetTimeMilliseconds());
|
||||
|
||||
CheckCacheLineSize();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void System::Internal::ProcessShutdown()
|
||||
{
|
||||
Bus::ReleaseMemory();
|
||||
CPU::CodeCache::ProcessShutdown();
|
||||
}
|
||||
|
||||
bool System::Internal::CPUThreadInitialize(Error* error)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
|
@ -332,17 +360,9 @@ bool System::Internal::CPUThreadInitialize(Error* error)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (!CPU::CodeCache::ProcessStartup(error) || !Bus::AllocateMemory(error))
|
||||
{
|
||||
CPUThreadShutdown();
|
||||
return false;
|
||||
}
|
||||
|
||||
// This will call back to Host::LoadSettings() -> ReloadSources().
|
||||
LoadSettings(false);
|
||||
|
||||
CheckCacheLineSize();
|
||||
|
||||
#ifdef ENABLE_RAINTEGRATION
|
||||
if (Host::GetBaseBoolSettingValue("Cheevos", "UseRAIntegration", false))
|
||||
Achievements::SwitchToRAIntegration();
|
||||
|
@ -377,9 +397,6 @@ void System::Internal::CPUThreadShutdown()
|
|||
|
||||
InputManager::CloseSources();
|
||||
|
||||
CPU::CodeCache::ProcessShutdown();
|
||||
Bus::ReleaseMemory();
|
||||
|
||||
#ifdef _WIN32
|
||||
CoUninitialize();
|
||||
#endif
|
||||
|
|
|
@ -504,10 +504,16 @@ namespace Internal {
|
|||
/// Performs mandatory hardware checks.
|
||||
bool PerformEarlyHardwareChecks(Error* error);
|
||||
|
||||
/// Called on process startup.
|
||||
bool CPUThreadInitialize(Error* error);
|
||||
/// Called on process startup, as early as possible.
|
||||
bool ProcessStartup(Error* error);
|
||||
|
||||
/// Called on process shutdown.
|
||||
void ProcessShutdown();
|
||||
|
||||
/// Called on CPU thread initialization.
|
||||
bool CPUThreadInitialize(Error* error);
|
||||
|
||||
/// Called on CPU thread shutdown.
|
||||
void CPUThreadShutdown();
|
||||
|
||||
/// Polls input, updates subsystems which are present while paused/inactive.
|
||||
|
|
|
@ -90,6 +90,7 @@ static constexpr u32 GDB_SERVER_POLLING_INTERVAL = 1;
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
namespace QtHost {
|
||||
static bool PerformEarlyHardwareChecks();
|
||||
static bool EarlyProcessStartup();
|
||||
static void RegisterTypes();
|
||||
static bool InitializeConfig(std::string settings_filename);
|
||||
static bool ShouldUsePortableMode();
|
||||
|
@ -128,11 +129,26 @@ EmuThread::EmuThread(QThread* ui_thread) : QThread(), m_ui_thread(ui_thread)
|
|||
|
||||
EmuThread::~EmuThread() = default;
|
||||
|
||||
void QtHost::RegisterTypes()
|
||||
{
|
||||
// Register any standard types we need elsewhere
|
||||
qRegisterMetaType<std::optional<WindowInfo>>("std::optional<WindowInfo>()");
|
||||
qRegisterMetaType<std::optional<bool>>();
|
||||
qRegisterMetaType<std::function<void()>>("std::function<void()>");
|
||||
qRegisterMetaType<std::shared_ptr<SystemBootParameters>>();
|
||||
qRegisterMetaType<const GameList::Entry*>();
|
||||
qRegisterMetaType<GPURenderer>("GPURenderer");
|
||||
qRegisterMetaType<InputBindingKey>("InputBindingKey");
|
||||
qRegisterMetaType<std::string>("std::string");
|
||||
qRegisterMetaType<std::vector<std::pair<std::string, std::string>>>(
|
||||
"std::vector<std::pair<std::string, std::string>>");
|
||||
}
|
||||
|
||||
bool QtHost::PerformEarlyHardwareChecks()
|
||||
{
|
||||
Error error;
|
||||
const bool okay = System::Internal::PerformEarlyHardwareChecks(&error);
|
||||
if (okay && !error.IsValid())
|
||||
if (okay && !error.IsValid()) [[likely]]
|
||||
return true;
|
||||
|
||||
if (okay)
|
||||
|
@ -149,19 +165,15 @@ bool QtHost::PerformEarlyHardwareChecks()
|
|||
return okay;
|
||||
}
|
||||
|
||||
void QtHost::RegisterTypes()
|
||||
bool QtHost::EarlyProcessStartup()
|
||||
{
|
||||
// Register any standard types we need elsewhere
|
||||
qRegisterMetaType<std::optional<WindowInfo>>("std::optional<WindowInfo>()");
|
||||
qRegisterMetaType<std::optional<bool>>();
|
||||
qRegisterMetaType<std::function<void()>>("std::function<void()>");
|
||||
qRegisterMetaType<std::shared_ptr<SystemBootParameters>>();
|
||||
qRegisterMetaType<const GameList::Entry*>();
|
||||
qRegisterMetaType<GPURenderer>("GPURenderer");
|
||||
qRegisterMetaType<InputBindingKey>("InputBindingKey");
|
||||
qRegisterMetaType<std::string>("std::string");
|
||||
qRegisterMetaType<std::vector<std::pair<std::string, std::string>>>(
|
||||
"std::vector<std::pair<std::string, std::string>>");
|
||||
Error error;
|
||||
if (System::Internal::ProcessStartup(&error)) [[likely]]
|
||||
return true;
|
||||
|
||||
QMessageBox::critical(nullptr, QStringLiteral("Process Startup Failed"),
|
||||
QString::fromStdString(error.GetDescription()));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool QtHost::InBatchMode()
|
||||
|
@ -452,7 +464,7 @@ bool QtHost::InitializeConfig(std::string settings_filename)
|
|||
EmuFolders::EnsureFoldersExist();
|
||||
MigrateSettings();
|
||||
|
||||
// We need to create the console window early, otherwise it appears behind the main window.
|
||||
// We need to create the console window early, otherwise it appears in front of the main window.
|
||||
if (!Log::IsConsoleOutputEnabled() &&
|
||||
s_base_settings_interface->GetBoolValue("Logging", "LogToConsole", Settings::DEFAULT_LOG_TO_CONSOLE))
|
||||
{
|
||||
|
@ -2508,6 +2520,9 @@ int main(int argc, char* argv[])
|
|||
if (!QtHost::ParseCommandLineParametersAndInitializeConfig(app, autoboot))
|
||||
return EXIT_FAILURE;
|
||||
|
||||
if (!QtHost::EarlyProcessStartup())
|
||||
return EXIT_FAILURE;
|
||||
|
||||
// Remove any previous-version remanants.
|
||||
if (s_cleanup_after_update)
|
||||
AutoUpdaterDialog::cleanupAfterUpdate();
|
||||
|
@ -2581,5 +2596,7 @@ shutdown_and_exit:
|
|||
// Ensure log is flushed.
|
||||
Log::SetFileOutputParams(false, nullptr);
|
||||
|
||||
System::Internal::ProcessShutdown();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -1,301 +1,40 @@
|
|||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "jit_code_buffer.h"
|
||||
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "common/memmap.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
Log_SetChannel(JitCodeBuffer);
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include "common/windows_headers.h"
|
||||
#else
|
||||
#include <errno.h>
|
||||
#include <sys/mman.h>
|
||||
#ifdef __APPLE__
|
||||
#include <mach/mach_init.h>
|
||||
#include <mach/mach_vm.h>
|
||||
#endif
|
||||
#endif
|
||||
#include <cstring>
|
||||
|
||||
JitCodeBuffer::JitCodeBuffer() = default;
|
||||
|
||||
JitCodeBuffer::JitCodeBuffer(u32 size, u32 far_code_size)
|
||||
JitCodeBuffer::~JitCodeBuffer() = default;
|
||||
|
||||
void JitCodeBuffer::Reset(void* ptr, u32 size, u32 far_code_size /* = 0 */)
|
||||
{
|
||||
if (!Allocate(size, far_code_size))
|
||||
Panic("Failed to allocate code space");
|
||||
}
|
||||
|
||||
JitCodeBuffer::JitCodeBuffer(void* buffer, u32 size, u32 far_code_size, u32 guard_pages)
|
||||
{
|
||||
if (!Initialize(buffer, size, far_code_size))
|
||||
Panic("Failed to initialize code space");
|
||||
}
|
||||
|
||||
JitCodeBuffer::~JitCodeBuffer()
|
||||
{
|
||||
Destroy();
|
||||
}
|
||||
|
||||
bool JitCodeBuffer::Allocate(u32 size /* = 64 * 1024 * 1024 */, u32 far_code_size /* = 0 */)
|
||||
{
|
||||
Destroy();
|
||||
|
||||
m_total_size = size + far_code_size;
|
||||
|
||||
#ifdef CPU_ARCH_X64
|
||||
// Try to find a region in 32-bit range of ourselves.
|
||||
// Assume that the DuckStation binary will at max be 256MB. Therefore the max offset is
|
||||
// +/- 256MB + round_up_pow2(size). This'll be 512MB for the JITs.
|
||||
static const u8 base_ptr = 0;
|
||||
const u8* base =
|
||||
reinterpret_cast<const u8*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(&base_ptr), HOST_PAGE_SIZE));
|
||||
const u32 max_displacement = 0x80000000u - Common::NextPow2(256 * 1024 * 1024 + m_total_size);
|
||||
const u8* max_address = ((base + max_displacement) < base) ?
|
||||
reinterpret_cast<const u8*>(std::numeric_limits<uintptr_t>::max()) :
|
||||
(base + max_displacement);
|
||||
const u8* min_address = ((base - max_displacement) > base) ? nullptr : (base - max_displacement);
|
||||
const u32 step = 64 * 1024 * 1024;
|
||||
const u32 steps = static_cast<u32>(max_address - min_address) / step;
|
||||
for (u32 offset = 0; offset < steps; offset++)
|
||||
{
|
||||
const u8* addr = max_address - (offset * step);
|
||||
VERBOSE_LOG("Trying {} (base {}, offset {}, displacement 0x{:X})", static_cast<const void*>(addr),
|
||||
static_cast<const void*>(base), offset, static_cast<ptrdiff_t>(addr - base));
|
||||
if (TryAllocateAt(addr))
|
||||
break;
|
||||
}
|
||||
if (m_code_ptr)
|
||||
{
|
||||
INFO_LOG("Allocated JIT buffer of size {} at {} (0x{:X} bytes away)", m_total_size, static_cast<void*>(m_code_ptr),
|
||||
static_cast<ptrdiff_t>(m_code_ptr - base));
|
||||
}
|
||||
else
|
||||
{
|
||||
ERROR_LOG("Failed to allocate JIT buffer in range, expect crashes.");
|
||||
if (!TryAllocateAt(nullptr))
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
if (!TryAllocateAt(nullptr))
|
||||
return false;
|
||||
#endif
|
||||
|
||||
m_free_code_ptr = m_code_ptr;
|
||||
m_code_size = size;
|
||||
m_code_used = 0;
|
||||
|
||||
m_far_code_ptr = static_cast<u8*>(m_code_ptr) + size;
|
||||
m_free_far_code_ptr = m_far_code_ptr;
|
||||
m_far_code_size = far_code_size;
|
||||
m_far_code_used = 0;
|
||||
|
||||
m_old_protection = 0;
|
||||
m_owns_buffer = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool JitCodeBuffer::TryAllocateAt(const void* addr)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
m_code_ptr = static_cast<u8*>(VirtualAlloc(const_cast<void*>(addr), m_total_size,
|
||||
addr ? (MEM_RESERVE | MEM_COMMIT) : MEM_COMMIT, PAGE_EXECUTE_READWRITE));
|
||||
if (!m_code_ptr)
|
||||
{
|
||||
if (!addr)
|
||||
ERROR_LOG("VirtualAlloc(RWX, {}) for internal buffer failed: {}", m_total_size, GetLastError());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
#elif defined(__APPLE__) && !defined(__aarch64__)
|
||||
kern_return_t ret = mach_vm_allocate(mach_task_self(), reinterpret_cast<mach_vm_address_t*>(&addr), m_total_size,
|
||||
addr ? VM_FLAGS_FIXED : VM_FLAGS_ANYWHERE);
|
||||
if (ret != KERN_SUCCESS)
|
||||
{
|
||||
ERROR_LOG("mach_vm_allocate() returned {}", ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
ret = mach_vm_protect(mach_task_self(), reinterpret_cast<mach_vm_address_t>(addr), m_total_size, false,
|
||||
VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE);
|
||||
if (ret != KERN_SUCCESS)
|
||||
{
|
||||
ERROR_LOG("mach_vm_protect() returned {}", ret);
|
||||
mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(addr), m_total_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
m_code_ptr = static_cast<u8*>(const_cast<void*>(addr));
|
||||
return true;
|
||||
#elif defined(__linux__) || defined(__ANDROID__) || defined(__APPLE__) || defined(__HAIKU__) || defined(__FreeBSD__)
|
||||
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
||||
#if defined(__linux__)
|
||||
// Linux does the right thing, allows us to not disturb an existing mapping.
|
||||
if (addr)
|
||||
flags |= MAP_FIXED_NOREPLACE;
|
||||
#elif defined(__FreeBSD__)
|
||||
// FreeBSD achieves the same with MAP_FIXED and MAP_EXCL.
|
||||
if (addr)
|
||||
flags |= MAP_FIXED | MAP_EXCL;
|
||||
#elif defined(__APPLE__)
|
||||
// On ARM64, we need to use MAP_JIT, which means we can't use MAP_FIXED.
|
||||
if (addr)
|
||||
return false;
|
||||
flags |= MAP_JIT;
|
||||
#endif
|
||||
|
||||
m_code_ptr =
|
||||
static_cast<u8*>(mmap(const_cast<void*>(addr), m_total_size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0));
|
||||
if (!m_code_ptr)
|
||||
{
|
||||
if (!addr)
|
||||
ERROR_LOG("mmap(RWX, {}) for internal buffer failed: {}", m_total_size, errno);
|
||||
|
||||
return false;
|
||||
}
|
||||
else if (addr && m_code_ptr != addr)
|
||||
{
|
||||
if (munmap(m_code_ptr, m_total_size) != 0)
|
||||
ERROR_LOG("Failed to munmap() incorrectly hinted allocation: {}", errno);
|
||||
m_code_ptr = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool JitCodeBuffer::Initialize(void* buffer, u32 size, u32 far_code_size /* = 0 */, u32 guard_size /* = 0 */)
|
||||
{
|
||||
Destroy();
|
||||
|
||||
if ((far_code_size > 0 && guard_size >= far_code_size) || (far_code_size + (guard_size * 2)) > size)
|
||||
return false;
|
||||
|
||||
#if defined(_WIN32)
|
||||
DWORD old_protect = 0;
|
||||
if (!VirtualProtect(buffer, size, PAGE_EXECUTE_READWRITE, &old_protect))
|
||||
{
|
||||
ERROR_LOG("VirtualProtect(RWX) for external buffer failed: {}", GetLastError());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (guard_size > 0)
|
||||
{
|
||||
DWORD old_guard_protect = 0;
|
||||
u8* guard_at_end = (static_cast<u8*>(buffer) + size) - guard_size;
|
||||
if (!VirtualProtect(buffer, guard_size, PAGE_NOACCESS, &old_guard_protect) ||
|
||||
!VirtualProtect(guard_at_end, guard_size, PAGE_NOACCESS, &old_guard_protect))
|
||||
{
|
||||
ERROR_LOG("VirtualProtect(NOACCESS) for guard page failed: {}", GetLastError());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
m_code_ptr = static_cast<u8*>(buffer);
|
||||
m_old_protection = static_cast<u32>(old_protect);
|
||||
#elif defined(__linux__) || defined(__ANDROID__) || defined(__APPLE__) || defined(__HAIKU__) || defined(__FreeBSD__)
|
||||
if (mprotect(buffer, size, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
|
||||
{
|
||||
ERROR_LOG("mprotect(RWX) for external buffer failed: {}", errno);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (guard_size > 0)
|
||||
{
|
||||
u8* guard_at_end = (static_cast<u8*>(buffer) + size) - guard_size;
|
||||
if (mprotect(buffer, guard_size, PROT_NONE) != 0 || mprotect(guard_at_end, guard_size, PROT_NONE) != 0)
|
||||
{
|
||||
ERROR_LOG("mprotect(NONE) for guard page failed: {}", errno);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// reasonable default?
|
||||
m_code_ptr = static_cast<u8*>(buffer);
|
||||
m_old_protection = PROT_READ | PROT_WRITE;
|
||||
#else
|
||||
m_code_ptr = nullptr;
|
||||
#endif
|
||||
|
||||
if (!m_code_ptr)
|
||||
return false;
|
||||
Assert(far_code_size < size);
|
||||
|
||||
m_total_size = size;
|
||||
m_free_code_ptr = m_code_ptr + guard_size;
|
||||
m_code_size = size - far_code_size - (guard_size * 2);
|
||||
m_code_ptr = static_cast<u8*>(ptr);
|
||||
m_free_code_ptr = m_code_ptr;
|
||||
m_code_size = size - far_code_size;
|
||||
m_code_used = 0;
|
||||
|
||||
m_far_code_ptr = static_cast<u8*>(m_code_ptr) + m_code_size;
|
||||
m_far_code_size = far_code_size;
|
||||
m_far_code_ptr = (far_code_size > 0) ? (static_cast<u8*>(m_code_ptr) + m_code_size) : nullptr;
|
||||
m_free_far_code_ptr = m_far_code_ptr;
|
||||
m_far_code_size = far_code_size - guard_size;
|
||||
m_far_code_used = 0;
|
||||
|
||||
m_guard_size = guard_size;
|
||||
m_owns_buffer = false;
|
||||
return true;
|
||||
}
|
||||
MemMap::BeginCodeWrite();
|
||||
|
||||
void JitCodeBuffer::Destroy()
|
||||
{
|
||||
if (m_owns_buffer)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
if (!VirtualFree(m_code_ptr, 0, MEM_RELEASE))
|
||||
ERROR_LOG("Failed to free code pointer {}", static_cast<void*>(m_code_ptr));
|
||||
#elif defined(__APPLE__) && !defined(__aarch64__)
|
||||
const kern_return_t res =
|
||||
mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(m_code_ptr), m_total_size);
|
||||
if (res != KERN_SUCCESS)
|
||||
ERROR_LOG("mach_vm_deallocate() failed: {}", res);
|
||||
#elif defined(__linux__) || defined(__ANDROID__) || defined(__APPLE__) || defined(__HAIKU__) || defined(__FreeBSD__)
|
||||
if (munmap(m_code_ptr, m_total_size) != 0)
|
||||
ERROR_LOG("Failed to free code pointer {}", static_cast<void*>(m_code_ptr));
|
||||
#endif
|
||||
}
|
||||
else if (m_code_ptr)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
DWORD old_protect = 0;
|
||||
if (!VirtualProtect(m_code_ptr, m_total_size, m_old_protection, &old_protect))
|
||||
ERROR_LOG("Failed to restore protection on {}", static_cast<void*>(m_code_ptr));
|
||||
#else
|
||||
if (mprotect(m_code_ptr, m_total_size, m_old_protection) != 0)
|
||||
ERROR_LOG("Failed to restore protection on {}", static_cast<void*>(m_code_ptr));
|
||||
#endif
|
||||
}
|
||||
std::memset(m_code_ptr, 0, m_total_size);
|
||||
MemMap::FlushInstructionCache(m_code_ptr, m_total_size);
|
||||
|
||||
m_code_ptr = nullptr;
|
||||
m_free_code_ptr = nullptr;
|
||||
m_code_size = 0;
|
||||
m_code_reserve_size = 0;
|
||||
m_code_used = 0;
|
||||
m_far_code_ptr = nullptr;
|
||||
m_free_far_code_ptr = nullptr;
|
||||
m_far_code_size = 0;
|
||||
m_far_code_used = 0;
|
||||
m_total_size = 0;
|
||||
m_guard_size = 0;
|
||||
m_old_protection = 0;
|
||||
m_owns_buffer = false;
|
||||
}
|
||||
|
||||
void JitCodeBuffer::ReserveCode(u32 size)
|
||||
{
|
||||
Assert(m_code_used == 0);
|
||||
Assert(size < m_code_size);
|
||||
|
||||
m_code_reserve_size += size;
|
||||
m_free_code_ptr += size;
|
||||
m_code_size -= size;
|
||||
MemMap::EndCodeWrite();
|
||||
}
|
||||
|
||||
void JitCodeBuffer::CommitCode(u32 length)
|
||||
|
@ -303,10 +42,7 @@ void JitCodeBuffer::CommitCode(u32 length)
|
|||
if (length == 0)
|
||||
return;
|
||||
|
||||
#if defined(CPU_ARCH_ARM32) || defined(CPU_ARCH_ARM64) || defined(CPU_ARCH_RISCV64)
|
||||
// ARM instruction and data caches are not coherent, we need to flush after every block.
|
||||
FlushInstructionCache(m_free_code_ptr, length);
|
||||
#endif
|
||||
MemMap::FlushInstructionCache(m_free_code_ptr, length);
|
||||
|
||||
Assert(length <= (m_code_size - m_code_used));
|
||||
m_free_code_ptr += length;
|
||||
|
@ -318,36 +54,13 @@ void JitCodeBuffer::CommitFarCode(u32 length)
|
|||
if (length == 0)
|
||||
return;
|
||||
|
||||
#if defined(CPU_ARCH_ARM32) || defined(CPU_ARCH_ARM64) || defined(CPU_ARCH_RISCV64)
|
||||
// ARM instruction and data caches are not coherent, we need to flush after every block.
|
||||
FlushInstructionCache(m_free_far_code_ptr, length);
|
||||
#endif
|
||||
MemMap::FlushInstructionCache(m_free_far_code_ptr, length);
|
||||
|
||||
Assert(length <= (m_far_code_size - m_far_code_used));
|
||||
m_free_far_code_ptr += length;
|
||||
m_far_code_used += length;
|
||||
}
|
||||
|
||||
void JitCodeBuffer::Reset()
|
||||
{
|
||||
MemMap::BeginCodeWrite();
|
||||
|
||||
m_free_code_ptr = m_code_ptr + m_guard_size + m_code_reserve_size;
|
||||
m_code_used = 0;
|
||||
std::memset(m_free_code_ptr, 0, m_code_size);
|
||||
FlushInstructionCache(m_free_code_ptr, m_code_size);
|
||||
|
||||
if (m_far_code_size > 0)
|
||||
{
|
||||
m_free_far_code_ptr = m_far_code_ptr;
|
||||
m_far_code_used = 0;
|
||||
std::memset(m_free_far_code_ptr, 0, m_far_code_size);
|
||||
FlushInstructionCache(m_free_far_code_ptr, m_far_code_size);
|
||||
}
|
||||
|
||||
MemMap::EndCodeWrite();
|
||||
}
|
||||
|
||||
void JitCodeBuffer::Align(u32 alignment, u8 padding_value)
|
||||
{
|
||||
DebugAssert(Common::IsPow2(alignment));
|
||||
|
@ -359,14 +72,3 @@ void JitCodeBuffer::Align(u32 alignment, u8 padding_value)
|
|||
m_free_code_ptr += num_padding_bytes;
|
||||
m_code_used += num_padding_bytes;
|
||||
}
|
||||
|
||||
void JitCodeBuffer::FlushInstructionCache(void* address, u32 size)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
::FlushInstructionCache(GetCurrentProcess(), address, size);
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
__builtin___clear_cache(reinterpret_cast<char*>(address), reinterpret_cast<char*>(address) + size);
|
||||
#else
|
||||
#error Unknown platform.
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -8,16 +8,11 @@ class JitCodeBuffer
|
|||
{
|
||||
public:
|
||||
JitCodeBuffer();
|
||||
JitCodeBuffer(u32 size, u32 far_code_size);
|
||||
JitCodeBuffer(void* buffer, u32 size, u32 far_code_size, u32 guard_size);
|
||||
~JitCodeBuffer();
|
||||
|
||||
bool IsValid() const { return (m_code_ptr != nullptr); }
|
||||
|
||||
bool Allocate(u32 size = 64 * 1024 * 1024, u32 far_code_size = 0);
|
||||
bool Initialize(void* buffer, u32 size, u32 far_code_size = 0, u32 guard_size = 0);
|
||||
void Destroy();
|
||||
void Reset();
|
||||
void Reset(void* ptr, u32 size, u32 far_code_size = 0);
|
||||
|
||||
ALWAYS_INLINE u8* GetCodePointer() const { return m_code_ptr; }
|
||||
ALWAYS_INLINE u32 GetTotalSize() const { return m_total_size; }
|
||||
|
@ -33,7 +28,6 @@ public:
|
|||
|
||||
ALWAYS_INLINE u8* GetFreeCodePointer() const { return m_free_code_ptr; }
|
||||
ALWAYS_INLINE u32 GetFreeCodeSpace() const { return static_cast<u32>(m_code_size - m_code_used); }
|
||||
void ReserveCode(u32 size);
|
||||
void CommitCode(u32 length);
|
||||
|
||||
ALWAYS_INLINE u8* GetFreeFarCodePointer() const { return m_free_far_code_ptr; }
|
||||
|
@ -44,12 +38,7 @@ public:
|
|||
/// Assumes alignment is a power-of-two.
|
||||
void Align(u32 alignment, u8 padding_value);
|
||||
|
||||
/// Flushes the instruction cache on the host for the specified range.
|
||||
static void FlushInstructionCache(void* address, u32 size);
|
||||
|
||||
private:
|
||||
bool TryAllocateAt(const void* addr);
|
||||
|
||||
u8* m_code_ptr = nullptr;
|
||||
u8* m_free_code_ptr = nullptr;
|
||||
u32 m_code_size = 0;
|
||||
|
@ -62,7 +51,4 @@ private:
|
|||
u32 m_far_code_used = 0;
|
||||
|
||||
u32 m_total_size = 0;
|
||||
u32 m_guard_size = 0;
|
||||
u32 m_old_protection = 0;
|
||||
bool m_owns_buffer = false;
|
||||
};
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <memory>
|
||||
|
||||
#include "common/windows_headers.h"
|
||||
#include <Psapi.h>
|
||||
#include <WinSock2.h>
|
||||
#include <mmsystem.h>
|
||||
|
||||
|
|
Loading…
Reference in a new issue