// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "threading.h" #include "assert.h" #include #if !defined(_WIN32) && !defined(__APPLE__) #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #endif #if defined(_WIN32) #include "windows_headers.h" #include #else #include #include #if defined(__linux__) #include #include #include // glibc < v2.30 doesn't define gettid... #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 30 #include #define gettid() syscall(SYS_gettid) #endif #elif defined(__APPLE__) #include #include #include #include #include #else #include #endif #endif #ifdef _WIN32 union FileTimeU64Union { FILETIME filetime; u64 u64time; }; #endif #ifdef __APPLE__ // gets the CPU time used by the current thread (both system and user), in // microseconds, returns 0 on failure static u64 getthreadtime(thread_port_t thread) { mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; thread_basic_info_data_t info; kern_return_t kr = thread_info(thread, THREAD_BASIC_INFO, (thread_info_t)&info, &count); if (kr != KERN_SUCCESS) return 0; // add system and user time return (u64)info.user_time.seconds * (u64)1e6 + (u64)info.user_time.microseconds + (u64)info.system_time.seconds * (u64)1e6 + (u64)info.system_time.microseconds; } #endif #if defined(__linux__) || defined(__FreeBSD__) // Helper function to get either either the current cpu usage // in called thread or in id thread static u64 get_thread_time(void* id = 0) { clockid_t cid; if (id) { int err = pthread_getcpuclockid((pthread_t)id, &cid); if (err) return 0; } else { cid = CLOCK_THREAD_CPUTIME_ID; } struct timespec ts; int err = clock_gettime(cid, &ts); if (err) return 0; return (u64)ts.tv_sec * (u64)1e6 + (u64)ts.tv_nsec / (u64)1e3; } #endif void Threading::Timeslice() { #if defined(_WIN32) ::Sleep(0); #elif defined(__APPLE__) sched_yield(); #else sched_yield(); #endif } Threading::ThreadHandle::ThreadHandle() = default; #ifdef _WIN32 Threading::ThreadHandle::ThreadHandle(const ThreadHandle& handle) { if (handle.m_native_handle) { HANDLE new_handle; if (DuplicateHandle(GetCurrentProcess(), (HANDLE)handle.m_native_handle, GetCurrentProcess(), &new_handle, THREAD_QUERY_INFORMATION | THREAD_SET_LIMITED_INFORMATION, FALSE, 0)) { m_native_handle = (void*)new_handle; } } } #else Threading::ThreadHandle::ThreadHandle(const ThreadHandle& handle) : m_native_handle(handle.m_native_handle) #ifdef __linux__ , m_native_id(handle.m_native_id) #endif { } #endif #ifdef _WIN32 Threading::ThreadHandle::ThreadHandle(ThreadHandle&& handle) : m_native_handle(handle.m_native_handle) { handle.m_native_handle = nullptr; } #else Threading::ThreadHandle::ThreadHandle(ThreadHandle&& handle) : m_native_handle(handle.m_native_handle) #ifdef __linux__ , m_native_id(handle.m_native_id) #endif { handle.m_native_handle = nullptr; #ifdef __linux__ handle.m_native_id = 0; #endif } #endif Threading::ThreadHandle::~ThreadHandle() { #ifdef _WIN32 if (m_native_handle) CloseHandle(m_native_handle); #endif } Threading::ThreadHandle Threading::ThreadHandle::GetForCallingThread() { ThreadHandle ret; #ifdef _WIN32 ret.m_native_handle = (void*)OpenThread(THREAD_QUERY_INFORMATION | THREAD_SET_LIMITED_INFORMATION, FALSE, GetCurrentThreadId()); #else ret.m_native_handle = (void*)pthread_self(); #ifdef __linux__ ret.m_native_id = gettid(); #endif #endif return ret; } Threading::ThreadHandle& Threading::ThreadHandle::operator=(ThreadHandle&& handle) { #ifdef _WIN32 if (m_native_handle) CloseHandle((HANDLE)m_native_handle); m_native_handle = handle.m_native_handle; handle.m_native_handle = nullptr; #else m_native_handle = handle.m_native_handle; handle.m_native_handle = nullptr; #ifdef __linux__ m_native_id = handle.m_native_id; handle.m_native_id = 0; #endif #endif return *this; } Threading::ThreadHandle& Threading::ThreadHandle::operator=(const ThreadHandle& handle) { #ifdef _WIN32 if (m_native_handle) { CloseHandle((HANDLE)m_native_handle); m_native_handle = nullptr; } HANDLE new_handle; if (DuplicateHandle(GetCurrentProcess(), (HANDLE)handle.m_native_handle, GetCurrentProcess(), &new_handle, THREAD_QUERY_INFORMATION | THREAD_SET_LIMITED_INFORMATION, FALSE, 0)) { m_native_handle = (void*)new_handle; } #else m_native_handle = handle.m_native_handle; #ifdef __linux__ m_native_id = handle.m_native_id; #endif #endif return *this; } u64 Threading::ThreadHandle::GetCPUTime() const { #if defined(_WIN32) && !defined(_M_ARM64) u64 ret = 0; if (m_native_handle) QueryThreadCycleTime((HANDLE)m_native_handle, &ret); return ret; #elif defined(_WIN32) FileTimeU64Union user = {}, kernel = {}; FILETIME dummy; GetThreadTimes((HANDLE)m_native_handle, &dummy, &dummy, &kernel.filetime, &user.filetime); return user.u64time + kernel.u64time; #elif defined(__APPLE__) return getthreadtime(pthread_mach_thread_np((pthread_t)m_native_handle)); #elif defined(__linux__) || defined(__FreeBSD__) return get_thread_time(m_native_handle); #else return 0; #endif } bool Threading::ThreadHandle::SetAffinity(u64 processor_mask) const { #if defined(_WIN32) if (processor_mask == 0) processor_mask = ~processor_mask; return (SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)processor_mask) != 0 || GetLastError() != ERROR_SUCCESS); #elif defined(__linux__) cpu_set_t set; CPU_ZERO(&set); if (processor_mask != 0) { for (u32 i = 0; i < 64; i++) { if (processor_mask & (static_cast(1) << i)) { CPU_SET(i, &set); } } } else { long num_processors = sysconf(_SC_NPROCESSORS_CONF); for (long i = 0; i < num_processors; i++) { CPU_SET(i, &set); } } return sched_setaffinity((pid_t)m_native_id, sizeof(set), &set) >= 0; #else return false; #endif } Threading::Thread::Thread() = default; Threading::Thread::Thread(Thread&& thread) : ThreadHandle(thread), m_stack_size(thread.m_stack_size) { thread.m_stack_size = 0; } Threading::Thread::Thread(EntryPoint func) : ThreadHandle() { if (!Start(std::move(func))) Panic("Failed to start implicitly started thread."); } Threading::Thread::~Thread() { AssertMsg(!m_native_handle, "Thread should be detached or joined at destruction"); } void Threading::Thread::SetStackSize(u32 size) { AssertMsg(!m_native_handle, "Can't change the stack size on a started thread"); m_stack_size = size; } #if defined(_WIN32) unsigned Threading::Thread::ThreadProc(void* param) { std::unique_ptr entry(static_cast(param)); (*entry.get())(); return 0; } bool Threading::Thread::Start(EntryPoint func) { AssertMsg(!m_native_handle, "Can't start an already-started thread"); std::unique_ptr func_clone(std::make_unique(std::move(func))); unsigned thread_id; m_native_handle = reinterpret_cast(_beginthreadex(nullptr, m_stack_size, ThreadProc, func_clone.get(), 0, &thread_id)); if (!m_native_handle) return false; // thread started, it'll release the memory func_clone.release(); return true; } #elif defined(__linux__) // For Linux, we have to do a bit of trickery here to get the thread's ID back from // the thread itself, because it's not part of pthreads. We use a semaphore to signal // when the thread has started, and filled in thread_id_ptr. struct ThreadProcParameters { Threading::Thread::EntryPoint func; Threading::KernelSemaphore* start_semaphore; unsigned int* thread_id_ptr; }; void* Threading::Thread::ThreadProc(void* param) { std::unique_ptr entry(static_cast(param)); *entry->thread_id_ptr = gettid(); entry->start_semaphore->Post(); entry->func(); return nullptr; } bool Threading::Thread::Start(EntryPoint func) { AssertMsg(!m_native_handle, "Can't start an already-started thread"); KernelSemaphore start_semaphore; std::unique_ptr params(std::make_unique()); params->func = std::move(func); params->start_semaphore = &start_semaphore; params->thread_id_ptr = &m_native_id; pthread_attr_t attrs; bool has_attributes = false; if (m_stack_size != 0) { has_attributes = true; pthread_attr_init(&attrs); } if (m_stack_size != 0) pthread_attr_setstacksize(&attrs, m_stack_size); pthread_t handle; const int res = pthread_create(&handle, has_attributes ? &attrs : nullptr, ThreadProc, params.get()); if (res != 0) return false; // wait until it sets our native id start_semaphore.Wait(); // thread started, it'll release the memory m_native_handle = (void*)handle; params.release(); return true; } #else void* Threading::Thread::ThreadProc(void* param) { std::unique_ptr entry(static_cast(param)); (*entry.get())(); return nullptr; } bool Threading::Thread::Start(EntryPoint func) { AssertMsg(!m_native_handle, "Can't start an already-started thread"); std::unique_ptr func_clone(std::make_unique(std::move(func))); pthread_attr_t attrs; bool has_attributes = false; if (m_stack_size != 0) { has_attributes = true; pthread_attr_init(&attrs); } if (m_stack_size != 0) pthread_attr_setstacksize(&attrs, m_stack_size); pthread_t handle; const int res = pthread_create(&handle, has_attributes ? &attrs : nullptr, ThreadProc, func_clone.get()); if (res != 0) return false; // thread started, it'll release the memory m_native_handle = (void*)handle; func_clone.release(); return true; } #endif void Threading::Thread::Detach() { AssertMsg(m_native_handle, "Can't detach without a thread"); #ifdef _WIN32 CloseHandle((HANDLE)m_native_handle); m_native_handle = nullptr; #else pthread_detach((pthread_t)m_native_handle); m_native_handle = nullptr; #ifdef __linux__ m_native_id = 0; #endif #endif } void Threading::Thread::Join() { AssertMsg(m_native_handle, "Can't join without a thread"); #ifdef _WIN32 const DWORD res = WaitForSingleObject((HANDLE)m_native_handle, INFINITE); if (res != WAIT_OBJECT_0) Panic("WaitForSingleObject() for thread join failed"); CloseHandle((HANDLE)m_native_handle); m_native_handle = nullptr; #else void* retval; const int res = pthread_join((pthread_t)m_native_handle, &retval); if (res != 0) Panic("pthread_join() for thread join failed"); m_native_handle = nullptr; #ifdef __linux__ m_native_id = 0; #endif #endif } Threading::ThreadHandle& Threading::Thread::operator=(Thread&& thread) { ThreadHandle::operator=(thread); m_stack_size = thread.m_stack_size; thread.m_stack_size = 0; return *this; } u64 Threading::GetThreadCpuTime() { #if defined(_WIN32) && !defined(_M_ARM64) u64 ret = 0; QueryThreadCycleTime(GetCurrentThread(), &ret); return ret; #elif defined(_WIN32) FileTimeU64Union user = {}, kernel = {}; FILETIME dummy; GetThreadTimes(GetCurrentThread(), &dummy, &dummy, &kernel.filetime, &user.filetime); return user.u64time + kernel.u64time; #elif defined(__APPLE__) return getthreadtime(pthread_mach_thread_np(pthread_self())); #else return get_thread_time(nullptr); #endif } u64 Threading::GetThreadTicksPerSecond() { #if defined(_WIN32) && !defined(_M_ARM64) // On x86, despite what the MS documentation says, this basically appears to be rdtsc. // So, the frequency is our base clock speed (and stable regardless of power management). static u64 frequency = 0; if (frequency == 0) [[unlikely]] { frequency = 1000000; HKEY hKey; if (RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0, KEY_READ, &hKey) == ERROR_SUCCESS) { DWORD value; DWORD value_size = sizeof(value); if (RegQueryValueExW(hKey, L"~MHz", 0, nullptr, reinterpret_cast(&value), &value_size) == ERROR_SUCCESS) { // value is in mhz, convert to hz frequency *= value; } RegCloseKey(hKey); } } return frequency; #elif defined(_WIN32) return 10000000; #elif defined(__APPLE__) return 1000000; #else return 1000000; #endif } void Threading::SetNameOfCurrentThread(const char* name) { // This feature needs Windows headers and MSVC's SEH support: #if defined(_WIN32) && defined(_MSC_VER) // This code sample was borrowed form some obscure MSDN article. // In a rare bout of sanity, it's an actual Microsoft-published hack // that actually works! static const int MS_VC_EXCEPTION = 0x406D1388; #pragma pack(push, 8) struct THREADNAME_INFO { DWORD dwType; // Must be 0x1000. LPCSTR szName; // Pointer to name (in user addr space). DWORD dwThreadID; // Thread ID (-1=caller thread). DWORD dwFlags; // Reserved for future use, must be zero. }; #pragma pack(pop) THREADNAME_INFO info; info.dwType = 0x1000; info.szName = name; info.dwThreadID = GetCurrentThreadId(); info.dwFlags = 0; __try { RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), (ULONG_PTR*)&info); } __except (EXCEPTION_EXECUTE_HANDLER) { } #elif defined(__linux__) // Extract of manpage: "The name can be up to 16 bytes long, and should be // null-terminated if it contains fewer bytes." prctl(PR_SET_NAME, name, 0, 0, 0); #elif defined(__APPLE__) pthread_setname_np(name); #else pthread_set_name_np(pthread_self(), name); #endif } Threading::KernelSemaphore::KernelSemaphore() { #ifdef _WIN32 m_sema = CreateSemaphore(nullptr, 0, LONG_MAX, nullptr); #elif defined(__APPLE__) semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, 0); #else sem_init(&m_sema, false, 0); #endif } Threading::KernelSemaphore::~KernelSemaphore() { #ifdef _WIN32 CloseHandle(m_sema); #elif defined(__APPLE__) semaphore_destroy(mach_task_self(), m_sema); #else sem_destroy(&m_sema); #endif } void Threading::KernelSemaphore::Post() { #ifdef _WIN32 ReleaseSemaphore(m_sema, 1, nullptr); #elif defined(__APPLE__) semaphore_signal(m_sema); #else sem_post(&m_sema); #endif } void Threading::KernelSemaphore::Wait() { #ifdef _WIN32 WaitForSingleObject(m_sema, INFINITE); #elif defined(__APPLE__) semaphore_wait(m_sema); #else sem_wait(&m_sema); #endif } bool Threading::KernelSemaphore::TryWait() { #ifdef _WIN32 return WaitForSingleObject(m_sema, 0) == WAIT_OBJECT_0; #elif defined(__APPLE__) mach_timespec_t time = {}; kern_return_t res = semaphore_timedwait(m_sema, time); return (res != KERN_OPERATION_TIMED_OUT); #else return sem_trywait(&m_sema) == 0; #endif }