// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "gpu_device.h" #include "core/host.h" // TODO: Remove, needed for getting fullscreen mode. #include "core/settings.h" // TODO: Remove, needed for dump directory. #include "gpu_framebuffer_manager.h" #include "shadergen.h" #include "common/assert.h" #include "common/dynamic_library.h" #include "common/error.h" #include "common/file_system.h" #include "common/log.h" #include "common/path.h" #include "common/scoped_guard.h" #include "common/string_util.h" #include "common/timer.h" #include "fmt/format.h" #include "imgui.h" #include "shaderc/shaderc.h" #include "spirv_cross/spirv_cross_c.h" #include "xxhash.h" Log_SetChannel(GPUDevice); #ifdef _WIN32 #include "common/windows_headers.h" #include "d3d11_device.h" #include "d3d12_device.h" #include "d3d_common.h" #endif #ifdef ENABLE_OPENGL #include "opengl_device.h" #endif #ifdef ENABLE_VULKAN #include "vulkan_device.h" #endif std::unique_ptr<GPUDevice> g_gpu_device; static std::string s_pipeline_cache_path; size_t GPUDevice::s_total_vram_usage = 0; GPUDevice::Statistics GPUDevice::s_stats = {}; GPUSampler::GPUSampler() = default; GPUSampler::~GPUSampler() = default; GPUSampler::Config GPUSampler::GetNearestConfig() { Config config = {}; config.address_u = GPUSampler::AddressMode::ClampToEdge; config.address_v = GPUSampler::AddressMode::ClampToEdge; config.address_w = GPUSampler::AddressMode::ClampToEdge; config.min_filter = GPUSampler::Filter::Nearest; config.mag_filter = GPUSampler::Filter::Nearest; return config; } GPUSampler::Config GPUSampler::GetLinearConfig() { Config config = {}; config.address_u = GPUSampler::AddressMode::ClampToEdge; config.address_v = GPUSampler::AddressMode::ClampToEdge; config.address_w = GPUSampler::AddressMode::ClampToEdge; config.min_filter = GPUSampler::Filter::Linear; config.mag_filter = GPUSampler::Filter::Linear; return config; } GPUShader::GPUShader(GPUShaderStage stage) : m_stage(stage) { } GPUShader::~GPUShader() = default; const char* GPUShader::GetStageName(GPUShaderStage stage) { static constexpr std::array<const char*, static_cast<u32>(GPUShaderStage::MaxCount)> names = {"Vertex", "Fragment", "Geometry", "Compute"}; return names[static_cast<u32>(stage)]; } GPUPipeline::GPUPipeline() = default; GPUPipeline::~GPUPipeline() = default; size_t GPUPipeline::InputLayoutHash::operator()(const InputLayout& il) const { std::size_t h = 0; hash_combine(h, il.vertex_attributes.size(), il.vertex_stride); for (const VertexAttribute& va : il.vertex_attributes) hash_combine(h, va.key); return h; } bool GPUPipeline::InputLayout::operator==(const InputLayout& rhs) const { return (vertex_stride == rhs.vertex_stride && vertex_attributes.size() == rhs.vertex_attributes.size() && std::memcmp(vertex_attributes.data(), rhs.vertex_attributes.data(), sizeof(VertexAttribute) * rhs.vertex_attributes.size()) == 0); } bool GPUPipeline::InputLayout::operator!=(const InputLayout& rhs) const { return (vertex_stride != rhs.vertex_stride || vertex_attributes.size() != rhs.vertex_attributes.size() || std::memcmp(vertex_attributes.data(), rhs.vertex_attributes.data(), sizeof(VertexAttribute) * rhs.vertex_attributes.size()) != 0); } GPUPipeline::RasterizationState GPUPipeline::RasterizationState::GetNoCullState() { RasterizationState ret = {}; ret.cull_mode = CullMode::None; return ret; } GPUPipeline::DepthState GPUPipeline::DepthState::GetNoTestsState() { DepthState ret = {}; ret.depth_test = DepthFunc::Always; return ret; } GPUPipeline::DepthState GPUPipeline::DepthState::GetAlwaysWriteState() { DepthState ret = {}; ret.depth_test = DepthFunc::Always; ret.depth_write = true; return ret; } GPUPipeline::BlendState GPUPipeline::BlendState::GetNoBlendingState() { BlendState ret = {}; ret.write_mask = 0xf; return ret; } GPUPipeline::BlendState GPUPipeline::BlendState::GetAlphaBlendingState() { BlendState ret = {}; ret.enable = true; ret.src_blend = BlendFunc::SrcAlpha; ret.dst_blend = BlendFunc::InvSrcAlpha; ret.blend_op = BlendOp::Add; ret.src_alpha_blend = BlendFunc::One; ret.dst_alpha_blend = BlendFunc::Zero; ret.alpha_blend_op = BlendOp::Add; ret.write_mask = 0xf; return ret; } void GPUPipeline::GraphicsConfig::SetTargetFormats(GPUTexture::Format color_format, GPUTexture::Format depth_format_ /* = GPUTexture::Format::Unknown */) { color_formats[0] = color_format; for (size_t i = 1; i < std::size(color_formats); i++) color_formats[i] = GPUTexture::Format::Unknown; depth_format = depth_format_; } u32 GPUPipeline::GraphicsConfig::GetRenderTargetCount() const { u32 num_rts = 0; for (; num_rts < static_cast<u32>(std::size(color_formats)); num_rts++) { if (color_formats[num_rts] == GPUTexture::Format::Unknown) break; } return num_rts; } GPUTextureBuffer::GPUTextureBuffer(Format format, u32 size) : m_format(format), m_size_in_elements(size) { } GPUTextureBuffer::~GPUTextureBuffer() = default; u32 GPUTextureBuffer::GetElementSize(Format format) { static constexpr std::array<u32, static_cast<u32>(Format::MaxCount)> element_size = {{ sizeof(u16), }}; return element_size[static_cast<u32>(format)]; } bool GPUFramebufferManagerBase::Key::operator==(const Key& rhs) const { return (std::memcmp(this, &rhs, sizeof(*this)) == 0); } bool GPUFramebufferManagerBase::Key::operator!=(const Key& rhs) const { return (std::memcmp(this, &rhs, sizeof(*this)) != 0); } bool GPUFramebufferManagerBase::Key::ContainsRT(const GPUTexture* tex) const { // num_rts is worse for predictability. for (u32 i = 0; i < GPUDevice::MAX_RENDER_TARGETS; i++) { if (rts[i] == tex) return true; } return false; } size_t GPUFramebufferManagerBase::KeyHash::operator()(const Key& key) const { if constexpr (sizeof(void*) == 8) return XXH3_64bits(&key, sizeof(key)); else return XXH32(&key, sizeof(key), 0x1337); } GPUDevice::GPUDevice() { ResetStatistics(); } GPUDevice::~GPUDevice() = default; RenderAPI GPUDevice::GetPreferredAPI() { static RenderAPI preferred_renderer = RenderAPI::None; if (preferred_renderer == RenderAPI::None) [[unlikely]] { #if defined(_WIN32) && !defined(_M_ARM64) // Perfer DX11 on Windows, except ARM64, where QCom has slow DX11 drivers. preferred_renderer = RenderAPI::D3D11; #elif defined(_WIN32) && defined(_M_ARM64) preferred_renderer = RenderAPI::D3D12; #elif defined(__APPLE__) // Prefer Metal on MacOS. preferred_renderer = RenderAPI::Metal; #elif defined(ENABLE_OPENGL) && defined(ENABLE_VULKAN) // On Linux, if we have both GL and Vulkan, prefer VK if the driver isn't software. preferred_renderer = VulkanDevice::IsSuitableDefaultRenderer() ? RenderAPI::Vulkan : RenderAPI::OpenGL; #elif defined(ENABLE_OPENGL) preferred_renderer = RenderAPI::OpenGL; #elif defined(ENABLE_VULKAN) preferred_renderer = RenderAPI::Vulkan; #else // Uhhh, what? ERROR_LOG("Somehow don't have any renderers available..."); preferred_renderer = RenderAPI::None; #endif } return preferred_renderer; } const char* GPUDevice::RenderAPIToString(RenderAPI api) { switch (api) { // clang-format off #define CASE(x) case RenderAPI::x: return #x CASE(None); CASE(D3D11); CASE(D3D12); CASE(Metal); CASE(Vulkan); CASE(OpenGL); CASE(OpenGLES); #undef CASE // clang-format on default: return "Unknown"; } } const char* GPUDevice::ShaderLanguageToString(GPUShaderLanguage language) { switch (language) { // clang-format off #define CASE(x) case GPUShaderLanguage::x: return #x CASE(HLSL); CASE(GLSL); CASE(GLSLES); CASE(MSL); CASE(SPV); #undef CASE // clang-format on default: return "Unknown"; } } bool GPUDevice::IsSameRenderAPI(RenderAPI lhs, RenderAPI rhs) { return (lhs == rhs || ((lhs == RenderAPI::OpenGL || lhs == RenderAPI::OpenGLES) && (rhs == RenderAPI::OpenGL || rhs == RenderAPI::OpenGLES))); } GPUDevice::AdapterInfoList GPUDevice::GetAdapterListForAPI(RenderAPI api) { AdapterInfoList ret; switch (api) { #ifdef ENABLE_VULKAN case RenderAPI::Vulkan: ret = VulkanDevice::GetAdapterList(); break; #endif #ifdef ENABLE_OPENGL case RenderAPI::OpenGL: case RenderAPI::OpenGLES: // No way of querying. break; #endif #ifdef _WIN32 case RenderAPI::D3D11: case RenderAPI::D3D12: ret = D3DCommon::GetAdapterInfoList(); break; #endif #ifdef __APPLE__ case RenderAPI::Metal: ret = WrapGetMetalAdapterList(); break; #endif default: break; } return ret; } bool GPUDevice::Create(std::string_view adapter, std::string_view shader_cache_path, u32 shader_cache_version, bool debug_device, GPUVSyncMode vsync, bool allow_present_throttle, bool threaded_presentation, std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features, Error* error) { m_vsync_mode = vsync; m_allow_present_throttle = allow_present_throttle; m_debug_device = debug_device; if (!AcquireWindow(true)) { Error::SetStringView(error, "Failed to acquire window from host."); return false; } if (!CreateDevice(adapter, threaded_presentation, exclusive_fullscreen_control, disabled_features, error)) { if (error && !error->IsValid()) error->SetStringView("Failed to create device."); return false; } INFO_LOG("Graphics Driver Info:\n{}", GetDriverInfo()); OpenShaderCache(shader_cache_path, shader_cache_version); if (!CreateResources(error)) { Error::AddPrefix(error, "Failed to create base resources."); return false; } return true; } void GPUDevice::Destroy() { PurgeTexturePool(); if (HasSurface()) DestroySurface(); DestroyResources(); CloseShaderCache(); DestroyDevice(); } bool GPUDevice::SupportsExclusiveFullscreen() const { return false; } void GPUDevice::OpenShaderCache(std::string_view base_path, u32 version) { if (m_features.shader_cache && !base_path.empty()) { const std::string basename = GetShaderCacheBaseName("shaders"); const std::string filename = Path::Combine(base_path, basename); if (!m_shader_cache.Open(filename.c_str(), version)) { WARNING_LOG("Failed to open shader cache. Creating new cache."); if (!m_shader_cache.Create()) ERROR_LOG("Failed to create new shader cache."); // Squish the pipeline cache too, it's going to be stale. if (m_features.pipeline_cache) { const std::string pc_filename = Path::Combine(base_path, TinyString::from_format("{}.bin", GetShaderCacheBaseName("pipelines"))); if (FileSystem::FileExists(pc_filename.c_str())) { INFO_LOG("Removing old pipeline cache '{}'", Path::GetFileName(pc_filename)); FileSystem::DeleteFile(pc_filename.c_str()); } } } } else { // Still need to set the version - GL needs it. m_shader_cache.Open(std::string_view(), version); } s_pipeline_cache_path = {}; if (m_features.pipeline_cache && !base_path.empty()) { const std::string basename = GetShaderCacheBaseName("pipelines"); std::string filename = Path::Combine(base_path, TinyString::from_format("{}.bin", basename)); if (ReadPipelineCache(filename)) s_pipeline_cache_path = std::move(filename); else WARNING_LOG("Failed to read pipeline cache."); } } void GPUDevice::CloseShaderCache() { m_shader_cache.Close(); if (!s_pipeline_cache_path.empty()) { DynamicHeapArray<u8> data; if (GetPipelineCacheData(&data)) { // Save disk writes if it hasn't changed, think of the poor SSDs. FILESYSTEM_STAT_DATA sd; if (!FileSystem::StatFile(s_pipeline_cache_path.c_str(), &sd) || sd.Size != static_cast<s64>(data.size())) { INFO_LOG("Writing {} bytes to '{}'", data.size(), Path::GetFileName(s_pipeline_cache_path)); if (!FileSystem::WriteBinaryFile(s_pipeline_cache_path.c_str(), data.data(), data.size())) ERROR_LOG("Failed to write pipeline cache to '{}'", Path::GetFileName(s_pipeline_cache_path)); } else { INFO_LOG("Skipping updating pipeline cache '{}' due to no changes.", Path::GetFileName(s_pipeline_cache_path)); } } s_pipeline_cache_path = {}; } } std::string GPUDevice::GetShaderCacheBaseName(std::string_view type) const { const std::string_view debug_suffix = m_debug_device ? "_debug" : ""; std::string ret; switch (GetRenderAPI()) { #ifdef _WIN32 case RenderAPI::D3D11: ret = fmt::format( "d3d11_{}_{}{}", type, D3DCommon::GetFeatureLevelShaderModelString(D3D11Device::GetInstance().GetD3DDevice()->GetFeatureLevel()), debug_suffix); break; case RenderAPI::D3D12: ret = fmt::format("d3d12_{}{}", type, debug_suffix); break; #endif #ifdef ENABLE_VULKAN case RenderAPI::Vulkan: ret = fmt::format("vulkan_{}{}", type, debug_suffix); break; #endif #ifdef ENABLE_OPENGL case RenderAPI::OpenGL: ret = fmt::format("opengl_{}{}", type, debug_suffix); break; case RenderAPI::OpenGLES: ret = fmt::format("opengles_{}{}", type, debug_suffix); break; #endif #ifdef __APPLE__ case RenderAPI::Metal: ret = fmt::format("metal_{}{}", type, debug_suffix); break; #endif default: UnreachableCode(); break; } return ret; } bool GPUDevice::ReadPipelineCache(const std::string& filename) { return false; } bool GPUDevice::GetPipelineCacheData(DynamicHeapArray<u8>* data) { return false; } bool GPUDevice::AcquireWindow(bool recreate_window) { std::optional<WindowInfo> wi = Host::AcquireRenderWindow(recreate_window); if (!wi.has_value()) return false; INFO_LOG("Render window is {}x{}.", wi->surface_width, wi->surface_height); m_window_info = wi.value(); return true; } bool GPUDevice::CreateResources(Error* error) { if (!(m_nearest_sampler = CreateSampler(GPUSampler::GetNearestConfig())) || !(m_linear_sampler = CreateSampler(GPUSampler::GetLinearConfig()))) { Error::SetStringView(error, "Failed to create samplers"); return false; } const RenderAPI render_api = GetRenderAPI(); ShaderGen shadergen(render_api, ShaderGen::GetShaderLanguageForAPI(render_api), m_features.dual_source_blend, m_features.framebuffer_fetch); std::unique_ptr<GPUShader> imgui_vs = CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), shadergen.GenerateImGuiVertexShader(), error); std::unique_ptr<GPUShader> imgui_fs = CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateImGuiFragmentShader(), error); if (!imgui_vs || !imgui_fs) { Error::AddPrefix(error, "Failed to compile ImGui shaders: "); return false; } GL_OBJECT_NAME(imgui_vs, "ImGui Vertex Shader"); GL_OBJECT_NAME(imgui_fs, "ImGui Fragment Shader"); static constexpr GPUPipeline::VertexAttribute imgui_attributes[] = { GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0, GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ImDrawVert, pos)), GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0, GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ImDrawVert, uv)), GPUPipeline::VertexAttribute::Make(2, GPUPipeline::VertexAttribute::Semantic::Color, 0, GPUPipeline::VertexAttribute::Type::UNorm8, 4, OFFSETOF(ImDrawVert, col)), }; GPUPipeline::GraphicsConfig plconfig; plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; plconfig.input_layout.vertex_attributes = imgui_attributes; plconfig.input_layout.vertex_stride = sizeof(ImDrawVert); plconfig.primitive = GPUPipeline::Primitive::Triangles; plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.blend = GPUPipeline::BlendState::GetAlphaBlendingState(); plconfig.blend.write_mask = 0x7; plconfig.SetTargetFormats(HasSurface() ? m_window_info.surface_format : GPUTexture::Format::RGBA8); plconfig.samples = 1; plconfig.per_sample_shading = false; plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; plconfig.vertex_shader = imgui_vs.get(); plconfig.geometry_shader = nullptr; plconfig.fragment_shader = imgui_fs.get(); m_imgui_pipeline = CreatePipeline(plconfig, error); if (!m_imgui_pipeline) { Error::AddPrefix(error, "Failed to compile ImGui pipeline: "); return false; } GL_OBJECT_NAME(m_imgui_pipeline, "ImGui Pipeline"); return true; } void GPUDevice::DestroyResources() { m_imgui_font_texture.reset(); m_imgui_pipeline.reset(); m_imgui_pipeline.reset(); m_linear_sampler.reset(); m_nearest_sampler.reset(); m_shader_cache.Close(); } void GPUDevice::RenderImGui() { GL_SCOPE("RenderImGui"); ImGui::Render(); const ImDrawData* draw_data = ImGui::GetDrawData(); if (draw_data->CmdListsCount == 0) return; SetPipeline(m_imgui_pipeline.get()); SetViewportAndScissor(0, 0, m_window_info.surface_width, m_window_info.surface_height); const float L = 0.0f; const float R = static_cast<float>(m_window_info.surface_width); const float T = 0.0f; const float B = static_cast<float>(m_window_info.surface_height); const float ortho_projection[4][4] = { {2.0f / (R - L), 0.0f, 0.0f, 0.0f}, {0.0f, 2.0f / (T - B), 0.0f, 0.0f}, {0.0f, 0.0f, 0.5f, 0.0f}, {(R + L) / (L - R), (T + B) / (B - T), 0.5f, 1.0f}, }; PushUniformBuffer(ortho_projection, sizeof(ortho_projection)); // Render command lists const bool flip = UsesLowerLeftOrigin(); for (int n = 0; n < draw_data->CmdListsCount; n++) { const ImDrawList* cmd_list = draw_data->CmdLists[n]; static_assert(sizeof(ImDrawIdx) == sizeof(DrawIndex)); u32 base_vertex, base_index; UploadVertexBuffer(cmd_list->VtxBuffer.Data, sizeof(ImDrawVert), cmd_list->VtxBuffer.Size, &base_vertex); UploadIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size, &base_index); for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) { const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; DebugAssert(!pcmd->UserCallback); if (pcmd->ElemCount == 0 || pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.y) continue; if (flip) { const s32 height = static_cast<s32>(pcmd->ClipRect.w - pcmd->ClipRect.y); const s32 flipped_y = static_cast<s32>(m_window_info.surface_height) - static_cast<s32>(pcmd->ClipRect.y) - height; SetScissor(static_cast<s32>(pcmd->ClipRect.x), flipped_y, static_cast<s32>(pcmd->ClipRect.z - pcmd->ClipRect.x), height); } else { SetScissor(static_cast<s32>(pcmd->ClipRect.x), static_cast<s32>(pcmd->ClipRect.y), static_cast<s32>(pcmd->ClipRect.z - pcmd->ClipRect.x), static_cast<s32>(pcmd->ClipRect.w - pcmd->ClipRect.y)); } SetTextureSampler(0, reinterpret_cast<GPUTexture*>(pcmd->TextureId), m_linear_sampler.get()); DrawIndexed(pcmd->ElemCount, base_index + pcmd->IdxOffset, base_vertex + pcmd->VtxOffset); } } } void GPUDevice::UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 vertex_count, u32* base_vertex) { void* map; u32 space; MapVertexBuffer(vertex_size, vertex_count, &map, &space, base_vertex); std::memcpy(map, vertices, vertex_size * vertex_count); UnmapVertexBuffer(vertex_size, vertex_count); } void GPUDevice::UploadIndexBuffer(const u16* indices, u32 index_count, u32* base_index) { u16* map; u32 space; MapIndexBuffer(index_count, &map, &space, base_index); std::memcpy(map, indices, sizeof(u16) * index_count); UnmapIndexBuffer(index_count); } void GPUDevice::UploadUniformBuffer(const void* data, u32 data_size) { void* map = MapUniformBuffer(data_size); std::memcpy(map, data, data_size); UnmapUniformBuffer(data_size); } void GPUDevice::SetRenderTarget(GPUTexture* rt, GPUTexture* ds, GPUPipeline::RenderPassFlag render_pass_flags) { SetRenderTargets(rt ? &rt : nullptr, rt ? 1 : 0, ds, render_pass_flags); } void GPUDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) { SetViewport(GSVector4i(x, y, x + width, y + height)); } void GPUDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) { SetScissor(GSVector4i(x, y, x + width, y + height)); } void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height) { SetViewportAndScissor(GSVector4i(x, y, x + width, y + height)); } void GPUDevice::SetViewportAndScissor(const GSVector4i rc) { SetViewport(rc); SetScissor(rc); } void GPUDevice::ClearRenderTarget(GPUTexture* t, u32 c) { t->SetClearColor(c); } void GPUDevice::ClearDepth(GPUTexture* t, float d) { t->SetClearDepth(d); } void GPUDevice::InvalidateRenderTarget(GPUTexture* t) { t->SetState(GPUTexture::State::Invalidated); } std::unique_ptr<GPUShader> GPUDevice::CreateShader(GPUShaderStage stage, GPUShaderLanguage language, std::string_view source, Error* error /* = nullptr */, const char* entry_point /* = "main" */) { std::unique_ptr<GPUShader> shader; if (!m_shader_cache.IsOpen()) { shader = CreateShaderFromSource(stage, language, source, entry_point, nullptr, error); return shader; } const GPUShaderCache::CacheIndexKey key = m_shader_cache.GetCacheKey(stage, language, source, entry_point); DynamicHeapArray<u8> binary; if (m_shader_cache.Lookup(key, &binary)) { shader = CreateShaderFromBinary(stage, binary, error); if (shader) return shader; ERROR_LOG("Failed to create shader from binary (driver changed?). Clearing cache."); m_shader_cache.Clear(); } shader = CreateShaderFromSource(stage, language, source, entry_point, &binary, error); if (!shader) return shader; // Don't insert empty shaders into the cache... if (!binary.empty()) { if (!m_shader_cache.Insert(key, binary.data(), static_cast<u32>(binary.size()))) m_shader_cache.Close(); } return shader; } bool GPUDevice::GetRequestedExclusiveFullscreenMode(u32* width, u32* height, float* refresh_rate) { const std::string mode = Host::GetBaseStringSettingValue("GPU", "FullscreenMode", ""); if (!mode.empty()) { const std::string_view mode_view = mode; std::string_view::size_type sep1 = mode.find('x'); if (sep1 != std::string_view::npos) { std::optional<u32> owidth = StringUtil::FromChars<u32>(mode_view.substr(0, sep1)); sep1++; while (sep1 < mode.length() && std::isspace(mode[sep1])) sep1++; if (owidth.has_value() && sep1 < mode.length()) { std::string_view::size_type sep2 = mode.find('@', sep1); if (sep2 != std::string_view::npos) { std::optional<u32> oheight = StringUtil::FromChars<u32>(mode_view.substr(sep1, sep2 - sep1)); sep2++; while (sep2 < mode.length() && std::isspace(mode[sep2])) sep2++; if (oheight.has_value() && sep2 < mode.length()) { std::optional<float> orefresh_rate = StringUtil::FromChars<float>(mode_view.substr(sep2)); if (orefresh_rate.has_value()) { *width = owidth.value(); *height = oheight.value(); *refresh_rate = orefresh_rate.value(); return true; } } } } } } *width = 0; *height = 0; *refresh_rate = 0; return false; } std::string GPUDevice::GetFullscreenModeString(u32 width, u32 height, float refresh_rate) { return fmt::format("{} x {} @ {} hz", width, height, refresh_rate); } std::string GPUDevice::GetShaderDumpPath(std::string_view name) { return Path::Combine(EmuFolders::Dumps, name); } void GPUDevice::DumpBadShader(std::string_view code, std::string_view errors) { static u32 next_bad_shader_id = 0; const std::string filename = GetShaderDumpPath(fmt::format("bad_shader_{}.txt", ++next_bad_shader_id)); auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb"); if (fp) { if (!code.empty()) std::fwrite(code.data(), code.size(), 1, fp.get()); std::fputs("\n\n**** ERRORS ****\n", fp.get()); if (!errors.empty()) std::fwrite(errors.data(), errors.size(), 1, fp.get()); } } std::array<float, 4> GPUDevice::RGBA8ToFloat(u32 rgba) { return std::array<float, 4>{static_cast<float>(rgba & UINT32_C(0xFF)) * (1.0f / 255.0f), static_cast<float>((rgba >> 8) & UINT32_C(0xFF)) * (1.0f / 255.0f), static_cast<float>((rgba >> 16) & UINT32_C(0xFF)) * (1.0f / 255.0f), static_cast<float>(rgba >> 24) * (1.0f / 255.0f)}; } bool GPUDevice::UpdateImGuiFontTexture() { ImGuiIO& io = ImGui::GetIO(); unsigned char* pixels; int width, height; io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); const u32 pitch = sizeof(u32) * width; if (m_imgui_font_texture && m_imgui_font_texture->GetWidth() == static_cast<u32>(width) && m_imgui_font_texture->GetHeight() == static_cast<u32>(height) && m_imgui_font_texture->Update(0, 0, static_cast<u32>(width), static_cast<u32>(height), pixels, pitch)) { io.Fonts->SetTexID(m_imgui_font_texture.get()); return true; } std::unique_ptr<GPUTexture> new_font = FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, pixels, pitch); if (!new_font) return false; RecycleTexture(std::move(m_imgui_font_texture)); m_imgui_font_texture = std::move(new_font); io.Fonts->SetTexID(m_imgui_font_texture.get()); return true; } bool GPUDevice::UsesLowerLeftOrigin() const { const RenderAPI api = GetRenderAPI(); return (api == RenderAPI::OpenGL || api == RenderAPI::OpenGLES); } GSVector4i GPUDevice::FlipToLowerLeft(GSVector4i rc, s32 target_height) { const s32 height = rc.height(); const s32 flipped_y = target_height - rc.top - height; rc.top = flipped_y; rc.bottom = flipped_y + height; return rc; } bool GPUDevice::IsTexturePoolType(GPUTexture::Type type) { return (type == GPUTexture::Type::Texture || type == GPUTexture::Type::DynamicTexture); } std::unique_ptr<GPUTexture> GPUDevice::FetchTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, const void* data /*= nullptr*/, u32 data_stride /*= 0*/) { std::unique_ptr<GPUTexture> ret; const TexturePoolKey key = {static_cast<u16>(width), static_cast<u16>(height), static_cast<u8>(layers), static_cast<u8>(levels), static_cast<u8>(samples), type, format, 0u}; const bool is_texture = IsTexturePoolType(type); TexturePool& pool = is_texture ? m_texture_pool : m_target_pool; const u32 pool_size = (is_texture ? MAX_TEXTURE_POOL_SIZE : MAX_TARGET_POOL_SIZE); TexturePool::iterator it; if (is_texture && m_features.prefer_unused_textures) { // Try to find a texture that wasn't used this frame first. for (it = m_texture_pool.begin(); it != m_texture_pool.end(); ++it) { if (it->use_counter == m_texture_pool_counter) { // We're into textures recycled this frame, not going to find anything newer. // But prefer reuse over creating a new texture. if (m_texture_pool.size() < pool_size) { it = m_texture_pool.end(); break; } } if (it->key == key) break; } } else { for (it = pool.begin(); it != pool.end(); ++it) { if (it->key == key) break; } } if (it != pool.end()) { if (!data || it->texture->Update(0, 0, width, height, data, data_stride, 0, 0)) { ret = std::move(it->texture); pool.erase(it); return ret; } else { // This shouldn't happen... ERROR_LOG("Failed to upload {}x{} to pooled texture", width, height); } } ret = CreateTexture(width, height, layers, levels, samples, type, format, data, data_stride); return ret; } std::unique_ptr<GPUTexture, GPUDevice::PooledTextureDeleter> GPUDevice::FetchAutoRecycleTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, const void* data /*= nullptr*/, u32 data_stride /*= 0*/, bool dynamic /*= false*/) { std::unique_ptr<GPUTexture> ret = FetchTexture(width, height, layers, levels, samples, type, format, data, data_stride); return std::unique_ptr<GPUTexture, PooledTextureDeleter>(ret.release()); } void GPUDevice::RecycleTexture(std::unique_ptr<GPUTexture> texture) { if (!texture) return; const TexturePoolKey key = {static_cast<u16>(texture->GetWidth()), static_cast<u16>(texture->GetHeight()), static_cast<u8>(texture->GetLayers()), static_cast<u8>(texture->GetLevels()), static_cast<u8>(texture->GetSamples()), texture->GetType(), texture->GetFormat(), 0u}; const bool is_texture = IsTexturePoolType(texture->GetType()); TexturePool& pool = is_texture ? m_texture_pool : m_target_pool; pool.push_back({std::move(texture), m_texture_pool_counter, key}); const u32 max_size = is_texture ? MAX_TEXTURE_POOL_SIZE : MAX_TARGET_POOL_SIZE; while (pool.size() > max_size) { DEBUG_LOG("Trim {}x{} texture from pool", pool.front().texture->GetWidth(), pool.front().texture->GetHeight()); pool.pop_front(); } } void GPUDevice::PurgeTexturePool() { m_texture_pool_counter = 0; m_texture_pool.clear(); m_target_pool.clear(); } void GPUDevice::TrimTexturePool() { GL_INS_FMT("Texture Pool Size: {}", m_texture_pool.size()); GL_INS_FMT("Target Pool Size: {}", m_target_pool.size()); GL_INS_FMT("VRAM Usage: {:.2f} MB", s_total_vram_usage / 1048576.0); DEBUG_LOG("Texture Pool Size: {} Target Pool Size: {} VRAM: {:.2f} MB", m_texture_pool.size(), m_target_pool.size(), s_total_vram_usage / 1048756.0); if (m_texture_pool.empty() && m_target_pool.empty()) return; const u32 prev_counter = m_texture_pool_counter++; for (u32 pool_idx = 0; pool_idx < 2; pool_idx++) { TexturePool& pool = pool_idx ? m_target_pool : m_texture_pool; for (auto it = pool.begin(); it != pool.end();) { const u32 delta = (prev_counter - it->use_counter); if (delta < POOL_PURGE_DELAY) break; DEBUG_LOG("Trim {}x{} texture from pool", it->texture->GetWidth(), it->texture->GetHeight()); it = pool.erase(it); } } if (m_texture_pool_counter < prev_counter) [[unlikely]] { // wrapped around, handle it if (m_texture_pool.empty() && m_target_pool.empty()) { m_texture_pool_counter = 0; } else { const u32 texture_min = m_texture_pool.empty() ? std::numeric_limits<u32>::max() : m_texture_pool.front().use_counter; const u32 target_min = m_target_pool.empty() ? std::numeric_limits<u32>::max() : m_target_pool.front().use_counter; const u32 reduce = std::min(texture_min, target_min); m_texture_pool_counter -= reduce; for (u32 pool_idx = 0; pool_idx < 2; pool_idx++) { TexturePool& pool = pool_idx ? m_target_pool : m_texture_pool; for (TexturePoolEntry& entry : pool) entry.use_counter -= reduce; } } } } bool GPUDevice::ResizeTexture(std::unique_ptr<GPUTexture>* tex, u32 new_width, u32 new_height, GPUTexture::Type type, GPUTexture::Format format, bool preserve /* = true */) { GPUTexture* old_tex = tex->get(); DebugAssert(!old_tex || (old_tex->GetLayers() == 1 && old_tex->GetLevels() == 1 && old_tex->GetSamples() == 1)); std::unique_ptr<GPUTexture> new_tex = FetchTexture(new_width, new_height, 1, 1, 1, type, format); if (!new_tex) [[unlikely]] { ERROR_LOG("Failed to create new {}x{} texture", new_width, new_height); return false; } if (old_tex) { if (old_tex->GetState() == GPUTexture::State::Cleared) { if (type == GPUTexture::Type::RenderTarget) ClearRenderTarget(new_tex.get(), old_tex->GetClearColor()); } else if (old_tex->GetState() == GPUTexture::State::Dirty) { const u32 copy_width = std::min(new_width, old_tex->GetWidth()); const u32 copy_height = std::min(new_height, old_tex->GetHeight()); if (type == GPUTexture::Type::RenderTarget) ClearRenderTarget(new_tex.get(), 0); CopyTextureRegion(new_tex.get(), 0, 0, 0, 0, old_tex, 0, 0, 0, 0, copy_width, copy_height); } } else if (preserve) { // If we're expecting data to be there, make sure to clear it. if (type == GPUTexture::Type::RenderTarget) ClearRenderTarget(new_tex.get(), 0); } RecycleTexture(std::move(*tex)); *tex = std::move(new_tex); return true; } bool GPUDevice::ShouldSkipPresentingFrame() { // Only needed with FIFO. But since we're so fast, we allow it always. if (!m_allow_present_throttle) return false; const float throttle_rate = (m_window_info.surface_refresh_rate > 0.0f) ? m_window_info.surface_refresh_rate : 60.0f; const float throttle_period = 1.0f / throttle_rate; const u64 now = Common::Timer::GetCurrentValue(); const double diff = Common::Timer::ConvertValueToSeconds(now - m_last_frame_displayed_time); if (diff < throttle_period) return true; m_last_frame_displayed_time = now; return false; } void GPUDevice::ThrottlePresentation() { const float throttle_rate = (m_window_info.surface_refresh_rate > 0.0f) ? m_window_info.surface_refresh_rate : 60.0f; const u64 sleep_period = Common::Timer::ConvertNanosecondsToValue(1e+9f / static_cast<double>(throttle_rate)); const u64 current_ts = Common::Timer::GetCurrentValue(); // Allow it to fall behind/run ahead up to 2*period. Sleep isn't that precise, plus we need to // allow time for the actual rendering. const u64 max_variance = sleep_period * 2; if (static_cast<u64>(std::abs(static_cast<s64>(current_ts - m_last_frame_displayed_time))) > max_variance) m_last_frame_displayed_time = current_ts + sleep_period; else m_last_frame_displayed_time += sleep_period; Common::Timer::SleepUntil(m_last_frame_displayed_time, false); } bool GPUDevice::SetGPUTimingEnabled(bool enabled) { return false; } float GPUDevice::GetAndResetAccumulatedGPUTime() { return 0.0f; } void GPUDevice::ResetStatistics() { s_stats = {}; } std::unique_ptr<GPUDevice> GPUDevice::CreateDeviceForAPI(RenderAPI api) { switch (api) { #ifdef ENABLE_VULKAN case RenderAPI::Vulkan: return std::make_unique<VulkanDevice>(); #endif #ifdef ENABLE_OPENGL case RenderAPI::OpenGL: case RenderAPI::OpenGLES: return std::make_unique<OpenGLDevice>(); #endif #ifdef _WIN32 case RenderAPI::D3D12: return std::make_unique<D3D12Device>(); case RenderAPI::D3D11: return std::make_unique<D3D11Device>(); #endif #ifdef __APPLE__ case RenderAPI::Metal: return WrapNewMetalDevice(); #endif default: return {}; } } #define SHADERC_FUNCTIONS(X) \ X(shaderc_compiler_initialize) \ X(shaderc_compiler_release) \ X(shaderc_compile_options_initialize) \ X(shaderc_compile_options_release) \ X(shaderc_compile_options_set_source_language) \ X(shaderc_compile_options_set_generate_debug_info) \ X(shaderc_compile_options_set_optimization_level) \ X(shaderc_compile_options_set_target_env) \ X(shaderc_compilation_status_to_string) \ X(shaderc_compile_into_spv) \ X(shaderc_result_release) \ X(shaderc_result_get_length) \ X(shaderc_result_get_num_warnings) \ X(shaderc_result_get_bytes) \ X(shaderc_result_get_compilation_status) \ X(shaderc_result_get_error_message) #define SPIRV_CROSS_FUNCTIONS(X) \ X(spvc_context_create) \ X(spvc_context_destroy) \ X(spvc_context_set_error_callback) \ X(spvc_context_parse_spirv) \ X(spvc_context_create_compiler) \ X(spvc_compiler_create_compiler_options) \ X(spvc_compiler_create_shader_resources) \ X(spvc_compiler_get_execution_model) \ X(spvc_compiler_options_set_bool) \ X(spvc_compiler_options_set_uint) \ X(spvc_compiler_install_compiler_options) \ X(spvc_compiler_require_extension) \ X(spvc_compiler_compile) \ X(spvc_resources_get_resource_list_for_type) #ifdef _WIN32 #define SPIRV_CROSS_HLSL_FUNCTIONS(X) X(spvc_compiler_hlsl_add_resource_binding) #else #define SPIRV_CROSS_HLSL_FUNCTIONS(X) #endif #ifdef __APPLE__ #define SPIRV_CROSS_MSL_FUNCTIONS(X) X(spvc_compiler_msl_add_resource_binding) #else #define SPIRV_CROSS_MSL_FUNCTIONS(X) #endif // TODO: NOT thread safe, yet. namespace dyn_libs { static bool OpenShaderc(Error* error); static void CloseShaderc(); static bool OpenSpirvCross(Error* error); static void CloseSpirvCross(); static void CloseAll(); static DynamicLibrary s_shaderc_library; static DynamicLibrary s_spirv_cross_library; static shaderc_compiler_t s_shaderc_compiler = nullptr; static bool s_close_registered = false; #define ADD_FUNC(F) static decltype(&::F) F; SHADERC_FUNCTIONS(ADD_FUNC) SPIRV_CROSS_FUNCTIONS(ADD_FUNC) SPIRV_CROSS_HLSL_FUNCTIONS(ADD_FUNC) SPIRV_CROSS_MSL_FUNCTIONS(ADD_FUNC) #undef ADD_FUNC } // namespace dyn_libs bool dyn_libs::OpenShaderc(Error* error) { if (s_shaderc_library.IsOpen()) return true; const std::string libname = DynamicLibrary::GetVersionedFilename("shaderc_shared"); if (!s_shaderc_library.Open(libname.c_str(), error)) { Error::AddPrefix(error, "Failed to load shaderc: "); return false; } #define LOAD_FUNC(F) \ if (!s_shaderc_library.GetSymbol(#F, &F)) \ { \ Error::SetStringFmt(error, "Failed to find function {}", #F); \ CloseShaderc(); \ return false; \ } SHADERC_FUNCTIONS(LOAD_FUNC) #undef LOAD_FUNC s_shaderc_compiler = shaderc_compiler_initialize(); if (!s_shaderc_compiler) { Error::SetStringView(error, "shaderc_compiler_initialize() failed"); CloseShaderc(); return false; } if (!s_close_registered) { s_close_registered = true; std::atexit(&dyn_libs::CloseAll); } return true; } void dyn_libs::CloseShaderc() { if (s_shaderc_compiler) { shaderc_compiler_release(s_shaderc_compiler); s_shaderc_compiler = nullptr; } #define UNLOAD_FUNC(F) F = nullptr; SHADERC_FUNCTIONS(UNLOAD_FUNC) #undef UNLOAD_FUNC s_shaderc_library.Close(); } bool dyn_libs::OpenSpirvCross(Error* error) { if (s_spirv_cross_library.IsOpen()) return true; #ifdef _WIN32 // SPVC's build on Windows doesn't spit out a versioned DLL. const std::string libname = DynamicLibrary::GetVersionedFilename("spirv-cross-c-shared"); #else const std::string libname = DynamicLibrary::GetVersionedFilename("spirv-cross-c-shared", SPVC_C_API_VERSION_MAJOR); #endif if (!s_spirv_cross_library.Open(libname.c_str(), error)) { Error::AddPrefix(error, "Failed to load spirv-cross: "); return false; } #define LOAD_FUNC(F) \ if (!s_spirv_cross_library.GetSymbol(#F, &F)) \ { \ Error::SetStringFmt(error, "Failed to find function {}", #F); \ CloseShaderc(); \ return false; \ } SPIRV_CROSS_FUNCTIONS(LOAD_FUNC) SPIRV_CROSS_HLSL_FUNCTIONS(LOAD_FUNC) SPIRV_CROSS_MSL_FUNCTIONS(LOAD_FUNC) #undef LOAD_FUNC if (!s_close_registered) { s_close_registered = true; std::atexit(&dyn_libs::CloseAll); } return true; } void dyn_libs::CloseSpirvCross() { #define UNLOAD_FUNC(F) F = nullptr; SPIRV_CROSS_FUNCTIONS(UNLOAD_FUNC) SPIRV_CROSS_HLSL_FUNCTIONS(UNLOAD_FUNC) SPIRV_CROSS_MSL_FUNCTIONS(UNLOAD_FUNC) #undef UNLOAD_FUNC s_spirv_cross_library.Close(); } void dyn_libs::CloseAll() { CloseShaderc(); CloseSpirvCross(); } #undef SPIRV_CROSS_HLSL_FUNCTIONS #undef SPIRV_CROSS_MSL_FUNCTIONS #undef SPIRV_CROSS_FUNCTIONS #undef SHADERC_FUNCTIONS bool GPUDevice::CompileGLSLShaderToVulkanSpv(GPUShaderStage stage, GPUShaderLanguage source_language, std::string_view source, const char* entry_point, bool optimization, bool nonsemantic_debug_info, DynamicHeapArray<u8>* out_binary, Error* error) { static constexpr const std::array<shaderc_shader_kind, static_cast<size_t>(GPUShaderStage::MaxCount)> stage_kinds = {{ shaderc_glsl_vertex_shader, shaderc_glsl_fragment_shader, shaderc_glsl_geometry_shader, shaderc_glsl_compute_shader, }}; if (source_language != GPUShaderLanguage::GLSLVK) { Error::SetStringFmt(error, "Unsupported source language for transpile: {}", ShaderLanguageToString(source_language)); return false; } if (!dyn_libs::OpenShaderc(error)) return false; const shaderc_compile_options_t options = dyn_libs::shaderc_compile_options_initialize(); AssertMsg(options, "shaderc_compile_options_initialize() failed"); dyn_libs::shaderc_compile_options_set_source_language(options, shaderc_source_language_glsl); dyn_libs::shaderc_compile_options_set_target_env(options, shaderc_target_env_vulkan, 0); dyn_libs::shaderc_compile_options_set_generate_debug_info(options, m_debug_device, m_debug_device && nonsemantic_debug_info); dyn_libs::shaderc_compile_options_set_optimization_level( options, optimization ? shaderc_optimization_level_performance : shaderc_optimization_level_zero); const shaderc_compilation_result_t result = dyn_libs::shaderc_compile_into_spv(dyn_libs::s_shaderc_compiler, source.data(), source.length(), stage_kinds[static_cast<size_t>(stage)], "source", entry_point, options); const shaderc_compilation_status status = result ? dyn_libs::shaderc_result_get_compilation_status(result) : shaderc_compilation_status_internal_error; if (status != shaderc_compilation_status_success) { const std::string_view errors(result ? dyn_libs::shaderc_result_get_error_message(result) : "null result object"); Error::SetStringFmt(error, "Failed to compile shader to SPIR-V: {}\n{}", dyn_libs::shaderc_compilation_status_to_string(status), errors); ERROR_LOG("Failed to compile shader to SPIR-V: {}\n{}", dyn_libs::shaderc_compilation_status_to_string(status), errors); DumpBadShader(source, errors); } else { const size_t num_warnings = dyn_libs::shaderc_result_get_num_warnings(result); if (num_warnings > 0) WARNING_LOG("Shader compiled with warnings:\n{}", dyn_libs::shaderc_result_get_error_message(result)); const size_t spirv_size = dyn_libs::shaderc_result_get_length(result); DebugAssert(spirv_size > 0); out_binary->resize(spirv_size); std::memcpy(out_binary->data(), dyn_libs::shaderc_result_get_bytes(result), spirv_size); } dyn_libs::shaderc_result_release(result); dyn_libs::shaderc_compile_options_release(options); return (status == shaderc_compilation_status_success); } bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GPUShaderStage stage, GPUShaderLanguage target_language, u32 target_version, std::string* output, Error* error) { if (!dyn_libs::OpenSpirvCross(error)) return false; spvc_context sctx; spvc_result sres; if ((sres = dyn_libs::spvc_context_create(&sctx)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_context_create() failed: {}", static_cast<int>(sres)); return false; } const ScopedGuard sctx_guard = [&sctx]() { dyn_libs::spvc_context_destroy(sctx); }; dyn_libs::spvc_context_set_error_callback( sctx, [](void* error, const char* errormsg) { ERROR_LOG("SPIRV-Cross reported an error: {}", errormsg); Error::SetStringView(static_cast<Error*>(error), errormsg); }, error); spvc_parsed_ir sir; if ((sres = dyn_libs::spvc_context_parse_spirv(sctx, reinterpret_cast<const u32*>(spirv.data()), spirv.size() / 4, &sir)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_context_parse_spirv() failed: {}", static_cast<int>(sres)); return {}; } static constexpr std::array<spvc_backend, static_cast<size_t>(GPUShaderLanguage::Count)> backends = { {SPVC_BACKEND_NONE, SPVC_BACKEND_HLSL, SPVC_BACKEND_GLSL, SPVC_BACKEND_GLSL, SPVC_BACKEND_GLSL, SPVC_BACKEND_MSL, SPVC_BACKEND_NONE}}; spvc_compiler scompiler; if ((sres = dyn_libs::spvc_context_create_compiler(sctx, backends[static_cast<size_t>(target_language)], sir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &scompiler)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_context_create_compiler() failed: {}", static_cast<int>(sres)); return {}; } spvc_compiler_options soptions; if ((sres = dyn_libs::spvc_compiler_create_compiler_options(scompiler, &soptions)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_create_compiler_options() failed: {}", static_cast<int>(sres)); return {}; } spvc_resources resources; if ((sres = dyn_libs::spvc_compiler_create_shader_resources(scompiler, &resources)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_create_shader_resources() failed: {}", static_cast<int>(sres)); return {}; } // Need to know if there's UBOs for mapping. const spvc_reflected_resource *ubos, *textures; size_t ubos_count, textures_count; if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos, &ubos_count)) != SPVC_SUCCESS || (sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE, &textures, &textures_count)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast<int>(sres)); return {}; } [[maybe_unused]] const SpvExecutionModel execmodel = dyn_libs::spvc_compiler_get_execution_model(scompiler); switch (target_language) { #ifdef _WIN32 case GPUShaderLanguage::HLSL: { if ((sres = dyn_libs::spvc_compiler_options_set_uint(soptions, SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, target_version)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_options_set_uint(SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL) failed: {}", static_cast<int>(sres)); return {}; } if ((sres = dyn_libs::spvc_compiler_options_set_bool( soptions, SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_BASE_INSTANCE, false)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_options_set_bool(SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_" "BASE_INSTANCE) failed: {}", static_cast<int>(sres)); return {}; } u32 start_set = 0; if (ubos_count > 0) { const spvc_hlsl_resource_binding rb = {.stage = execmodel, .desc_set = start_set++, .binding = 0, .cbv = {.register_space = 0, .register_binding = 0}}; if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres)); return {}; } } if (textures_count > 0) { for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) { const spvc_hlsl_resource_binding rb = {.stage = execmodel, .desc_set = start_set++, .binding = i, .srv = {.register_space = 0, .register_binding = i}, .sampler = {.register_space = 0, .register_binding = i}}; if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres)); return {}; } } } } break; #endif #ifdef ENABLE_OPENGL case GPUShaderLanguage::GLSL: case GPUShaderLanguage::GLSLES: { if ((sres = dyn_libs::spvc_compiler_options_set_uint(soptions, SPVC_COMPILER_OPTION_GLSL_VERSION, target_version)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_options_set_uint(SPVC_COMPILER_OPTION_GLSL_VERSION) failed: {}", static_cast<int>(sres)); return {}; } const bool is_gles = (target_language == GPUShaderLanguage::GLSLES); if ((sres = dyn_libs::spvc_compiler_options_set_bool(soptions, SPVC_COMPILER_OPTION_GLSL_ES, is_gles)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_options_set_bool(SPVC_COMPILER_OPTION_GLSL_ES) failed: {}", static_cast<int>(sres)); return {}; } const bool enable_420pack = (is_gles ? (target_version >= 310) : (target_version >= 420)); if ((sres = dyn_libs::spvc_compiler_options_set_bool(soptions, SPVC_COMPILER_OPTION_GLSL_ENABLE_420PACK_EXTENSION, enable_420pack)) != SPVC_SUCCESS) { Error::SetStringFmt( error, "spvc_compiler_options_set_bool(SPVC_COMPILER_OPTION_GLSL_ENABLE_420PACK_EXTENSION) failed: {}", static_cast<int>(sres)); return {}; } } break; #endif #ifdef __APPLE__ case GPUShaderLanguage::MSL: { if ((sres = dyn_libs::spvc_compiler_options_set_bool( soptions, SPVC_COMPILER_OPTION_MSL_PAD_FRAGMENT_OUTPUT_COMPONENTS, true)) != SPVC_SUCCESS) { Error::SetStringFmt( error, "spvc_compiler_options_set_bool(SPVC_COMPILER_OPTION_MSL_PAD_FRAGMENT_OUTPUT_COMPONENTS) failed: {}", static_cast<int>(sres)); return {}; } if ((sres = dyn_libs::spvc_compiler_options_set_bool(soptions, SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS, m_features.framebuffer_fetch)) != SPVC_SUCCESS) { Error::SetStringFmt( error, "spvc_compiler_options_set_bool(SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS) failed: {}", static_cast<int>(sres)); return {}; } if (m_features.framebuffer_fetch && ((sres = dyn_libs::spvc_compiler_options_set_uint(soptions, SPVC_COMPILER_OPTION_MSL_VERSION, SPVC_MAKE_MSL_VERSION(2, 3, 0))) != SPVC_SUCCESS)) { Error::SetStringFmt(error, "spvc_compiler_options_set_uint(SPVC_COMPILER_OPTION_MSL_VERSION) failed: {}", static_cast<int>(sres)); return {}; } if (stage == GPUShaderStage::Fragment) { for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) { const spvc_msl_resource_binding rb = {.stage = SpvExecutionModelFragment, .desc_set = 1, .binding = i, .msl_buffer = i, .msl_texture = i, .msl_sampler = i}; if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres)); return {}; } } if (!m_features.framebuffer_fetch) { const spvc_msl_resource_binding rb = { .stage = SpvExecutionModelFragment, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS}; if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}", static_cast<int>(sres)); return {}; } } } } break; #endif default: Error::SetStringFmt(error, "Unsupported target language {}.", ShaderLanguageToString(target_language)); break; } if ((sres = dyn_libs::spvc_compiler_install_compiler_options(scompiler, soptions)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_install_compiler_options() failed: {}", static_cast<int>(sres)); return false; } const char* out_src; if ((sres = dyn_libs::spvc_compiler_compile(scompiler, &out_src)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_compiler_compile() failed: {}", static_cast<int>(sres)); return false; } const size_t out_src_length = out_src ? std::strlen(out_src) : 0; if (out_src_length == 0) { Error::SetStringView(error, "Failed to compile SPIR-V to target language."); return false; } output->assign(out_src, out_src_length); return true; } std::unique_ptr<GPUShader> GPUDevice::TranspileAndCreateShaderFromSource( GPUShaderStage stage, GPUShaderLanguage source_language, std::string_view source, const char* entry_point, GPUShaderLanguage target_language, u32 target_version, DynamicHeapArray<u8>* out_binary, Error* error) { // Disable optimization when targeting OpenGL GLSL, otherwise, the name-based linking will fail. const bool optimization = (target_language != GPUShaderLanguage::GLSL && target_language != GPUShaderLanguage::GLSLES); DynamicHeapArray<u8> spv; if (!CompileGLSLShaderToVulkanSpv(stage, source_language, source, entry_point, optimization, false, &spv, error)) return {}; std::string dest_source; if (!TranslateVulkanSpvToLanguage(spv.cspan(), stage, target_language, target_version, &dest_source, error)) return {}; // TODO: MSL needs entry point suffixed. return CreateShaderFromSource(stage, target_language, dest_source, entry_point, out_binary, error); }