diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index d044e9650..c76e58ae9 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -36,6 +36,8 @@ add_library(core gpu_hw_opengl.h gpu_hw_shadergen.cpp gpu_hw_shadergen.h + gpu_hw_vulkan.cpp + gpu_hw_vulkan.h gpu_sw.cpp gpu_sw.h gte.cpp @@ -91,7 +93,7 @@ set(RECOMPILER_SRCS target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..") target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") -target_link_libraries(core PUBLIC Threads::Threads common imgui tinyxml2 zlib) +target_link_libraries(core PUBLIC Threads::Threads common imgui tinyxml2 zlib vulkan-loader) target_link_libraries(core PRIVATE glad stb) if(WIN32) diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index 36392b7a6..75fc8c713 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -64,6 +64,7 @@ + @@ -105,6 +106,7 @@ + @@ -296,7 +298,7 @@ WITH_RECOMPILER=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) true false stdcpp17 @@ -321,7 +323,7 @@ WITH_RECOMPILER=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) true false stdcpp17 @@ -346,7 +348,7 @@ WITH_RECOMPILER=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) Default true false @@ -374,7 +376,7 @@ WITH_RECOMPILER=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) Default true false @@ -401,7 +403,7 @@ MaxSpeed true WITH_RECOMPILER=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) true false stdcpp17 @@ -427,7 +429,7 @@ MaxSpeed true WITH_RECOMPILER=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) true true stdcpp17 @@ -454,7 +456,7 @@ MaxSpeed true WITH_RECOMPILER=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) true false stdcpp17 @@ -480,7 +482,7 @@ MaxSpeed true WITH_RECOMPILER=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\tinyxml2\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) true true stdcpp17 diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 0b8e7f84a..a5d2d065e 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -44,6 +44,7 @@ + @@ -89,6 +90,7 @@ + diff --git a/src/core/gpu.h b/src/core/gpu.h index bf112a0b6..889e70c09 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -164,6 +164,9 @@ public: // gpu_hw_opengl.cpp static std::unique_ptr CreateHardwareOpenGLRenderer(); + // gpu_hw_vulkan.cpp + static std::unique_ptr CreateHardwareVulkanRenderer(); + // gpu_sw.cpp static std::unique_ptr CreateSoftwareRenderer(); @@ -602,7 +605,7 @@ protected: void ClearTextureWindowChangedFlag() { texture_window_changed = false; } } m_draw_mode = {}; - Common::Rectangle m_drawing_area; + Common::Rectangle m_drawing_area{0, 0, VRAM_WIDTH, VRAM_HEIGHT}; struct DrawingOffset { diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index edbe39b22..37d0f66b2 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -527,6 +527,36 @@ bool GPU_HW::UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 w .Intersects(Common::Rectangle::FromExtents(dst_x, dst_y, width, height))); } +GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const +{ + // drop precision unless true colour is enabled + if (!m_true_color) + color = RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color)); + + VRAMFillUBOData uniforms; + std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) = + RGBA8ToFloat(color); + uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); + return uniforms; +} + +GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, + u32 height) const +{ + const VRAMCopyUBOData uniforms = {src_x * m_resolution_scale, + src_y * m_resolution_scale, + dst_x * m_resolution_scale, + dst_y * m_resolution_scale, + ((dst_x + width) % VRAM_WIDTH) * m_resolution_scale, + ((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale, + width * m_resolution_scale, + height * m_resolution_scale, + m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, + GetCurrentNormalizedBatchVertexDepthID()}; + + return uniforms; +} + GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc) { if (rc.primitive == Primitive::Line) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index db411bd2c..3a98a7a27 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -14,9 +14,7 @@ public: enum class BatchPrimitive : u8 { Lines = 0, - LineStrip = 1, - Triangles = 2, - TriangleStrip = 3 + Triangles = 1 }; enum class BatchRenderMode : u8 @@ -119,6 +117,12 @@ protected: u32 u_set_mask_while_drawing; }; + struct VRAMFillUBOData + { + float u_fill_color[4]; + u32 u_interlaced_displayed_field; + }; + struct VRAMWriteUBOData { u32 u_base_coords[2]; @@ -230,6 +234,9 @@ protected: /// Returns true if the VRAM copy shader should be used (oversized copies, masking). bool UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const; + VRAMFillUBOData GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const; + VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const; + /// Handles quads with flipped texture coordinate directions. static void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices); diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 5bc660b17..d9dd67777 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -534,9 +534,8 @@ void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_verte { const bool textured = (m_batch.texture_mode != TextureMode::Disabled); - static constexpr std::array d3d_primitives = { - {D3D11_PRIMITIVE_TOPOLOGY_LINELIST, D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, - D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP}}; + static constexpr std::array d3d_primitives = { + {D3D11_PRIMITIVE_TOPOLOGY_LINELIST, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST}}; m_context->IASetPrimitiveTopology(d3d_primitives[static_cast(m_batch.primitive)]); m_context->VSSetShader(m_batch_vertex_shaders[BoolToUInt8(textured)].Get(), nullptr, 0); diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 120aebf12..be740165a 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -535,7 +535,7 @@ void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); - static constexpr std::array gl_primitives = {{GL_LINES, GL_LINE_STRIP, GL_TRIANGLES, GL_TRIANGLE_STRIP}}; + static constexpr std::array gl_primitives = {{GL_LINES, GL_TRIANGLES}}; glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], m_batch_base_vertex, num_vertices); } diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 52005eaec..464009e48 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -14,10 +14,11 @@ GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolu { if (m_glsl) { - SetGLSLVersionString(); + if (m_render_api == HostDisplay::RenderAPI::OpenGL || m_render_api == HostDisplay::RenderAPI::OpenGLES) + SetGLSLVersionString(); - m_use_glsl_interface_blocks = (GLAD_GL_ES_VERSION_3_2 || GLAD_GL_VERSION_3_2); - m_use_glsl_binding_layout = UseGLSLBindingLayout(); + m_use_glsl_interface_blocks = (IsVulkan() || GLAD_GL_ES_VERSION_3_2 || GLAD_GL_VERSION_3_2); + m_use_glsl_binding_layout = (IsVulkan() || UseGLSLBindingLayout()); } } @@ -82,6 +83,8 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss) { if (m_render_api == HostDisplay::RenderAPI::OpenGL || m_render_api == HostDisplay::RenderAPI::OpenGLES) ss << m_glsl_version_string << "\n\n"; + else if (m_render_api == HostDisplay::RenderAPI::Vulkan) + ss << "#version 450 core\n\n"; // Extension enabling for OpenGL. if (m_render_api == HostDisplay::RenderAPI::OpenGLES) @@ -107,6 +110,7 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss) DefineMacro(ss, "API_OPENGL", m_render_api == HostDisplay::RenderAPI::OpenGL); DefineMacro(ss, "API_OPENGL_ES", m_render_api == HostDisplay::RenderAPI::OpenGLES); DefineMacro(ss, "API_D3D11", m_render_api == HostDisplay::RenderAPI::D3D11); + DefineMacro(ss, "API_VULKAN", m_render_api == HostDisplay::RenderAPI::Vulkan); if (m_render_api == HostDisplay::RenderAPI::OpenGLES) { @@ -219,9 +223,17 @@ float4 RGBA5551ToRGBA8(uint v) )"; } -void GPU_HW_ShaderGen::DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list& members) +void GPU_HW_ShaderGen::DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list& members, + bool push_constant_on_vulkan) { - if (m_glsl) + if (IsVulkan()) + { + if (push_constant_on_vulkan) + ss << "layout(push_constant) uniform PushConstants\n"; + else + ss << "layout(std140, set = 0, binding = 0) uniform UBOBlock\n"; + } + else if (m_glsl) { if (m_use_glsl_binding_layout) ss << "layout(std140, binding = 1) uniform UBOBlock\n"; @@ -243,7 +255,9 @@ void GPU_HW_ShaderGen::DeclareTexture(std::stringstream& ss, const char* name, u { if (m_glsl) { - if (m_use_glsl_binding_layout) + if (IsVulkan()) + ss << "layout(set = 0, binding = " << (index + 1u) << ") "; + else if (m_use_glsl_binding_layout) ss << "layout(binding = " << index << ") "; ss << "uniform sampler2D " << name << ";\n"; @@ -260,7 +274,9 @@ void GPU_HW_ShaderGen::DeclareTextureBuffer(std::stringstream& ss, const char* n { if (m_glsl) { - if (m_use_glsl_binding_layout) + if (IsVulkan()) + ss << "layout(set = 0, binding = " << index << ") "; + else if (m_use_glsl_binding_layout) ss << "layout(binding = " << index << ") "; ss << "uniform " << (is_int ? (is_unsigned ? "u" : "i") : "") << "samplerBuffer " << name << ";\n"; @@ -296,6 +312,9 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint( if (m_use_glsl_interface_blocks) { + if (IsVulkan()) + ss << "layout(location = 0) "; + ss << "out VertexData {\n"; for (u32 i = 0; i < num_color_outputs; i++) ss << " float4 v_col" << i << ";\n"; @@ -321,7 +340,12 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint( ss << "#define v_pos gl_Position\n\n"; if (declare_vertex_id) - ss << "#define v_id uint(gl_VertexID)\n"; + { + if (IsVulkan()) + ss << "#define v_id uint(gl_VertexIndex)\n"; + else + ss << "#define v_id uint(gl_VertexID)\n"; + } ss << "\n"; ss << "void main()\n"; @@ -366,6 +390,9 @@ void GPU_HW_ShaderGen::DeclareFragmentEntryPoint( { if (m_use_glsl_interface_blocks) { + if (IsVulkan()) + ss << "layout(location = 0) "; + ss << "in VertexData {\n"; for (u32 i = 0; i < num_color_inputs; i++) ss << " float4 v_col" << i << ";\n"; @@ -467,7 +494,8 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss) DeclareUniformBuffer(ss, {"uint2 u_texture_window_mask", "uint2 u_texture_window_offset", "float u_src_alpha_factor", "float u_dst_alpha_factor", "uint u_interlaced_displayed_field", "uint u_base_vertex_depth_id", - "bool u_check_mask_before_draw", "bool u_set_mask_while_drawing"}); + "bool u_check_mask_before_draw", "bool u_set_mask_while_drawing"}, + false); } std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured) @@ -507,6 +535,12 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured) #if API_OPENGL || API_OPENGL_ES pos_y += EPSILON; #endif + + // NDC space Y flip in Vulkan. +#if API_VULKAN + pos_y = -pos_y; +#endif + v_pos = float4(pos_x, pos_y, 0.0, 1.0); #if API_D3D11 @@ -861,13 +895,18 @@ CONSTANT float2 WIDTH = (1.0 / float2(VRAM_SIZE)) * float2(RESOLUTION_SCALE, RES // GS is a pain, too different between HLSL and GLSL... if (m_glsl) { - ss << R"( -in VertexData { + if (IsVulkan()) + ss << "layout(location = 0) "; + + ss << R"(in VertexData { float4 v_col0; nointerpolation float v_depth; -} in_data[]; +} in_data[];)"; -out VertexData { + if (IsVulkan()) + ss << "layout(location = 0) "; + + ss << R"(out VertexData { float4 v_col0; nointerpolation float v_depth; } out_data; @@ -968,8 +1007,8 @@ std::string GPU_HW_ShaderGen::GenerateScreenQuadVertexShader() { v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u)); v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); - #if API_OPENGL || API_OPENGL_ES - v_pos.y = -gl_Position.y; + #if API_OPENGL || API_OPENGL_ES || API_VULKAN + v_pos.y = -v_pos.y; #endif } )"; @@ -981,7 +1020,7 @@ std::string GPU_HW_ShaderGen::GenerateFillFragmentShader() { std::stringstream ss; WriteHeader(ss); - DeclareUniformBuffer(ss, {"float4 u_fill_color"}); + DeclareUniformBuffer(ss, {"float4 u_fill_color"}, true); DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1, true); ss << R"( @@ -999,7 +1038,7 @@ std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader() std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); - DeclareUniformBuffer(ss, {"float4 u_fill_color", "uint u_interlaced_displayed_field"}); + DeclareUniformBuffer(ss, {"float4 u_fill_color", "uint u_interlaced_displayed_field"}, true); DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); ss << R"( @@ -1019,7 +1058,7 @@ std::string GPU_HW_ShaderGen::GenerateCopyFragmentShader() { std::stringstream ss; WriteHeader(ss); - DeclareUniformBuffer(ss, {"float4 u_src_rect"}); + DeclareUniformBuffer(ss, {"float4 u_src_rect"}, true); DeclareTexture(ss, "samp0", 0); DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1); @@ -1043,7 +1082,7 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, DefineMacro(ss, "INTERLEAVED", interlace_mode == GPU_HW::InterlacedRenderMode::InterleavedFields); WriteCommonFunctions(ss); - DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_crop_left", "uint u_field_offset"}); + DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_crop_left", "uint u_field_offset"}, true); DeclareTexture(ss, "samp0", 0); DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1); @@ -1093,7 +1132,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMReadFragmentShader() std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); - DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size"}); + DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size"}, true); DeclareTexture(ss, "samp0", 0); @@ -1146,8 +1185,10 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader() std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); - DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", - "float u_depth_value"}); + DeclareUniformBuffer( + ss, + {"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", "float u_depth_value"}, + true); DeclareTextureBuffer(ss, "samp0", 0, true, true); DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); @@ -1176,8 +1217,10 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); - DeclareUniformBuffer(ss, {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size", - "bool u_set_mask_bit", "float u_depth_value"}); + DeclareUniformBuffer(ss, + {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size", + "bool u_set_mask_bit", "float u_depth_value"}, + true); DeclareTexture(ss, "samp0", 0); DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 3b2270dcc..6f6624166 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -28,9 +28,12 @@ public: std::string GenerateVRAMUpdateDepthFragmentShader(); private: + ALWAYS_INLINE bool IsVulkan() const { return (m_render_api == HostDisplay::RenderAPI::Vulkan); } + void SetGLSLVersionString(); void WriteHeader(std::stringstream& ss); - void DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list& members); + void DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list& members, + bool push_constant_on_vulkan); void DeclareTexture(std::stringstream& ss, const char* name, u32 index); void DeclareTextureBuffer(std::stringstream& ss, const char* name, u32 index, bool is_int, bool is_unsigned); void DeclareVertexEntryPoint(std::stringstream& ss, const std::initializer_list& attributes, diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp new file mode 100644 index 000000000..bdd99e68f --- /dev/null +++ b/src/core/gpu_hw_vulkan.cpp @@ -0,0 +1,1191 @@ +#include "gpu_hw_vulkan.h" +#include "common/assert.h" +#include "common/log.h" +#include "common/scope_guard.h" +#include "common/vulkan/builders.h" +#include "common/vulkan/context.h" +#include "common/vulkan/shader_cache.h" +#include "common/vulkan/util.h" +#include "gpu_hw_shadergen.h" +#include "host_display.h" +#include "host_interface.h" +#include "system.h" +Log_SetChannel(GPU_HW_Vulkan); + +GPU_HW_Vulkan::GPU_HW_Vulkan() = default; + +GPU_HW_Vulkan::~GPU_HW_Vulkan() +{ + if (m_host_display) + { + m_host_display->ClearDisplayTexture(); + ResetGraphicsAPIState(); + } + + DestroyResources(); +} + +bool GPU_HW_Vulkan::Initialize(HostDisplay* host_display, System* system, DMA* dma, + InterruptController* interrupt_controller, Timers* timers) +{ + if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::Vulkan) + { + Log_ErrorPrintf("Host render API is incompatible"); + return false; + } + + Assert(g_vulkan_shader_cache); + SetCapabilities(); + + if (!GPU_HW::Initialize(host_display, system, dma, interrupt_controller, timers)) + return false; + + if (!CreatePipelineLayouts()) + { + Log_ErrorPrintf("Failed to create pipeline layouts"); + return false; + } + + if (!CreateSamplers()) + { + Log_ErrorPrintf("Failed to create samplers"); + return false; + } + + if (!CreateVertexBuffer()) + { + Log_ErrorPrintf("Failed to create vertex buffer"); + return false; + } + + if (!CreateUniformBuffer()) + { + Log_ErrorPrintf("Failed to create uniform buffer"); + return false; + } + + if (!CreateTextureBuffer()) + { + Log_ErrorPrintf("Failed to create texture buffer"); + return false; + } + + if (!CreateFramebuffer()) + { + Log_ErrorPrintf("Failed to create framebuffer"); + return false; + } + + if (!CompilePipelines()) + { + Log_ErrorPrintf("Failed to compile pipelines"); + return false; + } + + RestoreGraphicsAPIState(); + return true; +} + +void GPU_HW_Vulkan::Reset() +{ + GPU_HW::Reset(); + + if (InRenderPass()) + EndRenderPass(); + + ClearFramebuffer(); +} + +void GPU_HW_Vulkan::ResetGraphicsAPIState() +{ + GPU_HW::ResetGraphicsAPIState(); + + if (InRenderPass()) + EndRenderPass(); + + // vram texture is probably going to be displayed now + if (!IsDisplayDisabled()) + { + m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } +} + +void GPU_HW_Vulkan::RestoreGraphicsAPIState() +{ + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + VkDeviceSize vertex_buffer_offset = 0; + vkCmdBindVertexBuffers(cmdbuf, 0, 1, m_vertex_stream_buffer.GetBufferPointer(), &vertex_buffer_offset); + Vulkan::Util::SetViewport(cmdbuf, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_batch_pipeline_layout, 0, 1, + &m_batch_descriptor_set, 1, &m_current_uniform_buffer_offset); + SetScissorFromDrawingArea(); +} + +void GPU_HW_Vulkan::UpdateSettings() +{ + GPU_HW::UpdateSettings(); + + // Everything should be finished executing before recreating resources. + g_vulkan_context->ExecuteCommandBuffer(true); + + CreateFramebuffer(); + DestroyPipelines(); + CompilePipelines(); + RestoreGraphicsAPIState(); + UpdateDisplay(); +} + +void GPU_HW_Vulkan::MapBatchVertexPointer(u32 required_vertices) +{ + DebugAssert(!m_batch_start_vertex_ptr); + + const u32 required_space = required_vertices * sizeof(BatchVertex); + if (!m_vertex_stream_buffer.ReserveMemory(required_space, sizeof(BatchVertex))) + { + Log_PerfPrintf("Executing command buffer while waiting for %u bytes in vertex stream buffer", required_space); + EndRenderPass(); + g_vulkan_context->ExecuteCommandBuffer(false); + RestoreGraphicsAPIState(); + if (!m_vertex_stream_buffer.ReserveMemory(required_space, sizeof(BatchVertex))) + Panic("Failed to reserve vertex stream buffer memory"); + } + + m_batch_start_vertex_ptr = static_cast(m_vertex_stream_buffer.GetCurrentHostPointer()); + m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; + m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + (m_vertex_stream_buffer.GetCurrentSpace() / sizeof(BatchVertex)); + m_batch_base_vertex = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(BatchVertex); +} + +void GPU_HW_Vulkan::UnmapBatchVertexPointer(u32 used_vertices) +{ + DebugAssert(m_batch_start_vertex_ptr); + if (used_vertices > 0) + m_vertex_stream_buffer.CommitMemory(used_vertices * sizeof(BatchVertex)); + + m_batch_start_vertex_ptr = nullptr; + m_batch_end_vertex_ptr = nullptr; + m_batch_current_vertex_ptr = nullptr; +} + +void GPU_HW_Vulkan::UploadUniformBuffer(const void* data, u32 data_size) +{ + const u32 alignment = static_cast(g_vulkan_context->GetUniformBufferAlignment()); + if (!m_uniform_stream_buffer.ReserveMemory(data_size, alignment)) + { + Log_PerfPrintf("Executing command buffer while waiting for %u bytes in uniform stream buffer", data_size); + EndRenderPass(); + g_vulkan_context->ExecuteCommandBuffer(false); + RestoreGraphicsAPIState(); + if (!m_uniform_stream_buffer.ReserveMemory(data_size, alignment)) + Panic("Failed to reserve uniform stream buffer memory"); + } + + m_current_uniform_buffer_offset = m_uniform_stream_buffer.GetCurrentOffset(); + std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), data, data_size); + m_uniform_stream_buffer.CommitMemory(data_size); + + vkCmdBindDescriptorSets(g_vulkan_context->GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, + m_batch_pipeline_layout, 0, 1, &m_batch_descriptor_set, 1, &m_current_uniform_buffer_offset); +} + +void GPU_HW_Vulkan::SetCapabilities() +{ + const u32 max_texture_size = g_vulkan_context->GetDeviceLimits().maxImageDimension2D; + const u32 max_texture_scale = max_texture_size / VRAM_WIDTH; + + Log_InfoPrintf("Max texture size: %ux%u", max_texture_size, max_texture_size); + + m_max_resolution_scale = max_texture_scale; + m_supports_dual_source_blend = true; +} + +void GPU_HW_Vulkan::DestroyResources() +{ + // Everything should be finished executing before recreating resources. + if (g_vulkan_context) + g_vulkan_context->ExecuteCommandBuffer(true); + + DestroyFramebuffer(); + DestroyPipelines(); + + Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_write_descriptor_set); + Vulkan::Util::SafeDestroyBufferView(m_texture_stream_buffer_view); + + m_vertex_stream_buffer.Destroy(false); + m_uniform_stream_buffer.Destroy(false); + m_texture_stream_buffer.Destroy(false); + + Vulkan::Util::SafeDestroyPipelineLayout(m_vram_write_pipeline_layout); + Vulkan::Util::SafeDestroyPipelineLayout(m_single_sampler_pipeline_layout); + Vulkan::Util::SafeDestroyPipelineLayout(m_no_samplers_pipeline_layout); + Vulkan::Util::SafeDestroyPipelineLayout(m_batch_pipeline_layout); + Vulkan::Util::SafeDestroyDescriptorSetLayout(m_vram_write_descriptor_set_layout); + Vulkan::Util::SafeDestroyDescriptorSetLayout(m_single_sampler_descriptor_set_layout); + Vulkan::Util::SafeDestroyDescriptorSetLayout(m_batch_descriptor_set_layout); + Vulkan::Util::SafeDestroySampler(m_point_sampler); + Vulkan::Util::SafeDestroySampler(m_linear_sampler); +} + +void GPU_HW_Vulkan::BeginRenderPass(VkRenderPass render_pass, VkFramebuffer framebuffer, u32 x, u32 y, u32 width, + u32 height) +{ + DebugAssert(m_current_render_pass == VK_NULL_HANDLE); + + const VkRenderPassBeginInfo bi = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + nullptr, + render_pass, + framebuffer, + {{static_cast(x), static_cast(y)}, {width, height}}, + 0u, + nullptr}; + vkCmdBeginRenderPass(g_vulkan_context->GetCurrentCommandBuffer(), &bi, VK_SUBPASS_CONTENTS_INLINE); + m_current_render_pass = render_pass; +} + +void GPU_HW_Vulkan::BeginVRAMRenderPass() +{ + if (m_current_render_pass == m_vram_render_pass) + return; + + EndRenderPass(); + BeginRenderPass(m_vram_render_pass, m_vram_framebuffer, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); +} + +void GPU_HW_Vulkan::EndRenderPass() +{ + if (m_current_render_pass == VK_NULL_HANDLE) + return; + + vkCmdEndRenderPass(g_vulkan_context->GetCurrentCommandBuffer()); + m_current_render_pass = VK_NULL_HANDLE; +} + +bool GPU_HW_Vulkan::CreatePipelineLayouts() +{ + VkDevice device = g_vulkan_context->GetDevice(); + + Vulkan::DescriptorSetLayoutBuilder dslbuilder; + dslbuilder.AddBinding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT); + dslbuilder.AddBinding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + m_batch_descriptor_set_layout = dslbuilder.Create(device); + if (m_batch_descriptor_set_layout == VK_NULL_HANDLE) + return false; + + // textures start at 1 + dslbuilder.AddBinding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + m_single_sampler_descriptor_set_layout = dslbuilder.Create(device); + if (m_single_sampler_descriptor_set_layout == VK_NULL_HANDLE) + return false; + + dslbuilder.AddBinding(0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + m_vram_write_descriptor_set_layout = dslbuilder.Create(device); + if (m_vram_write_descriptor_set_layout == VK_NULL_HANDLE) + return false; + + Vulkan::PipelineLayoutBuilder plbuilder; + plbuilder.AddDescriptorSet(m_batch_descriptor_set_layout); + m_batch_pipeline_layout = plbuilder.Create(device); + if (m_batch_pipeline_layout == VK_NULL_HANDLE) + return false; + + plbuilder.AddDescriptorSet(m_single_sampler_descriptor_set_layout); + plbuilder.AddPushConstants(VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE); + m_single_sampler_pipeline_layout = plbuilder.Create(device); + if (m_single_sampler_pipeline_layout == VK_NULL_HANDLE) + return false; + + plbuilder.AddPushConstants(VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE); + m_no_samplers_pipeline_layout = plbuilder.Create(device); + if (m_no_samplers_pipeline_layout == VK_NULL_HANDLE) + return false; + + plbuilder.AddDescriptorSet(m_vram_write_descriptor_set_layout); + plbuilder.AddPushConstants(VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE); + m_vram_write_pipeline_layout = plbuilder.Create(device); + if (m_vram_write_pipeline_layout == VK_NULL_HANDLE) + return false; + + return true; +} + +bool GPU_HW_Vulkan::CreateSamplers() +{ + VkDevice device = g_vulkan_context->GetDevice(); + + Vulkan::SamplerBuilder sbuilder; + sbuilder.SetPointSampler(VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER); + m_point_sampler = sbuilder.Create(device); + if (m_point_sampler == VK_NULL_HANDLE) + return false; + + sbuilder.SetLinearSampler(false, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER); + m_linear_sampler = sbuilder.Create(device); + if (m_linear_sampler == VK_NULL_HANDLE) + return false; + + return true; +} + +bool GPU_HW_Vulkan::CreateFramebuffer() +{ + // save old vram texture/fbo, in case we're changing scale + auto old_vram_texture = std::move(m_vram_texture); + DestroyFramebuffer(); + + // scale vram size to internal resolution + const u32 texture_width = VRAM_WIDTH * m_resolution_scale; + const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; + const VkFormat texture_format = VK_FORMAT_R8G8B8A8_UNORM; + const VkFormat depth_format = VK_FORMAT_D16_UNORM; + const VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + + if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D, + VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || + !m_vram_depth_texture.Create(texture_width, texture_height, 1, 1, depth_format, samples, VK_IMAGE_VIEW_TYPE_2D, + VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || + !m_vram_read_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D, + VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || + !m_display_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D, + VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || + !m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D, + VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) || + !m_vram_readback_staging_texture.Create(Vulkan::StagingBuffer::Type::Readback, texture_format, VRAM_WIDTH, + VRAM_HEIGHT)) + { + return false; + } + + m_vram_render_pass = + g_vulkan_context->GetRenderPass(texture_format, depth_format, samples, VK_ATTACHMENT_LOAD_OP_LOAD); + m_vram_update_depth_render_pass = + g_vulkan_context->GetRenderPass(VK_FORMAT_UNDEFINED, depth_format, samples, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + m_display_render_pass = g_vulkan_context->GetRenderPass(m_display_texture.GetFormat(), VK_FORMAT_UNDEFINED, + m_display_texture.GetSamples(), VK_ATTACHMENT_LOAD_OP_LOAD); + m_vram_readback_render_pass = + g_vulkan_context->GetRenderPass(m_vram_readback_texture.GetFormat(), VK_FORMAT_UNDEFINED, + m_vram_readback_texture.GetSamples(), VK_ATTACHMENT_LOAD_OP_DONT_CARE); + + if (m_vram_render_pass == VK_NULL_HANDLE || m_vram_update_depth_render_pass == VK_NULL_HANDLE || + m_display_render_pass == VK_NULL_HANDLE || m_vram_readback_render_pass == VK_NULL_HANDLE) + { + return false; + } + + // vram framebuffer has both colour and depth + { + Vulkan::FramebufferBuilder fbb; + fbb.AddAttachment(m_vram_texture.GetView()); + fbb.AddAttachment(m_vram_depth_texture.GetView()); + fbb.SetRenderPass(m_vram_render_pass); + fbb.SetSize(m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), m_vram_texture.GetLayers()); + m_vram_framebuffer = fbb.Create(g_vulkan_context->GetDevice()); + if (m_vram_framebuffer == VK_NULL_HANDLE) + return false; + } + + m_vram_update_depth_framebuffer = m_vram_depth_texture.CreateFramebuffer(m_vram_update_depth_render_pass); + m_vram_readback_framebuffer = m_vram_readback_texture.CreateFramebuffer(m_vram_readback_render_pass); + m_display_framebuffer = m_display_texture.CreateFramebuffer(m_display_render_pass); + if (m_vram_update_depth_framebuffer == VK_NULL_HANDLE || m_vram_readback_framebuffer == VK_NULL_HANDLE || + m_display_framebuffer == VK_NULL_HANDLE) + { + return false; + } + + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + Vulkan::DescriptorSetUpdateBuilder dsubuilder; + + m_batch_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_batch_descriptor_set_layout); + m_vram_copy_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout); + m_vram_read_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout); + if (m_batch_descriptor_set == VK_NULL_HANDLE || m_vram_copy_descriptor_set == VK_NULL_HANDLE || + m_vram_read_descriptor_set == VK_NULL_HANDLE) + { + return false; + } + + dsubuilder.AddBufferDescriptorWrite(m_batch_descriptor_set, 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + m_uniform_stream_buffer.GetBuffer(), 0, sizeof(BatchUBOData)); + dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_batch_descriptor_set, 1, m_vram_read_texture.GetView(), + m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_vram_copy_descriptor_set, 1, m_vram_read_texture.GetView(), + m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_vram_read_descriptor_set, 1, m_vram_texture.GetView(), + m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + dsubuilder.Update(g_vulkan_context->GetDevice()); + + if (old_vram_texture.IsValid()) + { + const bool linear_filter = old_vram_texture.GetWidth() > m_vram_texture.GetWidth(); + Log_DevPrintf("Scaling %ux%u VRAM texture to %ux%u using %s filter", old_vram_texture.GetWidth(), + old_vram_texture.GetHeight(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), + linear_filter ? "linear" : "nearest"); + + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + old_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + const VkImageBlit blit{ + {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, + {{0, 0, 0}, {static_cast(old_vram_texture.GetWidth()), static_cast(old_vram_texture.GetHeight()), 1}}, + {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, + {{0, 0, 0}, {static_cast(m_vram_texture.GetWidth()), static_cast(m_vram_texture.GetHeight()), 1}}}; + vkCmdBlitImage(cmdbuf, old_vram_texture.GetImage(), old_vram_texture.GetLayout(), m_vram_texture.GetImage(), + m_vram_texture.GetLayout(), 1, &blit, linear_filter ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + UpdateDepthBufferFromMaskBit(); + + // Can't immediately destroy because we're blitting in the current command buffer. + old_vram_texture.Destroy(true); + } + + SetFullVRAMDirtyRectangle(); + RestoreGraphicsAPIState(); + return true; +} + +void GPU_HW_Vulkan::ClearFramebuffer() +{ + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + static constexpr VkClearColorValue cc = {}; + static constexpr VkImageSubresourceRange csrr = {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}; + static constexpr VkClearDepthStencilValue cds = {}; + static constexpr VkImageSubresourceRange dsrr = {VK_IMAGE_ASPECT_DEPTH_BIT, 0u, 1u, 0u, 1u}; + vkCmdClearColorImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), &cc, 1u, &csrr); + vkCmdClearDepthStencilImage(cmdbuf, m_vram_depth_texture.GetImage(), m_vram_depth_texture.GetLayout(), &cds, 1u, + &dsrr); + + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + SetFullVRAMDirtyRectangle(); +} + +void GPU_HW_Vulkan::DestroyFramebuffer() +{ + Vulkan::Util::SafeFreeGlobalDescriptorSet(m_batch_descriptor_set); + Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_copy_descriptor_set); + Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_read_descriptor_set); + + Vulkan::Util::SafeDestroyFramebuffer(m_vram_framebuffer); + Vulkan::Util::SafeDestroyFramebuffer(m_vram_update_depth_framebuffer); + Vulkan::Util::SafeDestroyFramebuffer(m_vram_readback_framebuffer); + Vulkan::Util::SafeDestroyFramebuffer(m_display_framebuffer); + + m_vram_read_texture.Destroy(false); + m_vram_depth_texture.Destroy(false); + m_vram_texture.Destroy(false); + m_vram_readback_texture.Destroy(false); + m_display_texture.Destroy(false); + m_vram_readback_staging_texture.Destroy(false); +} + +bool GPU_HW_Vulkan::CreateVertexBuffer() +{ + return m_vertex_stream_buffer.Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_BUFFER_SIZE); +} + +bool GPU_HW_Vulkan::CreateUniformBuffer() +{ + return m_uniform_stream_buffer.Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_BUFFER_SIZE); +} + +bool GPU_HW_Vulkan::CreateTextureBuffer() +{ + if (!m_texture_stream_buffer.Create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VRAM_UPDATE_TEXTURE_BUFFER_SIZE)) + return false; + + Vulkan::BufferViewBuilder bvbuilder; + bvbuilder.Set(m_texture_stream_buffer.GetBuffer(), VK_FORMAT_R16_UINT, 0, m_texture_stream_buffer.GetCurrentSize()); + m_texture_stream_buffer_view = bvbuilder.Create(g_vulkan_context->GetDevice()); + if (m_texture_stream_buffer_view == VK_NULL_HANDLE) + return false; + + m_vram_write_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_vram_write_descriptor_set_layout); + if (m_vram_write_descriptor_set == VK_NULL_HANDLE) + return false; + + Vulkan::DescriptorSetUpdateBuilder dsubuilder; + dsubuilder.AddBufferViewDescriptorWrite(m_vram_write_descriptor_set, 0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + m_texture_stream_buffer_view); + dsubuilder.Update(g_vulkan_context->GetDevice()); + return true; +} + +bool GPU_HW_Vulkan::CompilePipelines() +{ + static constexpr std::array primitive_mapping = { + {VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST}}; + static constexpr std::array polygon_mode_mapping = {{VK_POLYGON_MODE_LINE, VK_POLYGON_MODE_FILL}}; + + VkDevice device = g_vulkan_context->GetDevice(); + VkPipelineCache pipeline_cache = g_vulkan_shader_cache->GetPipelineCache(); + + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering, + m_texture_filtering, m_supports_dual_source_blend); + + // vertex shaders - [textured] + // fragment shaders - [render_mode][texture_mode][dithering][interlacing] + DimensionalArray batch_vertex_shaders{}; + DimensionalArray batch_fragment_shaders{}; + Common::ScopeGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { + batch_vertex_shaders.enumerate([](VkShaderModule& s) { + if (s != VK_NULL_HANDLE) + { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), s, nullptr); + } + }); + batch_fragment_shaders.enumerate([](VkShaderModule& s) { + if (s != VK_NULL_HANDLE) + { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), s, nullptr); + } + }); + }); + + for (u8 textured = 0; textured < 2; textured++) + { + const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured)); + VkShaderModule shader = g_vulkan_shader_cache->GetVertexShader(vs); + if (shader == VK_NULL_HANDLE) + return false; + + batch_vertex_shaders[textured] = shader; + } + + for (u8 render_mode = 0; render_mode < 4; render_mode++) + { + for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + { + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) + { + const std::string fs = shadergen.GenerateBatchFragmentShader( + static_cast(render_mode), static_cast(texture_mode), + ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); + + VkShaderModule shader = g_vulkan_shader_cache->GetFragmentShader(fs); + if (shader == VK_NULL_HANDLE) + return false; + + batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing] = shader; + } + } + } + } + + Vulkan::GraphicsPipelineBuilder gpbuilder; + + // [primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + for (u8 primitive = 0; primitive < 2; primitive++) + { + for (u8 depth_test = 0; depth_test < 2; depth_test++) + { + for (u8 render_mode = 0; render_mode < 4; render_mode++) + { + for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) + { + for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + { + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) + { + // TODO: GS + const bool textured = (static_cast(texture_mode) != TextureMode::Disabled); + + gpbuilder.SetPipelineLayout(m_batch_pipeline_layout); + gpbuilder.SetRenderPass(m_vram_render_pass, 0); + + gpbuilder.AddVertexBuffer(0, sizeof(BatchVertex), VK_VERTEX_INPUT_RATE_VERTEX); + gpbuilder.AddVertexAttribute(0, 0, VK_FORMAT_R32G32_SINT, offsetof(BatchVertex, x)); + gpbuilder.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, color)); + if (textured) + { + gpbuilder.AddVertexAttribute(2, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, u)); + gpbuilder.AddVertexAttribute(3, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, texpage)); + } + + gpbuilder.SetPrimitiveTopology(primitive_mapping[primitive]); + gpbuilder.SetVertexShader(batch_vertex_shaders[BoolToUInt8(textured)]); + gpbuilder.SetFragmentShader(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing]); + gpbuilder.SetRasterizationState(polygon_mode_mapping[primitive], VK_CULL_MODE_NONE, + VK_FRONT_FACE_CLOCKWISE); + gpbuilder.SetDepthState(depth_test != 0, true, + (depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS); + + gpbuilder.SetNoBlendingState(); + if (static_cast(transparency_mode) != TransparencyMode::Disabled || + m_texture_filtering) + { + // TODO: Check dual-source blend support here. + gpbuilder.SetBlendAttachment( + 0, true, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC1_ALPHA, + (static_cast(transparency_mode) == TransparencyMode::BackgroundMinusForeground) ? + VK_BLEND_OP_REVERSE_SUBTRACT : + VK_BLEND_OP_ADD, + VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD); + } + + gpbuilder.SetDynamicViewportAndScissorState(); + + VkPipeline pipeline = gpbuilder.Create(device, pipeline_cache); + if (pipeline == VK_NULL_HANDLE) + return false; + + m_batch_pipelines[primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering] + [interlacing] = pipeline; + } + } + } + } + } + } + } + + batch_shader_guard.Exit(); + + VkShaderModule fullscreen_quad_vertex_shader = + g_vulkan_shader_cache->GetVertexShader(shadergen.GenerateScreenQuadVertexShader()); + if (fullscreen_quad_vertex_shader == VK_NULL_HANDLE) + return false; + + Common::ScopeGuard fullscreen_quad_vertex_shader_guard([&fullscreen_quad_vertex_shader]() { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), fullscreen_quad_vertex_shader, nullptr); + }); + + // common state + gpbuilder.SetRenderPass(m_vram_render_pass, 0); + gpbuilder.SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); + gpbuilder.SetNoCullRasterizationState(); + gpbuilder.SetNoDepthTestState(); + gpbuilder.SetNoBlendingState(); + gpbuilder.SetDynamicViewportAndScissorState(); + gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader); + + // VRAM fill + { + for (u8 interlaced = 0; interlaced < 2; interlaced++) + { + VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader( + (interlaced == 0) ? shadergen.GenerateFillFragmentShader() : shadergen.GenerateInterlacedFillFragmentShader()); + if (fs == VK_NULL_HANDLE) + return false; + + gpbuilder.SetPipelineLayout(m_no_samplers_pipeline_layout); + gpbuilder.SetFragmentShader(fs); + gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS); + + m_vram_fill_pipelines[interlaced] = gpbuilder.Create(device, pipeline_cache, false); + vkDestroyShaderModule(device, fs, nullptr); + if (m_vram_fill_pipelines[interlaced] == VK_NULL_HANDLE) + return false; + } + } + + // VRAM copy + { + VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMCopyFragmentShader()); + if (fs == VK_NULL_HANDLE) + return false; + + gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); + gpbuilder.SetFragmentShader(fs); + for (u8 depth_test = 0; depth_test < 2; depth_test++) + { + gpbuilder.SetDepthState((depth_test != 0), true, + (depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS); + + m_vram_copy_pipelines[depth_test] = gpbuilder.Create(device, pipeline_cache, false); + if (m_vram_copy_pipelines[depth_test] == VK_NULL_HANDLE) + { + vkDestroyShaderModule(device, fs, nullptr); + return false; + } + } + + vkDestroyShaderModule(device, fs, nullptr); + } + + // VRAM write + { + VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMWriteFragmentShader()); + if (fs == VK_NULL_HANDLE) + return false; + + gpbuilder.SetPipelineLayout(m_vram_write_pipeline_layout); + gpbuilder.SetFragmentShader(fs); + for (u8 depth_test = 0; depth_test < 2; depth_test++) + { + gpbuilder.SetDepthState((depth_test != 0), true, + (depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS); + m_vram_write_pipelines[depth_test] = gpbuilder.Create(device, pipeline_cache, false); + if (m_vram_write_pipelines[depth_test] == VK_NULL_HANDLE) + { + vkDestroyShaderModule(device, fs, nullptr); + return false; + } + } + + vkDestroyShaderModule(device, fs, nullptr); + } + + // VRAM update depth + { + VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMUpdateDepthFragmentShader()); + if (fs == VK_NULL_HANDLE) + return false; + + gpbuilder.SetRenderPass(m_vram_update_depth_render_pass, 0); + gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); + gpbuilder.SetFragmentShader(fs); + gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS); + gpbuilder.SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, + VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, 0); + + m_vram_update_depth_pipeline = gpbuilder.Create(device, pipeline_cache, false); + vkDestroyShaderModule(device, fs, nullptr); + if (m_vram_update_depth_pipeline == VK_NULL_HANDLE) + return false; + } + + gpbuilder.Clear(); + + // VRAM read + { + VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMReadFragmentShader()); + if (fs == VK_NULL_HANDLE) + return false; + + gpbuilder.SetRenderPass(m_vram_readback_render_pass, 0); + gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); + gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader); + gpbuilder.SetFragmentShader(fs); + gpbuilder.SetNoCullRasterizationState(); + gpbuilder.SetNoDepthTestState(); + gpbuilder.SetNoBlendingState(); + gpbuilder.SetDynamicViewportAndScissorState(); + + m_vram_readback_pipeline = gpbuilder.Create(device, pipeline_cache, false); + vkDestroyShaderModule(device, fs, nullptr); + if (m_vram_readback_pipeline == VK_NULL_HANDLE) + return false; + } + + gpbuilder.Clear(); + + // Display + { + gpbuilder.SetRenderPass(m_display_render_pass, 0); + gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); + gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader); + gpbuilder.SetNoCullRasterizationState(); + gpbuilder.SetNoDepthTestState(); + gpbuilder.SetNoBlendingState(); + gpbuilder.SetDynamicViewportAndScissorState(); + + for (u8 depth_24 = 0; depth_24 < 2; depth_24++) + { + for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++) + { + VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateDisplayFragmentShader( + ConvertToBoolUnchecked(depth_24), static_cast(interlace_mode))); + if (fs == VK_NULL_HANDLE) + return false; + + gpbuilder.SetFragmentShader(fs); + + m_display_pipelines[depth_24][interlace_mode] = gpbuilder.Create(device, pipeline_cache, false); + vkDestroyShaderModule(device, fs, nullptr); + if (m_display_pipelines[depth_24][interlace_mode] == VK_NULL_HANDLE) + return false; + } + } + } + + return true; +} + +void GPU_HW_Vulkan::DestroyPipelines() +{ + m_batch_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline); + + for (VkPipeline& p : m_vram_fill_pipelines) + Vulkan::Util::SafeDestroyPipeline(p); + + for (VkPipeline& p : m_vram_write_pipelines) + Vulkan::Util::SafeDestroyPipeline(p); + + for (VkPipeline& p : m_vram_copy_pipelines) + Vulkan::Util::SafeDestroyPipeline(p); + + Vulkan::Util::SafeDestroyPipeline(m_vram_readback_pipeline); + Vulkan::Util::SafeDestroyPipeline(m_vram_update_depth_pipeline); + + m_display_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline); +} + +void GPU_HW_Vulkan::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) +{ + BeginVRAMRenderPass(); + + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + + // [primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + VkPipeline pipeline = + m_batch_pipelines[static_cast(m_batch.primitive)][BoolToUInt8(m_batch.check_mask_before_draw)][static_cast( + render_mode)][static_cast(m_batch.texture_mode)][static_cast(m_batch.transparency_mode)] + [BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]; + + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + vkCmdDraw(cmdbuf, num_vertices, 1, base_vertex, 0); +} + +void GPU_HW_Vulkan::SetScissorFromDrawingArea() +{ + int left, top, right, bottom; + CalcScissorRect(&left, &top, &right, &bottom); + + Vulkan::Util::SetScissor(g_vulkan_context->GetCurrentCommandBuffer(), left, top, right - left, bottom - top); +} + +void GPU_HW_Vulkan::UpdateDisplay() +{ + GPU_HW::UpdateDisplay(); + + if (m_system->GetSettings().debugging.show_vram) + { + m_host_display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0, + m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); + m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, + static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); + } + else + { + const u32 vram_offset_x = m_crtc_state.display_vram_left; + const u32 vram_offset_y = m_crtc_state.display_vram_top; + const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale; + const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale; + const u32 display_width = m_crtc_state.display_vram_width; + const u32 display_height = m_crtc_state.display_vram_height; + const u32 scaled_display_width = display_width * m_resolution_scale; + const u32 scaled_display_height = display_height * m_resolution_scale; + const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); + + if (IsDisplayDisabled()) + { + m_host_display->ClearDisplayTexture(); + } + else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && + (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && + (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) + { + m_host_display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), + scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, + scaled_display_height); + } + else + { + EndRenderPass(); + + const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; + const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale; + const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale; + const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, + reinterpret_crop_left, reinterpret_field_offset}; + + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + m_display_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + BeginRenderPass(m_display_render_pass, m_display_framebuffer, 0, 0, scaled_display_width, scaled_display_height); + + vkCmdBindPipeline( + cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, + m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)]); + vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), + uniforms); + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, + &m_vram_read_descriptor_set, 0, nullptr); + Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, scaled_display_width, scaled_display_height); + vkCmdDraw(cmdbuf, 3, 1, 0, 0); + + EndRenderPass(); + + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + m_display_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + m_host_display->SetDisplayTexture(&m_display_texture, m_display_texture.GetWidth(), m_display_texture.GetHeight(), + 0, 0, scaled_display_width, scaled_display_height); + + RestoreGraphicsAPIState(); + } + + m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, + m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, + m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, + m_crtc_state.display_aspect_ratio); + } +} + +void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +{ + // Get bounds with wrap-around handled. + const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); + const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; + const u32 encoded_height = copy_rect.GetHeight(); + + EndRenderPass(); + + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, encoded_width, encoded_height); + + // Encode the 24-bit texture as 16-bit. + const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_readback_pipeline); + vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), + uniforms); + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, + &m_vram_read_descriptor_set, 0, nullptr); + Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, encoded_width, encoded_height); + vkCmdDraw(cmdbuf, 3, 1, 0, 0); + + EndRenderPass(); + + m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + // Stage the readback. + m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width, + encoded_height); + + // And copy it into our shadow buffer (will execute command buffer and stall). + m_vram_readback_staging_texture.ReadTexels(0, 0, encoded_width, encoded_height, + &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], + VRAM_WIDTH * sizeof(u16)); + + RestoreGraphicsAPIState(); +} + +void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) +{ + if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) + { + // CPU round trip if oversized for now. + Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + GPU::FillVRAM(x, y, width, height, color); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); + return; + } + + GPU_HW::FillVRAM(x, y, width, height, color); + + x *= m_resolution_scale; + y *= m_resolution_scale; + width *= m_resolution_scale; + height *= m_resolution_scale; + + BeginVRAMRenderPass(); + + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); + vkCmdPushConstants(cmdbuf, m_no_samplers_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), + &uniforms); + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, + m_vram_fill_pipelines[BoolToUInt8(IsInterlacedRenderingEnabled())]); + Vulkan::Util::SetViewportAndScissor(cmdbuf, x, y, width, height); + vkCmdDraw(cmdbuf, 3, 1, 0, 0); + + RestoreGraphicsAPIState(); +} + +void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) +{ + if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) + { + // CPU round trip if oversized for now. + Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + GPU::UpdateVRAM(x, y, width, height, data); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); + return; + } + + GPU_HW::UpdateVRAM(x, y, width, height, data); + + const u32 data_size = width * height * sizeof(u16); + const u32 alignment = std::max(sizeof(u16), static_cast(g_vulkan_context->GetTexelBufferAlignment())); + if (!m_texture_stream_buffer.ReserveMemory(data_size, alignment)) + { + Log_PerfPrintf("Executing command buffer while waiting for %u bytes in stream buffer", data_size); + EndRenderPass(); + g_vulkan_context->ExecuteCommandBuffer(false); + RestoreGraphicsAPIState(); + if (!m_texture_stream_buffer.ReserveMemory(data_size, alignment)) + { + Panic("Failed to allocate space in stream buffer for VRAM write"); + return; + } + } + + const u32 start_index = m_texture_stream_buffer.GetCurrentOffset() / sizeof(u16); + std::memcpy(m_texture_stream_buffer.GetCurrentHostPointer(), data, data_size); + m_texture_stream_buffer.CommitMemory(data_size); + + BeginVRAMRenderPass(); + + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + const VRAMWriteUBOData uniforms = {x, + y, + width, + height, + start_index, + m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00, + GetCurrentNormalizedBatchVertexDepthID()}; + vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), + &uniforms); + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, + m_vram_write_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)]); + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_write_pipeline_layout, 0, 1, + &m_vram_write_descriptor_set, 0, nullptr); + + // the viewport should already be set to the full vram, so just adjust the scissor + Vulkan::Util::SetScissor(cmdbuf, x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, + height * m_resolution_scale); + vkCmdDraw(cmdbuf, 3, 1, 0, 0); + + RestoreGraphicsAPIState(); +} + +void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) +{ + if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height)) + { + const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); + const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); + if (m_vram_dirty_rect.Intersects(src_bounds)) + UpdateVRAMReadTexture(); + IncludeVRAMDityRectangle(dst_bounds); + + const VRAMCopyUBOData uniforms(GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height)); + const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); + + BeginVRAMRenderPass(); + + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, + m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)]); + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, + &m_vram_copy_descriptor_set, 0, nullptr); + Vulkan::Util::SetViewportAndScissor(cmdbuf, dst_bounds_scaled.left, dst_bounds_scaled.top, + dst_bounds_scaled.GetWidth(), dst_bounds_scaled.GetHeight()); + vkCmdDraw(cmdbuf, 3, 1, 0, 0); + RestoreGraphicsAPIState(); + return; + } + + if (m_GPUSTAT.IsMaskingEnabled()) + Log_WarningPrintf("Masking enabled on VRAM copy - not implemented"); + + GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + + src_x *= m_resolution_scale; + src_y *= m_resolution_scale; + dst_x *= m_resolution_scale; + dst_y *= m_resolution_scale; + width *= m_resolution_scale; + height *= m_resolution_scale; + + EndRenderPass(); + + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_GENERAL); + + const VkImageCopy ic{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, + {static_cast(src_x), static_cast(src_y), 0}, + {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, + {static_cast(dst_x), static_cast(dst_y), 0}, + {width, height, 1u}}; + vkCmdCopyImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), m_vram_texture.GetImage(), + m_vram_texture.GetLayout(), 1, &ic); + + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); +} + +void GPU_HW_Vulkan::UpdateVRAMReadTexture() +{ + EndRenderPass(); + + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; + const VkImageCopy copy{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, + {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, + {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, + {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, + {scaled_rect.GetWidth(), scaled_rect.GetHeight(), 1u}}; + + vkCmdCopyImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), m_vram_read_texture.GetImage(), + m_vram_read_texture.GetLayout(), 1u, ©); + + m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + GPU_HW::UpdateVRAMReadTexture(); +} + +void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit() +{ + EndRenderPass(); + + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + BeginRenderPass(m_vram_update_depth_render_pass, m_vram_update_depth_framebuffer, 0, 0, m_vram_texture.GetWidth(), + m_vram_texture.GetHeight()); + + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_update_depth_pipeline); + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, + &m_vram_read_descriptor_set, 0, nullptr); + Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); + vkCmdDraw(cmdbuf, 3, 1, 0, 0); + + EndRenderPass(); + + m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + RestoreGraphicsAPIState(); +} + +std::unique_ptr GPU::CreateHardwareVulkanRenderer() +{ + return std::make_unique(); +} diff --git a/src/core/gpu_hw_vulkan.h b/src/core/gpu_hw_vulkan.h new file mode 100644 index 000000000..6e211fbfd --- /dev/null +++ b/src/core/gpu_hw_vulkan.h @@ -0,0 +1,124 @@ +#pragma once +#include "common/dimensional_array.h" +#include "common/vulkan/staging_texture.h" +#include "common/vulkan/stream_buffer.h" +#include "common/vulkan/texture.h" +#include "gpu_hw.h" +#include +#include +#include + +class GPU_HW_Vulkan : public GPU_HW +{ +public: + GPU_HW_Vulkan(); + ~GPU_HW_Vulkan() override; + + bool Initialize(HostDisplay* host_display, System* system, DMA* dma, InterruptController* interrupt_controller, + Timers* timers) override; + void Reset() override; + + void ResetGraphicsAPIState() override; + void RestoreGraphicsAPIState() override; + void UpdateSettings() override; + +protected: + void UpdateDisplay() override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; + void UpdateVRAMReadTexture() override; + void UpdateDepthBufferFromMaskBit() override; + void SetScissorFromDrawingArea() override; + void MapBatchVertexPointer(u32 required_vertices) override; + void UnmapBatchVertexPointer(u32 used_vertices) override; + void UploadUniformBuffer(const void* data, u32 data_size) override; + void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; + +private: + enum : u32 + { + MAX_PUSH_CONSTANTS_SIZE = 64, + }; + void SetCapabilities(); + void DestroyResources(); + + ALWAYS_INLINE bool InRenderPass() const { return (m_current_render_pass != VK_NULL_HANDLE); } + void BeginRenderPass(VkRenderPass render_pass, VkFramebuffer framebuffer, u32 x, u32 y, u32 width, u32 height); + void BeginVRAMRenderPass(); + void EndRenderPass(); + + bool CreatePipelineLayouts(); + bool CreateSamplers(); + + bool CreateFramebuffer(); + void ClearFramebuffer(); + void DestroyFramebuffer(); + + bool CreateVertexBuffer(); + bool CreateUniformBuffer(); + bool CreateTextureBuffer(); + + bool CompilePipelines(); + void DestroyPipelines(); + + VkRenderPass m_current_render_pass = VK_NULL_HANDLE; + + VkRenderPass m_vram_render_pass = VK_NULL_HANDLE; + VkRenderPass m_vram_update_depth_render_pass = VK_NULL_HANDLE; + VkRenderPass m_display_render_pass = VK_NULL_HANDLE; + VkRenderPass m_vram_readback_render_pass = VK_NULL_HANDLE; + + VkDescriptorSetLayout m_batch_descriptor_set_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout m_single_sampler_descriptor_set_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout m_vram_write_descriptor_set_layout = VK_NULL_HANDLE; + + VkPipelineLayout m_batch_pipeline_layout = VK_NULL_HANDLE; + VkPipelineLayout m_no_samplers_pipeline_layout = VK_NULL_HANDLE; + VkPipelineLayout m_single_sampler_pipeline_layout = VK_NULL_HANDLE; + VkPipelineLayout m_vram_write_pipeline_layout = VK_NULL_HANDLE; + + Vulkan::Texture m_vram_texture; + Vulkan::Texture m_vram_depth_texture; + Vulkan::Texture m_vram_read_texture; + Vulkan::Texture m_vram_readback_texture; + Vulkan::StagingTexture m_vram_readback_staging_texture; + Vulkan::Texture m_display_texture; + + VkFramebuffer m_vram_framebuffer = VK_NULL_HANDLE; + VkFramebuffer m_vram_update_depth_framebuffer = VK_NULL_HANDLE; + VkFramebuffer m_vram_readback_framebuffer = VK_NULL_HANDLE; + VkFramebuffer m_display_framebuffer = VK_NULL_HANDLE; + + VkSampler m_point_sampler = VK_NULL_HANDLE; + VkSampler m_linear_sampler = VK_NULL_HANDLE; + + VkDescriptorSet m_batch_descriptor_set = VK_NULL_HANDLE; + VkDescriptorSet m_vram_copy_descriptor_set = VK_NULL_HANDLE; + VkDescriptorSet m_vram_read_descriptor_set = VK_NULL_HANDLE; + VkDescriptorSet m_vram_write_descriptor_set = VK_NULL_HANDLE; + + Vulkan::StreamBuffer m_vertex_stream_buffer; + Vulkan::StreamBuffer m_uniform_stream_buffer; + Vulkan::StreamBuffer m_texture_stream_buffer; + + u32 m_current_uniform_buffer_offset = 0; + VkBufferView m_texture_stream_buffer_view = VK_NULL_HANDLE; + + // [primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + DimensionalArray m_batch_pipelines{}; + + // [interlaced] + std::array m_vram_fill_pipelines{}; + + // [depth_test] + std::array m_vram_write_pipelines{}; + std::array m_vram_copy_pipelines{}; + + VkPipeline m_vram_readback_pipeline = VK_NULL_HANDLE; + VkPipeline m_vram_update_depth_pipeline = VK_NULL_HANDLE; + + // [depth_24][interlace_mode] + DimensionalArray m_display_pipelines{}; +}; diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index 53f8eda16..855cd8781 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -314,6 +314,11 @@ void HostInterface::OnRunningGameChanged() {} void HostInterface::OnControllerTypeChanged(u32 slot) {} +std::string HostInterface::GetShaderCacheDirectory() +{ + return GetUserDirectoryRelativePath("cache"); +} + void HostInterface::SetDefaultSettings(SettingsInterface& si) { si.SetStringValue("Console", "Region", Settings::GetConsoleRegionName(ConsoleRegion::Auto)); diff --git a/src/core/host_interface.h b/src/core/host_interface.h index b0738e67e..21f5e00eb 100644 --- a/src/core/host_interface.h +++ b/src/core/host_interface.h @@ -114,6 +114,9 @@ protected: virtual void OnRunningGameChanged(); virtual void OnControllerTypeChanged(u32 slot); + /// Returns the path to the shader cache directory. + virtual std::string GetShaderCacheDirectory(); + /// Restores all settings to defaults. virtual void SetDefaultSettings(SettingsInterface& si); diff --git a/src/core/system.cpp b/src/core/system.cpp index e86fc2f49..064d12376 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -288,6 +288,10 @@ bool System::CreateGPU(GPURenderer renderer) m_gpu = GPU::CreateHardwareOpenGLRenderer(); break; + case GPURenderer::HardwareVulkan: + m_gpu = GPU::CreateHardwareVulkanRenderer(); + break; + #ifdef WIN32 case GPURenderer::HardwareD3D11: m_gpu = GPU::CreateHardwareD3D11Renderer();