diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 9a35591b7..7b6695958 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -240,7 +240,7 @@ bool GPU_HW_D3D11::CreateBatchInputLayout() {"ATTR", 3, DXGI_FORMAT_R32_SINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}}}; // we need a vertex shader... - GPU_HW_ShaderGen shadergen(GPU_HW_ShaderGen::API::D3D11, m_resolution_scale, m_true_color); + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color); ComPtr vs_bytecode = D3D11::ShaderCompiler::CompileShader( D3D11::ShaderCompiler::Type::Vertex, m_device->GetFeatureLevel(), shadergen.GenerateBatchVertexShader(true), false); if (!vs_bytecode) @@ -318,7 +318,7 @@ bool GPU_HW_D3D11::CreateStateObjects() bool GPU_HW_D3D11::CompileShaders() { const bool debug = false; - GPU_HW_ShaderGen shadergen(GPU_HW_ShaderGen::API::D3D11, m_resolution_scale, m_true_color); + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color); m_screen_quad_vertex_shader = D3D11::ShaderCompiler::CompileAndCreateVertexShader( m_device.Get(), shadergen.GenerateScreenQuadVertexShader(), debug); diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index ac1f1af97..567e1d896 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -11,6 +11,7 @@ GPU_HW_OpenGL::GPU_HW_OpenGL() : GPU_HW() {} GPU_HW_OpenGL::~GPU_HW_OpenGL() { + // TODO: Destroy objects... if (m_host_display) { m_host_display->SetDisplayTexture(nullptr, 0, 0, 0, 0, 0, 0, 1.0f); @@ -21,13 +22,14 @@ GPU_HW_OpenGL::~GPU_HW_OpenGL() bool GPU_HW_OpenGL::Initialize(HostDisplay* host_display, System* system, DMA* dma, InterruptController* interrupt_controller, Timers* timers) { - if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGL) + if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGL && + host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGLES) { Log_ErrorPrintf("Host render API type is incompatible"); return false; } - SetCapabilities(); + SetCapabilities(host_display); if (!GPU_HW::Initialize(host_display, system, dma, interrupt_controller, timers)) return false; @@ -108,8 +110,10 @@ std::tuple GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y return std::make_tuple(x, static_cast(static_cast(VRAM_HEIGHT) - y)); } -void GPU_HW_OpenGL::SetCapabilities() +void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) { + m_is_gles = (host_display->GetRenderAPI() == HostDisplay::RenderAPI::OpenGLES); + GLint max_texture_size = VRAM_WIDTH; glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); Log_InfoPrintf("Max texture size: %dx%d", max_texture_size, max_texture_size); @@ -128,9 +132,17 @@ void GPU_HW_OpenGL::SetCapabilities() if (!GLAD_GL_VERSION_4_3 && !GLAD_GL_EXT_copy_image) Log_WarningPrintf("GL_EXT_copy_image missing, this may affect performance."); - glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, reinterpret_cast(&m_max_texture_buffer_size)); - if (m_max_texture_buffer_size < VRAM_WIDTH * VRAM_HEIGHT) - Log_WarningPrintf("Maximum texture buffer size is less than VRAM size, VRAM writes may be slower."); + m_supports_texture_buffer = (GLAD_GL_VERSION_3_1 || GLAD_GL_ES_VERSION_3_2); + if (m_uniform_stream_buffer) + { + glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, reinterpret_cast(&m_max_texture_buffer_size)); + if (m_max_texture_buffer_size < VRAM_WIDTH * VRAM_HEIGHT) + Log_WarningPrintf("Maximum texture buffer size is less than VRAM size, VRAM writes may be slower."); + } + else + { + Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower."); + } } void GPU_HW_OpenGL::CreateFramebuffer() @@ -234,16 +246,19 @@ void GPU_HW_OpenGL::CreateTextureBuffer() if (!m_texture_stream_buffer) Panic("Failed to create texture stream buffer"); - glGenTextures(1, &m_texture_buffer_r16ui_texture); - glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); - glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, m_texture_stream_buffer->GetGLBufferId()); + if (m_max_texture_buffer_size > 0) + { + glGenTextures(1, &m_texture_buffer_r16ui_texture); + glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); + glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, m_texture_stream_buffer->GetGLBufferId()); + } m_texture_stream_buffer->Unbind(); } bool GPU_HW_OpenGL::CompilePrograms() { - GPU_HW_ShaderGen shadergen(GPU_HW_ShaderGen::API::OpenGL, m_resolution_scale, m_true_color); + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color); for (u32 render_mode = 0; render_mode < 4; render_mode++) { @@ -269,7 +284,8 @@ bool GPU_HW_OpenGL::CompilePrograms() prog.BindAttribute(3, "a_texpage"); } - prog.BindFragData(0, "o_col0"); + if (!m_is_gles) + prog.BindFragData(0, "o_col0"); if (!prog.Link()) return false; @@ -295,7 +311,9 @@ bool GPU_HW_OpenGL::CompilePrograms() if (!prog.Compile(vs, fs)) return false; - prog.BindFragData(0, "o_col0"); + if (!m_is_gles) + prog.BindFragData(0, "o_col0"); + if (!prog.Link()) return false; @@ -306,21 +324,26 @@ bool GPU_HW_OpenGL::CompilePrograms() } } - if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(), - shadergen.GenerateVRAMWriteFragmentShader())) + if (m_supports_texture_buffer) { - return false; + if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(), + shadergen.GenerateVRAMWriteFragmentShader())) + { + return false; + } + + if (!m_is_gles) + m_vram_write_program.BindFragData(0, "o_col0"); + + if (!m_vram_write_program.Link()) + return false; + + m_vram_write_program.BindUniformBlock("UBOBlock", 1); + + m_vram_write_program.Bind(); + m_vram_write_program.Uniform1i("samp0", 0); } - m_vram_write_program.BindFragData(0, "o_col0"); - if (!m_vram_write_program.Link()) - return false; - - m_vram_write_program.BindUniformBlock("UBOBlock", 1); - - m_vram_write_program.Bind(); - m_vram_write_program.Uniform1i("samp0", 0); - return true; } diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index cce27a638..1d19861ac 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -45,7 +45,7 @@ private: std::tuple ConvertToFramebufferCoordinates(s32 x, s32 y); - void SetCapabilities(); + void SetCapabilities(HostDisplay* host_display); void CreateFramebuffer(); void ClearFramebuffer(); void DestroyFramebuffer(); @@ -74,10 +74,13 @@ private: std::unique_ptr m_texture_stream_buffer; GLuint m_texture_buffer_r16ui_texture = 0; - u32 m_uniform_buffer_alignment = 1; - u32 m_max_texture_buffer_size = 0; - std::array, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering] std::array, 2> m_display_programs; // [depth_24][interlaced] GL::Program m_vram_write_program; + + u32 m_uniform_buffer_alignment = 1; + u32 m_max_texture_buffer_size = 0; + + bool m_is_gles = false; + bool m_supports_texture_buffer = false; }; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 7105a3ca9..60306be6c 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1,7 +1,9 @@ #include "gpu_hw_shadergen.h" +#include -GPU_HW_ShaderGen::GPU_HW_ShaderGen(API backend, u32 resolution_scale, bool true_color) - : m_backend(backend), m_resolution_scale(resolution_scale), m_true_color(true_color), m_glsl(backend != API::D3D11) +GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color) + : m_render_api(render_api), m_resolution_scale(resolution_scale), m_true_color(true_color), + m_glsl(render_api != HostDisplay::RenderAPI::D3D11) { } @@ -9,20 +11,37 @@ GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default; static void DefineMacro(std::stringstream& ss, const char* name, bool enabled) { - if (enabled) - ss << "#define " << name << " 1\n"; - else - ss << "/* #define " << name << " 0 */\n"; + ss << "#define " << name << " " << BoolToUInt32(enabled) << "\n"; } void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss) { - if (m_backend == API::OpenGL) + if (m_render_api == HostDisplay::RenderAPI::OpenGL) { ss << "#version 330 core\n\n"; ss << "#define API_OPENGL 1\n"; } - else if (m_backend == API::D3D11) + else if (m_render_api == HostDisplay::RenderAPI::OpenGLES) + { + if (GLAD_GL_ES_VERSION_3_2) + ss << "#version 320 es\n\n"; + else if (GLAD_GL_ES_VERSION_3_1) + ss << "#version 310 es\n\n"; + else + ss << "#version 300 es\n\n"; + + ss << "precision highp float;\n"; + ss << "precision highp int;\n"; + ss << "precision highp sampler2D;\n"; + + if (GLAD_GL_ES_VERSION_3_2) + ss << "precision highp usamplerBuffer;\n"; + + ss << "\n"; + ss << "#define API_OPENGL 1\n"; + ss << "#define API_OPENGL_ES 1\n"; + } + else if (m_render_api == HostDisplay::RenderAPI::D3D11) { ss << "#define API_D3D11 1\n"; } @@ -67,7 +86,7 @@ void GPU_HW_ShaderGen::WriteCommonFunctions(std::stringstream& ss) float fixYCoord(float y) { -#if API_OPENGL +#if API_OPENGL || API_OPENGL_ES return 1.0 - RCP_VRAM_SIZE.y - y; #else return y; @@ -76,7 +95,7 @@ float fixYCoord(float y) int fixYCoord(int y) { -#if API_OPENGL +#if API_OPENGL || API_OPENGL_ES return VRAM_SIZE.y - y - 1; #else return y; @@ -447,7 +466,12 @@ int4 SampleFromVRAM(int4 texpage, float2 coord) #if TEXTURED int4 texcol = SampleFromVRAM(v_texpage, v_tex0); - if (all(texcol == int4(0.0, 0.0, 0.0, 0.0))) + #if GLSL + bool transparent = (texcol == int4(0.0, 0.0, 0.0, 0.0)); + #else + bool transparent = (all(texcol == int4(0.0, 0.0, 0.0, 0.0))); + #endif + if (transparent) discard; // Grab semitransparent bit from the texture color. @@ -565,7 +589,7 @@ std::string GPU_HW_ShaderGen::GenerateScreenQuadVertexShader() { v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u)); v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); - #if API_OPENGL + #if API_OPENGL || API_OPENGL_ES v_pos.y = -gl_Position.y; #endif } @@ -695,7 +719,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader() int2 coords = int2(v_pos.xy) / int2(RESOLUTION_SCALE, RESOLUTION_SCALE); int2 offset = coords - u_base_coords; - #if API_OPENGL + #if API_OPENGL || API_OPENGL_ES // Lower-left origin flip for OpenGL offset.y = u_size.y - offset.y - 1; #endif diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index d21cb165b..ae851f1f8 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -1,23 +1,15 @@ #pragma once #include "gpu_hw.h" +#include "host_display.h" #include #include class GPU_HW_ShaderGen { public: - enum class API - { - OpenGL, - D3D11 - }; - -public: - GPU_HW_ShaderGen(API backend, u32 resolution_scale, bool true_color); + GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color); ~GPU_HW_ShaderGen(); - void Init(API backend, u32 resolution_scale, bool true_color); - std::string GenerateBatchVertexShader(bool textured); std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPU::TextureMode texture_mode, bool dithering); @@ -28,10 +20,11 @@ public: std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced); std::string GenerateVRAMWriteFragmentShader(); - API m_backend; + HostDisplay::RenderAPI m_render_api; u32 m_resolution_scale; bool m_true_color; bool m_glsl; + bool m_glsl_es; private: void WriteHeader(std::stringstream& ss);