From c284d3835f811f35c663f44c6a82e02b389505dc Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 7 Dec 2019 01:37:27 +1000 Subject: [PATCH] GPU: Add a simplified OpenGL ES renderer Seems this is the only way to get semi-decent performance out of Mali drivers :/ --- src/core/CMakeLists.txt | 2 + src/core/core.vcxproj | 2 + src/core/core.vcxproj.filters | 2 + src/core/gpu.h | 3 + src/core/gpu_hw_opengl.cpp | 3 + src/core/gpu_hw_opengl_es.cpp | 649 ++++++++++++++++++++++++++++++++++ src/core/gpu_hw_opengl_es.h | 69 ++++ src/core/gpu_hw_shadergen.cpp | 43 ++- src/core/system.cpp | 5 +- 9 files changed, 761 insertions(+), 17 deletions(-) create mode 100644 src/core/gpu_hw_opengl_es.cpp create mode 100644 src/core/gpu_hw_opengl_es.h diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 2a3fe9cf0..4f54d38f8 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -28,6 +28,8 @@ add_library(core gpu_hw.h gpu_hw_opengl.cpp gpu_hw_opengl.h + gpu_hw_opengl_es.cpp + gpu_hw_opengl_es.h gpu_hw_shadergen.cpp gpu_hw_shadergen.h gpu_sw.cpp diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index 16a0f70e8..51f663d90 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -61,6 +61,7 @@ + @@ -93,6 +94,7 @@ + diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index b1858b3f8..23a97837f 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -35,6 +35,7 @@ + @@ -72,6 +73,7 @@ + diff --git a/src/core/gpu.h b/src/core/gpu.h index c62f1e8db..f7ede1803 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -136,6 +136,9 @@ public: // gpu_hw_opengl.cpp static std::unique_ptr CreateHardwareOpenGLRenderer(); + // gpu_hw_opengl_es.cpp + static std::unique_ptr CreateHardwareOpenGLESRenderer(); + // gpu_sw.cpp static std::unique_ptr CreateSoftwareRenderer(); diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 42bb74f0d..d771c79a2 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -114,6 +114,9 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) { m_is_gles = (host_display->GetRenderAPI() == HostDisplay::RenderAPI::OpenGLES); + Log_InfoPrintf("GL_VERSION: %s", glGetString(GL_VERSION)); + Log_InfoPrintf("GL_RENDERER: %s", glGetString(GL_VERSION)); + GLint max_texture_size = VRAM_WIDTH; glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); Log_InfoPrintf("Max texture size: %dx%d", max_texture_size, max_texture_size); diff --git a/src/core/gpu_hw_opengl_es.cpp b/src/core/gpu_hw_opengl_es.cpp new file mode 100644 index 000000000..9aa03ff15 --- /dev/null +++ b/src/core/gpu_hw_opengl_es.cpp @@ -0,0 +1,649 @@ +#include "gpu_hw_opengl_es.h" +#include "YBaseLib/Assert.h" +#include "YBaseLib/Log.h" +#include "YBaseLib/String.h" +#include "gpu_hw_shadergen.h" +#include "host_display.h" +#include "system.h" +Log_SetChannel(GPU_HW_OpenGL_ES); + +GPU_HW_OpenGL_ES::GPU_HW_OpenGL_ES() : GPU_HW(), m_vertex_buffer(VERTEX_BUFFER_SIZE / sizeof(BatchVertex)) {} + +GPU_HW_OpenGL_ES::~GPU_HW_OpenGL_ES() +{ + // TODO: Destroy objects... + if (m_host_display) + { + m_host_display->SetDisplayTexture(nullptr, 0, 0, 0, 0, 0, 0, 1.0f); + ResetGraphicsAPIState(); + } +} + +bool GPU_HW_OpenGL_ES::Initialize(HostDisplay* host_display, System* system, DMA* dma, + InterruptController* interrupt_controller, Timers* timers) +{ + if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGLES) + { + Log_ErrorPrintf("Host render API type is incompatible"); + return false; + } + + SetCapabilities(host_display); + + if (!GPU_HW::Initialize(host_display, system, dma, interrupt_controller, timers)) + return false; + + CreateFramebuffer(); + if (!CompilePrograms()) + return false; + + m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture->GetGLId())), 0, 0, + m_display_texture->GetWidth(), m_display_texture->GetHeight(), + m_display_texture->GetWidth(), m_display_texture->GetHeight(), 1.0f); + RestoreGraphicsAPIState(); + return true; +} + +void GPU_HW_OpenGL_ES::Reset() +{ + GPU_HW::Reset(); + + ClearFramebuffer(); +} + +void GPU_HW_OpenGL_ES::ResetGraphicsAPIState() +{ + GPU_HW::ResetGraphicsAPIState(); + + glEnable(GL_CULL_FACE); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_BLEND); + glDepthMask(GL_TRUE); + glLineWidth(1.0f); + + glDisableVertexAttribArray(0); + glDisableVertexAttribArray(1); + glDisableVertexAttribArray(2); + glDisableVertexAttribArray(3); +} + +void GPU_HW_OpenGL_ES::RestoreGraphicsAPIState() +{ + m_vram_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); + glViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glEnable(GL_SCISSOR_TEST); + glDepthMask(GL_FALSE); + glLineWidth(static_cast(m_resolution_scale)); + glBindVertexArray(0); + + SetScissorFromDrawingArea(); + SetVertexPointers(); +} + +void GPU_HW_OpenGL_ES::UpdateSettings() +{ + GPU_HW::UpdateSettings(); + + CreateFramebuffer(); + CompilePrograms(); + UpdateDisplay(); +} + +void GPU_HW_OpenGL_ES::MapBatchVertexPointer(u32 required_vertices) +{ + Assert(!m_batch_start_vertex_ptr); + + m_batch_start_vertex_ptr = m_vertex_buffer.data(); + m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; + m_batch_end_vertex_ptr = m_vertex_buffer.data() + m_vertex_buffer.size(); + m_batch_base_vertex = 0; +} + +std::tuple GPU_HW_OpenGL_ES::ConvertToFramebufferCoordinates(s32 x, s32 y) +{ + return std::make_tuple(x, static_cast(static_cast(VRAM_HEIGHT) - y)); +} + +void GPU_HW_OpenGL_ES::SetCapabilities(HostDisplay* host_display) +{ + Log_InfoPrintf("GL_VERSION: %s", glGetString(GL_VERSION)); + Log_InfoPrintf("GL_RENDERER: %s", glGetString(GL_VERSION)); + + GLint max_texture_size = VRAM_WIDTH; + glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); + Log_InfoPrintf("Max texture size: %dx%d", max_texture_size, max_texture_size); + const int max_texture_scale = max_texture_size / VRAM_WIDTH; + + std::array line_width_range = {{1, 1}}; + glGetIntegerv(GL_ALIASED_LINE_WIDTH_RANGE, line_width_range.data()); + Log_InfoPrintf("Max line width: %d", line_width_range[1]); + + m_max_resolution_scale = std::min(max_texture_scale, line_width_range[1]); + Log_InfoPrintf("Maximum resolution scale is %u", m_max_resolution_scale); + + m_supports_dual_source_blend = false; +} + +void GPU_HW_OpenGL_ES::CreateFramebuffer() +{ + // save old vram texture/fbo, in case we're changing scale + auto old_vram_texture = std::move(m_vram_texture); + DestroyFramebuffer(); + + // scale vram size to internal resolution + const u32 texture_width = VRAM_WIDTH * m_resolution_scale; + const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; + + m_vram_texture = + std::make_unique(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true); + + // do we need to restore the framebuffer after a size change? + if (old_vram_texture) + { + const bool linear_filter = old_vram_texture->GetWidth() > m_vram_texture->GetWidth(); + Log_DevPrintf("Scaling %ux%u VRAM texture to %ux%u using %s filter", old_vram_texture->GetWidth(), + old_vram_texture->GetHeight(), m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), + linear_filter ? "linear" : "nearest"); + glDisable(GL_SCISSOR_TEST); + old_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); + glBlitFramebuffer(0, 0, old_vram_texture->GetWidth(), old_vram_texture->GetHeight(), 0, 0, + m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), GL_COLOR_BUFFER_BIT, + linear_filter ? GL_LINEAR : GL_NEAREST); + + glEnable(GL_SCISSOR_TEST); + old_vram_texture.reset(); + } + + m_vram_read_texture = + std::make_unique(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true); + + m_vram_encoding_texture = + std::make_unique(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true); + + m_display_texture = + std::make_unique(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true); + + m_vram_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); + SetFullVRAMDirtyRectangle(); +} + +void GPU_HW_OpenGL_ES::ClearFramebuffer() +{ + glDisable(GL_SCISSOR_TEST); + glClearColor(0.0f, 0.0f, 0.0f, 0.0f); + glClear(GL_COLOR_BUFFER_BIT); + glEnable(GL_SCISSOR_TEST); + SetFullVRAMDirtyRectangle(); +} + +void GPU_HW_OpenGL_ES::DestroyFramebuffer() +{ + m_vram_read_texture.reset(); + m_vram_texture.reset(); + m_vram_encoding_texture.reset(); + m_display_texture.reset(); +} + +bool GPU_HW_OpenGL_ES::CompilePrograms() +{ + GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, + m_supports_dual_source_blend); + + for (u32 render_mode = 0; render_mode < 4; render_mode++) + { + for (u32 texture_mode = 0; texture_mode < 9; texture_mode++) + { + for (u8 dithering = 0; dithering < 2; dithering++) + { + const bool textured = (static_cast(texture_mode) != TextureMode::Disabled); + const std::string vs = shadergen.GenerateBatchVertexShader(textured); + const std::string fs = shadergen.GenerateBatchFragmentShader(static_cast(render_mode), + static_cast(texture_mode), + ConvertToBoolUnchecked(dithering)); + + GL::Program& prog = m_render_programs[render_mode][texture_mode][dithering]; + if (!prog.Compile(vs, fs)) + return false; + + prog.BindAttribute(0, "a_pos"); + prog.BindAttribute(1, "a_col0"); + if (textured) + { + prog.BindAttribute(2, "a_texcoord"); + prog.BindAttribute(3, "a_texpage"); + } + + if (!prog.Link()) + return false; + + prog.Bind(); + + prog.RegisterUniform("u_pos_offset"); + prog.RegisterUniform("u_texture_window_mask"); + prog.RegisterUniform("u_texture_window_offset"); + prog.RegisterUniform("u_src_alpha_factor"); + prog.RegisterUniform("u_dst_alpha_factor"); + prog.RegisterUniform("u_set_mask_while_drawing"); + + if (textured) + prog.Uniform1i("samp0", 0); + } + } + } + + for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++) + { + for (u8 interlaced = 0; interlaced < 2; interlaced++) + { + GL::Program& prog = m_display_programs[depth_24bit][interlaced]; + const std::string vs = shadergen.GenerateScreenQuadVertexShader(); + const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), + ConvertToBoolUnchecked(interlaced)); + if (!prog.Compile(vs, fs)) + return false; + + if (!prog.Link()) + return false; + + prog.Bind(); + prog.RegisterUniform("u_base_coords"); + prog.Uniform1i("samp0", 0); + } + } + + if (!m_vram_read_program.Compile(shadergen.GenerateScreenQuadVertexShader(), + shadergen.GenerateVRAMReadFragmentShader())) + { + return false; + } + + if (!m_vram_read_program.Link()) + return false; + + m_vram_read_program.Bind(); + m_vram_read_program.RegisterUniform("u_base_coords"); + m_vram_read_program.RegisterUniform("u_size"); + m_vram_read_program.Uniform1i("samp0", 0); + return true; +} + +void GPU_HW_OpenGL_ES::SetVertexPointers() +{ + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glVertexAttribIPointer(0, 2, GL_INT, sizeof(BatchVertex), &m_vertex_buffer[0].x); + glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex), &m_vertex_buffer[0].color); + glVertexAttribIPointer(2, 1, GL_INT, sizeof(BatchVertex), &m_vertex_buffer[0].texcoord); + glVertexAttribIPointer(3, 1, GL_INT, sizeof(BatchVertex), &m_vertex_buffer[0].texpage); +} + +void GPU_HW_OpenGL_ES::SetDrawState(BatchRenderMode render_mode) +{ + const GL::Program& prog = m_render_programs[static_cast(render_mode)][static_cast(m_batch.texture_mode)] + [BoolToUInt8(m_batch.dithering)]; + m_batch_ubo_dirty |= !prog.IsBound(); + prog.Bind(); + + if (m_batch.texture_mode != TextureMode::Disabled) + m_vram_read_texture->Bind(); + + if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque) + { + glDisable(GL_BLEND); + } + else + { + glEnable(GL_BLEND); + glBlendEquationSeparate( + m_batch.transparency_mode == TransparencyMode::BackgroundMinusForeground ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD, + GL_FUNC_ADD); + glBlendFuncSeparate(GL_ONE, GL_SRC_ALPHA, GL_ONE, GL_ZERO); + } + + if (m_drawing_area_changed) + { + m_drawing_area_changed = false; + m_vram_dirty_rect.Include(m_drawing_area); + SetScissorFromDrawingArea(); + } + + if (m_batch_ubo_dirty) + { + prog.Uniform2iv(0, m_batch_ubo_data.u_pos_offset); + prog.Uniform2uiv(1, m_batch_ubo_data.u_texture_window_mask); + prog.Uniform2uiv(2, m_batch_ubo_data.u_texture_window_offset); + prog.Uniform1f(3, m_batch_ubo_data.u_src_alpha_factor); + prog.Uniform1f(4, m_batch_ubo_data.u_dst_alpha_factor); + prog.Uniform1i(5, static_cast(m_batch_ubo_data.u_set_mask_while_drawing)); + m_batch_ubo_dirty = false; + } +} + +void GPU_HW_OpenGL_ES::SetScissorFromDrawingArea() +{ + int left, top, right, bottom; + CalcScissorRect(&left, &top, &right, &bottom); + + const int width = right - left; + const int height = bottom - top; + const int x = left; + const int y = m_vram_texture->GetHeight() - bottom; + + Log_DebugPrintf("SetScissor: (%d-%d, %d-%d)", x, x + width, y, y + height); + glScissor(x, y, width, height); +} + +void GPU_HW_OpenGL_ES::UpdateDisplay() +{ + GPU_HW::UpdateDisplay(); + + if (m_system->GetSettings().debugging.show_vram) + { + m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture->GetGLId())), 0, + m_vram_texture->GetHeight(), m_vram_texture->GetWidth(), + -static_cast(m_vram_texture->GetHeight()), m_vram_texture->GetWidth(), + m_vram_texture->GetHeight(), 1.0f); + } + else + { + const u32 vram_offset_x = m_crtc_state.regs.X; + const u32 vram_offset_y = m_crtc_state.regs.Y; + const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale; + const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale; + const u32 display_width = std::min(m_crtc_state.display_width, VRAM_WIDTH - vram_offset_x); + const u32 display_height = std::min(m_crtc_state.display_height, VRAM_HEIGHT - vram_offset_y); + const u32 scaled_display_width = display_width * m_resolution_scale; + const u32 scaled_display_height = display_height * m_resolution_scale; + + if (m_GPUSTAT.display_disable) + { + m_host_display->SetDisplayTexture(nullptr, 0, 0, 0, 0, 0, 0, m_crtc_state.display_aspect_ratio); + } + else if (!m_GPUSTAT.display_area_color_depth_24 && !m_GPUSTAT.vertical_interlace) + { + m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture->GetGLId())), + scaled_vram_offset_x, m_vram_texture->GetHeight() - scaled_vram_offset_y, + scaled_display_width, -static_cast(scaled_display_height), + m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), + m_crtc_state.display_aspect_ratio); + } + else + { + const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height; + const u32 scaled_flipped_vram_offset_y = + m_vram_texture->GetHeight() - scaled_vram_offset_y - scaled_display_height; + const u32 field_offset = BoolToUInt8(m_GPUSTAT.vertical_interlace && m_GPUSTAT.interlaced_field); + + glDisable(GL_BLEND); + glDisable(GL_SCISSOR_TEST); + + const GL::Program& prog = m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)] + [BoolToUInt8(m_GPUSTAT.vertical_interlace)]; + prog.Bind(); + + // Because of how the reinterpret shader works, we need to use the downscaled version. + if (m_GPUSTAT.display_area_color_depth_24 && m_resolution_scale > 1) + { + const u32 copy_width = std::min((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x); + const u32 scaled_copy_width = copy_width * m_resolution_scale; + m_vram_encoding_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); + m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); + glBlitFramebuffer(scaled_vram_offset_x, scaled_flipped_vram_offset_y, scaled_vram_offset_x + scaled_copy_width, + scaled_flipped_vram_offset_y + scaled_display_height, vram_offset_x, flipped_vram_offset_y, + vram_offset_x + copy_width, flipped_vram_offset_y + display_height, GL_COLOR_BUFFER_BIT, + GL_NEAREST); + + m_display_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); + m_vram_encoding_texture->Bind(); + + glViewport(0, field_offset, display_width, display_height); + + prog.Uniform3i(0, static_cast(vram_offset_x), static_cast(flipped_vram_offset_y), + static_cast(field_offset)); + m_batch_ubo_dirty = true; + + glDrawArrays(GL_TRIANGLES, 0, 3); + + m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_display_texture->GetGLId())), + 0, display_height, display_width, -static_cast(display_height), + m_display_texture->GetWidth(), m_display_texture->GetHeight(), + m_crtc_state.display_aspect_ratio); + } + else + { + m_display_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); + m_vram_texture->Bind(); + + glViewport(0, field_offset, scaled_display_width, scaled_display_height); + + prog.Uniform3i(0, static_cast(scaled_vram_offset_x), static_cast(scaled_flipped_vram_offset_y), + static_cast(field_offset)); + m_batch_ubo_dirty = true; + + glDrawArrays(GL_TRIANGLES, 0, 3); + + m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_display_texture->GetGLId())), + 0, scaled_display_height, scaled_display_width, + -static_cast(scaled_display_height), m_display_texture->GetWidth(), + m_display_texture->GetHeight(), m_crtc_state.display_aspect_ratio); + } + + // restore state + m_vram_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); + glViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + glEnable(GL_SCISSOR_TEST); + } + } +} + +void GPU_HW_OpenGL_ES::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +{ + // Get bounds with wrap-around handled. + const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); + const u32 encoded_width = copy_rect.GetWidth() / 2; + const u32 encoded_height = copy_rect.GetHeight(); + + // Encode the 24-bit texture as 16-bit. + m_vram_encoding_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); + m_vram_texture->Bind(); + m_vram_read_program.Bind(); + m_vram_read_program.Uniform2i(0, copy_rect.left, VRAM_HEIGHT - copy_rect.top - copy_rect.GetHeight()); + m_vram_read_program.Uniform2i(1, copy_rect.GetWidth(), copy_rect.GetHeight()); + glDisable(GL_BLEND); + glDisable(GL_SCISSOR_TEST); + glViewport(0, 0, encoded_width, encoded_height); + glDrawArrays(GL_TRIANGLES, 0, 3); + + // Readback encoded texture. + m_vram_encoding_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); + glPixelStorei(GL_PACK_ALIGNMENT, 2); + glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2); + glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE, + &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); + glPixelStorei(GL_PACK_ALIGNMENT, 4); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); + RestoreGraphicsAPIState(); +} + +void GPU_HW_OpenGL_ES::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) +{ + GPU_HW::FillVRAM(x, y, width, height, color); + + // scale coordinates + x *= m_resolution_scale; + y *= m_resolution_scale; + width *= m_resolution_scale; + height *= m_resolution_scale; + + glScissor(x, m_vram_texture->GetHeight() - y - height, width, height); + + // drop precision unless true colour is enabled + if (!m_true_color) + color = RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color)); + + const auto [r, g, b, a] = RGBA8ToFloat(color); + glClearColor(r, g, b, a); + glClear(GL_COLOR_BUFFER_BIT); + + SetScissorFromDrawingArea(); +} + +void GPU_HW_OpenGL_ES::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) +{ + GPU_HW::UpdateVRAM(x, y, width, height, data); + + if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) + { + // CPU round trip if oversized for now. + Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); + GPU::UpdateVRAM(x, y, width, height, data); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); + return; + } + + const u32 num_pixels = width * height; + std::vector staging_buffer(num_pixels); + + // reverse copy the rows so it matches opengl's lower-left origin + const u32 source_stride = width * sizeof(u16); + const u8* source_ptr = static_cast(data) + (source_stride * (height - 1)); + u32* dest_ptr = static_cast(staging_buffer.data()); + for (u32 row = 0; row < height; row++) + { + const u8* source_row_ptr = source_ptr; + + for (u32 col = 0; col < width; col++) + { + u16 src_col; + std::memcpy(&src_col, source_row_ptr, sizeof(src_col)); + source_row_ptr += sizeof(src_col); + + *(dest_ptr++) = RGBA5551ToRGBA8888(src_col); + } + + source_ptr -= source_stride; + } + + // have to write to the 1x texture first + if (m_resolution_scale > 1) + m_vram_encoding_texture->Bind(); + else + m_vram_texture->Bind(); + + // lower-left origin flip happens here + const u32 flipped_y = VRAM_HEIGHT - y - height; + + // update texture data + glTexSubImage2D(GL_TEXTURE_2D, 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, staging_buffer.data()); + + if (m_resolution_scale > 1) + { + // scale to internal resolution + const u32 scaled_width = width * m_resolution_scale; + const u32 scaled_height = height * m_resolution_scale; + const u32 scaled_x = x * m_resolution_scale; + const u32 scaled_y = y * m_resolution_scale; + const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height; + glDisable(GL_SCISSOR_TEST); + m_vram_encoding_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); + glBlitFramebuffer(x, flipped_y, x + width, flipped_y + height, scaled_x, scaled_flipped_y, scaled_x + scaled_width, + scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); + glEnable(GL_SCISSOR_TEST); + } +} + +void GPU_HW_OpenGL_ES::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) +{ + GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + + src_x *= m_resolution_scale; + src_y *= m_resolution_scale; + dst_x *= m_resolution_scale; + dst_y *= m_resolution_scale; + width *= m_resolution_scale; + height *= m_resolution_scale; + + // lower-left origin flip + src_y = m_vram_texture->GetHeight() - src_y - height; + dst_y = m_vram_texture->GetHeight() - dst_y - height; + + if (GLAD_GL_EXT_copy_image) + { + glCopyImageSubDataEXT(m_vram_texture->GetGLId(), GL_TEXTURE_2D, 0, src_x, src_y, 0, m_vram_texture->GetGLId(), + GL_TEXTURE_2D, 0, dst_x, dst_y, 0, width, height, 1); + } + else + { + glDisable(GL_SCISSOR_TEST); + m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); + glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height, + GL_COLOR_BUFFER_BIT, GL_NEAREST); + glEnable(GL_SCISSOR_TEST); + } +} + +void GPU_HW_OpenGL_ES::UpdateVRAMReadTexture() +{ + const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; + const u32 width = scaled_rect.GetWidth(); + const u32 height = scaled_rect.GetHeight(); + const u32 x = scaled_rect.left; + const u32 y = m_vram_texture->GetHeight() - scaled_rect.top - height; + + if (GLAD_GL_EXT_copy_image) + { + glCopyImageSubDataEXT(m_vram_texture->GetGLId(), GL_TEXTURE_2D, 0, x, y, 0, m_vram_read_texture->GetGLId(), + GL_TEXTURE_2D, 0, x, y, 0, width, height, 1); + } + else + { + m_vram_read_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); + m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); + glDisable(GL_SCISSOR_TEST); + glBlitFramebuffer(x, y, x + width, y + height, x, y, x + width, y + height, GL_COLOR_BUFFER_BIT, GL_NEAREST); + glEnable(GL_SCISSOR_TEST); + m_vram_texture->BindFramebuffer(GL_FRAMEBUFFER); + } + + m_renderer_stats.num_vram_read_texture_updates++; + ClearVRAMDirtyRectangle(); +} + +void GPU_HW_OpenGL_ES::FlushRender() +{ + const u32 vertex_count = GetBatchVertexCount(); + if (vertex_count == 0) + return; + + m_renderer_stats.num_batches++; + + m_batch_start_vertex_ptr = nullptr; + m_batch_end_vertex_ptr = nullptr; + m_batch_current_vertex_ptr = nullptr; + + static constexpr std::array gl_primitives = {{GL_LINES, GL_LINE_STRIP, GL_TRIANGLES, GL_TRIANGLE_STRIP}}; + + if (m_batch.NeedsTwoPassRendering()) + { + SetDrawState(BatchRenderMode::OnlyTransparent); + glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], m_batch_base_vertex, vertex_count); + SetDrawState(BatchRenderMode::OnlyOpaque); + glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], m_batch_base_vertex, vertex_count); + } + else + { + SetDrawState(m_batch.GetRenderMode()); + glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], m_batch_base_vertex, vertex_count); + } +} + +std::unique_ptr GPU::CreateHardwareOpenGLESRenderer() +{ + return std::make_unique(); +} diff --git a/src/core/gpu_hw_opengl_es.h b/src/core/gpu_hw_opengl_es.h new file mode 100644 index 000000000..5f49374e3 --- /dev/null +++ b/src/core/gpu_hw_opengl_es.h @@ -0,0 +1,69 @@ +#pragma once +#include "common/gl/program.h" +#include "common/gl/stream_buffer.h" +#include "common/gl/texture.h" +#include "glad.h" +#include "gpu_hw.h" +#include +#include +#include + +class GPU_HW_OpenGL_ES : public GPU_HW +{ +public: + GPU_HW_OpenGL_ES(); + ~GPU_HW_OpenGL_ES() override; + + bool Initialize(HostDisplay* host_display, System* system, DMA* dma, InterruptController* interrupt_controller, + Timers* timers) override; + void Reset() override; + + void ResetGraphicsAPIState() override; + void RestoreGraphicsAPIState() override; + void UpdateSettings() override; + +protected: + void UpdateDisplay() override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; + void FlushRender() override; + void MapBatchVertexPointer(u32 required_vertices) override; + void UpdateVRAMReadTexture() override; + +private: + struct GLStats + { + u32 num_batches; + u32 num_vertices; + u32 num_vram_reads; + u32 num_vram_writes; + u32 num_vram_read_texture_updates; + u32 num_uniform_buffer_updates; + }; + + std::tuple ConvertToFramebufferCoordinates(s32 x, s32 y); + + void SetCapabilities(HostDisplay* host_display); + void CreateFramebuffer(); + void ClearFramebuffer(); + void DestroyFramebuffer(); + + bool CompilePrograms(); + void SetVertexPointers(); + void SetDrawState(BatchRenderMode render_mode); + void SetScissorFromDrawingArea(); + + // downsample texture - used for readbacks at >1xIR. + std::unique_ptr m_vram_texture; + std::unique_ptr m_vram_read_texture; + std::unique_ptr m_vram_encoding_texture; + std::unique_ptr m_display_texture; + + std::vector m_vertex_buffer; + + std::array, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering] + std::array, 2> m_display_programs; // [depth_24][interlaced] + GL::Program m_vram_read_program; +}; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index a45939268..511e016fd 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -22,13 +22,13 @@ static void DefineMacro(std::stringstream& ss, const char* name, bool enabled) ss << "#define " << name << " " << BoolToUInt32(enabled) << "\n"; } -void GPU_HW_ShaderGen::SetGLSLVersionString() { - const char *glsl_version = reinterpret_cast(glGetString( - GL_SHADING_LANGUAGE_VERSION)); +void GPU_HW_ShaderGen::SetGLSLVersionString() +{ + const char* glsl_version = reinterpret_cast(glGetString(GL_SHADING_LANGUAGE_VERSION)); Assert(glsl_version != nullptr); // Skip any strings in front of the version code. - const char *glsl_version_start = glsl_version; + const char* glsl_version_start = glsl_version; while (*glsl_version_start != '\0' && (*glsl_version_start < '0' || *glsl_version_start > '9')) glsl_version_start++; @@ -36,10 +36,13 @@ void GPU_HW_ShaderGen::SetGLSLVersionString() { if (std::sscanf(glsl_version_start, "%d.%d", &major_version, &minor_version) == 2) { // Cap at GLSL 3.3, we're not using anything newer for now. - if (!m_glsl_es && major_version >= 4) { + if (!m_glsl_es && major_version >= 4) + { major_version = 3; minor_version = 30; - } else if (m_glsl_es && (major_version > 3 || minor_version > 20)) { + } + else if (m_glsl_es && (major_version > 3 || minor_version > 20)) + { major_version = 3; minor_version = 20; } @@ -47,17 +50,17 @@ void GPU_HW_ShaderGen::SetGLSLVersionString() { else { Log_ErrorPrintf("Invalid GLSL version string: '%s' ('%s')", glsl_version, glsl_version_start); - if (m_glsl_es) { + if (m_glsl_es) + { major_version = 3; minor_version = 0; } m_glsl_version_string = m_glsl_es ? "300" : "130"; } - char buf[128]; std::snprintf(buf, sizeof(buf), "#version %d%02d %s", major_version, minor_version, - (!m_glsl_es && major_version >= 3 && minor_version >= 3) ? "core" : (m_glsl_es ? "es" : "")); + (!m_glsl_es && major_version >= 3 && minor_version >= 3) ? "core" : (m_glsl_es ? "es" : "")); m_glsl_version_string = buf; } @@ -168,15 +171,23 @@ float4 RGBA5551ToRGBA8(uint v) void GPU_HW_ShaderGen::DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list& members) { - if (m_glsl) - ss << "layout(std140) uniform UBOBlock\n"; + if (m_glsl_es) + { + for (const char* member : members) + ss << "uniform " << member << ";\n"; + } else - ss << "cbuffer UBOBlock : register(b0)\n"; + { + if (m_glsl) + ss << "layout(std140) uniform UBOBlock\n"; + else + ss << "cbuffer UBOBlock : register(b0)\n"; - ss << "{\n"; - for (const char* member : members) - ss << member << ";\n"; - ss << "};\n\n"; + ss << "{\n"; + for (const char* member : members) + ss << member << ";\n"; + ss << "};\n\n"; + } } void GPU_HW_ShaderGen::DeclareTexture(std::stringstream& ss, const char* name, u32 index) diff --git a/src/core/system.cpp b/src/core/system.cpp index 7687fcd84..2412acbde 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -10,6 +10,7 @@ #include "dma.h" #include "game_list.h" #include "gpu.h" +#include "host_display.h" #include "host_interface.h" #include "interrupt_controller.h" #include "mdec.h" @@ -194,7 +195,9 @@ bool System::CreateGPU() switch (m_host_interface->GetSettings().gpu_renderer) { case GPURenderer::HardwareOpenGL: - m_gpu = GPU::CreateHardwareOpenGLRenderer(); + m_gpu = m_host_interface->GetDisplay()->GetRenderAPI() == HostDisplay::RenderAPI::OpenGLES ? + GPU::CreateHardwareOpenGLESRenderer() : + GPU::CreateHardwareOpenGLRenderer(); break; #ifdef WIN32