From 2b17cfd3653f817bcc820b865010d7920dbc4fd3 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 2 Nov 2019 22:31:15 +1000 Subject: [PATCH] GPU: Use streaming buffers for CPU->VRAM transfers --- src/core/gpu_hw.h | 1 + src/core/gpu_hw_opengl.cpp | 21 ++++++++++++++++----- src/core/gpu_hw_opengl.h | 3 +++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 7828ad714..235074464 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -83,6 +83,7 @@ protected: } }; + static constexpr u32 VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32); static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024; static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6; static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex); diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 3003d7521..c0d06f5b7 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -23,6 +23,7 @@ bool GPU_HW_OpenGL::Initialize(System* system, DMA* dma, InterruptController* in CreateFramebuffer(); CreateVertexBuffer(); + CreateTextureBuffer(); if (!CompilePrograms()) return false; @@ -251,6 +252,13 @@ void GPU_HW_OpenGL::CreateVertexBuffer() glGenVertexArrays(1, &m_attributeless_vao_id); } +void GPU_HW_OpenGL::CreateTextureBuffer() +{ + m_texture_stream_buffer = GL::StreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, VRAM_UPDATE_TEXTURE_BUFFER_SIZE); + if (!m_texture_stream_buffer) + Panic("Failed to create texture stream buffer"); +} + bool GPU_HW_OpenGL::CompilePrograms() { for (u32 render_mode = 0; render_mode < 4; render_mode++) @@ -564,12 +572,13 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) { - std::vector rgba_data; - rgba_data.reserve(width * height); + const u32 num_pixels = width * height; + const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32)); // reverse copy the rows so it matches opengl's lower-left origin const u32 source_stride = width * sizeof(u16); const u8* source_ptr = static_cast(data) + (source_stride * (height - 1)); + u32* dest_ptr = static_cast(map_result.pointer); for (u32 row = 0; row < height; row++) { const u8* source_row_ptr = source_ptr; @@ -580,13 +589,14 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* std::memcpy(&src_col, source_row_ptr, sizeof(src_col)); source_row_ptr += sizeof(src_col); - const u32 dst_col = RGBA5551ToRGBA8888(src_col); - rgba_data.push_back(dst_col); + *(dest_ptr++) = RGBA5551ToRGBA8888(src_col); } source_ptr -= source_stride; } + m_texture_stream_buffer->Unmap(num_pixels * sizeof(u32)); + // have to write to the 1x texture first if (m_resolution_scale > 1) m_vram_downsample_texture->Bind(); @@ -597,7 +607,8 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* const u32 flipped_y = VRAM_HEIGHT - y - height; // update texture data - glTexSubImage2D(GL_TEXTURE_2D, 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, rgba_data.data()); + glTexSubImage2D(GL_TEXTURE_2D, 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, + reinterpret_cast(map_result.index_aligned * sizeof(u32))); InvalidateVRAMReadCache(); if (m_resolution_scale > 1) diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 81236a943..8ba2eac34 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -53,6 +53,7 @@ private: void UpdateVRAMReadTexture(); void CreateVertexBuffer(); + void CreateTextureBuffer(); bool CompilePrograms(); bool CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode, bool dithering); @@ -68,6 +69,8 @@ private: GLuint m_vao_id = 0; GLuint m_attributeless_vao_id = 0; + std::unique_ptr m_texture_stream_buffer; + bool m_vram_read_texture_dirty = true; bool m_drawing_area_changed = true; bool m_show_renderer_statistics = false;