GPU/HW: Fallback path for VRAM writes for GPUs w/o texel buffers

This commit is contained in:
Connor McLaughlin 2019-11-05 23:15:54 +10:00
parent 089f297b9a
commit 15c42f032f
2 changed files with 76 additions and 69 deletions

View file

@ -127,6 +127,10 @@ void GPU_HW_OpenGL::SetCapabilities()
if (!GLAD_GL_VERSION_4_3 && !GLAD_GL_EXT_copy_image) if (!GLAD_GL_VERSION_4_3 && !GLAD_GL_EXT_copy_image)
Log_WarningPrintf("GL_EXT_copy_image missing, this may affect performance."); Log_WarningPrintf("GL_EXT_copy_image missing, this may affect performance.");
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, reinterpret_cast<GLint*>(&m_max_texture_buffer_size));
if (m_max_texture_buffer_size < VRAM_WIDTH * VRAM_HEIGHT)
Log_WarningPrintf("Maximum texture buffer size is less than VRAM size, VRAM writes may be slower.");
} }
void GPU_HW_OpenGL::CreateFramebuffer() void GPU_HW_OpenGL::CreateFramebuffer()
@ -225,9 +229,8 @@ void GPU_HW_OpenGL::CreateUniformBuffer()
void GPU_HW_OpenGL::CreateTextureBuffer() void GPU_HW_OpenGL::CreateTextureBuffer()
{ {
// const GLenum target = GL_PIXEL_UNPACK_BUFFER; // We use the pixel unpack buffer here because we share it with CPU-decoded VRAM writes.
const GLenum target = GL_TEXTURE_BUFFER; m_texture_stream_buffer = GL::StreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, VRAM_UPDATE_TEXTURE_BUFFER_SIZE);
m_texture_stream_buffer = GL::StreamBuffer::Create(target, VRAM_UPDATE_TEXTURE_BUFFER_SIZE);
if (!m_texture_stream_buffer) if (!m_texture_stream_buffer)
Panic("Failed to create texture stream buffer"); Panic("Failed to create texture stream buffer");
@ -573,7 +576,34 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
GPU_HW::UpdateVRAM(x, y, width, height, data); GPU_HW::UpdateVRAM(x, y, width, height, data);
const u32 num_pixels = width * height; const u32 num_pixels = width * height;
#if 0 if (num_pixels < m_max_texture_buffer_size)
{
const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
m_texture_stream_buffer->Unmap(num_pixels * sizeof(u16));
m_texture_stream_buffer->Unbind();
// viewport should be set to the whole VRAM size, so we can just set the scissor
const u32 flipped_y = VRAM_HEIGHT - y - height;
const u32 scaled_width = width * m_resolution_scale;
const u32 scaled_height = height * m_resolution_scale;
const u32 scaled_x = x * m_resolution_scale;
const u32 scaled_y = y * m_resolution_scale;
const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height;
glScissor(scaled_x, scaled_flipped_y, scaled_width, scaled_height);
m_vram_write_program.Bind();
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
const u32 uniforms[5] = {x, flipped_y, width, height, map_result.index_aligned};
UploadUniformBlock(uniforms, sizeof(uniforms));
glDrawArrays(GL_TRIANGLES, 0, 3);
RestoreGraphicsAPIState();
}
else
{
const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32)); const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32));
// reverse copy the rows so it matches opengl's lower-left origin // reverse copy the rows so it matches opengl's lower-left origin
@ -623,35 +653,11 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height; const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height;
glDisable(GL_SCISSOR_TEST); glDisable(GL_SCISSOR_TEST);
m_vram_downsample_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); m_vram_downsample_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
glBlitFramebuffer(x, flipped_y, x + width, flipped_y + height, scaled_x, scaled_flipped_y, scaled_x + scaled_width, glBlitFramebuffer(x, flipped_y, x + width, flipped_y + height, scaled_x, scaled_flipped_y,
scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); scaled_x + scaled_width, scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST); glEnable(GL_SCISSOR_TEST);
} }
#else }
const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
m_texture_stream_buffer->Unmap(num_pixels * sizeof(u16));
// viewport should be set to the whole VRAM size, so we can just set the scissor
const u32 flipped_y = VRAM_HEIGHT - y - height;
const u32 scaled_width = width * m_resolution_scale;
const u32 scaled_height = height * m_resolution_scale;
const u32 scaled_x = x * m_resolution_scale;
const u32 scaled_y = y * m_resolution_scale;
const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height;
glScissor(scaled_x, scaled_flipped_y, scaled_width, scaled_height);
m_vram_write_program.Bind();
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
const u32 uniforms[5] = {x, flipped_y, width, height, map_result.index_aligned};
UploadUniformBlock(uniforms, sizeof(uniforms));
m_batch_ubo_dirty = true;
glDrawArrays(GL_TRIANGLES, 0, 3);
SetScissorFromDrawingArea();
#endif
} }
void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)

View file

@ -75,6 +75,7 @@ private:
GLuint m_texture_buffer_r16ui_texture = 0; GLuint m_texture_buffer_r16ui_texture = 0;
u32 m_uniform_buffer_alignment = 1; u32 m_uniform_buffer_alignment = 1;
u32 m_max_texture_buffer_size = 0;
std::array<std::array<std::array<GL::Program, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering] std::array<std::array<std::array<GL::Program, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering]
std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced] std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced]