GPU: Use texture buffer/fragment shader for VRAM writes

This commit is contained in:
Connor McLaughlin 2019-11-02 23:43:20 +10:00
parent f59e08b564
commit 4143469353
4 changed files with 89 additions and 8 deletions

View file

@ -201,12 +201,17 @@ uint RGBA8ToRGBA5551(vec4 v)
vec4 RGBA5551ToRGBA8(uint v) vec4 RGBA5551ToRGBA8(uint v)
{ {
uint r = (v & 0x1Fu); uint r = (v & 31u);
uint g = ((v >> 5) & 0x1Fu); uint g = ((v >> 5) & 31u);
uint b = ((v >> 10) & 0x1Fu); uint b = ((v >> 10) & 31u);
uint a = ((v >> 15) & 0x01u); uint a = ((v >> 15) & 1u);
return vec4(float(r) * 255.0, float(g) * 255.0, float(b) * 255.0, float(a) * 255.0); // repeat lower bits
r = (r << 3) | (r & 7u);
g = (g << 3) | (g & 7u);
b = (b << 3) | (b & 7u);
return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a));
} }
)"; )";
} }
@ -547,6 +552,34 @@ void main()
return ss.str(); return ss.str();
} }
std::string GPU_HW::GenerateVRAMWriteFragmentShader()
{
std::stringstream ss;
GenerateShaderHeader(ss);
ss << R"(
uniform ivec2 u_base_coords;
uniform ivec2 u_size;
uniform usamplerBuffer samp0;
out vec4 o_col0;
void main()
{
ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE);
ivec2 offset = coords - u_base_coords;
offset.y = u_size.y - offset.y - 1;
int buffer_offset = offset.y * u_size.x + offset.x;
uint value = texelFetch(samp0, buffer_offset).r;
o_col0 = RGBA5551ToRGBA8(value);
})";
return ss.str();
}
GPU_HW::HWPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc) GPU_HW::HWPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
{ {
if (rc.primitive == Primitive::Line) if (rc.primitive == Primitive::Line)

View file

@ -123,6 +123,7 @@ protected:
std::string GenerateScreenQuadVertexShader(); std::string GenerateScreenQuadVertexShader();
std::string GenerateFillFragmentShader(); std::string GenerateFillFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced); std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
std::string GenerateVRAMWriteFragmentShader();
HWBatchConfig m_batch = {}; HWBatchConfig m_batch = {};

View file

@ -254,9 +254,17 @@ void GPU_HW_OpenGL::CreateVertexBuffer()
void GPU_HW_OpenGL::CreateTextureBuffer() void GPU_HW_OpenGL::CreateTextureBuffer()
{ {
m_texture_stream_buffer = GL::StreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, VRAM_UPDATE_TEXTURE_BUFFER_SIZE); // const GLenum target = GL_PIXEL_UNPACK_BUFFER;
const GLenum target = GL_TEXTURE_BUFFER;
m_texture_stream_buffer = GL::StreamBuffer::Create(target, VRAM_UPDATE_TEXTURE_BUFFER_SIZE);
if (!m_texture_stream_buffer) if (!m_texture_stream_buffer)
Panic("Failed to create texture stream buffer"); Panic("Failed to create texture stream buffer");
glGenTextures(1, &m_texture_buffer_r16ui_texture);
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, m_texture_stream_buffer->GetGLBufferId());
m_texture_stream_buffer->Unbind();
} }
bool GPU_HW_OpenGL::CompilePrograms() bool GPU_HW_OpenGL::CompilePrograms()
@ -300,6 +308,19 @@ bool GPU_HW_OpenGL::CompilePrograms()
} }
} }
if (!m_vram_write_program.Compile(GenerateScreenQuadVertexShader(), GenerateVRAMWriteFragmentShader()))
return false;
m_vram_write_program.BindFragData(0, "o_col0");
if (!m_vram_write_program.Link())
return false;
m_vram_write_program.Bind();
m_vram_write_program.RegisterUniform("u_base_coords");
m_vram_write_program.RegisterUniform("u_size");
m_vram_write_program.RegisterUniform("samp0");
m_vram_write_program.Uniform1i(2, 0);
return true; return true;
} }
@ -559,7 +580,6 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color)
width *= m_resolution_scale; width *= m_resolution_scale;
height *= m_resolution_scale; height *= m_resolution_scale;
glEnable(GL_SCISSOR_TEST);
glScissor(x, m_vram_texture->GetHeight() - y - height, width, height); glScissor(x, m_vram_texture->GetHeight() - y - height, width, height);
const auto [r, g, b, a] = RGBA8ToFloat(RGBA5551ToRGBA8888(color)); const auto [r, g, b, a] = RGBA8ToFloat(RGBA5551ToRGBA8888(color));
@ -573,6 +593,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color)
void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
{ {
const u32 num_pixels = width * height; const u32 num_pixels = width * height;
#if 0
const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32)); const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32));
// reverse copy the rows so it matches opengl's lower-left origin // reverse copy the rows so it matches opengl's lower-left origin
@ -596,6 +617,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
} }
m_texture_stream_buffer->Unmap(num_pixels * sizeof(u32)); m_texture_stream_buffer->Unmap(num_pixels * sizeof(u32));
m_texture_stream_buffer->Bind();
// have to write to the 1x texture first // have to write to the 1x texture first
if (m_resolution_scale > 1) if (m_resolution_scale > 1)
@ -609,7 +631,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
// update texture data // update texture data
glTexSubImage2D(GL_TEXTURE_2D, 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, glTexSubImage2D(GL_TEXTURE_2D, 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE,
reinterpret_cast<void*>(map_result.index_aligned * sizeof(u32))); reinterpret_cast<void*>(map_result.index_aligned * sizeof(u32)));
InvalidateVRAMReadCache(); m_texture_stream_buffer->Unbind();
if (m_resolution_scale > 1) if (m_resolution_scale > 1)
{ {
@ -625,7 +647,30 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST); glEnable(GL_SCISSOR_TEST);
} }
#else
const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
m_texture_stream_buffer->Unmap(num_pixels * sizeof(u16));
// viewport should be set to the whole VRAM size, so we can just set the scissor
const u32 flipped_y = VRAM_HEIGHT - y - height;
const u32 scaled_width = width * m_resolution_scale;
const u32 scaled_height = height * m_resolution_scale;
const u32 scaled_x = x * m_resolution_scale;
const u32 scaled_y = y * m_resolution_scale;
const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height;
glScissor(scaled_x, scaled_flipped_y, scaled_width, scaled_height);
m_vram_write_program.Bind();
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
m_vram_write_program.Uniform2i(0, x, flipped_y);
m_vram_write_program.Uniform2i(1, width, height);
glDrawArrays(GL_TRIANGLES, 0, 3);
UpdateDrawingArea();
#endif
InvalidateVRAMReadCache();
m_stats.num_vram_writes++; m_stats.num_vram_writes++;
} }

View file

@ -70,6 +70,7 @@ private:
GLuint m_attributeless_vao_id = 0; GLuint m_attributeless_vao_id = 0;
std::unique_ptr<GL::StreamBuffer> m_texture_stream_buffer; std::unique_ptr<GL::StreamBuffer> m_texture_stream_buffer;
GLuint m_texture_buffer_r16ui_texture = 0;
bool m_vram_read_texture_dirty = true; bool m_vram_read_texture_dirty = true;
bool m_drawing_area_changed = true; bool m_drawing_area_changed = true;
@ -77,6 +78,7 @@ private:
std::array<std::array<std::array<GL::Program, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering] std::array<std::array<std::array<GL::Program, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering]
std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced] std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced]
GL::Program m_vram_write_program;
GLStats m_stats = {}; GLStats m_stats = {};
GLStats m_last_stats = {}; GLStats m_last_stats = {};