GPU/HW: GPU-based RGBA8->RGB5551 conversion for readbacks

This commit is contained in:
Connor McLaughlin 2019-11-14 16:58:27 +10:00
parent 878a76e258
commit 3998b9684e
8 changed files with 197 additions and 76 deletions

View file

@ -28,6 +28,8 @@ void GPU_HW::Reset()
{ {
GPU::Reset(); GPU::Reset();
m_vram_shadow.fill(0);
m_batch = {}; m_batch = {};
m_batch_ubo_data = {}; m_batch_ubo_data = {};
m_batch_ubo_dirty = true; m_batch_ubo_dirty = true;
@ -181,6 +183,22 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
*bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1); *bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1);
} }
Common::Rectangle<u32> GPU_HW::GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height)
{
Common::Rectangle<u32> out_rc = Common::Rectangle<u32>::FromExtents(x, y, width, height);
if (out_rc.right > VRAM_WIDTH)
{
out_rc.left = 0;
out_rc.right = VRAM_WIDTH;
}
if (out_rc.bottom > VRAM_HEIGHT)
{
out_rc.top = 0;
out_rc.bottom = VRAM_HEIGHT;
}
return out_rc;
}
GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc) GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
{ {
if (rc.primitive == Primitive::Line) if (rc.primitive == Primitive::Line)
@ -191,6 +209,22 @@ GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
return BatchPrimitive::Triangles; return BatchPrimitive::Triangles;
} }
void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer)
{
u8* out_ptr = static_cast<u8*>(buffer);
for (u32 row = 0; row < height; row++)
{
const u32 row_offset = ((y + row) % VRAM_HEIGHT) * VRAM_WIDTH;
for (u32 col = 0; col < width; col++)
{
const u32 col_offset = row_offset + ((x + col) % VRAM_WIDTH);
std::memcpy(out_ptr, &m_vram_shadow[col_offset], sizeof(u16));
out_ptr += sizeof(u16);
}
}
}
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{ {
m_vram_dirty_rect.Include(Common::Rectangle<u32>::FromExtents(x, y, width, height)); m_vram_dirty_rect.Include(Common::Rectangle<u32>::FromExtents(x, y, width, height));

View file

@ -1,4 +1,5 @@
#pragma once #pragma once
#include "common/heap_array.h"
#include "gpu.h" #include "gpu.h"
#include <sstream> #include <sstream>
#include <string> #include <string>
@ -126,6 +127,7 @@ protected:
bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; } bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
@ -139,6 +141,11 @@ protected:
return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale)); return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale));
} }
/// Computes the area affected by a VRAM transfer, including wrap-around of X.
Common::Rectangle<u32> GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height);
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
BatchVertex* m_batch_start_vertex_ptr = nullptr; BatchVertex* m_batch_start_vertex_ptr = nullptr;
BatchVertex* m_batch_end_vertex_ptr = nullptr; BatchVertex* m_batch_end_vertex_ptr = nullptr;
BatchVertex* m_batch_current_vertex_ptr = nullptr; BatchVertex* m_batch_current_vertex_ptr = nullptr;

View file

@ -159,7 +159,9 @@ bool GPU_HW_D3D11::CreateFramebuffer()
if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) || if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) ||
!m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, false) || !m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, false) ||
!m_display_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true)) !m_display_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) ||
!m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, true, true) ||
!m_vram_readback_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, false))
{ {
return false; return false;
} }
@ -177,12 +179,6 @@ bool GPU_HW_D3D11::CreateFramebuffer()
old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), linear_filter); old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), linear_filter);
} }
if (m_resolution_scale > 1 &&
!m_vram_downsample_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true))
{
return false;
}
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr); m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr);
SetFullVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle();
return true; return true;
@ -199,8 +195,9 @@ void GPU_HW_D3D11::DestroyFramebuffer()
{ {
m_vram_read_texture.Destroy(); m_vram_read_texture.Destroy();
m_vram_texture.Destroy(); m_vram_texture.Destroy();
m_vram_downsample_texture.Destroy(); m_vram_encoding_texture.Destroy();
m_display_texture.Destroy(); m_display_texture.Destroy();
m_vram_readback_texture.Destroy();
} }
bool GPU_HW_D3D11::CreateVertexBuffer() bool GPU_HW_D3D11::CreateVertexBuffer()
@ -370,6 +367,11 @@ bool GPU_HW_D3D11::CompileShaders()
if (!m_fill_pixel_shader) if (!m_fill_pixel_shader)
return false; return false;
m_vram_read_pixel_shader = D3D11::ShaderCompiler::CompileAndCreatePixelShader(
m_device.Get(), shadergen.GenerateVRAMReadFragmentShader(), debug);
if (!m_vram_read_pixel_shader)
return false;
m_vram_write_pixel_shader = D3D11::ShaderCompiler::CompileAndCreatePixelShader( m_vram_write_pixel_shader = D3D11::ShaderCompiler::CompileAndCreatePixelShader(
m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader(), debug); m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader(), debug);
if (!m_vram_write_pixel_shader) if (!m_vram_write_pixel_shader)
@ -548,17 +550,16 @@ void GPU_HW_D3D11::UpdateDisplay()
{ {
const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x); const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
const u32 scaled_copy_width = copy_width * m_resolution_scale; const u32 scaled_copy_width = copy_width * m_resolution_scale;
BlitTexture(m_vram_downsample_texture.GetD3DRTV(), vram_offset_x, vram_offset_y, copy_width, display_height, BlitTexture(m_vram_encoding_texture.GetD3DRTV(), vram_offset_x, vram_offset_y, copy_width, display_height,
m_vram_texture.GetD3DSRV(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_copy_width, m_vram_texture.GetD3DSRV(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_copy_width,
scaled_display_height, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), false); scaled_display_height, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), false);
m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr); m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr);
m_context->PSSetShaderResources(0, 1, m_vram_downsample_texture.GetD3DSRVArray()); m_context->PSSetShaderResources(0, 1, m_vram_encoding_texture.GetD3DSRVArray());
const u32 uniforms[4] = {vram_offset_x, vram_offset_y, field_offset}; const u32 uniforms[4] = {vram_offset_x, vram_offset_y, field_offset};
SetViewportAndScissor(0, field_offset, display_width, display_height); SetViewportAndScissor(0, field_offset, display_width, display_height);
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms)); DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
UploadUniformBlock(uniforms, sizeof(uniforms));
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), 0, 0, display_width, display_height, m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), 0, 0, display_width, display_height,
m_display_texture.GetWidth(), m_display_texture.GetHeight(), m_display_texture.GetWidth(), m_display_texture.GetHeight(),
@ -572,7 +573,6 @@ void GPU_HW_D3D11::UpdateDisplay()
const u32 uniforms[4] = {scaled_vram_offset_x, scaled_vram_offset_y, field_offset}; const u32 uniforms[4] = {scaled_vram_offset_x, scaled_vram_offset_y, field_offset};
SetViewportAndScissor(0, field_offset, scaled_display_width, scaled_display_height); SetViewportAndScissor(0, field_offset, scaled_display_width, scaled_display_height);
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms)); DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
UploadUniformBlock(uniforms, sizeof(uniforms));
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), 0, 0, scaled_display_width, m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), 0, 0, scaled_display_width,
scaled_display_height, m_display_texture.GetWidth(), scaled_display_height, m_display_texture.GetWidth(),
@ -586,7 +586,37 @@ void GPU_HW_D3D11::UpdateDisplay()
void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer)
{ {
Log_WarningPrintf("VRAM readback not implemented"); // Get bounds with wrap-around handled.
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 encoded_width = copy_rect.GetWidth() / 2;
const u32 encoded_height = copy_rect.GetHeight();
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
m_context->OMSetRenderTargets(1, m_vram_encoding_texture.GetD3DRTVArray(), nullptr);
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
SetViewportAndScissor(0, 0, encoded_width, encoded_height);
DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms));
// Stage the readback.
m_vram_readback_texture.CopyFromTexture(m_context.Get(), m_vram_encoding_texture.GetD3DTexture(), 0, 0, 0, 0, 0,
encoded_width, encoded_height);
// And copy it into our shadow buffer.
if (m_vram_readback_texture.Map(m_context.Get(), false))
{
m_vram_readback_texture.ReadPixels(0, 0, encoded_width * 2, encoded_height, VRAM_WIDTH,
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
m_vram_readback_texture.Unmap(m_context.Get());
}
else
{
Log_ErrorPrintf("Failed to map VRAM readback texture");
}
RestoreGraphicsAPIState();
// Feed the shadow buffer back to the output.
GPU_HW::ReadVRAM(x, y, width, height, buffer);
} }
void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)

View file

@ -69,7 +69,7 @@ private:
// downsample texture - used for readbacks at >1xIR. // downsample texture - used for readbacks at >1xIR.
D3D11::Texture m_vram_texture; D3D11::Texture m_vram_texture;
D3D11::Texture m_vram_read_texture; D3D11::Texture m_vram_read_texture;
D3D11::Texture m_vram_downsample_texture; D3D11::Texture m_vram_encoding_texture;
D3D11::Texture m_display_texture; D3D11::Texture m_display_texture;
D3D11::StreamBuffer m_vertex_stream_buffer; D3D11::StreamBuffer m_vertex_stream_buffer;
@ -77,6 +77,9 @@ private:
D3D11::StreamBuffer m_uniform_stream_buffer; D3D11::StreamBuffer m_uniform_stream_buffer;
D3D11::StreamBuffer m_texture_stream_buffer; D3D11::StreamBuffer m_texture_stream_buffer;
D3D11::StagingTexture m_vram_readback_texture;
ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui; ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui;
ComPtr<ID3D11RasterizerState> m_cull_none_rasterizer_state; ComPtr<ID3D11RasterizerState> m_cull_none_rasterizer_state;
@ -98,6 +101,7 @@ private:
ComPtr<ID3D11VertexShader> m_screen_quad_vertex_shader; ComPtr<ID3D11VertexShader> m_screen_quad_vertex_shader;
ComPtr<ID3D11PixelShader> m_copy_pixel_shader; ComPtr<ID3D11PixelShader> m_copy_pixel_shader;
ComPtr<ID3D11PixelShader> m_fill_pixel_shader; ComPtr<ID3D11PixelShader> m_fill_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader; ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2> m_display_pixel_shaders; // [depth_24][interlaced] std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2> m_display_pixel_shaders; // [depth_24][interlaced]
}; };

View file

@ -179,11 +179,8 @@ void GPU_HW_OpenGL::CreateFramebuffer()
m_vram_read_texture = m_vram_read_texture =
std::make_unique<GL::Texture>(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true); std::make_unique<GL::Texture>(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true);
if (m_resolution_scale > 1) m_vram_encoding_texture =
{ std::make_unique<GL::Texture>(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true);
m_vram_downsample_texture =
std::make_unique<GL::Texture>(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true);
}
m_display_texture = m_display_texture =
std::make_unique<GL::Texture>(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true); std::make_unique<GL::Texture>(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true);
@ -205,7 +202,7 @@ void GPU_HW_OpenGL::DestroyFramebuffer()
{ {
m_vram_read_texture.reset(); m_vram_read_texture.reset();
m_vram_texture.reset(); m_vram_texture.reset();
m_vram_downsample_texture.reset(); m_vram_encoding_texture.reset();
m_display_texture.reset(); m_display_texture.reset();
} }
@ -325,6 +322,23 @@ bool GPU_HW_OpenGL::CompilePrograms()
} }
} }
if (!m_vram_read_program.Compile(shadergen.GenerateScreenQuadVertexShader(),
shadergen.GenerateVRAMReadFragmentShader()))
{
return false;
}
if (!m_is_gles)
m_vram_read_program.BindFragData(0, "o_col0");
if (!m_vram_read_program.Link())
return false;
m_vram_read_program.BindUniformBlock("UBOBlock", 1);
m_vram_read_program.Bind();
m_vram_read_program.Uniform1i("samp0", 0);
if (m_supports_texture_buffer) if (m_supports_texture_buffer)
{ {
if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(), if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(),
@ -462,7 +476,7 @@ void GPU_HW_OpenGL::UpdateDisplay()
{ {
const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x); const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
const u32 scaled_copy_width = copy_width * m_resolution_scale; const u32 scaled_copy_width = copy_width * m_resolution_scale;
m_vram_downsample_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_vram_encoding_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
glBlitFramebuffer(scaled_vram_offset_x, scaled_flipped_vram_offset_y, scaled_vram_offset_x + scaled_copy_width, glBlitFramebuffer(scaled_vram_offset_x, scaled_flipped_vram_offset_y, scaled_vram_offset_x + scaled_copy_width,
scaled_flipped_vram_offset_y + scaled_display_height, vram_offset_x, flipped_vram_offset_y, scaled_flipped_vram_offset_y + scaled_display_height, vram_offset_x, flipped_vram_offset_y,
@ -470,7 +484,7 @@ void GPU_HW_OpenGL::UpdateDisplay()
GL_NEAREST); GL_NEAREST);
m_display_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_display_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_downsample_texture->Bind(); m_vram_encoding_texture->Bind();
glViewport(0, field_offset, display_width, display_height); glViewport(0, field_offset, display_width, display_height);
@ -514,60 +528,35 @@ void GPU_HW_OpenGL::UpdateDisplay()
void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer)
{ {
// we need to convert RGBA8 -> RGBA5551 // Get bounds with wrap-around handled.
std::vector<u32> temp_buffer(width * height); const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 flipped_y = VRAM_HEIGHT - y - height; const u32 encoded_width = copy_rect.GetWidth() / 2;
const u32 encoded_height = copy_rect.GetHeight();
// downscaling to 1xIR. // Encode the 24-bit texture as 16-bit.
if (m_resolution_scale > 1) const u32 uniforms[4] = {copy_rect.left, VRAM_HEIGHT - copy_rect.top - copy_rect.GetHeight(), copy_rect.GetWidth(),
{ copy_rect.GetHeight()};
const u32 texture_height = m_vram_texture->GetHeight(); m_vram_encoding_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
const u32 scaled_x = x * m_resolution_scale; m_vram_texture->Bind();
const u32 scaled_y = y * m_resolution_scale; m_vram_read_program.Bind();
const u32 scaled_width = width * m_resolution_scale; UploadUniformBlock(uniforms, sizeof(uniforms));
const u32 scaled_height = height * m_resolution_scale; glDisable(GL_BLEND);
const u32 scaled_flipped_y = texture_height - scaled_y - scaled_height; glDisable(GL_SCISSOR_TEST);
glViewport(0, 0, encoded_width, encoded_height);
glDrawArrays(GL_TRIANGLES, 0, 3);
m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); // Readback encoded texture.
m_vram_downsample_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_vram_encoding_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
glDisable(GL_SCISSOR_TEST); glPixelStorei(GL_PACK_ALIGNMENT, 2);
glBlitFramebuffer(scaled_x, scaled_flipped_y, scaled_x + scaled_width, scaled_flipped_y + scaled_height, 0, 0, glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2);
width, height, GL_COLOR_BUFFER_BIT, GL_LINEAR); glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE,
glEnable(GL_SCISSOR_TEST); &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
m_vram_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); glPixelStorei(GL_PACK_ALIGNMENT, 4);
m_vram_downsample_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, temp_buffer.data()); RestoreGraphicsAPIState();
}
else
{
m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
glReadPixels(x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, temp_buffer.data());
}
// reverse copy because of lower-left origin // Feed the shadow buffer back to the output.
const u32 source_stride = width * sizeof(u32); GPU_HW::ReadVRAM(x, y, width, height, buffer);
const u8* source_ptr = reinterpret_cast<const u8*>(temp_buffer.data()) + (source_stride * (height - 1));
const u32 dst_stride = width * sizeof(u16);
u8* dst_ptr = static_cast<u8*>(buffer);
for (u32 row = 0; row < height; row++)
{
const u8* source_row_ptr = source_ptr;
u8* dst_row_ptr = dst_ptr;
for (u32 col = 0; col < width; col++)
{
u32 src_col;
std::memcpy(&src_col, source_row_ptr, sizeof(src_col));
source_row_ptr += sizeof(src_col);
const u16 dst_col = RGBA8888ToRGBA5551(src_col);
std::memcpy(dst_row_ptr, &dst_col, sizeof(dst_col));
dst_row_ptr += sizeof(dst_col);
}
source_ptr -= source_stride;
dst_ptr += dst_stride;
}
} }
void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
@ -655,7 +644,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
// have to write to the 1x texture first // have to write to the 1x texture first
if (m_resolution_scale > 1) if (m_resolution_scale > 1)
m_vram_downsample_texture->Bind(); m_vram_encoding_texture->Bind();
else else
m_vram_texture->Bind(); m_vram_texture->Bind();
@ -676,7 +665,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
const u32 scaled_y = y * m_resolution_scale; const u32 scaled_y = y * m_resolution_scale;
const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height; const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height;
glDisable(GL_SCISSOR_TEST); glDisable(GL_SCISSOR_TEST);
m_vram_downsample_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); m_vram_encoding_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
glBlitFramebuffer(x, flipped_y, x + width, flipped_y + height, scaled_x, scaled_flipped_y, glBlitFramebuffer(x, flipped_y, x + width, flipped_y + height, scaled_x, scaled_flipped_y,
scaled_x + scaled_width, scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); scaled_x + scaled_width, scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST); glEnable(GL_SCISSOR_TEST);

View file

@ -62,7 +62,7 @@ private:
// downsample texture - used for readbacks at >1xIR. // downsample texture - used for readbacks at >1xIR.
std::unique_ptr<GL::Texture> m_vram_texture; std::unique_ptr<GL::Texture> m_vram_texture;
std::unique_ptr<GL::Texture> m_vram_read_texture; std::unique_ptr<GL::Texture> m_vram_read_texture;
std::unique_ptr<GL::Texture> m_vram_downsample_texture; std::unique_ptr<GL::Texture> m_vram_encoding_texture;
std::unique_ptr<GL::Texture> m_display_texture; std::unique_ptr<GL::Texture> m_display_texture;
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer; std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
@ -76,6 +76,7 @@ private:
std::array<std::array<std::array<GL::Program, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering] std::array<std::array<std::array<GL::Program, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering]
std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced] std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced]
GL::Program m_vram_read_program;
GL::Program m_vram_write_program; GL::Program m_vram_write_program;
u32 m_uniform_buffer_alignment = 1; u32 m_uniform_buffer_alignment = 1;

View file

@ -63,6 +63,7 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss)
ss << "#define CONSTANT const\n"; ss << "#define CONSTANT const\n";
ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n"; ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n";
ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n";
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n";
ss << "#define LOAD_TEXTURE_BUFFER(name, index) texelFetch(name, index)\n"; ss << "#define LOAD_TEXTURE_BUFFER(name, index) texelFetch(name, index)\n";
} }
else else
@ -71,6 +72,7 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss)
ss << "#define CONSTANT static const\n"; ss << "#define CONSTANT static const\n";
ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n"; ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n";
ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n";
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) name.Load(int3(coords, mip), offset)\n";
ss << "#define LOAD_TEXTURE_BUFFER(name, index) name.Load(index)\n"; ss << "#define LOAD_TEXTURE_BUFFER(name, index) name.Load(index)\n";
} }
@ -705,6 +707,59 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
return ss.str(); return ss.str();
} }
std::string GPU_HW_ShaderGen::GenerateVRAMReadFragmentShader()
{
std::stringstream ss;
WriteHeader(ss);
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"int2 u_base_coords", "int2 u_size"});
DeclareTexture(ss, "samp0", 0);
ss << R"(
uint SampleVRAM(int2 coords)
{
if (RESOLUTION_SCALE == 1)
return RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, coords, 0));
// Box filter for downsampling.
float4 value = float4(0.0, 0.0, 0.0, 0.0);
int2 base_coords = coords * int2(RESOLUTION_SCALE, RESOLUTION_SCALE);
for (int offset_x = 0; offset_x < RESOLUTION_SCALE; offset_x++)
{
for (int offset_y = 0; offset_y < RESOLUTION_SCALE; offset_y++)
value += LOAD_TEXTURE(samp0, base_coords + int2(offset_x, offset_y), 0);
}
value /= float(RESOLUTION_SCALE * RESOLUTION_SCALE);
return RGBA8ToRGBA5551(value);
}
)";
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
ss << R"(
{
int2 sample_coords = int2(int(v_pos.x) * 2, int(v_pos.y));
#if API_OPENGL || API_OPENGL_ES || 1
// Lower-left origin flip for OpenGL.
// We want to write the image out upside-down so we can read it top-to-bottom.
sample_coords.y = u_size.y - sample_coords.y - 1;
#endif
sample_coords += u_base_coords;
// We're encoding as 32-bit, so the output width is halved and we pack two 16-bit pixels in one 32-bit pixel.
uint left = SampleVRAM(sample_coords);
uint right = SampleVRAM(int2(sample_coords.x + 1, sample_coords.y));
o_col0 = float4(float(left & 0xFFu), float((left >> 8) & 0xFFu),
float(right & 0xFFu), float((right >> 8) & 0xFFu))
/ float4(255.0, 255.0, 255.0, 255.0);
})";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader() std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
{ {
std::stringstream ss; std::stringstream ss;

View file

@ -18,6 +18,7 @@ public:
std::string GenerateFillFragmentShader(); std::string GenerateFillFragmentShader();
std::string GenerateCopyFragmentShader(); std::string GenerateCopyFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced); std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
std::string GenerateVRAMReadFragmentShader();
std::string GenerateVRAMWriteFragmentShader(); std::string GenerateVRAMWriteFragmentShader();
HostDisplay::RenderAPI m_render_api; HostDisplay::RenderAPI m_render_api;