mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-26 15:45:42 +00:00
GPU/HW: GPU-based RGBA8->RGB5551 conversion for readbacks
This commit is contained in:
parent
878a76e258
commit
3998b9684e
|
@ -28,6 +28,8 @@ void GPU_HW::Reset()
|
||||||
{
|
{
|
||||||
GPU::Reset();
|
GPU::Reset();
|
||||||
|
|
||||||
|
m_vram_shadow.fill(0);
|
||||||
|
|
||||||
m_batch = {};
|
m_batch = {};
|
||||||
m_batch_ubo_data = {};
|
m_batch_ubo_data = {};
|
||||||
m_batch_ubo_dirty = true;
|
m_batch_ubo_dirty = true;
|
||||||
|
@ -181,6 +183,22 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
|
||||||
*bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1);
|
*bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Common::Rectangle<u32> GPU_HW::GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height)
|
||||||
|
{
|
||||||
|
Common::Rectangle<u32> out_rc = Common::Rectangle<u32>::FromExtents(x, y, width, height);
|
||||||
|
if (out_rc.right > VRAM_WIDTH)
|
||||||
|
{
|
||||||
|
out_rc.left = 0;
|
||||||
|
out_rc.right = VRAM_WIDTH;
|
||||||
|
}
|
||||||
|
if (out_rc.bottom > VRAM_HEIGHT)
|
||||||
|
{
|
||||||
|
out_rc.top = 0;
|
||||||
|
out_rc.bottom = VRAM_HEIGHT;
|
||||||
|
}
|
||||||
|
return out_rc;
|
||||||
|
}
|
||||||
|
|
||||||
GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
|
GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
|
||||||
{
|
{
|
||||||
if (rc.primitive == Primitive::Line)
|
if (rc.primitive == Primitive::Line)
|
||||||
|
@ -191,6 +209,22 @@ GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
|
||||||
return BatchPrimitive::Triangles;
|
return BatchPrimitive::Triangles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer)
|
||||||
|
{
|
||||||
|
u8* out_ptr = static_cast<u8*>(buffer);
|
||||||
|
|
||||||
|
for (u32 row = 0; row < height; row++)
|
||||||
|
{
|
||||||
|
const u32 row_offset = ((y + row) % VRAM_HEIGHT) * VRAM_WIDTH;
|
||||||
|
for (u32 col = 0; col < width; col++)
|
||||||
|
{
|
||||||
|
const u32 col_offset = row_offset + ((x + col) % VRAM_WIDTH);
|
||||||
|
std::memcpy(out_ptr, &m_vram_shadow[col_offset], sizeof(u16));
|
||||||
|
out_ptr += sizeof(u16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
{
|
{
|
||||||
m_vram_dirty_rect.Include(Common::Rectangle<u32>::FromExtents(x, y, width, height));
|
m_vram_dirty_rect.Include(Common::Rectangle<u32>::FromExtents(x, y, width, height));
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include "common/heap_array.h"
|
||||||
#include "gpu.h"
|
#include "gpu.h"
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@ -126,6 +127,7 @@ protected:
|
||||||
|
|
||||||
bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
|
bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
|
||||||
|
|
||||||
|
void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) override;
|
||||||
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
|
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
|
||||||
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
|
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
|
||||||
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
|
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
|
||||||
|
@ -139,6 +141,11 @@ protected:
|
||||||
return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale));
|
return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Computes the area affected by a VRAM transfer, including wrap-around of X.
|
||||||
|
Common::Rectangle<u32> GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height);
|
||||||
|
|
||||||
|
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
|
||||||
|
|
||||||
BatchVertex* m_batch_start_vertex_ptr = nullptr;
|
BatchVertex* m_batch_start_vertex_ptr = nullptr;
|
||||||
BatchVertex* m_batch_end_vertex_ptr = nullptr;
|
BatchVertex* m_batch_end_vertex_ptr = nullptr;
|
||||||
BatchVertex* m_batch_current_vertex_ptr = nullptr;
|
BatchVertex* m_batch_current_vertex_ptr = nullptr;
|
||||||
|
|
|
@ -159,7 +159,9 @@ bool GPU_HW_D3D11::CreateFramebuffer()
|
||||||
|
|
||||||
if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) ||
|
if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) ||
|
||||||
!m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, false) ||
|
!m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, false) ||
|
||||||
!m_display_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true))
|
!m_display_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) ||
|
||||||
|
!m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, true, true) ||
|
||||||
|
!m_vram_readback_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, false))
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -177,12 +179,6 @@ bool GPU_HW_D3D11::CreateFramebuffer()
|
||||||
old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), linear_filter);
|
old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), linear_filter);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_resolution_scale > 1 &&
|
|
||||||
!m_vram_downsample_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true))
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr);
|
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr);
|
||||||
SetFullVRAMDirtyRectangle();
|
SetFullVRAMDirtyRectangle();
|
||||||
return true;
|
return true;
|
||||||
|
@ -199,8 +195,9 @@ void GPU_HW_D3D11::DestroyFramebuffer()
|
||||||
{
|
{
|
||||||
m_vram_read_texture.Destroy();
|
m_vram_read_texture.Destroy();
|
||||||
m_vram_texture.Destroy();
|
m_vram_texture.Destroy();
|
||||||
m_vram_downsample_texture.Destroy();
|
m_vram_encoding_texture.Destroy();
|
||||||
m_display_texture.Destroy();
|
m_display_texture.Destroy();
|
||||||
|
m_vram_readback_texture.Destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GPU_HW_D3D11::CreateVertexBuffer()
|
bool GPU_HW_D3D11::CreateVertexBuffer()
|
||||||
|
@ -370,6 +367,11 @@ bool GPU_HW_D3D11::CompileShaders()
|
||||||
if (!m_fill_pixel_shader)
|
if (!m_fill_pixel_shader)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
m_vram_read_pixel_shader = D3D11::ShaderCompiler::CompileAndCreatePixelShader(
|
||||||
|
m_device.Get(), shadergen.GenerateVRAMReadFragmentShader(), debug);
|
||||||
|
if (!m_vram_read_pixel_shader)
|
||||||
|
return false;
|
||||||
|
|
||||||
m_vram_write_pixel_shader = D3D11::ShaderCompiler::CompileAndCreatePixelShader(
|
m_vram_write_pixel_shader = D3D11::ShaderCompiler::CompileAndCreatePixelShader(
|
||||||
m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader(), debug);
|
m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader(), debug);
|
||||||
if (!m_vram_write_pixel_shader)
|
if (!m_vram_write_pixel_shader)
|
||||||
|
@ -548,17 +550,16 @@ void GPU_HW_D3D11::UpdateDisplay()
|
||||||
{
|
{
|
||||||
const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
|
const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
|
||||||
const u32 scaled_copy_width = copy_width * m_resolution_scale;
|
const u32 scaled_copy_width = copy_width * m_resolution_scale;
|
||||||
BlitTexture(m_vram_downsample_texture.GetD3DRTV(), vram_offset_x, vram_offset_y, copy_width, display_height,
|
BlitTexture(m_vram_encoding_texture.GetD3DRTV(), vram_offset_x, vram_offset_y, copy_width, display_height,
|
||||||
m_vram_texture.GetD3DSRV(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_copy_width,
|
m_vram_texture.GetD3DSRV(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_copy_width,
|
||||||
scaled_display_height, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), false);
|
scaled_display_height, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), false);
|
||||||
|
|
||||||
m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr);
|
m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr);
|
||||||
m_context->PSSetShaderResources(0, 1, m_vram_downsample_texture.GetD3DSRVArray());
|
m_context->PSSetShaderResources(0, 1, m_vram_encoding_texture.GetD3DSRVArray());
|
||||||
|
|
||||||
const u32 uniforms[4] = {vram_offset_x, vram_offset_y, field_offset};
|
const u32 uniforms[4] = {vram_offset_x, vram_offset_y, field_offset};
|
||||||
SetViewportAndScissor(0, field_offset, display_width, display_height);
|
SetViewportAndScissor(0, field_offset, display_width, display_height);
|
||||||
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
|
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
|
||||||
UploadUniformBlock(uniforms, sizeof(uniforms));
|
|
||||||
|
|
||||||
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), 0, 0, display_width, display_height,
|
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), 0, 0, display_width, display_height,
|
||||||
m_display_texture.GetWidth(), m_display_texture.GetHeight(),
|
m_display_texture.GetWidth(), m_display_texture.GetHeight(),
|
||||||
|
@ -572,7 +573,6 @@ void GPU_HW_D3D11::UpdateDisplay()
|
||||||
const u32 uniforms[4] = {scaled_vram_offset_x, scaled_vram_offset_y, field_offset};
|
const u32 uniforms[4] = {scaled_vram_offset_x, scaled_vram_offset_y, field_offset};
|
||||||
SetViewportAndScissor(0, field_offset, scaled_display_width, scaled_display_height);
|
SetViewportAndScissor(0, field_offset, scaled_display_width, scaled_display_height);
|
||||||
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
|
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
|
||||||
UploadUniformBlock(uniforms, sizeof(uniforms));
|
|
||||||
|
|
||||||
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), 0, 0, scaled_display_width,
|
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), 0, 0, scaled_display_width,
|
||||||
scaled_display_height, m_display_texture.GetWidth(),
|
scaled_display_height, m_display_texture.GetWidth(),
|
||||||
|
@ -586,7 +586,37 @@ void GPU_HW_D3D11::UpdateDisplay()
|
||||||
|
|
||||||
void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer)
|
void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer)
|
||||||
{
|
{
|
||||||
Log_WarningPrintf("VRAM readback not implemented");
|
// Get bounds with wrap-around handled.
|
||||||
|
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
|
||||||
|
const u32 encoded_width = copy_rect.GetWidth() / 2;
|
||||||
|
const u32 encoded_height = copy_rect.GetHeight();
|
||||||
|
|
||||||
|
// Encode the 24-bit texture as 16-bit.
|
||||||
|
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
|
||||||
|
m_context->OMSetRenderTargets(1, m_vram_encoding_texture.GetD3DRTVArray(), nullptr);
|
||||||
|
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
|
||||||
|
SetViewportAndScissor(0, 0, encoded_width, encoded_height);
|
||||||
|
DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms));
|
||||||
|
|
||||||
|
// Stage the readback.
|
||||||
|
m_vram_readback_texture.CopyFromTexture(m_context.Get(), m_vram_encoding_texture.GetD3DTexture(), 0, 0, 0, 0, 0,
|
||||||
|
encoded_width, encoded_height);
|
||||||
|
// And copy it into our shadow buffer.
|
||||||
|
if (m_vram_readback_texture.Map(m_context.Get(), false))
|
||||||
|
{
|
||||||
|
m_vram_readback_texture.ReadPixels(0, 0, encoded_width * 2, encoded_height, VRAM_WIDTH,
|
||||||
|
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
|
||||||
|
m_vram_readback_texture.Unmap(m_context.Get());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Log_ErrorPrintf("Failed to map VRAM readback texture");
|
||||||
|
}
|
||||||
|
|
||||||
|
RestoreGraphicsAPIState();
|
||||||
|
|
||||||
|
// Feed the shadow buffer back to the output.
|
||||||
|
GPU_HW::ReadVRAM(x, y, width, height, buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
|
|
|
@ -69,7 +69,7 @@ private:
|
||||||
// downsample texture - used for readbacks at >1xIR.
|
// downsample texture - used for readbacks at >1xIR.
|
||||||
D3D11::Texture m_vram_texture;
|
D3D11::Texture m_vram_texture;
|
||||||
D3D11::Texture m_vram_read_texture;
|
D3D11::Texture m_vram_read_texture;
|
||||||
D3D11::Texture m_vram_downsample_texture;
|
D3D11::Texture m_vram_encoding_texture;
|
||||||
D3D11::Texture m_display_texture;
|
D3D11::Texture m_display_texture;
|
||||||
|
|
||||||
D3D11::StreamBuffer m_vertex_stream_buffer;
|
D3D11::StreamBuffer m_vertex_stream_buffer;
|
||||||
|
@ -77,6 +77,9 @@ private:
|
||||||
D3D11::StreamBuffer m_uniform_stream_buffer;
|
D3D11::StreamBuffer m_uniform_stream_buffer;
|
||||||
|
|
||||||
D3D11::StreamBuffer m_texture_stream_buffer;
|
D3D11::StreamBuffer m_texture_stream_buffer;
|
||||||
|
|
||||||
|
D3D11::StagingTexture m_vram_readback_texture;
|
||||||
|
|
||||||
ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui;
|
ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui;
|
||||||
|
|
||||||
ComPtr<ID3D11RasterizerState> m_cull_none_rasterizer_state;
|
ComPtr<ID3D11RasterizerState> m_cull_none_rasterizer_state;
|
||||||
|
@ -98,6 +101,7 @@ private:
|
||||||
ComPtr<ID3D11VertexShader> m_screen_quad_vertex_shader;
|
ComPtr<ID3D11VertexShader> m_screen_quad_vertex_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_copy_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_copy_pixel_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_fill_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_fill_pixel_shader;
|
||||||
|
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
|
||||||
std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2> m_display_pixel_shaders; // [depth_24][interlaced]
|
std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2> m_display_pixel_shaders; // [depth_24][interlaced]
|
||||||
};
|
};
|
||||||
|
|
|
@ -179,11 +179,8 @@ void GPU_HW_OpenGL::CreateFramebuffer()
|
||||||
m_vram_read_texture =
|
m_vram_read_texture =
|
||||||
std::make_unique<GL::Texture>(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true);
|
std::make_unique<GL::Texture>(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true);
|
||||||
|
|
||||||
if (m_resolution_scale > 1)
|
m_vram_encoding_texture =
|
||||||
{
|
std::make_unique<GL::Texture>(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true);
|
||||||
m_vram_downsample_texture =
|
|
||||||
std::make_unique<GL::Texture>(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
m_display_texture =
|
m_display_texture =
|
||||||
std::make_unique<GL::Texture>(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true);
|
std::make_unique<GL::Texture>(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true);
|
||||||
|
@ -205,7 +202,7 @@ void GPU_HW_OpenGL::DestroyFramebuffer()
|
||||||
{
|
{
|
||||||
m_vram_read_texture.reset();
|
m_vram_read_texture.reset();
|
||||||
m_vram_texture.reset();
|
m_vram_texture.reset();
|
||||||
m_vram_downsample_texture.reset();
|
m_vram_encoding_texture.reset();
|
||||||
m_display_texture.reset();
|
m_display_texture.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -325,6 +322,23 @@ bool GPU_HW_OpenGL::CompilePrograms()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!m_vram_read_program.Compile(shadergen.GenerateScreenQuadVertexShader(),
|
||||||
|
shadergen.GenerateVRAMReadFragmentShader()))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!m_is_gles)
|
||||||
|
m_vram_read_program.BindFragData(0, "o_col0");
|
||||||
|
|
||||||
|
if (!m_vram_read_program.Link())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
m_vram_read_program.BindUniformBlock("UBOBlock", 1);
|
||||||
|
|
||||||
|
m_vram_read_program.Bind();
|
||||||
|
m_vram_read_program.Uniform1i("samp0", 0);
|
||||||
|
|
||||||
if (m_supports_texture_buffer)
|
if (m_supports_texture_buffer)
|
||||||
{
|
{
|
||||||
if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(),
|
if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(),
|
||||||
|
@ -462,7 +476,7 @@ void GPU_HW_OpenGL::UpdateDisplay()
|
||||||
{
|
{
|
||||||
const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
|
const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
|
||||||
const u32 scaled_copy_width = copy_width * m_resolution_scale;
|
const u32 scaled_copy_width = copy_width * m_resolution_scale;
|
||||||
m_vram_downsample_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
m_vram_encoding_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||||
m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
|
m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
|
||||||
glBlitFramebuffer(scaled_vram_offset_x, scaled_flipped_vram_offset_y, scaled_vram_offset_x + scaled_copy_width,
|
glBlitFramebuffer(scaled_vram_offset_x, scaled_flipped_vram_offset_y, scaled_vram_offset_x + scaled_copy_width,
|
||||||
scaled_flipped_vram_offset_y + scaled_display_height, vram_offset_x, flipped_vram_offset_y,
|
scaled_flipped_vram_offset_y + scaled_display_height, vram_offset_x, flipped_vram_offset_y,
|
||||||
|
@ -470,7 +484,7 @@ void GPU_HW_OpenGL::UpdateDisplay()
|
||||||
GL_NEAREST);
|
GL_NEAREST);
|
||||||
|
|
||||||
m_display_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
m_display_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||||
m_vram_downsample_texture->Bind();
|
m_vram_encoding_texture->Bind();
|
||||||
|
|
||||||
glViewport(0, field_offset, display_width, display_height);
|
glViewport(0, field_offset, display_width, display_height);
|
||||||
|
|
||||||
|
@ -514,60 +528,35 @@ void GPU_HW_OpenGL::UpdateDisplay()
|
||||||
|
|
||||||
void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer)
|
void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer)
|
||||||
{
|
{
|
||||||
// we need to convert RGBA8 -> RGBA5551
|
// Get bounds with wrap-around handled.
|
||||||
std::vector<u32> temp_buffer(width * height);
|
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
|
||||||
const u32 flipped_y = VRAM_HEIGHT - y - height;
|
const u32 encoded_width = copy_rect.GetWidth() / 2;
|
||||||
|
const u32 encoded_height = copy_rect.GetHeight();
|
||||||
|
|
||||||
// downscaling to 1xIR.
|
// Encode the 24-bit texture as 16-bit.
|
||||||
if (m_resolution_scale > 1)
|
const u32 uniforms[4] = {copy_rect.left, VRAM_HEIGHT - copy_rect.top - copy_rect.GetHeight(), copy_rect.GetWidth(),
|
||||||
{
|
copy_rect.GetHeight()};
|
||||||
const u32 texture_height = m_vram_texture->GetHeight();
|
m_vram_encoding_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||||
const u32 scaled_x = x * m_resolution_scale;
|
m_vram_texture->Bind();
|
||||||
const u32 scaled_y = y * m_resolution_scale;
|
m_vram_read_program.Bind();
|
||||||
const u32 scaled_width = width * m_resolution_scale;
|
UploadUniformBlock(uniforms, sizeof(uniforms));
|
||||||
const u32 scaled_height = height * m_resolution_scale;
|
glDisable(GL_BLEND);
|
||||||
const u32 scaled_flipped_y = texture_height - scaled_y - scaled_height;
|
glDisable(GL_SCISSOR_TEST);
|
||||||
|
glViewport(0, 0, encoded_width, encoded_height);
|
||||||
|
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||||
|
|
||||||
m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
|
// Readback encoded texture.
|
||||||
m_vram_downsample_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
m_vram_encoding_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
|
||||||
glDisable(GL_SCISSOR_TEST);
|
glPixelStorei(GL_PACK_ALIGNMENT, 2);
|
||||||
glBlitFramebuffer(scaled_x, scaled_flipped_y, scaled_x + scaled_width, scaled_flipped_y + scaled_height, 0, 0,
|
glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2);
|
||||||
width, height, GL_COLOR_BUFFER_BIT, GL_LINEAR);
|
glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE,
|
||||||
glEnable(GL_SCISSOR_TEST);
|
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]);
|
||||||
m_vram_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
glPixelStorei(GL_PACK_ALIGNMENT, 4);
|
||||||
m_vram_downsample_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
|
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||||
glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, temp_buffer.data());
|
RestoreGraphicsAPIState();
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
|
|
||||||
glReadPixels(x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, temp_buffer.data());
|
|
||||||
}
|
|
||||||
|
|
||||||
// reverse copy because of lower-left origin
|
// Feed the shadow buffer back to the output.
|
||||||
const u32 source_stride = width * sizeof(u32);
|
GPU_HW::ReadVRAM(x, y, width, height, buffer);
|
||||||
const u8* source_ptr = reinterpret_cast<const u8*>(temp_buffer.data()) + (source_stride * (height - 1));
|
|
||||||
const u32 dst_stride = width * sizeof(u16);
|
|
||||||
u8* dst_ptr = static_cast<u8*>(buffer);
|
|
||||||
for (u32 row = 0; row < height; row++)
|
|
||||||
{
|
|
||||||
const u8* source_row_ptr = source_ptr;
|
|
||||||
u8* dst_row_ptr = dst_ptr;
|
|
||||||
|
|
||||||
for (u32 col = 0; col < width; col++)
|
|
||||||
{
|
|
||||||
u32 src_col;
|
|
||||||
std::memcpy(&src_col, source_row_ptr, sizeof(src_col));
|
|
||||||
source_row_ptr += sizeof(src_col);
|
|
||||||
|
|
||||||
const u16 dst_col = RGBA8888ToRGBA5551(src_col);
|
|
||||||
std::memcpy(dst_row_ptr, &dst_col, sizeof(dst_col));
|
|
||||||
dst_row_ptr += sizeof(dst_col);
|
|
||||||
}
|
|
||||||
|
|
||||||
source_ptr -= source_stride;
|
|
||||||
dst_ptr += dst_stride;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
|
@ -655,7 +644,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
||||||
|
|
||||||
// have to write to the 1x texture first
|
// have to write to the 1x texture first
|
||||||
if (m_resolution_scale > 1)
|
if (m_resolution_scale > 1)
|
||||||
m_vram_downsample_texture->Bind();
|
m_vram_encoding_texture->Bind();
|
||||||
else
|
else
|
||||||
m_vram_texture->Bind();
|
m_vram_texture->Bind();
|
||||||
|
|
||||||
|
@ -676,7 +665,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
||||||
const u32 scaled_y = y * m_resolution_scale;
|
const u32 scaled_y = y * m_resolution_scale;
|
||||||
const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height;
|
const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height;
|
||||||
glDisable(GL_SCISSOR_TEST);
|
glDisable(GL_SCISSOR_TEST);
|
||||||
m_vram_downsample_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
|
m_vram_encoding_texture->BindFramebuffer(GL_READ_FRAMEBUFFER);
|
||||||
glBlitFramebuffer(x, flipped_y, x + width, flipped_y + height, scaled_x, scaled_flipped_y,
|
glBlitFramebuffer(x, flipped_y, x + width, flipped_y + height, scaled_x, scaled_flipped_y,
|
||||||
scaled_x + scaled_width, scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
scaled_x + scaled_width, scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||||
glEnable(GL_SCISSOR_TEST);
|
glEnable(GL_SCISSOR_TEST);
|
||||||
|
|
|
@ -62,7 +62,7 @@ private:
|
||||||
// downsample texture - used for readbacks at >1xIR.
|
// downsample texture - used for readbacks at >1xIR.
|
||||||
std::unique_ptr<GL::Texture> m_vram_texture;
|
std::unique_ptr<GL::Texture> m_vram_texture;
|
||||||
std::unique_ptr<GL::Texture> m_vram_read_texture;
|
std::unique_ptr<GL::Texture> m_vram_read_texture;
|
||||||
std::unique_ptr<GL::Texture> m_vram_downsample_texture;
|
std::unique_ptr<GL::Texture> m_vram_encoding_texture;
|
||||||
std::unique_ptr<GL::Texture> m_display_texture;
|
std::unique_ptr<GL::Texture> m_display_texture;
|
||||||
|
|
||||||
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
|
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
|
||||||
|
@ -76,6 +76,7 @@ private:
|
||||||
|
|
||||||
std::array<std::array<std::array<GL::Program, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering]
|
std::array<std::array<std::array<GL::Program, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering]
|
||||||
std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced]
|
std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced]
|
||||||
|
GL::Program m_vram_read_program;
|
||||||
GL::Program m_vram_write_program;
|
GL::Program m_vram_write_program;
|
||||||
|
|
||||||
u32 m_uniform_buffer_alignment = 1;
|
u32 m_uniform_buffer_alignment = 1;
|
||||||
|
|
|
@ -63,6 +63,7 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss)
|
||||||
ss << "#define CONSTANT const\n";
|
ss << "#define CONSTANT const\n";
|
||||||
ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n";
|
ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n";
|
||||||
ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n";
|
ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n";
|
||||||
|
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n";
|
||||||
ss << "#define LOAD_TEXTURE_BUFFER(name, index) texelFetch(name, index)\n";
|
ss << "#define LOAD_TEXTURE_BUFFER(name, index) texelFetch(name, index)\n";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -71,6 +72,7 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss)
|
||||||
ss << "#define CONSTANT static const\n";
|
ss << "#define CONSTANT static const\n";
|
||||||
ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n";
|
ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n";
|
||||||
ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n";
|
ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n";
|
||||||
|
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) name.Load(int3(coords, mip), offset)\n";
|
||||||
ss << "#define LOAD_TEXTURE_BUFFER(name, index) name.Load(index)\n";
|
ss << "#define LOAD_TEXTURE_BUFFER(name, index) name.Load(index)\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -705,6 +707,59 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string GPU_HW_ShaderGen::GenerateVRAMReadFragmentShader()
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
WriteHeader(ss);
|
||||||
|
WriteCommonFunctions(ss);
|
||||||
|
DeclareUniformBuffer(ss, {"int2 u_base_coords", "int2 u_size"});
|
||||||
|
|
||||||
|
DeclareTexture(ss, "samp0", 0);
|
||||||
|
|
||||||
|
ss << R"(
|
||||||
|
uint SampleVRAM(int2 coords)
|
||||||
|
{
|
||||||
|
if (RESOLUTION_SCALE == 1)
|
||||||
|
return RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, coords, 0));
|
||||||
|
|
||||||
|
// Box filter for downsampling.
|
||||||
|
float4 value = float4(0.0, 0.0, 0.0, 0.0);
|
||||||
|
int2 base_coords = coords * int2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
||||||
|
for (int offset_x = 0; offset_x < RESOLUTION_SCALE; offset_x++)
|
||||||
|
{
|
||||||
|
for (int offset_y = 0; offset_y < RESOLUTION_SCALE; offset_y++)
|
||||||
|
value += LOAD_TEXTURE(samp0, base_coords + int2(offset_x, offset_y), 0);
|
||||||
|
}
|
||||||
|
value /= float(RESOLUTION_SCALE * RESOLUTION_SCALE);
|
||||||
|
return RGBA8ToRGBA5551(value);
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
|
||||||
|
ss << R"(
|
||||||
|
{
|
||||||
|
int2 sample_coords = int2(int(v_pos.x) * 2, int(v_pos.y));
|
||||||
|
|
||||||
|
#if API_OPENGL || API_OPENGL_ES || 1
|
||||||
|
// Lower-left origin flip for OpenGL.
|
||||||
|
// We want to write the image out upside-down so we can read it top-to-bottom.
|
||||||
|
sample_coords.y = u_size.y - sample_coords.y - 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
sample_coords += u_base_coords;
|
||||||
|
|
||||||
|
// We're encoding as 32-bit, so the output width is halved and we pack two 16-bit pixels in one 32-bit pixel.
|
||||||
|
uint left = SampleVRAM(sample_coords);
|
||||||
|
uint right = SampleVRAM(int2(sample_coords.x + 1, sample_coords.y));
|
||||||
|
|
||||||
|
o_col0 = float4(float(left & 0xFFu), float((left >> 8) & 0xFFu),
|
||||||
|
float(right & 0xFFu), float((right >> 8) & 0xFFu))
|
||||||
|
/ float4(255.0, 255.0, 255.0, 255.0);
|
||||||
|
})";
|
||||||
|
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
||||||
{
|
{
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
|
|
|
@ -18,6 +18,7 @@ public:
|
||||||
std::string GenerateFillFragmentShader();
|
std::string GenerateFillFragmentShader();
|
||||||
std::string GenerateCopyFragmentShader();
|
std::string GenerateCopyFragmentShader();
|
||||||
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
|
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
|
||||||
|
std::string GenerateVRAMReadFragmentShader();
|
||||||
std::string GenerateVRAMWriteFragmentShader();
|
std::string GenerateVRAMWriteFragmentShader();
|
||||||
|
|
||||||
HostDisplay::RenderAPI m_render_api;
|
HostDisplay::RenderAPI m_render_api;
|
||||||
|
|
Loading…
Reference in a new issue