GPU/HW: Mask bit handling in hardware renderers

Fixes:
 - Menu effect in Ghost in the Shell
 - Incorrect text colours in menu of Dragon Quest VII
 - Fade effect in TwinBee RPG
 - Fog in Silent Hill
 - Water in Duke Nukem - Land of the Babes
 - Shadows in Ultraman - Fighting Evolution

and probably others.
This commit is contained in:
Connor McLaughlin 2020-05-03 17:11:28 +10:00
parent a5ecff0893
commit 9446587e8f
10 changed files with 448 additions and 128 deletions

View file

@ -27,14 +27,10 @@ D3D11_TEXTURE2D_DESC Texture::GetDesc() const
return desc;
}
bool Texture::Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT format, bool shader_resource,
bool render_target, const void* initial_data, u32 initial_data_stride)
bool Texture::Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT format, u32 bind_flags,
const void* initial_data, u32 initial_data_stride)
{
CD3D11_TEXTURE2D_DESC desc(format, width, height, 1, 1, 0, D3D11_USAGE_DEFAULT, 0, 1, 0, 0);
if (shader_resource)
desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE;
if (render_target)
desc.BindFlags |= D3D11_BIND_RENDER_TARGET;
CD3D11_TEXTURE2D_DESC desc(format, width, height, 1, 1, bind_flags, D3D11_USAGE_DEFAULT, 0, 1, 0, 0);
D3D11_SUBRESOURCE_DATA srd;
srd.pSysMem = initial_data;
@ -50,7 +46,7 @@ bool Texture::Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT fo
}
ComPtr<ID3D11ShaderResourceView> srv;
if (shader_resource)
if (bind_flags & D3D11_BIND_SHADER_RESOURCE)
{
const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(D3D11_SRV_DIMENSION_TEXTURE2D, desc.Format, 0, desc.MipLevels, 0,
desc.ArraySize);
@ -63,7 +59,7 @@ bool Texture::Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT fo
}
ComPtr<ID3D11RenderTargetView> rtv;
if (render_target)
if (bind_flags & D3D11_BIND_RENDER_TARGET)
{
const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(D3D11_RTV_DIMENSION_TEXTURE2D, desc.Format, 0, 0, desc.ArraySize);
const HRESULT hr = device->CreateRenderTargetView(texture.Get(), &rtv_desc, rtv.GetAddressOf());

View file

@ -31,7 +31,7 @@ public:
ALWAYS_INLINE operator ID3D11RenderTargetView*() const { return m_rtv.Get(); }
ALWAYS_INLINE operator bool() const { return static_cast<bool>(m_texture); }
bool Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT format, bool shader_resource, bool render_target,
bool Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT format, u32 bind_flags,
const void* initial_data = nullptr, u32 initial_data_stride = 0);
bool Adopt(ID3D11Device* device, ComPtr<ID3D11Texture2D> texture);

View file

@ -28,6 +28,7 @@ bool GPU_HW::Initialize(HostDisplay* host_display, System* system, DMA* dma, Int
const Settings& settings = m_system->GetSettings();
m_resolution_scale = settings.gpu_resolution_scale;
m_render_api = host_display->GetRenderAPI();
m_true_color = settings.gpu_true_color;
m_scaled_dithering = settings.gpu_scaled_dithering;
m_texture_filtering = settings.gpu_texture_filtering;
@ -46,10 +47,15 @@ void GPU_HW::Reset()
{
GPU::Reset();
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
m_vram_shadow.fill(0);
m_batch = {};
m_batch_ubo_data = {};
m_batch_current_vertex_depth_id = 1;
m_batch_next_vertex_depth_id = 2;
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
m_batch_ubo_dirty = true;
SetFullVRAMDirtyRectangle();
@ -62,7 +68,11 @@ bool GPU_HW::DoState(StateWrapper& sw)
// invalidate the whole VRAM read texture when loading state
if (sw.IsReading())
{
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
SetFullVRAMDirtyRectangle();
ResetBatchVertexDepthID();
}
return true;
}
@ -177,12 +187,11 @@ void GPU_HW::LoadVertices()
const RenderCommand rc{m_render_command.bits};
const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16);
// TODO: Move this to the GPU..
switch (rc.primitive)
{
case Primitive::Polygon:
{
EnsureVertexBufferSpace(rc.quad_polygon ? 6 : 3);
DebugAssert(GetBatchVertexSpace() >= (rc.quad_polygon ? 6u : 3u));
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
@ -308,9 +317,7 @@ void GPU_HW::LoadVertices()
}
// we can split the rectangle up into potentially 8 quads
const u32 required_vertices = 6 * (((rectangle_width + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) *
(((rectangle_height + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u);
EnsureVertexBufferSpace(required_vertices);
DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE);
// Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
u16 tex_top = orig_tex_top;
@ -361,7 +368,7 @@ void GPU_HW::LoadVertices()
{
if (!rc.polyline)
{
EnsureVertexBufferSpace(2);
DebugAssert(GetBatchVertexSpace() >= 2);
u32 color0, color1;
VertexPosition pos0, pos1;
@ -410,7 +417,7 @@ void GPU_HW::LoadVertices()
{
// Multiply by two because we don't use line strips.
const u32 num_vertices = GetPolyLineVertexCount();
EnsureVertexBufferSpace(num_vertices * 2);
DebugAssert(GetBatchVertexSpace() >= (num_vertices * 2));
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
@ -534,6 +541,73 @@ void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices)
MapBatchVertexPointer(required_vertices);
}
void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
{
u32 required_vertices;
switch (m_render_command.primitive)
{
case Primitive::Polygon:
required_vertices = m_render_command.quad_polygon ? 6 : 3;
break;
case Primitive::Rectangle:
required_vertices = MAX_VERTICES_FOR_RECTANGLE;
break;
case Primitive::Line:
default:
required_vertices = m_render_command.polyline ? (GetPolyLineVertexCount() * 2u) : 2u;
break;
}
// can we fit these vertices in the current depth buffer range?
if (BatchVertexDepthIDNeedsUpdate() &&
(m_batch_next_vertex_depth_id + GetBatchVertexCount() + required_vertices) > MAX_BATCH_VERTEX_COUNTER_IDS)
{
// implies FlushRender()
ResetBatchVertexDepthID();
}
else if (m_batch_current_vertex_ptr)
{
if (GetBatchVertexSpace() >= required_vertices)
return;
FlushRender();
}
MapBatchVertexPointer(required_vertices);
}
void GPU_HW::ResetBatchVertexDepthID()
{
Log_PerfPrint("Resetting batch vertex depth ID");
FlushRender();
UpdateDepthBufferFromMaskBit();
m_batch_current_vertex_depth_id = 1;
m_batch_next_vertex_depth_id = 2;
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
}
void GPU_HW::IncrementBatchVertexID(u32 count)
{
DebugAssert((m_batch_next_vertex_depth_id + count) <= MAX_BATCH_VERTEX_COUNTER_IDS);
m_batch_next_vertex_depth_id += count;
}
void GPU_HW::SetBatchUBOVertexDepthID(u32 value)
{
u32 ubo_value;
// In OpenGL, gl_VertexID is inclusive of the base vertex, whereas SV_VertexID in D3D isn't.
// We rely on unsigned overflow to compute the correct value based on the base vertex.
if (m_render_api != HostDisplay::RenderAPI::D3D11)
ubo_value = m_batch_base_vertex - value;
else
ubo_value = value;
m_batch_ubo_dirty |= (m_batch_ubo_data.u_vertex_depth_id != ubo_value);
m_batch_ubo_data.u_vertex_depth_id = ubo_value;
}
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{
IncludeVRAMDityRectangle(
@ -544,12 +618,26 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
{
DebugAssert((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT);
IncludeVRAMDityRectangle(Common::Rectangle<u32>::FromExtents(x, y, width, height));
if (m_GPUSTAT.check_mask_before_draw)
{
// set new vertex counter since we want this to take into consideration previous masked pixels
m_batch_current_vertex_depth_id = m_batch_next_vertex_depth_id++;
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
}
}
void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
{
IncludeVRAMDityRectangle(
Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
if (m_GPUSTAT.check_mask_before_draw)
{
// set new vertex counter since we want this to take into consideration previous masked pixels
m_batch_current_vertex_depth_id = m_batch_next_vertex_depth_id++;
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
}
}
void GPU_HW::DispatchRenderCommand()
@ -600,6 +688,8 @@ void GPU_HW::DispatchRenderCommand()
FlushRender();
}
EnsureVertexBufferSpaceForCurrentCommand();
// transparency mode change
if (m_batch.transparency_mode != transparency_mode && transparency_mode != TransparencyMode::Disabled)
{
@ -614,7 +704,8 @@ void GPU_HW::DispatchRenderCommand()
{
m_batch.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw;
m_batch.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing;
m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(m_GPUSTAT.set_mask_while_drawing);
m_batch_ubo_data.u_check_mask_before_draw = BoolToUInt32(m_batch.check_mask_before_draw);
m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(m_batch.set_mask_while_drawing);
m_batch_ubo_dirty = true;
}
@ -657,6 +748,10 @@ void GPU_HW::FlushRender()
if (vertex_count == 0)
return;
const bool update_depth_id = BatchVertexDepthIDNeedsUpdate();
if (update_depth_id)
SetBatchUBOVertexDepthID(m_batch_next_vertex_depth_id);
if (m_drawing_area_changed)
{
m_drawing_area_changed = false;
@ -680,6 +775,9 @@ void GPU_HW::FlushRender()
m_renderer_stats.num_batches++;
DrawBatchVertices(m_batch.GetRenderMode(), m_batch_base_vertex, vertex_count);
}
if (update_depth_id)
IncrementBatchVertexID(vertex_count);
}
void GPU_HW::DrawRendererStats(bool is_idle_frame)

View file

@ -1,6 +1,7 @@
#pragma once
#include "common/heap_array.h"
#include "gpu.h"
#include "host_display.h"
#include <sstream>
#include <string>
#include <tuple>
@ -42,7 +43,10 @@ protected:
{
VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32),
VERTEX_BUFFER_SIZE = 1 * 1024 * 1024,
UNIFORM_BUFFER_SIZE = 512 * 1024
UNIFORM_BUFFER_SIZE = 512 * 1024,
MAX_BATCH_VERTEX_COUNTER_IDS = 65536 - 2,
MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) *
(((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u)
};
struct BatchVertex
@ -102,8 +106,19 @@ protected:
u32 u_texture_window_offset[2];
float u_src_alpha_factor;
float u_dst_alpha_factor;
u32 u_set_mask_while_drawing;
u32 u_interlaced_displayed_field;
u32 u_vertex_depth_id;
u32 u_check_mask_before_draw;
u32 u_set_mask_while_drawing;
};
struct VRAMWriteUBOData
{
u32 u_base_coords[2];
u32 u_size[2];
u32 u_buffer_base_offset;
u32 u_mask_or_bits;
float u_depth_value;
};
struct VRAMCopyUBOData
@ -115,6 +130,7 @@ protected:
u32 u_width;
u32 u_height;
u32 u_set_mask_bit;
float u_depth_value;
};
struct RendererStats
@ -133,6 +149,7 @@ protected:
}
virtual void UpdateVRAMReadTexture() = 0;
virtual void UpdateDepthBufferFromMaskBit() = 0;
virtual void SetScissorFromDrawingArea() = 0;
virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0;
@ -147,11 +164,28 @@ protected:
void ClearVRAMDirtyRectangle() { m_vram_dirty_rect.SetInvalid(); }
void IncludeVRAMDityRectangle(const Common::Rectangle<u32>& rect);
bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
u32 GetBatchVertexSpace() const { return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); }
u32 GetBatchVertexCount() const { return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); }
void EnsureVertexBufferSpace(u32 required_vertices);
void EnsureVertexBufferSpaceForCurrentCommand();
void ResetBatchVertexDepthID();
void IncrementBatchVertexID(u32 count);
void SetBatchUBOVertexDepthID(u32 value);
bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
/// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
ALWAYS_INLINE float GetCurrentNormalizedBatchVertexDepthID() const
{
return 1.0f - (static_cast<float>(m_batch_next_vertex_depth_id) / 65535.0f);
}
/// Returns true if the batch vertex depth ID needs to be updated.
ALWAYS_INLINE bool BatchVertexDepthIDNeedsUpdate() const
{
// because GL uses base vertex we're incrementing the depth id every draw whether we like it or not
return m_batch.check_mask_before_draw || m_render_api != HostDisplay::RenderAPI::D3D11;
}
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
@ -182,9 +216,12 @@ protected:
BatchVertex* m_batch_end_vertex_ptr = nullptr;
BatchVertex* m_batch_current_vertex_ptr = nullptr;
u32 m_batch_base_vertex = 0;
u32 m_batch_current_vertex_depth_id = 0;
u32 m_batch_next_vertex_depth_id = 0;
u32 m_resolution_scale = 1;
u32 m_max_resolution_scale = 1;
HostDisplay::RenderAPI m_render_api = HostDisplay::RenderAPI::None;
bool m_true_color = true;
bool m_scaled_dithering = false;
bool m_texture_filtering = false;

View file

@ -111,8 +111,7 @@ void GPU_HW_D3D11::RestoreGraphicsAPIState()
m_context->IASetVertexBuffers(0, 1, m_vertex_stream_buffer.GetD3DBufferArray(), &stride, &offset);
m_context->IASetInputLayout(m_batch_input_layout.Get());
m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray());
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr);
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), m_vram_depth_view.Get());
m_context->RSSetState(m_cull_none_rasterizer_state.Get());
SetViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
SetScissorFromDrawingArea();
@ -171,16 +170,29 @@ bool GPU_HW_D3D11::CreateFramebuffer()
const u32 texture_width = VRAM_WIDTH * m_resolution_scale;
const u32 texture_height = VRAM_HEIGHT * m_resolution_scale;
const DXGI_FORMAT texture_format = DXGI_FORMAT_R8G8B8A8_UNORM;
const DXGI_FORMAT depth_format = DXGI_FORMAT_D16_UNORM;
if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) ||
!m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, false) ||
!m_display_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) ||
!m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, true, true) ||
if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, texture_format,
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) ||
!m_vram_depth_texture.Create(m_device.Get(), texture_width, texture_height, depth_format,
D3D11_BIND_DEPTH_STENCIL) ||
!m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, texture_format,
D3D11_BIND_SHADER_RESOURCE) ||
!m_display_texture.Create(m_device.Get(), texture_width, texture_height, texture_format,
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) ||
!m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format,
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) ||
!m_vram_readback_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, false))
{
return false;
}
const CD3D11_DEPTH_STENCIL_VIEW_DESC depth_view_desc(D3D11_DSV_DIMENSION_TEXTURE2D, depth_format);
HRESULT hr =
m_device->CreateDepthStencilView(m_vram_depth_texture, &depth_view_desc, m_vram_depth_view.GetAddressOf());
if (FAILED(hr))
return false;
// do we need to restore the framebuffer after a size change?
if (old_vram_texture)
{
@ -192,10 +204,12 @@ bool GPU_HW_D3D11::CreateFramebuffer()
BlitTexture(m_vram_texture.GetD3DRTV(), 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
old_vram_texture.GetD3DSRV(), 0, 0, old_vram_texture.GetWidth(), old_vram_texture.GetHeight(),
old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), linear_filter);
UpdateDepthBufferFromMaskBit();
}
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr);
SetFullVRAMDirtyRectangle();
RestoreGraphicsAPIState();
return true;
}
@ -203,12 +217,16 @@ void GPU_HW_D3D11::ClearFramebuffer()
{
static constexpr std::array<float, 4> color = {};
m_context->ClearRenderTargetView(m_vram_texture.GetD3DRTV(), color.data());
m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, 0.0f, 0);
m_context->ClearRenderTargetView(m_display_texture, color.data());
SetFullVRAMDirtyRectangle();
}
void GPU_HW_D3D11::DestroyFramebuffer()
{
m_vram_read_texture.Destroy();
m_vram_depth_view.Reset();
m_vram_depth_texture.Destroy();
m_vram_texture.Destroy();
m_vram_encoding_texture.Destroy();
m_display_texture.Destroy();
@ -289,11 +307,28 @@ bool GPU_HW_D3D11::CreateStateObjects()
if (FAILED(hr))
return false;
ds_desc.DepthEnable = TRUE;
ds_desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL;
ds_desc.DepthFunc = D3D11_COMPARISON_ALWAYS;
hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_always_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
ds_desc.DepthFunc = D3D11_COMPARISON_GREATER_EQUAL;
hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_less_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
CD3D11_BLEND_DESC bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
hr = m_device->CreateBlendState(&bl_desc, m_blend_disabled_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
bl_desc.RenderTarget[0].RenderTargetWriteMask = 0;
hr = m_device->CreateBlendState(&bl_desc, m_blend_no_color_writes_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
CD3D11_SAMPLER_DESC sampler_desc = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT());
sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
hr = m_device->CreateSamplerState(&sampler_desc, m_point_sampler_state.ReleaseAndGetAddressOf());
@ -306,12 +341,9 @@ bool GPU_HW_D3D11::CreateStateObjects()
return false;
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
{
if (transparency_mode == static_cast<u8>(TransparencyMode::Disabled) && !m_texture_filtering)
{
bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
}
else
if (transparency_mode != static_cast<u8>(TransparencyMode::Disabled) || m_texture_filtering)
{
bl_desc.RenderTarget[0].BlendEnable = TRUE;
bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE;
@ -409,6 +441,11 @@ bool GPU_HW_D3D11::CompileShaders()
if (!m_vram_copy_pixel_shader)
return false;
m_vram_update_depth_pixel_shader =
m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMUpdateDepthFragmentShader());
if (!m_vram_update_depth_pixel_shader)
return false;
for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++)
{
for (u8 interlacing = 0; interlacing < 2; interlacing++)
@ -467,6 +504,7 @@ void GPU_HW_D3D11::BlitTexture(ID3D11RenderTargetView* dst, u32 dst_x, u32 dst_y
static_cast<float>(src_height) / static_cast<float>(src_texture_height)};
m_context->OMSetRenderTargets(1, &dst, nullptr);
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, &src);
m_context->PSSetSamplers(
0, 1, linear_filter ? m_linear_sampler_state.GetAddressOf() : m_point_sampler_state.GetAddressOf());
@ -516,6 +554,8 @@ void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_verte
const TransparencyMode transparency_mode =
(render_mode == BatchRenderMode::OnlyOpaque) ? TransparencyMode::Disabled : m_batch.transparency_mode;
m_context->OMSetBlendState(m_batch_blend_states[static_cast<u8>(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu);
m_context->OMSetDepthStencilState(
m_batch.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->Draw(num_vertices, base_vertex);
}
@ -567,6 +607,7 @@ void GPU_HW_D3D11::UpdateDisplay()
else
{
m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr);
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
const u32 reinterpret_field_offset = GetInterlacedField();
@ -604,6 +645,7 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
m_context->OMSetRenderTargets(1, m_vram_encoding_texture.GetD3DRTVArray(), nullptr);
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
SetViewportAndScissor(0, 0, encoded_width, encoded_height);
DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms));
@ -654,6 +696,8 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
RGBA8ToFloat(color);
uniforms.u_interlaced_displayed_field = GetInterlacedField();
m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0);
SetViewportAndScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale,
height * m_resolution_scale);
DrawUtilityShader(IsInterlacedRenderingEnabled() ? m_vram_interlaced_fill_pixel_shader.Get() :
@ -682,13 +726,21 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
m_texture_stream_buffer.Unmap(m_context.Get(), num_pixels * sizeof(u16));
const u32 uniforms[5] = {x, y, width, height, map_result.index_aligned};
const VRAMWriteUBOData uniforms = {x,
y,
width,
height,
map_result.index_aligned,
m_GPUSTAT.set_mask_while_drawing ? 0xFFu : 0x00,
GetCurrentNormalizedBatchVertexDepthID()};
m_context->OMSetDepthStencilState(
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf());
// the viewport should already be set to the full vram, so just adjust the scissor
SetScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale);
DrawUtilityShader(m_vram_write_pixel_shader.Get(), uniforms, sizeof(uniforms));
DrawUtilityShader(m_vram_write_pixel_shader.Get(), &uniforms, sizeof(uniforms));
RestoreGraphicsAPIState();
}
@ -703,19 +755,20 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
UpdateVRAMReadTexture();
IncludeVRAMDityRectangle(dst_bounds);
const VRAMCopyUBOData uniforms = {
src_x * m_resolution_scale,
const VRAMCopyUBOData uniforms = {src_x * m_resolution_scale,
src_y * m_resolution_scale,
dst_x * m_resolution_scale,
dst_y * m_resolution_scale,
width * m_resolution_scale,
height * m_resolution_scale,
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
};
GetCurrentNormalizedBatchVertexDepthID()};
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(),
dst_bounds_scaled.GetHeight());
m_context->OMSetDepthStencilState(
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray());
DrawUtilityShader(m_vram_copy_pixel_shader.Get(), &uniforms, sizeof(uniforms));
RestoreGraphicsAPIState();
@ -728,6 +781,9 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
if (m_vram_dirty_rect.Intersects(Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height)))
UpdateVRAMReadTexture();
if (m_GPUSTAT.IsMaskingEnabled())
Log_WarningPrintf("Masking enabled on VRAM copy - not implemented");
GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
src_x *= m_resolution_scale;
@ -749,6 +805,21 @@ void GPU_HW_D3D11::UpdateVRAMReadTexture()
&src_box);
}
void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit()
{
SetViewportAndScissor(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
m_context->OMSetRenderTargets(0, nullptr, m_vram_depth_view.Get());
m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0);
m_context->OMSetBlendState(m_blend_no_color_writes_state.Get(), nullptr, 0xFFFFFFFFu);
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
DrawUtilityShader(m_vram_update_depth_pixel_shader.Get(), nullptr, 0);
m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray());
RestoreGraphicsAPIState();
}
std::unique_ptr<GPU> GPU::CreateHardwareD3D11Renderer()
{
return std::make_unique<GPU_HW_D3D11>();

View file

@ -34,6 +34,7 @@ protected:
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override;
void SetScissorFromDrawingArea() override;
void MapBatchVertexPointer(u32 required_vertices) override;
void UnmapBatchVertexPointer(u32 used_vertices) override;
@ -77,6 +78,8 @@ private:
// downsample texture - used for readbacks at >1xIR.
D3D11::Texture m_vram_texture;
D3D11::Texture m_vram_depth_texture;
ComPtr<ID3D11DepthStencilView> m_vram_depth_view;
D3D11::Texture m_vram_read_texture;
D3D11::Texture m_vram_encoding_texture;
D3D11::Texture m_display_texture;
@ -94,8 +97,11 @@ private:
ComPtr<ID3D11RasterizerState> m_cull_none_rasterizer_state;
ComPtr<ID3D11DepthStencilState> m_depth_disabled_state;
ComPtr<ID3D11DepthStencilState> m_depth_test_always_state;
ComPtr<ID3D11DepthStencilState> m_depth_test_less_state;
ComPtr<ID3D11BlendState> m_blend_disabled_state;
ComPtr<ID3D11BlendState> m_blend_no_color_writes_state;
ComPtr<ID3D11SamplerState> m_point_sampler_state;
ComPtr<ID3D11SamplerState> m_linear_sampler_state;
@ -114,5 +120,6 @@ private:
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_copy_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_update_depth_pixel_shader;
std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2> m_display_pixel_shaders; // [depth_24][interlaced]
};

View file

@ -11,6 +11,8 @@ GPU_HW_OpenGL::GPU_HW_OpenGL() : GPU_HW() {}
GPU_HW_OpenGL::~GPU_HW_OpenGL()
{
// Destroy objects which don't have destructors to clean them up
if (m_vram_fbo_id != 0)
glDeleteFramebuffers(1, &m_vram_fbo_id);
if (m_vao_id != 0)
glDeleteVertexArrays(1, &m_vao_id);
if (m_attributeless_vao_id != 0)
@ -90,7 +92,6 @@ void GPU_HW_OpenGL::ResetGraphicsAPIState()
glEnable(GL_CULL_FACE);
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glDepthMask(GL_TRUE);
if (m_resolution_scale > 1 && !m_supports_geometry_shaders)
glLineWidth(1.0f);
glBindVertexArray(0);
@ -98,13 +99,14 @@ void GPU_HW_OpenGL::ResetGraphicsAPIState()
void GPU_HW_OpenGL::RestoreGraphicsAPIState()
{
m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
glDisable(GL_CULL_FACE);
glDisable(GL_DEPTH_TEST);
glEnable(GL_DEPTH_TEST);
glEnable(GL_SCISSOR_TEST);
glDepthMask(GL_FALSE);
glDepthMask(GL_TRUE);
glDepthFunc(GL_ALWAYS);
if (m_resolution_scale > 1 && !m_supports_geometry_shaders)
glLineWidth(static_cast<float>(m_resolution_scale));
glBindVertexArray(m_vao_id);
@ -211,34 +213,16 @@ bool GPU_HW_OpenGL::CreateFramebuffer()
{
// save old vram texture/fbo, in case we're changing scale
GL::Texture old_vram_texture = std::move(m_vram_texture);
GLuint old_vram_fbo = m_vram_fbo_id;
// scale vram size to internal resolution
const u32 texture_width = VRAM_WIDTH * m_resolution_scale;
const u32 texture_height = VRAM_HEIGHT * m_resolution_scale;
if (!m_vram_texture.Create(texture_width, texture_height, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) ||
!m_vram_texture.CreateFramebuffer())
{
return false;
}
// do we need to restore the framebuffer after a size change?
if (old_vram_texture.IsValid())
{
const bool linear_filter = old_vram_texture.GetWidth() > m_vram_texture.GetWidth();
Log_DevPrintf("Scaling %ux%u VRAM texture to %ux%u using %s filter", old_vram_texture.GetWidth(),
old_vram_texture.GetHeight(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
linear_filter ? "linear" : "nearest");
glDisable(GL_SCISSOR_TEST);
old_vram_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
glBlitFramebuffer(0, 0, old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), 0, 0, m_vram_texture.GetWidth(),
m_vram_texture.GetHeight(), GL_COLOR_BUFFER_BIT, linear_filter ? GL_LINEAR : GL_NEAREST);
glEnable(GL_SCISSOR_TEST);
old_vram_texture.Destroy();
}
if (!m_vram_read_texture.Create(texture_width, texture_height, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) ||
!m_vram_depth_texture.Create(texture_width, texture_height, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT,
GL_UNSIGNED_SHORT, nullptr, false) ||
!m_vram_read_texture.Create(texture_width, texture_height, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) ||
!m_vram_read_texture.CreateFramebuffer() ||
!m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) ||
!m_vram_encoding_texture.CreateFramebuffer() ||
@ -248,7 +232,32 @@ bool GPU_HW_OpenGL::CreateFramebuffer()
return false;
}
m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
glGenFramebuffers(1, &m_vram_fbo_id);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_vram_texture.GetGLId(), 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_vram_depth_texture.GetGLId(), 0);
Assert(glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
// do we need to restore the framebuffer after a size change?
if (old_vram_fbo != 0)
{
const bool linear_filter = old_vram_texture.GetWidth() > m_vram_texture.GetWidth();
Log_DevPrintf("Scaling %ux%u VRAM texture to %ux%u using %s filter", old_vram_texture.GetWidth(),
old_vram_texture.GetHeight(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
linear_filter ? "linear" : "nearest");
glDisable(GL_SCISSOR_TEST);
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_vram_fbo);
glBlitFramebuffer(0, 0, old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), 0, 0, m_vram_texture.GetWidth(),
m_vram_texture.GetHeight(), GL_COLOR_BUFFER_BIT, linear_filter ? GL_LINEAR : GL_NEAREST);
glEnable(GL_SCISSOR_TEST);
old_vram_texture.Destroy();
glDeleteFramebuffers(1, &old_vram_fbo);
UpdateDepthBufferFromMaskBit();
}
SetFullVRAMDirtyRectangle();
return true;
}
@ -257,7 +266,8 @@ void GPU_HW_OpenGL::ClearFramebuffer()
{
glDisable(GL_SCISSOR_TEST);
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
glClear(GL_COLOR_BUFFER_BIT);
glClearDepth(0.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glEnable(GL_SCISSOR_TEST);
SetFullVRAMDirtyRectangle();
}
@ -470,6 +480,15 @@ bool GPU_HW_OpenGL::CompilePrograms()
}
m_vram_copy_program = std::move(*prog);
prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {},
shadergen.GenerateVRAMUpdateDepthFragmentShader());
if (!prog)
return false;
prog->Bind();
prog->Uniform1i("samp0", 0);
m_vram_update_depth_program = std::move(*prog);
if (m_supports_texture_buffer)
{
prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {},
@ -519,6 +538,8 @@ void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert
glBlendFuncSeparate(GL_ONE, m_supports_dual_source_blend ? GL_SRC1_ALPHA : GL_SRC_ALPHA, GL_ONE, GL_ZERO);
}
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
static constexpr std::array<GLenum, 4> gl_primitives = {{GL_LINES, GL_LINE_STRIP, GL_TRIANGLES, GL_TRIANGLE_STRIP}};
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], m_batch_base_vertex, num_vertices);
}
@ -590,6 +611,7 @@ void GPU_HW_OpenGL::UpdateDisplay()
{
glDisable(GL_BLEND);
glDisable(GL_SCISSOR_TEST);
glDisable(GL_DEPTH_TEST);
m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Bind();
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
@ -607,6 +629,7 @@ void GPU_HW_OpenGL::UpdateDisplay()
m_batch_ubo_dirty = true;
glViewport(0, reinterpret_field_offset, reinterpret_width, scaled_display_height);
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
@ -615,8 +638,10 @@ void GPU_HW_OpenGL::UpdateDisplay()
scaled_display_width, -static_cast<s32>(scaled_display_height));
// restore state
m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
glBindVertexArray(m_vao_id);
glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
glEnable(GL_DEPTH_TEST);
glEnable(GL_SCISSOR_TEST);
}
@ -644,6 +669,7 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
glDisable(GL_BLEND);
glDisable(GL_SCISSOR_TEST);
glViewport(0, 0, encoded_width, encoded_height);
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);
// Readback encoded texture.
@ -688,7 +714,8 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{
const auto [r, g, b, a] = RGBA8ToFloat(color);
glClearColor(r, g, b, a);
glClear(GL_COLOR_BUFFER_BIT);
glClearDepth(a);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
SetScissorFromDrawingArea();
}
else
@ -705,6 +732,9 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
m_vram_interlaced_fill_program.Bind();
UploadUniformBuffer(&uniforms, sizeof(uniforms));
glDisable(GL_BLEND);
glDepthFunc(GL_ALWAYS);
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);
RestoreGraphicsAPIState();
@ -743,13 +773,21 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
glViewport(scaled_x, scaled_flipped_y, scaled_width, scaled_height);
glDisable(GL_BLEND);
glDisable(GL_SCISSOR_TEST);
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
m_vram_write_program.Bind();
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
const u32 uniforms[5] = {x, flipped_y, width, height, map_result.index_aligned};
UploadUniformBuffer(uniforms, sizeof(uniforms));
const VRAMWriteUBOData uniforms = {x,
flipped_y,
width,
height,
map_result.index_aligned,
m_GPUSTAT.set_mask_while_drawing ? 0xFFu : 0x00,
GetCurrentNormalizedBatchVertexDepthID()};
UploadUniformBuffer(&uniforms, sizeof(uniforms));
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);
RestoreGraphicsAPIState();
@ -822,21 +860,21 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
UpdateVRAMReadTexture();
IncludeVRAMDityRectangle(dst_bounds);
VRAMCopyUBOData uniforms = {
src_x * m_resolution_scale,
VRAMCopyUBOData uniforms = {src_x * m_resolution_scale,
src_y * m_resolution_scale,
dst_x * m_resolution_scale,
dst_y * m_resolution_scale,
width * m_resolution_scale,
height * m_resolution_scale,
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
};
GetCurrentNormalizedBatchVertexDepthID()};
uniforms.u_src_y = m_vram_texture.GetHeight() - uniforms.u_src_y - uniforms.u_height;
uniforms.u_dst_y = m_vram_texture.GetHeight() - uniforms.u_dst_y - uniforms.u_height;
UploadUniformBuffer(&uniforms, sizeof(uniforms));
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
glViewport(dst_bounds_scaled.left,
@ -876,7 +914,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
else
{
glDisable(GL_SCISSOR_TEST);
m_vram_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_vram_fbo_id);
glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height,
GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST);
@ -904,14 +942,31 @@ void GPU_HW_OpenGL::UpdateVRAMReadTexture()
else
{
m_vram_read_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_vram_fbo_id);
glDisable(GL_SCISSOR_TEST);
glBlitFramebuffer(x, y, x + width, y + height, x, y, x + width, y + height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST);
m_vram_texture.BindFramebuffer(GL_FRAMEBUFFER);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
}
}
void GPU_HW_OpenGL::UpdateDepthBufferFromMaskBit()
{
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
glDepthFunc(GL_ALWAYS);
m_vram_texture.Bind();
m_vram_update_depth_program.Bind();
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);
glBindVertexArray(m_vao_id);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glEnable(GL_SCISSOR_TEST);
}
std::unique_ptr<GPU> GPU::CreateHardwareOpenGLRenderer()
{
return std::make_unique<GPU_HW_OpenGL>();

View file

@ -30,6 +30,7 @@ protected:
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override;
void SetScissorFromDrawingArea() override;
void MapBatchVertexPointer(u32 required_vertices) override;
void UnmapBatchVertexPointer(u32 used_vertices) override;
@ -63,11 +64,13 @@ private:
// downsample texture - used for readbacks at >1xIR.
GL::Texture m_vram_texture;
GL::Texture m_vram_depth_texture;
GL::Texture m_vram_read_texture;
GL::Texture m_vram_encoding_texture;
GL::Texture m_display_texture;
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
GLuint m_vram_fbo_id = 0;
GLuint m_vao_id = 0;
GLuint m_attributeless_vao_id = 0;
@ -85,6 +88,7 @@ private:
GL::Program m_vram_read_program;
GL::Program m_vram_write_program;
GL::Program m_vram_copy_program;
GL::Program m_vram_update_depth_program;
u32 m_uniform_buffer_alignment = 1;
u32 m_max_texture_buffer_size = 0;

View file

@ -319,6 +319,9 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint(
{
ss << "void main(\n";
if (declare_vertex_id)
ss << " in uint v_id : SV_VertexID,\n";
u32 attribute_counter = 0;
for (const char* attribute : attributes)
{
@ -326,9 +329,6 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint(
attribute_counter++;
}
if (declare_vertex_id)
ss << " in uint v_id : SV_VertexID,\n";
for (u32 i = 0; i < num_color_outputs; i++)
ss << " out float4 v_col" << i << " : COLOR" << i << ",\n";
@ -349,7 +349,7 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint(
void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(
std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs,
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
bool declare_fragcoord /* = false */, bool dual_color_output /* = false */)
bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool depth_output /* = false */)
{
if (m_glsl)
{
@ -381,23 +381,18 @@ void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(
if (declare_fragcoord)
ss << "#define v_pos gl_FragCoord\n";
if (depth_output)
ss << "#define o_depth gl_FragDepth\n";
if (m_use_glsl_binding_layout)
{
if (dual_color_output)
{
ss << "layout(location = 0, index = 0) out float4 o_col0;\n";
ss << "layout(location = 0, index = 1) out float4 o_col1;\n";
for (u32 i = 0; i < num_color_outputs; i++)
ss << "layout(location = 0, index = " << i << ") out float4 o_col" << i << ";\n";
}
else
{
ss << "layout(location = 0) out float4 o_col0;\n";
}
}
else
{
ss << "out float4 o_col0;\n";
if (dual_color_output)
ss << "out float4 o_col1;\n";
for (u32 i = 0; i < num_color_outputs; i++)
ss << "out float4 o_col" << i << ";\n";
}
ss << "\n";
@ -425,14 +420,23 @@ void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(
if (declare_fragcoord)
ss << " in float4 v_pos : SV_Position,\n";
if (dual_color_output)
if (depth_output)
{
ss << " out float4 o_col0 : SV_Target0,\n";
ss << " out float4 o_col1 : SV_Target1)\n";
}
ss << " out float o_depth : SV_Depth";
if (num_color_outputs > 0)
ss << ",\n";
else
ss << ")\n";
}
for (u32 i = 0; i < num_color_outputs; i++)
{
ss << " out float4 o_col0 : SV_Target)";
ss << " out float4 o_col" << i << " : SV_Target" << i;
if (i == (num_color_outputs - 1))
ss << ")\n";
else
ss << ",\n";
}
}
}
@ -440,9 +444,10 @@ void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(
void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss)
{
DeclareUniformBuffer(ss, {"uint2 u_texture_window_mask", "uint2 u_texture_window_offset", "float u_src_alpha_factor",
"float u_dst_alpha_factor", "bool u_set_mask_while_drawing",
"uint u_interlaced_displayed_field"});
DeclareUniformBuffer(ss,
{"uint2 u_texture_window_mask", "uint2 u_texture_window_offset", "float u_src_alpha_factor",
"float u_dst_alpha_factor", "uint u_interlaced_displayed_field", "uint u_base_vertex_depth_id",
"bool u_check_mask_before_draw", "bool u_set_mask_while_drawing"});
}
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
@ -459,11 +464,11 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
if (textured)
{
DeclareVertexEntryPoint(ss, {"int2 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}});
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float v_depth"}}, true);
}
else
{
DeclareVertexEntryPoint(ss, {"int2 a_pos", "float4 a_col0"}, 1, 0, {});
DeclareVertexEntryPoint(ss, {"int2 a_pos", "float4 a_col0"}, 1, 0, {{"nointerpolation", "float v_depth"}}, true);
}
ss << R"(
@ -484,6 +489,12 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
#endif
v_pos = float4(pos_x, pos_y, 0.0, 1.0);
#if API_D3D11
v_depth = 1.0 - (float(u_base_vertex_depth_id + (u_check_mask_before_draw ? 0u : v_id)) / 65535.0);
#else
v_depth = 1.0 - (float(v_id - u_base_vertex_depth_id) / 65535.0);
#endif
v_col0 = a_col0;
#if TEXTURED
// Fudge the texture coordinates by half a pixel in screen-space.
@ -616,11 +627,12 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
if (textured)
{
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source);
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float v_depth"}},
true, use_dual_source ? 2 : 1, true);
}
else
{
DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source);
DeclareFragmentEntryPoint(ss, 1, 0, {{"nointerpolation", "float v_depth"}}, true, use_dual_source ? 2 : 1, true);
}
ss << R"(
@ -736,6 +748,8 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
#else
o_col0 = float4(color, u_dst_alpha_factor / ialpha);
#endif
o_depth = oalpha * v_depth;
}
else
{
@ -752,6 +766,8 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
#else
o_col0 = float4(color, 1.0 - ialpha);
#endif
o_depth = oalpha * v_depth;
}
#else
// Non-transparency won't enable blending so we can write the mask here regardless.
@ -760,6 +776,8 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
#if USE_DUAL_SOURCE
o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha);
#endif
o_depth = oalpha * v_depth;
#endif
}
)";
@ -783,10 +801,12 @@ CONSTANT float2 WIDTH = (1.0 / float2(VRAM_SIZE)) * float2(RESOLUTION_SCALE, RES
ss << R"(
in VertexData {
float4 v_col0;
nointerpolation float v_depth;
} in_data[];
out VertexData {
float4 v_col0;
nointerpolation float v_depth;
} out_data;
layout(lines) in;
@ -799,21 +819,25 @@ void main() {
// top-left
out_data.v_col0 = in_data[0].v_col0;
out_data.v_depth = in_data[0].v_depth;
gl_Position = gl_in[0].gl_Position - offset;
EmitVertex();
// top-right
out_data.v_col0 = in_data[0].v_col0;
out_data.v_depth = in_data[0].v_depth;
gl_Position = gl_in[0].gl_Position + offset;
EmitVertex();
// bottom-left
out_data.v_col0 = in_data[1].v_col0;
out_data.v_depth = in_data[1].v_depth;
gl_Position = gl_in[1].gl_Position - offset;
EmitVertex();
// bottom-right
out_data.v_col0 = in_data[1].v_col0;
out_data.v_depth = in_data[1].v_depth;
gl_Position = gl_in[1].gl_Position + offset;
EmitVertex();
@ -827,6 +851,7 @@ void main() {
struct Vertex
{
float4 col0 : COLOR0;
float depth : TEXCOORD0;
float4 pos : SV_Position;
};
@ -841,21 +866,25 @@ void main(line Vertex input[2], inout TriangleStream<Vertex> output)
// top-left
v.col0 = input[0].col0;
v.depth = input[0].depth;
v.pos = input[0].pos - offset;
output.Append(v);
// top-right
v.col0 = input[0].col0;
v.depth = input[0].depth;
v.pos = input[0].pos + offset;
output.Append(v);
// bottom-left
v.col0 = input[1].col0;
v.depth = input[1].depth;
v.pos = input[1].pos - offset;
output.Append(v);
// bottom-right
v.col0 = input[1].col0;
v.depth = input[1].depth;
v.pos = input[1].pos + offset;
output.Append(v);
@ -890,11 +919,12 @@ std::string GPU_HW_ShaderGen::GenerateFillFragmentShader()
std::stringstream ss;
WriteHeader(ss);
DeclareUniformBuffer(ss, {"float4 u_fill_color"});
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, false);
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1, true);
ss << R"(
{
o_col0 = u_fill_color;
o_depth = u_fill_color.a;
}
)";
@ -907,7 +937,7 @@ std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader()
WriteHeader(ss);
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"float4 u_fill_color", "uint u_interlaced_displayed_field"});
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
ss << R"(
{
@ -915,6 +945,7 @@ std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader()
discard;
o_col0 = u_fill_color;
o_depth = u_fill_color.a;
}
)";
@ -927,7 +958,7 @@ std::string GPU_HW_ShaderGen::GenerateCopyFragmentShader()
WriteHeader(ss);
DeclareUniformBuffer(ss, {"float4 u_src_rect"});
DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, false);
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1);
ss << R"(
{
@ -950,7 +981,7 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_field_offset"});
DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1);
ss << R"(
{
uint2 icoords = uint2(v_pos.xy) + u_vram_offset;
@ -1013,7 +1044,7 @@ uint SampleVRAM(uint2 coords)
}
)";
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1);
ss << R"(
{
uint2 sample_coords = uint2(uint(v_pos.x) * 2u, uint(v_pos.y));
@ -1043,10 +1074,11 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
std::stringstream ss;
WriteHeader(ss);
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset"});
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits",
"float u_depth_value"});
DeclareTextureBuffer(ss, "samp0", 0, true, true);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
ss << R"(
{
uint2 coords = uint2(v_pos.xy) / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE);
@ -1058,9 +1090,10 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
#endif
uint buffer_offset = u_buffer_base_offset + (offset.y * u_size.x) + offset.x;
uint value = LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r;
uint value = LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r | u_mask_or_bits;
o_col0 = RGBA5551ToRGBA8(value);
o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0;
})";
return ss.str();
@ -1071,10 +1104,11 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
std::stringstream ss;
WriteHeader(ss);
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_size", "bool u_set_mask_bit"});
DeclareUniformBuffer(
ss, {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_size", "bool u_set_mask_bit", "float u_depth_value"});
DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
ss << R"(
{
uint2 dst_coords = uint2(v_pos.xy);
@ -1090,7 +1124,24 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
// sample and apply mask bit
float4 color = LOAD_TEXTURE(samp0, int2(src_coords), 0);
o_col0 = float4(color.xyz, u_set_mask_bit ? 1.0 : color.a);
o_depth = (u_set_mask_bit ? 1.0f : ((o_col0.a == 1.0) ? u_depth_value : 0.0));
})";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader()
{
std::stringstream ss;
WriteHeader(ss);
DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 0, true);
ss << R"(
{
o_depth = LOAD_TEXTURE(samp0, int2(v_pos.xy), 0).a;
}
)";
return ss.str();
}

View file

@ -25,6 +25,7 @@ public:
std::string GenerateVRAMReadFragmentShader();
std::string GenerateVRAMWriteFragmentShader();
std::string GenerateVRAMCopyFragmentShader();
std::string GenerateVRAMUpdateDepthFragmentShader();
private:
void SetGLSLVersionString();
@ -38,7 +39,7 @@ private:
bool declare_vertex_id = false);
void DeclareFragmentEntryPoint(std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs,
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
bool declare_fragcoord = false, bool dual_color_output = false);
bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false);
void WriteCommonFunctions(std::stringstream& ss);
void WriteBatchUniformBuffer(std::stringstream& ss);