mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-22 05:45:38 +00:00
GPU/HW: Mask bit handling in hardware renderers
Fixes: - Menu effect in Ghost in the Shell - Incorrect text colours in menu of Dragon Quest VII - Fade effect in TwinBee RPG - Fog in Silent Hill - Water in Duke Nukem - Land of the Babes - Shadows in Ultraman - Fighting Evolution and probably others.
This commit is contained in:
parent
a5ecff0893
commit
9446587e8f
|
@ -27,14 +27,10 @@ D3D11_TEXTURE2D_DESC Texture::GetDesc() const
|
|||
return desc;
|
||||
}
|
||||
|
||||
bool Texture::Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT format, bool shader_resource,
|
||||
bool render_target, const void* initial_data, u32 initial_data_stride)
|
||||
bool Texture::Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT format, u32 bind_flags,
|
||||
const void* initial_data, u32 initial_data_stride)
|
||||
{
|
||||
CD3D11_TEXTURE2D_DESC desc(format, width, height, 1, 1, 0, D3D11_USAGE_DEFAULT, 0, 1, 0, 0);
|
||||
if (shader_resource)
|
||||
desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE;
|
||||
if (render_target)
|
||||
desc.BindFlags |= D3D11_BIND_RENDER_TARGET;
|
||||
CD3D11_TEXTURE2D_DESC desc(format, width, height, 1, 1, bind_flags, D3D11_USAGE_DEFAULT, 0, 1, 0, 0);
|
||||
|
||||
D3D11_SUBRESOURCE_DATA srd;
|
||||
srd.pSysMem = initial_data;
|
||||
|
@ -50,7 +46,7 @@ bool Texture::Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT fo
|
|||
}
|
||||
|
||||
ComPtr<ID3D11ShaderResourceView> srv;
|
||||
if (shader_resource)
|
||||
if (bind_flags & D3D11_BIND_SHADER_RESOURCE)
|
||||
{
|
||||
const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(D3D11_SRV_DIMENSION_TEXTURE2D, desc.Format, 0, desc.MipLevels, 0,
|
||||
desc.ArraySize);
|
||||
|
@ -63,7 +59,7 @@ bool Texture::Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT fo
|
|||
}
|
||||
|
||||
ComPtr<ID3D11RenderTargetView> rtv;
|
||||
if (render_target)
|
||||
if (bind_flags & D3D11_BIND_RENDER_TARGET)
|
||||
{
|
||||
const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(D3D11_RTV_DIMENSION_TEXTURE2D, desc.Format, 0, 0, desc.ArraySize);
|
||||
const HRESULT hr = device->CreateRenderTargetView(texture.Get(), &rtv_desc, rtv.GetAddressOf());
|
||||
|
|
|
@ -31,7 +31,7 @@ public:
|
|||
ALWAYS_INLINE operator ID3D11RenderTargetView*() const { return m_rtv.Get(); }
|
||||
ALWAYS_INLINE operator bool() const { return static_cast<bool>(m_texture); }
|
||||
|
||||
bool Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT format, bool shader_resource, bool render_target,
|
||||
bool Create(ID3D11Device* device, u32 width, u32 height, DXGI_FORMAT format, u32 bind_flags,
|
||||
const void* initial_data = nullptr, u32 initial_data_stride = 0);
|
||||
bool Adopt(ID3D11Device* device, ComPtr<ID3D11Texture2D> texture);
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ bool GPU_HW::Initialize(HostDisplay* host_display, System* system, DMA* dma, Int
|
|||
|
||||
const Settings& settings = m_system->GetSettings();
|
||||
m_resolution_scale = settings.gpu_resolution_scale;
|
||||
m_render_api = host_display->GetRenderAPI();
|
||||
m_true_color = settings.gpu_true_color;
|
||||
m_scaled_dithering = settings.gpu_scaled_dithering;
|
||||
m_texture_filtering = settings.gpu_texture_filtering;
|
||||
|
@ -46,10 +47,15 @@ void GPU_HW::Reset()
|
|||
{
|
||||
GPU::Reset();
|
||||
|
||||
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
|
||||
|
||||
m_vram_shadow.fill(0);
|
||||
|
||||
m_batch = {};
|
||||
m_batch_ubo_data = {};
|
||||
m_batch_current_vertex_depth_id = 1;
|
||||
m_batch_next_vertex_depth_id = 2;
|
||||
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
|
||||
m_batch_ubo_dirty = true;
|
||||
|
||||
SetFullVRAMDirtyRectangle();
|
||||
|
@ -62,7 +68,11 @@ bool GPU_HW::DoState(StateWrapper& sw)
|
|||
|
||||
// invalidate the whole VRAM read texture when loading state
|
||||
if (sw.IsReading())
|
||||
{
|
||||
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
|
||||
SetFullVRAMDirtyRectangle();
|
||||
ResetBatchVertexDepthID();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -177,12 +187,11 @@ void GPU_HW::LoadVertices()
|
|||
const RenderCommand rc{m_render_command.bits};
|
||||
const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16);
|
||||
|
||||
// TODO: Move this to the GPU..
|
||||
switch (rc.primitive)
|
||||
{
|
||||
case Primitive::Polygon:
|
||||
{
|
||||
EnsureVertexBufferSpace(rc.quad_polygon ? 6 : 3);
|
||||
DebugAssert(GetBatchVertexSpace() >= (rc.quad_polygon ? 6u : 3u));
|
||||
|
||||
const u32 first_color = rc.color_for_first_vertex;
|
||||
const bool shaded = rc.shading_enable;
|
||||
|
@ -308,9 +317,7 @@ void GPU_HW::LoadVertices()
|
|||
}
|
||||
|
||||
// we can split the rectangle up into potentially 8 quads
|
||||
const u32 required_vertices = 6 * (((rectangle_width + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) *
|
||||
(((rectangle_height + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u);
|
||||
EnsureVertexBufferSpace(required_vertices);
|
||||
DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE);
|
||||
|
||||
// Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
|
||||
u16 tex_top = orig_tex_top;
|
||||
|
@ -361,7 +368,7 @@ void GPU_HW::LoadVertices()
|
|||
{
|
||||
if (!rc.polyline)
|
||||
{
|
||||
EnsureVertexBufferSpace(2);
|
||||
DebugAssert(GetBatchVertexSpace() >= 2);
|
||||
|
||||
u32 color0, color1;
|
||||
VertexPosition pos0, pos1;
|
||||
|
@ -410,7 +417,7 @@ void GPU_HW::LoadVertices()
|
|||
{
|
||||
// Multiply by two because we don't use line strips.
|
||||
const u32 num_vertices = GetPolyLineVertexCount();
|
||||
EnsureVertexBufferSpace(num_vertices * 2);
|
||||
DebugAssert(GetBatchVertexSpace() >= (num_vertices * 2));
|
||||
|
||||
const u32 first_color = rc.color_for_first_vertex;
|
||||
const bool shaded = rc.shading_enable;
|
||||
|
@ -534,6 +541,73 @@ void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices)
|
|||
MapBatchVertexPointer(required_vertices);
|
||||
}
|
||||
|
||||
void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
|
||||
{
|
||||
u32 required_vertices;
|
||||
switch (m_render_command.primitive)
|
||||
{
|
||||
case Primitive::Polygon:
|
||||
required_vertices = m_render_command.quad_polygon ? 6 : 3;
|
||||
break;
|
||||
case Primitive::Rectangle:
|
||||
required_vertices = MAX_VERTICES_FOR_RECTANGLE;
|
||||
break;
|
||||
case Primitive::Line:
|
||||
default:
|
||||
required_vertices = m_render_command.polyline ? (GetPolyLineVertexCount() * 2u) : 2u;
|
||||
break;
|
||||
}
|
||||
|
||||
// can we fit these vertices in the current depth buffer range?
|
||||
if (BatchVertexDepthIDNeedsUpdate() &&
|
||||
(m_batch_next_vertex_depth_id + GetBatchVertexCount() + required_vertices) > MAX_BATCH_VERTEX_COUNTER_IDS)
|
||||
{
|
||||
// implies FlushRender()
|
||||
ResetBatchVertexDepthID();
|
||||
}
|
||||
else if (m_batch_current_vertex_ptr)
|
||||
{
|
||||
if (GetBatchVertexSpace() >= required_vertices)
|
||||
return;
|
||||
|
||||
FlushRender();
|
||||
}
|
||||
|
||||
MapBatchVertexPointer(required_vertices);
|
||||
}
|
||||
|
||||
void GPU_HW::ResetBatchVertexDepthID()
|
||||
{
|
||||
Log_PerfPrint("Resetting batch vertex depth ID");
|
||||
FlushRender();
|
||||
UpdateDepthBufferFromMaskBit();
|
||||
|
||||
m_batch_current_vertex_depth_id = 1;
|
||||
m_batch_next_vertex_depth_id = 2;
|
||||
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
|
||||
}
|
||||
|
||||
void GPU_HW::IncrementBatchVertexID(u32 count)
|
||||
{
|
||||
DebugAssert((m_batch_next_vertex_depth_id + count) <= MAX_BATCH_VERTEX_COUNTER_IDS);
|
||||
m_batch_next_vertex_depth_id += count;
|
||||
}
|
||||
|
||||
void GPU_HW::SetBatchUBOVertexDepthID(u32 value)
|
||||
{
|
||||
u32 ubo_value;
|
||||
|
||||
// In OpenGL, gl_VertexID is inclusive of the base vertex, whereas SV_VertexID in D3D isn't.
|
||||
// We rely on unsigned overflow to compute the correct value based on the base vertex.
|
||||
if (m_render_api != HostDisplay::RenderAPI::D3D11)
|
||||
ubo_value = m_batch_base_vertex - value;
|
||||
else
|
||||
ubo_value = value;
|
||||
|
||||
m_batch_ubo_dirty |= (m_batch_ubo_data.u_vertex_depth_id != ubo_value);
|
||||
m_batch_ubo_data.u_vertex_depth_id = ubo_value;
|
||||
}
|
||||
|
||||
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||
{
|
||||
IncludeVRAMDityRectangle(
|
||||
|
@ -544,12 +618,26 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
|
|||
{
|
||||
DebugAssert((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT);
|
||||
IncludeVRAMDityRectangle(Common::Rectangle<u32>::FromExtents(x, y, width, height));
|
||||
|
||||
if (m_GPUSTAT.check_mask_before_draw)
|
||||
{
|
||||
// set new vertex counter since we want this to take into consideration previous masked pixels
|
||||
m_batch_current_vertex_depth_id = m_batch_next_vertex_depth_id++;
|
||||
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
|
||||
{
|
||||
IncludeVRAMDityRectangle(
|
||||
Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
|
||||
|
||||
if (m_GPUSTAT.check_mask_before_draw)
|
||||
{
|
||||
// set new vertex counter since we want this to take into consideration previous masked pixels
|
||||
m_batch_current_vertex_depth_id = m_batch_next_vertex_depth_id++;
|
||||
SetBatchUBOVertexDepthID(m_batch_current_vertex_depth_id);
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_HW::DispatchRenderCommand()
|
||||
|
@ -600,6 +688,8 @@ void GPU_HW::DispatchRenderCommand()
|
|||
FlushRender();
|
||||
}
|
||||
|
||||
EnsureVertexBufferSpaceForCurrentCommand();
|
||||
|
||||
// transparency mode change
|
||||
if (m_batch.transparency_mode != transparency_mode && transparency_mode != TransparencyMode::Disabled)
|
||||
{
|
||||
|
@ -614,7 +704,8 @@ void GPU_HW::DispatchRenderCommand()
|
|||
{
|
||||
m_batch.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw;
|
||||
m_batch.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing;
|
||||
m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(m_GPUSTAT.set_mask_while_drawing);
|
||||
m_batch_ubo_data.u_check_mask_before_draw = BoolToUInt32(m_batch.check_mask_before_draw);
|
||||
m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(m_batch.set_mask_while_drawing);
|
||||
m_batch_ubo_dirty = true;
|
||||
}
|
||||
|
||||
|
@ -657,6 +748,10 @@ void GPU_HW::FlushRender()
|
|||
if (vertex_count == 0)
|
||||
return;
|
||||
|
||||
const bool update_depth_id = BatchVertexDepthIDNeedsUpdate();
|
||||
if (update_depth_id)
|
||||
SetBatchUBOVertexDepthID(m_batch_next_vertex_depth_id);
|
||||
|
||||
if (m_drawing_area_changed)
|
||||
{
|
||||
m_drawing_area_changed = false;
|
||||
|
@ -680,6 +775,9 @@ void GPU_HW::FlushRender()
|
|||
m_renderer_stats.num_batches++;
|
||||
DrawBatchVertices(m_batch.GetRenderMode(), m_batch_base_vertex, vertex_count);
|
||||
}
|
||||
|
||||
if (update_depth_id)
|
||||
IncrementBatchVertexID(vertex_count);
|
||||
}
|
||||
|
||||
void GPU_HW::DrawRendererStats(bool is_idle_frame)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
#include "common/heap_array.h"
|
||||
#include "gpu.h"
|
||||
#include "host_display.h"
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
|
@ -42,7 +43,10 @@ protected:
|
|||
{
|
||||
VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32),
|
||||
VERTEX_BUFFER_SIZE = 1 * 1024 * 1024,
|
||||
UNIFORM_BUFFER_SIZE = 512 * 1024
|
||||
UNIFORM_BUFFER_SIZE = 512 * 1024,
|
||||
MAX_BATCH_VERTEX_COUNTER_IDS = 65536 - 2,
|
||||
MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) *
|
||||
(((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u)
|
||||
};
|
||||
|
||||
struct BatchVertex
|
||||
|
@ -102,8 +106,19 @@ protected:
|
|||
u32 u_texture_window_offset[2];
|
||||
float u_src_alpha_factor;
|
||||
float u_dst_alpha_factor;
|
||||
u32 u_set_mask_while_drawing;
|
||||
u32 u_interlaced_displayed_field;
|
||||
u32 u_vertex_depth_id;
|
||||
u32 u_check_mask_before_draw;
|
||||
u32 u_set_mask_while_drawing;
|
||||
};
|
||||
|
||||
struct VRAMWriteUBOData
|
||||
{
|
||||
u32 u_base_coords[2];
|
||||
u32 u_size[2];
|
||||
u32 u_buffer_base_offset;
|
||||
u32 u_mask_or_bits;
|
||||
float u_depth_value;
|
||||
};
|
||||
|
||||
struct VRAMCopyUBOData
|
||||
|
@ -115,6 +130,7 @@ protected:
|
|||
u32 u_width;
|
||||
u32 u_height;
|
||||
u32 u_set_mask_bit;
|
||||
float u_depth_value;
|
||||
};
|
||||
|
||||
struct RendererStats
|
||||
|
@ -133,6 +149,7 @@ protected:
|
|||
}
|
||||
|
||||
virtual void UpdateVRAMReadTexture() = 0;
|
||||
virtual void UpdateDepthBufferFromMaskBit() = 0;
|
||||
virtual void SetScissorFromDrawingArea() = 0;
|
||||
virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
|
||||
virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0;
|
||||
|
@ -147,11 +164,28 @@ protected:
|
|||
void ClearVRAMDirtyRectangle() { m_vram_dirty_rect.SetInvalid(); }
|
||||
void IncludeVRAMDityRectangle(const Common::Rectangle<u32>& rect);
|
||||
|
||||
bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
|
||||
|
||||
u32 GetBatchVertexSpace() const { return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); }
|
||||
u32 GetBatchVertexCount() const { return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); }
|
||||
void EnsureVertexBufferSpace(u32 required_vertices);
|
||||
void EnsureVertexBufferSpaceForCurrentCommand();
|
||||
void ResetBatchVertexDepthID();
|
||||
void IncrementBatchVertexID(u32 count);
|
||||
void SetBatchUBOVertexDepthID(u32 value);
|
||||
|
||||
bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
|
||||
/// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
|
||||
ALWAYS_INLINE float GetCurrentNormalizedBatchVertexDepthID() const
|
||||
{
|
||||
return 1.0f - (static_cast<float>(m_batch_next_vertex_depth_id) / 65535.0f);
|
||||
}
|
||||
|
||||
/// Returns true if the batch vertex depth ID needs to be updated.
|
||||
ALWAYS_INLINE bool BatchVertexDepthIDNeedsUpdate() const
|
||||
{
|
||||
// because GL uses base vertex we're incrementing the depth id every draw whether we like it or not
|
||||
return m_batch.check_mask_before_draw || m_render_api != HostDisplay::RenderAPI::D3D11;
|
||||
}
|
||||
|
||||
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
|
||||
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
|
||||
|
@ -182,9 +216,12 @@ protected:
|
|||
BatchVertex* m_batch_end_vertex_ptr = nullptr;
|
||||
BatchVertex* m_batch_current_vertex_ptr = nullptr;
|
||||
u32 m_batch_base_vertex = 0;
|
||||
u32 m_batch_current_vertex_depth_id = 0;
|
||||
u32 m_batch_next_vertex_depth_id = 0;
|
||||
|
||||
u32 m_resolution_scale = 1;
|
||||
u32 m_max_resolution_scale = 1;
|
||||
HostDisplay::RenderAPI m_render_api = HostDisplay::RenderAPI::None;
|
||||
bool m_true_color = true;
|
||||
bool m_scaled_dithering = false;
|
||||
bool m_texture_filtering = false;
|
||||
|
|
|
@ -111,8 +111,7 @@ void GPU_HW_D3D11::RestoreGraphicsAPIState()
|
|||
m_context->IASetVertexBuffers(0, 1, m_vertex_stream_buffer.GetD3DBufferArray(), &stride, &offset);
|
||||
m_context->IASetInputLayout(m_batch_input_layout.Get());
|
||||
m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray());
|
||||
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
|
||||
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr);
|
||||
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), m_vram_depth_view.Get());
|
||||
m_context->RSSetState(m_cull_none_rasterizer_state.Get());
|
||||
SetViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
|
||||
SetScissorFromDrawingArea();
|
||||
|
@ -171,16 +170,29 @@ bool GPU_HW_D3D11::CreateFramebuffer()
|
|||
const u32 texture_width = VRAM_WIDTH * m_resolution_scale;
|
||||
const u32 texture_height = VRAM_HEIGHT * m_resolution_scale;
|
||||
const DXGI_FORMAT texture_format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
const DXGI_FORMAT depth_format = DXGI_FORMAT_D16_UNORM;
|
||||
|
||||
if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) ||
|
||||
!m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, false) ||
|
||||
!m_display_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) ||
|
||||
!m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, true, true) ||
|
||||
if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, texture_format,
|
||||
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) ||
|
||||
!m_vram_depth_texture.Create(m_device.Get(), texture_width, texture_height, depth_format,
|
||||
D3D11_BIND_DEPTH_STENCIL) ||
|
||||
!m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, texture_format,
|
||||
D3D11_BIND_SHADER_RESOURCE) ||
|
||||
!m_display_texture.Create(m_device.Get(), texture_width, texture_height, texture_format,
|
||||
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) ||
|
||||
!m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format,
|
||||
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) ||
|
||||
!m_vram_readback_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, false))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const CD3D11_DEPTH_STENCIL_VIEW_DESC depth_view_desc(D3D11_DSV_DIMENSION_TEXTURE2D, depth_format);
|
||||
HRESULT hr =
|
||||
m_device->CreateDepthStencilView(m_vram_depth_texture, &depth_view_desc, m_vram_depth_view.GetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
// do we need to restore the framebuffer after a size change?
|
||||
if (old_vram_texture)
|
||||
{
|
||||
|
@ -192,10 +204,12 @@ bool GPU_HW_D3D11::CreateFramebuffer()
|
|||
BlitTexture(m_vram_texture.GetD3DRTV(), 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
|
||||
old_vram_texture.GetD3DSRV(), 0, 0, old_vram_texture.GetWidth(), old_vram_texture.GetHeight(),
|
||||
old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), linear_filter);
|
||||
UpdateDepthBufferFromMaskBit();
|
||||
}
|
||||
|
||||
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr);
|
||||
SetFullVRAMDirtyRectangle();
|
||||
RestoreGraphicsAPIState();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -203,12 +217,16 @@ void GPU_HW_D3D11::ClearFramebuffer()
|
|||
{
|
||||
static constexpr std::array<float, 4> color = {};
|
||||
m_context->ClearRenderTargetView(m_vram_texture.GetD3DRTV(), color.data());
|
||||
m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, 0.0f, 0);
|
||||
m_context->ClearRenderTargetView(m_display_texture, color.data());
|
||||
SetFullVRAMDirtyRectangle();
|
||||
}
|
||||
|
||||
void GPU_HW_D3D11::DestroyFramebuffer()
|
||||
{
|
||||
m_vram_read_texture.Destroy();
|
||||
m_vram_depth_view.Reset();
|
||||
m_vram_depth_texture.Destroy();
|
||||
m_vram_texture.Destroy();
|
||||
m_vram_encoding_texture.Destroy();
|
||||
m_display_texture.Destroy();
|
||||
|
@ -289,11 +307,28 @@ bool GPU_HW_D3D11::CreateStateObjects()
|
|||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
ds_desc.DepthEnable = TRUE;
|
||||
ds_desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL;
|
||||
ds_desc.DepthFunc = D3D11_COMPARISON_ALWAYS;
|
||||
hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_always_state.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
ds_desc.DepthFunc = D3D11_COMPARISON_GREATER_EQUAL;
|
||||
hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_less_state.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
CD3D11_BLEND_DESC bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
|
||||
hr = m_device->CreateBlendState(&bl_desc, m_blend_disabled_state.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
bl_desc.RenderTarget[0].RenderTargetWriteMask = 0;
|
||||
hr = m_device->CreateBlendState(&bl_desc, m_blend_no_color_writes_state.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
CD3D11_SAMPLER_DESC sampler_desc = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT());
|
||||
sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
|
||||
hr = m_device->CreateSamplerState(&sampler_desc, m_point_sampler_state.ReleaseAndGetAddressOf());
|
||||
|
@ -307,11 +342,8 @@ bool GPU_HW_D3D11::CreateStateObjects()
|
|||
|
||||
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
|
||||
{
|
||||
if (transparency_mode == static_cast<u8>(TransparencyMode::Disabled) && !m_texture_filtering)
|
||||
{
|
||||
bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
|
||||
}
|
||||
else
|
||||
bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
|
||||
if (transparency_mode != static_cast<u8>(TransparencyMode::Disabled) || m_texture_filtering)
|
||||
{
|
||||
bl_desc.RenderTarget[0].BlendEnable = TRUE;
|
||||
bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE;
|
||||
|
@ -409,6 +441,11 @@ bool GPU_HW_D3D11::CompileShaders()
|
|||
if (!m_vram_copy_pixel_shader)
|
||||
return false;
|
||||
|
||||
m_vram_update_depth_pixel_shader =
|
||||
m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMUpdateDepthFragmentShader());
|
||||
if (!m_vram_update_depth_pixel_shader)
|
||||
return false;
|
||||
|
||||
for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++)
|
||||
{
|
||||
for (u8 interlacing = 0; interlacing < 2; interlacing++)
|
||||
|
@ -467,6 +504,7 @@ void GPU_HW_D3D11::BlitTexture(ID3D11RenderTargetView* dst, u32 dst_x, u32 dst_y
|
|||
static_cast<float>(src_height) / static_cast<float>(src_texture_height)};
|
||||
|
||||
m_context->OMSetRenderTargets(1, &dst, nullptr);
|
||||
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
|
||||
m_context->PSSetShaderResources(0, 1, &src);
|
||||
m_context->PSSetSamplers(
|
||||
0, 1, linear_filter ? m_linear_sampler_state.GetAddressOf() : m_point_sampler_state.GetAddressOf());
|
||||
|
@ -516,6 +554,8 @@ void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_verte
|
|||
const TransparencyMode transparency_mode =
|
||||
(render_mode == BatchRenderMode::OnlyOpaque) ? TransparencyMode::Disabled : m_batch.transparency_mode;
|
||||
m_context->OMSetBlendState(m_batch_blend_states[static_cast<u8>(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu);
|
||||
m_context->OMSetDepthStencilState(
|
||||
m_batch.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
|
||||
|
||||
m_context->Draw(num_vertices, base_vertex);
|
||||
}
|
||||
|
@ -567,6 +607,7 @@ void GPU_HW_D3D11::UpdateDisplay()
|
|||
else
|
||||
{
|
||||
m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr);
|
||||
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
|
||||
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
|
||||
|
||||
const u32 reinterpret_field_offset = GetInterlacedField();
|
||||
|
@ -604,6 +645,7 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
|
|||
// Encode the 24-bit texture as 16-bit.
|
||||
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
|
||||
m_context->OMSetRenderTargets(1, m_vram_encoding_texture.GetD3DRTVArray(), nullptr);
|
||||
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
|
||||
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
|
||||
SetViewportAndScissor(0, 0, encoded_width, encoded_height);
|
||||
DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms));
|
||||
|
@ -654,6 +696,8 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
|||
RGBA8ToFloat(color);
|
||||
uniforms.u_interlaced_displayed_field = GetInterlacedField();
|
||||
|
||||
m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0);
|
||||
|
||||
SetViewportAndScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale,
|
||||
height * m_resolution_scale);
|
||||
DrawUtilityShader(IsInterlacedRenderingEnabled() ? m_vram_interlaced_fill_pixel_shader.Get() :
|
||||
|
@ -682,13 +726,21 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d
|
|||
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
|
||||
m_texture_stream_buffer.Unmap(m_context.Get(), num_pixels * sizeof(u16));
|
||||
|
||||
const u32 uniforms[5] = {x, y, width, height, map_result.index_aligned};
|
||||
const VRAMWriteUBOData uniforms = {x,
|
||||
y,
|
||||
width,
|
||||
height,
|
||||
map_result.index_aligned,
|
||||
m_GPUSTAT.set_mask_while_drawing ? 0xFFu : 0x00,
|
||||
GetCurrentNormalizedBatchVertexDepthID()};
|
||||
m_context->OMSetDepthStencilState(
|
||||
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
|
||||
m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf());
|
||||
|
||||
// the viewport should already be set to the full vram, so just adjust the scissor
|
||||
SetScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale);
|
||||
|
||||
DrawUtilityShader(m_vram_write_pixel_shader.Get(), uniforms, sizeof(uniforms));
|
||||
DrawUtilityShader(m_vram_write_pixel_shader.Get(), &uniforms, sizeof(uniforms));
|
||||
|
||||
RestoreGraphicsAPIState();
|
||||
}
|
||||
|
@ -703,19 +755,20 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
|
|||
UpdateVRAMReadTexture();
|
||||
IncludeVRAMDityRectangle(dst_bounds);
|
||||
|
||||
const VRAMCopyUBOData uniforms = {
|
||||
src_x * m_resolution_scale,
|
||||
src_y * m_resolution_scale,
|
||||
dst_x * m_resolution_scale,
|
||||
dst_y * m_resolution_scale,
|
||||
width * m_resolution_scale,
|
||||
height * m_resolution_scale,
|
||||
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
|
||||
};
|
||||
const VRAMCopyUBOData uniforms = {src_x * m_resolution_scale,
|
||||
src_y * m_resolution_scale,
|
||||
dst_x * m_resolution_scale,
|
||||
dst_y * m_resolution_scale,
|
||||
width * m_resolution_scale,
|
||||
height * m_resolution_scale,
|
||||
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
|
||||
GetCurrentNormalizedBatchVertexDepthID()};
|
||||
|
||||
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
|
||||
SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(),
|
||||
dst_bounds_scaled.GetHeight());
|
||||
m_context->OMSetDepthStencilState(
|
||||
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
|
||||
m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray());
|
||||
DrawUtilityShader(m_vram_copy_pixel_shader.Get(), &uniforms, sizeof(uniforms));
|
||||
RestoreGraphicsAPIState();
|
||||
|
@ -728,6 +781,9 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
|
|||
if (m_vram_dirty_rect.Intersects(Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height)))
|
||||
UpdateVRAMReadTexture();
|
||||
|
||||
if (m_GPUSTAT.IsMaskingEnabled())
|
||||
Log_WarningPrintf("Masking enabled on VRAM copy - not implemented");
|
||||
|
||||
GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
|
||||
|
||||
src_x *= m_resolution_scale;
|
||||
|
@ -749,6 +805,21 @@ void GPU_HW_D3D11::UpdateVRAMReadTexture()
|
|||
&src_box);
|
||||
}
|
||||
|
||||
void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit()
|
||||
{
|
||||
SetViewportAndScissor(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
|
||||
|
||||
m_context->OMSetRenderTargets(0, nullptr, m_vram_depth_view.Get());
|
||||
m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0);
|
||||
m_context->OMSetBlendState(m_blend_no_color_writes_state.Get(), nullptr, 0xFFFFFFFFu);
|
||||
|
||||
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
|
||||
DrawUtilityShader(m_vram_update_depth_pixel_shader.Get(), nullptr, 0);
|
||||
|
||||
m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray());
|
||||
RestoreGraphicsAPIState();
|
||||
}
|
||||
|
||||
std::unique_ptr<GPU> GPU::CreateHardwareD3D11Renderer()
|
||||
{
|
||||
return std::make_unique<GPU_HW_D3D11>();
|
||||
|
|
|
@ -34,6 +34,7 @@ protected:
|
|||
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
|
||||
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
|
||||
void UpdateVRAMReadTexture() override;
|
||||
void UpdateDepthBufferFromMaskBit() override;
|
||||
void SetScissorFromDrawingArea() override;
|
||||
void MapBatchVertexPointer(u32 required_vertices) override;
|
||||
void UnmapBatchVertexPointer(u32 used_vertices) override;
|
||||
|
@ -77,6 +78,8 @@ private:
|
|||
|
||||
// downsample texture - used for readbacks at >1xIR.
|
||||
D3D11::Texture m_vram_texture;
|
||||
D3D11::Texture m_vram_depth_texture;
|
||||
ComPtr<ID3D11DepthStencilView> m_vram_depth_view;
|
||||
D3D11::Texture m_vram_read_texture;
|
||||
D3D11::Texture m_vram_encoding_texture;
|
||||
D3D11::Texture m_display_texture;
|
||||
|
@ -94,8 +97,11 @@ private:
|
|||
ComPtr<ID3D11RasterizerState> m_cull_none_rasterizer_state;
|
||||
|
||||
ComPtr<ID3D11DepthStencilState> m_depth_disabled_state;
|
||||
ComPtr<ID3D11DepthStencilState> m_depth_test_always_state;
|
||||
ComPtr<ID3D11DepthStencilState> m_depth_test_less_state;
|
||||
|
||||
ComPtr<ID3D11BlendState> m_blend_disabled_state;
|
||||
ComPtr<ID3D11BlendState> m_blend_no_color_writes_state;
|
||||
|
||||
ComPtr<ID3D11SamplerState> m_point_sampler_state;
|
||||
ComPtr<ID3D11SamplerState> m_linear_sampler_state;
|
||||
|
@ -114,5 +120,6 @@ private:
|
|||
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
|
||||
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
|
||||
ComPtr<ID3D11PixelShader> m_vram_copy_pixel_shader;
|
||||
ComPtr<ID3D11PixelShader> m_vram_update_depth_pixel_shader;
|
||||
std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2> m_display_pixel_shaders; // [depth_24][interlaced]
|
||||
};
|
||||
|
|
|
@ -11,6 +11,8 @@ GPU_HW_OpenGL::GPU_HW_OpenGL() : GPU_HW() {}
|
|||
GPU_HW_OpenGL::~GPU_HW_OpenGL()
|
||||
{
|
||||
// Destroy objects which don't have destructors to clean them up
|
||||
if (m_vram_fbo_id != 0)
|
||||
glDeleteFramebuffers(1, &m_vram_fbo_id);
|
||||
if (m_vao_id != 0)
|
||||
glDeleteVertexArrays(1, &m_vao_id);
|
||||
if (m_attributeless_vao_id != 0)
|
||||
|
@ -90,7 +92,6 @@ void GPU_HW_OpenGL::ResetGraphicsAPIState()
|
|||
glEnable(GL_CULL_FACE);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glDisable(GL_BLEND);
|
||||
glDepthMask(GL_TRUE);
|
||||
if (m_resolution_scale > 1 && !m_supports_geometry_shaders)
|
||||
glLineWidth(1.0f);
|
||||
glBindVertexArray(0);
|
||||
|
@ -98,13 +99,14 @@ void GPU_HW_OpenGL::ResetGraphicsAPIState()
|
|||
|
||||
void GPU_HW_OpenGL::RestoreGraphicsAPIState()
|
||||
{
|
||||
m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
|
||||
glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
|
||||
|
||||
glDisable(GL_CULL_FACE);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
glDepthMask(GL_FALSE);
|
||||
glDepthMask(GL_TRUE);
|
||||
glDepthFunc(GL_ALWAYS);
|
||||
if (m_resolution_scale > 1 && !m_supports_geometry_shaders)
|
||||
glLineWidth(static_cast<float>(m_resolution_scale));
|
||||
glBindVertexArray(m_vao_id);
|
||||
|
@ -211,34 +213,16 @@ bool GPU_HW_OpenGL::CreateFramebuffer()
|
|||
{
|
||||
// save old vram texture/fbo, in case we're changing scale
|
||||
GL::Texture old_vram_texture = std::move(m_vram_texture);
|
||||
GLuint old_vram_fbo = m_vram_fbo_id;
|
||||
|
||||
// scale vram size to internal resolution
|
||||
const u32 texture_width = VRAM_WIDTH * m_resolution_scale;
|
||||
const u32 texture_height = VRAM_HEIGHT * m_resolution_scale;
|
||||
|
||||
if (!m_vram_texture.Create(texture_width, texture_height, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) ||
|
||||
!m_vram_texture.CreateFramebuffer())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// do we need to restore the framebuffer after a size change?
|
||||
if (old_vram_texture.IsValid())
|
||||
{
|
||||
const bool linear_filter = old_vram_texture.GetWidth() > m_vram_texture.GetWidth();
|
||||
Log_DevPrintf("Scaling %ux%u VRAM texture to %ux%u using %s filter", old_vram_texture.GetWidth(),
|
||||
old_vram_texture.GetHeight(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
|
||||
linear_filter ? "linear" : "nearest");
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
old_vram_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
|
||||
glBlitFramebuffer(0, 0, old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), 0, 0, m_vram_texture.GetWidth(),
|
||||
m_vram_texture.GetHeight(), GL_COLOR_BUFFER_BIT, linear_filter ? GL_LINEAR : GL_NEAREST);
|
||||
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
old_vram_texture.Destroy();
|
||||
}
|
||||
|
||||
if (!m_vram_read_texture.Create(texture_width, texture_height, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) ||
|
||||
!m_vram_depth_texture.Create(texture_width, texture_height, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT,
|
||||
GL_UNSIGNED_SHORT, nullptr, false) ||
|
||||
!m_vram_read_texture.Create(texture_width, texture_height, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) ||
|
||||
!m_vram_read_texture.CreateFramebuffer() ||
|
||||
!m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false) ||
|
||||
!m_vram_encoding_texture.CreateFramebuffer() ||
|
||||
|
@ -248,7 +232,32 @@ bool GPU_HW_OpenGL::CreateFramebuffer()
|
|||
return false;
|
||||
}
|
||||
|
||||
m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
glGenFramebuffers(1, &m_vram_fbo_id);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_vram_texture.GetGLId(), 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_vram_depth_texture.GetGLId(), 0);
|
||||
Assert(glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
|
||||
|
||||
// do we need to restore the framebuffer after a size change?
|
||||
if (old_vram_fbo != 0)
|
||||
{
|
||||
const bool linear_filter = old_vram_texture.GetWidth() > m_vram_texture.GetWidth();
|
||||
Log_DevPrintf("Scaling %ux%u VRAM texture to %ux%u using %s filter", old_vram_texture.GetWidth(),
|
||||
old_vram_texture.GetHeight(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
|
||||
linear_filter ? "linear" : "nearest");
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_vram_fbo);
|
||||
glBlitFramebuffer(0, 0, old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), 0, 0, m_vram_texture.GetWidth(),
|
||||
m_vram_texture.GetHeight(), GL_COLOR_BUFFER_BIT, linear_filter ? GL_LINEAR : GL_NEAREST);
|
||||
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
old_vram_texture.Destroy();
|
||||
glDeleteFramebuffers(1, &old_vram_fbo);
|
||||
|
||||
UpdateDepthBufferFromMaskBit();
|
||||
}
|
||||
|
||||
SetFullVRAMDirtyRectangle();
|
||||
return true;
|
||||
}
|
||||
|
@ -257,7 +266,8 @@ void GPU_HW_OpenGL::ClearFramebuffer()
|
|||
{
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glClearDepth(0.0f);
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
SetFullVRAMDirtyRectangle();
|
||||
}
|
||||
|
@ -470,6 +480,15 @@ bool GPU_HW_OpenGL::CompilePrograms()
|
|||
}
|
||||
m_vram_copy_program = std::move(*prog);
|
||||
|
||||
prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {},
|
||||
shadergen.GenerateVRAMUpdateDepthFragmentShader());
|
||||
if (!prog)
|
||||
return false;
|
||||
|
||||
prog->Bind();
|
||||
prog->Uniform1i("samp0", 0);
|
||||
m_vram_update_depth_program = std::move(*prog);
|
||||
|
||||
if (m_supports_texture_buffer)
|
||||
{
|
||||
prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {},
|
||||
|
@ -519,6 +538,8 @@ void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert
|
|||
glBlendFuncSeparate(GL_ONE, m_supports_dual_source_blend ? GL_SRC1_ALPHA : GL_SRC_ALPHA, GL_ONE, GL_ZERO);
|
||||
}
|
||||
|
||||
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
|
||||
|
||||
static constexpr std::array<GLenum, 4> gl_primitives = {{GL_LINES, GL_LINE_STRIP, GL_TRIANGLES, GL_TRIANGLE_STRIP}};
|
||||
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], m_batch_base_vertex, num_vertices);
|
||||
}
|
||||
|
@ -590,6 +611,7 @@ void GPU_HW_OpenGL::UpdateDisplay()
|
|||
{
|
||||
glDisable(GL_BLEND);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
|
||||
m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Bind();
|
||||
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
|
@ -607,6 +629,7 @@ void GPU_HW_OpenGL::UpdateDisplay()
|
|||
m_batch_ubo_dirty = true;
|
||||
|
||||
glViewport(0, reinterpret_field_offset, reinterpret_width, scaled_display_height);
|
||||
glBindVertexArray(m_attributeless_vao_id);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
|
||||
|
@ -615,8 +638,10 @@ void GPU_HW_OpenGL::UpdateDisplay()
|
|||
scaled_display_width, -static_cast<s32>(scaled_display_height));
|
||||
|
||||
// restore state
|
||||
m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
|
||||
glBindVertexArray(m_vao_id);
|
||||
glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
}
|
||||
|
||||
|
@ -644,6 +669,7 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
|
|||
glDisable(GL_BLEND);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glViewport(0, 0, encoded_width, encoded_height);
|
||||
glBindVertexArray(m_attributeless_vao_id);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
// Readback encoded texture.
|
||||
|
@ -688,7 +714,8 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
|||
{
|
||||
const auto [r, g, b, a] = RGBA8ToFloat(color);
|
||||
glClearColor(r, g, b, a);
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glClearDepth(a);
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
SetScissorFromDrawingArea();
|
||||
}
|
||||
else
|
||||
|
@ -705,6 +732,9 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
|||
|
||||
m_vram_interlaced_fill_program.Bind();
|
||||
UploadUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
glDisable(GL_BLEND);
|
||||
glDepthFunc(GL_ALWAYS);
|
||||
glBindVertexArray(m_attributeless_vao_id);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
RestoreGraphicsAPIState();
|
||||
|
@ -743,13 +773,21 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
|||
glViewport(scaled_x, scaled_flipped_y, scaled_width, scaled_height);
|
||||
glDisable(GL_BLEND);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
|
||||
|
||||
m_vram_write_program.Bind();
|
||||
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
|
||||
|
||||
const u32 uniforms[5] = {x, flipped_y, width, height, map_result.index_aligned};
|
||||
UploadUniformBuffer(uniforms, sizeof(uniforms));
|
||||
const VRAMWriteUBOData uniforms = {x,
|
||||
flipped_y,
|
||||
width,
|
||||
height,
|
||||
map_result.index_aligned,
|
||||
m_GPUSTAT.set_mask_while_drawing ? 0xFFu : 0x00,
|
||||
GetCurrentNormalizedBatchVertexDepthID()};
|
||||
UploadUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
|
||||
glBindVertexArray(m_attributeless_vao_id);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
RestoreGraphicsAPIState();
|
||||
|
@ -822,21 +860,21 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
|
|||
UpdateVRAMReadTexture();
|
||||
IncludeVRAMDityRectangle(dst_bounds);
|
||||
|
||||
VRAMCopyUBOData uniforms = {
|
||||
src_x * m_resolution_scale,
|
||||
src_y * m_resolution_scale,
|
||||
dst_x * m_resolution_scale,
|
||||
dst_y * m_resolution_scale,
|
||||
width * m_resolution_scale,
|
||||
height * m_resolution_scale,
|
||||
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
|
||||
};
|
||||
VRAMCopyUBOData uniforms = {src_x * m_resolution_scale,
|
||||
src_y * m_resolution_scale,
|
||||
dst_x * m_resolution_scale,
|
||||
dst_y * m_resolution_scale,
|
||||
width * m_resolution_scale,
|
||||
height * m_resolution_scale,
|
||||
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
|
||||
GetCurrentNormalizedBatchVertexDepthID()};
|
||||
uniforms.u_src_y = m_vram_texture.GetHeight() - uniforms.u_src_y - uniforms.u_height;
|
||||
uniforms.u_dst_y = m_vram_texture.GetHeight() - uniforms.u_dst_y - uniforms.u_height;
|
||||
UploadUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glDisable(GL_BLEND);
|
||||
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
|
||||
|
||||
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
|
||||
glViewport(dst_bounds_scaled.left,
|
||||
|
@ -876,7 +914,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
|
|||
else
|
||||
{
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
m_vram_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_vram_fbo_id);
|
||||
glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height,
|
||||
GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
|
@ -904,14 +942,31 @@ void GPU_HW_OpenGL::UpdateVRAMReadTexture()
|
|||
else
|
||||
{
|
||||
m_vram_read_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
|
||||
m_vram_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_vram_fbo_id);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glBlitFramebuffer(x, y, x + width, y + height, x, y, x + width, y + height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
m_vram_texture.BindFramebuffer(GL_FRAMEBUFFER);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::UpdateDepthBufferFromMaskBit()
|
||||
{
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glDisable(GL_BLEND);
|
||||
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
|
||||
glDepthFunc(GL_ALWAYS);
|
||||
|
||||
m_vram_texture.Bind();
|
||||
m_vram_update_depth_program.Bind();
|
||||
glBindVertexArray(m_attributeless_vao_id);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
glBindVertexArray(m_vao_id);
|
||||
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
}
|
||||
|
||||
std::unique_ptr<GPU> GPU::CreateHardwareOpenGLRenderer()
|
||||
{
|
||||
return std::make_unique<GPU_HW_OpenGL>();
|
||||
|
|
|
@ -30,6 +30,7 @@ protected:
|
|||
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
|
||||
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
|
||||
void UpdateVRAMReadTexture() override;
|
||||
void UpdateDepthBufferFromMaskBit() override;
|
||||
void SetScissorFromDrawingArea() override;
|
||||
void MapBatchVertexPointer(u32 required_vertices) override;
|
||||
void UnmapBatchVertexPointer(u32 used_vertices) override;
|
||||
|
@ -63,11 +64,13 @@ private:
|
|||
|
||||
// downsample texture - used for readbacks at >1xIR.
|
||||
GL::Texture m_vram_texture;
|
||||
GL::Texture m_vram_depth_texture;
|
||||
GL::Texture m_vram_read_texture;
|
||||
GL::Texture m_vram_encoding_texture;
|
||||
GL::Texture m_display_texture;
|
||||
|
||||
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
|
||||
GLuint m_vram_fbo_id = 0;
|
||||
GLuint m_vao_id = 0;
|
||||
GLuint m_attributeless_vao_id = 0;
|
||||
|
||||
|
@ -85,6 +88,7 @@ private:
|
|||
GL::Program m_vram_read_program;
|
||||
GL::Program m_vram_write_program;
|
||||
GL::Program m_vram_copy_program;
|
||||
GL::Program m_vram_update_depth_program;
|
||||
|
||||
u32 m_uniform_buffer_alignment = 1;
|
||||
u32 m_max_texture_buffer_size = 0;
|
||||
|
|
|
@ -319,6 +319,9 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint(
|
|||
{
|
||||
ss << "void main(\n";
|
||||
|
||||
if (declare_vertex_id)
|
||||
ss << " in uint v_id : SV_VertexID,\n";
|
||||
|
||||
u32 attribute_counter = 0;
|
||||
for (const char* attribute : attributes)
|
||||
{
|
||||
|
@ -326,9 +329,6 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint(
|
|||
attribute_counter++;
|
||||
}
|
||||
|
||||
if (declare_vertex_id)
|
||||
ss << " in uint v_id : SV_VertexID,\n";
|
||||
|
||||
for (u32 i = 0; i < num_color_outputs; i++)
|
||||
ss << " out float4 v_col" << i << " : COLOR" << i << ",\n";
|
||||
|
||||
|
@ -349,7 +349,7 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint(
|
|||
void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(
|
||||
std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs,
|
||||
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
|
||||
bool declare_fragcoord /* = false */, bool dual_color_output /* = false */)
|
||||
bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool depth_output /* = false */)
|
||||
{
|
||||
if (m_glsl)
|
||||
{
|
||||
|
@ -381,23 +381,18 @@ void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(
|
|||
if (declare_fragcoord)
|
||||
ss << "#define v_pos gl_FragCoord\n";
|
||||
|
||||
if (depth_output)
|
||||
ss << "#define o_depth gl_FragDepth\n";
|
||||
|
||||
if (m_use_glsl_binding_layout)
|
||||
{
|
||||
if (dual_color_output)
|
||||
{
|
||||
ss << "layout(location = 0, index = 0) out float4 o_col0;\n";
|
||||
ss << "layout(location = 0, index = 1) out float4 o_col1;\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
ss << "layout(location = 0) out float4 o_col0;\n";
|
||||
}
|
||||
for (u32 i = 0; i < num_color_outputs; i++)
|
||||
ss << "layout(location = 0, index = " << i << ") out float4 o_col" << i << ";\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
ss << "out float4 o_col0;\n";
|
||||
if (dual_color_output)
|
||||
ss << "out float4 o_col1;\n";
|
||||
for (u32 i = 0; i < num_color_outputs; i++)
|
||||
ss << "out float4 o_col" << i << ";\n";
|
||||
}
|
||||
|
||||
ss << "\n";
|
||||
|
@ -425,14 +420,23 @@ void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(
|
|||
if (declare_fragcoord)
|
||||
ss << " in float4 v_pos : SV_Position,\n";
|
||||
|
||||
if (dual_color_output)
|
||||
if (depth_output)
|
||||
{
|
||||
ss << " out float4 o_col0 : SV_Target0,\n";
|
||||
ss << " out float4 o_col1 : SV_Target1)\n";
|
||||
ss << " out float o_depth : SV_Depth";
|
||||
if (num_color_outputs > 0)
|
||||
ss << ",\n";
|
||||
else
|
||||
ss << ")\n";
|
||||
}
|
||||
else
|
||||
|
||||
for (u32 i = 0; i < num_color_outputs; i++)
|
||||
{
|
||||
ss << " out float4 o_col0 : SV_Target)";
|
||||
ss << " out float4 o_col" << i << " : SV_Target" << i;
|
||||
|
||||
if (i == (num_color_outputs - 1))
|
||||
ss << ")\n";
|
||||
else
|
||||
ss << ",\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -440,9 +444,10 @@ void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(
|
|||
|
||||
void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss)
|
||||
{
|
||||
DeclareUniformBuffer(ss, {"uint2 u_texture_window_mask", "uint2 u_texture_window_offset", "float u_src_alpha_factor",
|
||||
"float u_dst_alpha_factor", "bool u_set_mask_while_drawing",
|
||||
"uint u_interlaced_displayed_field"});
|
||||
DeclareUniformBuffer(ss,
|
||||
{"uint2 u_texture_window_mask", "uint2 u_texture_window_offset", "float u_src_alpha_factor",
|
||||
"float u_dst_alpha_factor", "uint u_interlaced_displayed_field", "uint u_base_vertex_depth_id",
|
||||
"bool u_check_mask_before_draw", "bool u_set_mask_while_drawing"});
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
|
||||
|
@ -459,11 +464,11 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
|
|||
if (textured)
|
||||
{
|
||||
DeclareVertexEntryPoint(ss, {"int2 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1,
|
||||
{{"nointerpolation", "uint4 v_texpage"}});
|
||||
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float v_depth"}}, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
DeclareVertexEntryPoint(ss, {"int2 a_pos", "float4 a_col0"}, 1, 0, {});
|
||||
DeclareVertexEntryPoint(ss, {"int2 a_pos", "float4 a_col0"}, 1, 0, {{"nointerpolation", "float v_depth"}}, true);
|
||||
}
|
||||
|
||||
ss << R"(
|
||||
|
@ -484,6 +489,12 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
|
|||
#endif
|
||||
v_pos = float4(pos_x, pos_y, 0.0, 1.0);
|
||||
|
||||
#if API_D3D11
|
||||
v_depth = 1.0 - (float(u_base_vertex_depth_id + (u_check_mask_before_draw ? 0u : v_id)) / 65535.0);
|
||||
#else
|
||||
v_depth = 1.0 - (float(v_id - u_base_vertex_depth_id) / 65535.0);
|
||||
#endif
|
||||
|
||||
v_col0 = a_col0;
|
||||
#if TEXTURED
|
||||
// Fudge the texture coordinates by half a pixel in screen-space.
|
||||
|
@ -616,11 +627,12 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
|
|||
|
||||
if (textured)
|
||||
{
|
||||
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source);
|
||||
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float v_depth"}},
|
||||
true, use_dual_source ? 2 : 1, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source);
|
||||
DeclareFragmentEntryPoint(ss, 1, 0, {{"nointerpolation", "float v_depth"}}, true, use_dual_source ? 2 : 1, true);
|
||||
}
|
||||
|
||||
ss << R"(
|
||||
|
@ -736,6 +748,8 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
|
|||
#else
|
||||
o_col0 = float4(color, u_dst_alpha_factor / ialpha);
|
||||
#endif
|
||||
|
||||
o_depth = oalpha * v_depth;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -752,6 +766,8 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
|
|||
#else
|
||||
o_col0 = float4(color, 1.0 - ialpha);
|
||||
#endif
|
||||
|
||||
o_depth = oalpha * v_depth;
|
||||
}
|
||||
#else
|
||||
// Non-transparency won't enable blending so we can write the mask here regardless.
|
||||
|
@ -760,6 +776,8 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
|
|||
#if USE_DUAL_SOURCE
|
||||
o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha);
|
||||
#endif
|
||||
|
||||
o_depth = oalpha * v_depth;
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
@ -783,10 +801,12 @@ CONSTANT float2 WIDTH = (1.0 / float2(VRAM_SIZE)) * float2(RESOLUTION_SCALE, RES
|
|||
ss << R"(
|
||||
in VertexData {
|
||||
float4 v_col0;
|
||||
nointerpolation float v_depth;
|
||||
} in_data[];
|
||||
|
||||
out VertexData {
|
||||
float4 v_col0;
|
||||
nointerpolation float v_depth;
|
||||
} out_data;
|
||||
|
||||
layout(lines) in;
|
||||
|
@ -799,21 +819,25 @@ void main() {
|
|||
|
||||
// top-left
|
||||
out_data.v_col0 = in_data[0].v_col0;
|
||||
out_data.v_depth = in_data[0].v_depth;
|
||||
gl_Position = gl_in[0].gl_Position - offset;
|
||||
EmitVertex();
|
||||
|
||||
// top-right
|
||||
out_data.v_col0 = in_data[0].v_col0;
|
||||
out_data.v_depth = in_data[0].v_depth;
|
||||
gl_Position = gl_in[0].gl_Position + offset;
|
||||
EmitVertex();
|
||||
|
||||
// bottom-left
|
||||
out_data.v_col0 = in_data[1].v_col0;
|
||||
out_data.v_depth = in_data[1].v_depth;
|
||||
gl_Position = gl_in[1].gl_Position - offset;
|
||||
EmitVertex();
|
||||
|
||||
// bottom-right
|
||||
out_data.v_col0 = in_data[1].v_col0;
|
||||
out_data.v_depth = in_data[1].v_depth;
|
||||
gl_Position = gl_in[1].gl_Position + offset;
|
||||
EmitVertex();
|
||||
|
||||
|
@ -827,6 +851,7 @@ void main() {
|
|||
struct Vertex
|
||||
{
|
||||
float4 col0 : COLOR0;
|
||||
float depth : TEXCOORD0;
|
||||
float4 pos : SV_Position;
|
||||
};
|
||||
|
||||
|
@ -841,21 +866,25 @@ void main(line Vertex input[2], inout TriangleStream<Vertex> output)
|
|||
|
||||
// top-left
|
||||
v.col0 = input[0].col0;
|
||||
v.depth = input[0].depth;
|
||||
v.pos = input[0].pos - offset;
|
||||
output.Append(v);
|
||||
|
||||
// top-right
|
||||
v.col0 = input[0].col0;
|
||||
v.depth = input[0].depth;
|
||||
v.pos = input[0].pos + offset;
|
||||
output.Append(v);
|
||||
|
||||
// bottom-left
|
||||
v.col0 = input[1].col0;
|
||||
v.depth = input[1].depth;
|
||||
v.pos = input[1].pos - offset;
|
||||
output.Append(v);
|
||||
|
||||
// bottom-right
|
||||
v.col0 = input[1].col0;
|
||||
v.depth = input[1].depth;
|
||||
v.pos = input[1].pos + offset;
|
||||
output.Append(v);
|
||||
|
||||
|
@ -890,11 +919,12 @@ std::string GPU_HW_ShaderGen::GenerateFillFragmentShader()
|
|||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
DeclareUniformBuffer(ss, {"float4 u_fill_color"});
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, false);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1, true);
|
||||
|
||||
ss << R"(
|
||||
{
|
||||
o_col0 = u_fill_color;
|
||||
o_depth = u_fill_color.a;
|
||||
}
|
||||
)";
|
||||
|
||||
|
@ -907,7 +937,7 @@ std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader()
|
|||
WriteHeader(ss);
|
||||
WriteCommonFunctions(ss);
|
||||
DeclareUniformBuffer(ss, {"float4 u_fill_color", "uint u_interlaced_displayed_field"});
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
|
||||
|
||||
ss << R"(
|
||||
{
|
||||
|
@ -915,6 +945,7 @@ std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader()
|
|||
discard;
|
||||
|
||||
o_col0 = u_fill_color;
|
||||
o_depth = u_fill_color.a;
|
||||
}
|
||||
)";
|
||||
|
||||
|
@ -927,12 +958,12 @@ std::string GPU_HW_ShaderGen::GenerateCopyFragmentShader()
|
|||
WriteHeader(ss);
|
||||
DeclareUniformBuffer(ss, {"float4 u_src_rect"});
|
||||
DeclareTexture(ss, "samp0", 0);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, false);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1);
|
||||
|
||||
ss << R"(
|
||||
{
|
||||
float2 coords = u_src_rect.xy + v_tex0 * u_src_rect.zw;
|
||||
o_col0 = SAMPLE_TEXTURE(samp0, coords);
|
||||
float2 coords = u_src_rect.xy + v_tex0 * u_src_rect.zw;
|
||||
o_col0 = SAMPLE_TEXTURE(samp0, coords);
|
||||
}
|
||||
)";
|
||||
|
||||
|
@ -950,7 +981,7 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
|
|||
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_field_offset"});
|
||||
DeclareTexture(ss, "samp0", 0);
|
||||
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1);
|
||||
ss << R"(
|
||||
{
|
||||
uint2 icoords = uint2(v_pos.xy) + u_vram_offset;
|
||||
|
@ -1013,7 +1044,7 @@ uint SampleVRAM(uint2 coords)
|
|||
}
|
||||
)";
|
||||
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1);
|
||||
ss << R"(
|
||||
{
|
||||
uint2 sample_coords = uint2(uint(v_pos.x) * 2u, uint(v_pos.y));
|
||||
|
@ -1043,10 +1074,11 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
|||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
WriteCommonFunctions(ss);
|
||||
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset"});
|
||||
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits",
|
||||
"float u_depth_value"});
|
||||
|
||||
DeclareTextureBuffer(ss, "samp0", 0, true, true);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
|
||||
ss << R"(
|
||||
{
|
||||
uint2 coords = uint2(v_pos.xy) / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
||||
|
@ -1058,9 +1090,10 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
|||
#endif
|
||||
|
||||
uint buffer_offset = u_buffer_base_offset + (offset.y * u_size.x) + offset.x;
|
||||
uint value = LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r;
|
||||
uint value = LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r | u_mask_or_bits;
|
||||
|
||||
o_col0 = RGBA5551ToRGBA8(value);
|
||||
o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0;
|
||||
})";
|
||||
|
||||
return ss.str();
|
||||
|
@ -1071,10 +1104,11 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
|
|||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
WriteCommonFunctions(ss);
|
||||
DeclareUniformBuffer(ss, {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_size", "bool u_set_mask_bit"});
|
||||
DeclareUniformBuffer(
|
||||
ss, {"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_size", "bool u_set_mask_bit", "float u_depth_value"});
|
||||
|
||||
DeclareTexture(ss, "samp0", 0);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
|
||||
ss << R"(
|
||||
{
|
||||
uint2 dst_coords = uint2(v_pos.xy);
|
||||
|
@ -1090,7 +1124,24 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader()
|
|||
// sample and apply mask bit
|
||||
float4 color = LOAD_TEXTURE(samp0, int2(src_coords), 0);
|
||||
o_col0 = float4(color.xyz, u_set_mask_bit ? 1.0 : color.a);
|
||||
o_depth = (u_set_mask_bit ? 1.0f : ((o_col0.a == 1.0) ? u_depth_value : 0.0));
|
||||
})";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader()
|
||||
{
|
||||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
DeclareTexture(ss, "samp0", 0);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 0, true);
|
||||
|
||||
ss << R"(
|
||||
{
|
||||
o_depth = LOAD_TEXTURE(samp0, int2(v_pos.xy), 0).a;
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ public:
|
|||
std::string GenerateVRAMReadFragmentShader();
|
||||
std::string GenerateVRAMWriteFragmentShader();
|
||||
std::string GenerateVRAMCopyFragmentShader();
|
||||
std::string GenerateVRAMUpdateDepthFragmentShader();
|
||||
|
||||
private:
|
||||
void SetGLSLVersionString();
|
||||
|
@ -38,7 +39,7 @@ private:
|
|||
bool declare_vertex_id = false);
|
||||
void DeclareFragmentEntryPoint(std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs,
|
||||
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
|
||||
bool declare_fragcoord = false, bool dual_color_output = false);
|
||||
bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false);
|
||||
|
||||
void WriteCommonFunctions(std::stringstream& ss);
|
||||
void WriteBatchUniformBuffer(std::stringstream& ss);
|
||||
|
|
Loading…
Reference in a new issue