GPU: Add basic texture filtering support

Still has issues around the edges of text.
This commit is contained in:
Connor McLaughlin 2019-12-07 17:03:54 +10:00
parent c284d3835f
commit 67c67bbb66
10 changed files with 105 additions and 69 deletions

View file

@ -24,6 +24,7 @@ bool GPU_HW::Initialize(HostDisplay* host_display, System* system, DMA* dma, Int
m_resolution_scale = std::clamp<u32>(m_system->GetSettings().gpu_resolution_scale, 1, m_max_resolution_scale);
m_system->GetSettings().max_gpu_resolution_scale = m_max_resolution_scale;
m_true_color = m_system->GetSettings().gpu_true_color;
m_texture_filtering = m_system->GetSettings().gpu_texture_filtering;
return true;
}
@ -58,6 +59,7 @@ void GPU_HW::UpdateSettings()
m_resolution_scale = std::clamp<u32>(m_system->GetSettings().gpu_resolution_scale, 1, m_max_resolution_scale);
m_true_color = m_system->GetSettings().gpu_true_color;
m_texture_filtering = m_system->GetSettings().gpu_texture_filtering;
}
void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr)

View file

@ -157,6 +157,7 @@ protected:
u32 m_resolution_scale = 1;
u32 m_max_resolution_scale = 1;
bool m_true_color = false;
bool m_texture_filtering = false;
bool m_supports_dual_source_blend = false;
BatchConfig m_batch = {};

View file

@ -119,6 +119,7 @@ void GPU_HW_D3D11::UpdateSettings()
GPU_HW::UpdateSettings();
CreateFramebuffer();
CreateStateObjects();
CompileShaders();
UpdateDisplay();
}
@ -239,7 +240,7 @@ bool GPU_HW_D3D11::CreateBatchInputLayout()
{"ATTR", 3, DXGI_FORMAT_R32_SINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}}};
// we need a vertex shader...
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color,
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_texture_filtering,
m_supports_dual_source_blend);
ComPtr<ID3DBlob> vs_bytecode = D3D11::ShaderCompiler::CompileShader(
D3D11::ShaderCompiler::Type::Vertex, m_device->GetFeatureLevel(), shadergen.GenerateBatchVertexShader(true), false);
@ -265,49 +266,54 @@ bool GPU_HW_D3D11::CreateStateObjects()
CD3D11_RASTERIZER_DESC rs_desc = CD3D11_RASTERIZER_DESC(CD3D11_DEFAULT());
rs_desc.CullMode = D3D11_CULL_NONE;
rs_desc.ScissorEnable = TRUE;
hr = m_device->CreateRasterizerState(&rs_desc, m_cull_none_rasterizer_state.GetAddressOf());
hr = m_device->CreateRasterizerState(&rs_desc, m_cull_none_rasterizer_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
CD3D11_DEPTH_STENCIL_DESC ds_desc = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT());
ds_desc.DepthEnable = FALSE;
ds_desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO;
hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_disabled_state.GetAddressOf());
hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_disabled_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
CD3D11_BLEND_DESC bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
hr = m_device->CreateBlendState(&bl_desc, m_blend_disabled_state.GetAddressOf());
hr = m_device->CreateBlendState(&bl_desc, m_blend_disabled_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
CD3D11_SAMPLER_DESC sampler_desc = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT());
sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
hr = m_device->CreateSamplerState(&sampler_desc, m_point_sampler_state.GetAddressOf());
hr = m_device->CreateSamplerState(&sampler_desc, m_point_sampler_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
sampler_desc.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT;
hr = m_device->CreateSamplerState(&sampler_desc, m_linear_sampler_state.GetAddressOf());
hr = m_device->CreateSamplerState(&sampler_desc, m_linear_sampler_state.ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
m_batch_blend_states[static_cast<u8>(TransparencyMode::Disabled)] = m_blend_disabled_state;
for (u8 transparency_mode = 0; transparency_mode < 4; transparency_mode++)
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
{
bl_desc.RenderTarget[0].BlendEnable = TRUE;
bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE;
bl_desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC1_ALPHA;
bl_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
bl_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
bl_desc.RenderTarget[0].BlendOp =
(transparency_mode == static_cast<u8>(TransparencyMode::BackgroundMinusForeground)) ?
D3D11_BLEND_OP_REV_SUBTRACT :
D3D11_BLEND_OP_ADD;
bl_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
hr = m_device->CreateBlendState(&bl_desc, m_batch_blend_states[transparency_mode].GetAddressOf());
if (transparency_mode == static_cast<u8>(TransparencyMode::Disabled) && !m_texture_filtering)
{
bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
}
else
{
bl_desc.RenderTarget[0].BlendEnable = TRUE;
bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE;
bl_desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC1_ALPHA;
bl_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
bl_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
bl_desc.RenderTarget[0].BlendOp =
(transparency_mode == static_cast<u8>(TransparencyMode::BackgroundMinusForeground)) ?
D3D11_BLEND_OP_REV_SUBTRACT :
D3D11_BLEND_OP_ADD;
bl_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
}
hr = m_device->CreateBlendState(&bl_desc, m_batch_blend_states[transparency_mode].ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
}
@ -317,8 +323,8 @@ bool GPU_HW_D3D11::CreateStateObjects()
bool GPU_HW_D3D11::CompileShaders()
{
const bool debug = true;
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color,
const bool debug = false;
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_texture_filtering,
m_supports_dual_source_blend);
m_screen_quad_vertex_shader = D3D11::ShaderCompiler::CompileAndCreateVertexShader(

View file

@ -265,7 +265,7 @@ void GPU_HW_OpenGL::CreateTextureBuffer()
bool GPU_HW_OpenGL::CompilePrograms()
{
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color,
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_texture_filtering,
m_supports_dual_source_blend);
for (u32 render_mode = 0; render_mode < 4; render_mode++)

View file

@ -189,7 +189,7 @@ void GPU_HW_OpenGL_ES::DestroyFramebuffer()
bool GPU_HW_OpenGL_ES::CompilePrograms()
{
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color,
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_texture_filtering,
m_supports_dual_source_blend);
for (u32 render_mode = 0; render_mode < 4; render_mode++)

View file

@ -6,10 +6,10 @@
Log_SetChannel(GPU_HW_ShaderGen);
GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color,
bool supports_dual_source_blend)
bool texture_filtering, bool supports_dual_source_blend)
: m_render_api(render_api), m_resolution_scale(resolution_scale), m_true_color(true_color),
m_glsl(render_api != HostDisplay::RenderAPI::D3D11), m_glsl_es(render_api == HostDisplay::RenderAPI::OpenGLES),
m_supports_dual_source_blend(supports_dual_source_blend)
m_texture_filering(texture_filtering), m_glsl(render_api != HostDisplay::RenderAPI::D3D11),
m_glsl_es(render_api == HostDisplay::RenderAPI::OpenGLES), m_supports_dual_source_blend(supports_dual_source_blend)
{
if (m_glsl)
SetGLSLVersionString();
@ -98,8 +98,12 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss)
ss << "#define uint3 uvec3\n";
ss << "#define uint4 uvec4\n";
ss << "#define nointerpolation flat\n";
ss << "#define frac fract\n";
ss << "#define lerp mix\n";
ss << "#define CONSTANT const\n";
ss << "#define VECTOR_EQ(a, b) ((a) == (b))\n";
ss << "#define VECTOR_NEQ(a, b) ((a) != (b))\n";
ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n";
ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n";
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n";
@ -109,6 +113,8 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss)
{
ss << "#define HLSL 1\n";
ss << "#define CONSTANT static const\n";
ss << "#define VECTOR_EQ(a, b) (all((a) == (b)))\n";
ss << "#define VECTOR_NEQ(a, b) (any((a) != (b)))\n";
ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n";
ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n";
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) name.Load(int3(coords, mip), offset)\n";
@ -369,7 +375,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
v_col0 = a_col0;
#if TEXTURED
v_tex0 = float2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16)) / float2(255.0, 255.0);
v_tex0 = float2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16));
// base_x,base_y,palette_x,palette_y
v_texpage.x = (a_texpage & 15) * 64 * RESOLUTION_SCALE;
@ -389,8 +395,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
const GPU::TextureMode actual_texture_mode = texture_mode & ~GPU::TextureMode::RawTextureBit;
const bool raw_texture = (texture_mode & GPU::TextureMode::RawTextureBit) == GPU::TextureMode::RawTextureBit;
const bool textured = (texture_mode != GPU::TextureMode::Disabled);
const bool use_dual_source =
m_supports_dual_source_blend && transparency != GPU_HW::BatchRenderMode::TransparencyDisabled;
const bool use_dual_source = m_supports_dual_source_blend &&
(transparency != GPU_HW::BatchRenderMode::TransparencyDisabled || m_texture_filering);
std::stringstream ss;
WriteHeader(ss);
@ -406,6 +412,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "RAW_TEXTURE", raw_texture);
DefineMacro(ss, "DITHERING", dithering);
DefineMacro(ss, "TRUE_COLOR", m_true_color);
DefineMacro(ss, "TEXTURE_FILTERING", m_texture_filering);
DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source);
WriteCommonFunctions(ss);
@ -442,27 +449,17 @@ int3 TruncateTo15Bit(int3 icol)
}
#if TEXTURED
int2 ApplyNativeTextureWindow(int2 coords)
CONSTANT float4 TRANSPARENT_PIXEL_COLOR = float4(0.0, 0.0, 0.0, 0.0);
int2 ApplyTextureWindow(int2 coords)
{
uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u);
uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u);
return int2(int(x), int(y));
}
int2 ApplyTextureWindow(int2 coords)
{
if (RESOLUTION_SCALE == 1)
return ApplyNativeTextureWindow(coords);
int2 downscaled_coords = coords / int2(RESOLUTION_SCALE, RESOLUTION_SCALE);
int2 coords_offset = coords % int2(RESOLUTION_SCALE, RESOLUTION_SCALE);
return (ApplyNativeTextureWindow(downscaled_coords) * int2(RESOLUTION_SCALE, RESOLUTION_SCALE)) + coords_offset;
}
int4 SampleFromVRAM(int4 texpage, float2 coord)
float4 SampleFromVRAM(int4 texpage, int2 icoord)
{
// from 0..1 to 0..255
int2 icoord = int2(coord * float2(float(255 * RESOLUTION_SCALE), float(255 * RESOLUTION_SCALE)));
icoord = ApplyTextureWindow(icoord);
// adjust for tightly packed palette formats
@ -474,7 +471,7 @@ int4 SampleFromVRAM(int4 texpage, float2 coord)
#endif
// fixup coords
int2 vicoord = int2(texpage.x + index_coord.x, fixYCoord(texpage.y + index_coord.y));
int2 vicoord = int2(texpage.x + index_coord.x * RESOLUTION_SCALE, fixYCoord(texpage.y + index_coord.y * RESOLUTION_SCALE));
// load colour/palette
float4 color = LOAD_TEXTURE(samp0, vicoord, 0);
@ -482,11 +479,11 @@ int4 SampleFromVRAM(int4 texpage, float2 coord)
// apply palette
#if PALETTE
#if PALETTE_4_BIT
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 3;
int subpixel = int(icoord.x) & 3;
uint vram_value = RGBA8ToRGBA5551(color);
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
#elif PALETTE_8_BIT
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 1;
int subpixel = int(icoord.x) & 1;
uint vram_value = RGBA8ToRGBA5551(color);
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
#endif
@ -494,7 +491,7 @@ int4 SampleFromVRAM(int4 texpage, float2 coord)
color = LOAD_TEXTURE(samp0, palette_icoord, 0);
#endif
return int4(color * float4(255.0, 255.0, 255.0, 255.0));
return color;
}
#endif
)";
@ -513,31 +510,55 @@ int4 SampleFromVRAM(int4 texpage, float2 coord)
int3 vertcol = int3(v_col0.rgb * float3(255.0, 255.0, 255.0));
bool semitransparent;
bool new_mask_bit;
int3 icolor;
float ialpha;
#if TEXTURED
int4 texcol = SampleFromVRAM(v_texpage, v_tex0);
#if GLSL
bool transparent = (texcol == int4(0.0, 0.0, 0.0, 0.0));
#if TEXTURE_FILTERING
int2 icoord = int2(v_tex0);
float2 pcoord = frac(v_tex0) - float2(0.5, 0.5);
float2 poffs = sign(pcoord);
pcoord = abs(pcoord);
// TODO: Clamp to page
float4 tl = SampleFromVRAM(v_texpage, int2(v_tex0));
float4 tr = SampleFromVRAM(v_texpage, int2(min(v_tex0.x + poffs.x, 255.0), v_tex0.y));
float4 bl = SampleFromVRAM(v_texpage, int2(v_tex0.x, min(v_tex0.y + poffs.y, 255.0)));
float4 br = SampleFromVRAM(v_texpage, int2(min(v_tex0.x + poffs.x, 255.0), min(v_tex0.y + poffs.y, 255.0)));
// Compute alpha from how many texels aren't pixel color 0000h.
float tl_a = float(VECTOR_NEQ(tl, TRANSPARENT_PIXEL_COLOR));
float tr_a = float(VECTOR_NEQ(tr, TRANSPARENT_PIXEL_COLOR));
float bl_a = float(VECTOR_NEQ(bl, TRANSPARENT_PIXEL_COLOR));
float br_a = float(VECTOR_NEQ(br, TRANSPARENT_PIXEL_COLOR));
// Bilinearly interpolate.
float4 texcol = lerp(lerp(tl, tr, pcoord.x), lerp(bl, br, pcoord.x), pcoord.y);
ialpha = lerp(lerp(tl_a, tr_a, pcoord.x), lerp(bl_a, br_a, pcoord.x), pcoord.y);
if (ialpha == 0.0)
discard;
texcol.rgb /= float3(ialpha, ialpha, ialpha);
semitransparent = (texcol.a != 0.0);
#else
bool transparent = (all(texcol == int4(0.0, 0.0, 0.0, 0.0)));
#endif
if (transparent)
discard;
float4 texcol = SampleFromVRAM(v_texpage, int2(v_tex0));
if (VECTOR_EQ(texcol, TRANSPARENT_PIXEL_COLOR))
discard;
// Grab semitransparent bit from the texture color.
semitransparent = (texcol.a != 0);
semitransparent = (texcol.a != 0.0);
ialpha = 1.0;
#endif
#if RAW_TEXTURE
icolor = texcol.rgb;
icolor = int3(texcol.rgb * float3(255.0, 255.0, 255.0));
#else
icolor = (vertcol * texcol.rgb) >> 7;
icolor = (vertcol * int3(texcol.rgb * float3(255.0, 255.0, 255.0))) >> 7;
#endif
#else
// All pixels are semitransparent for untextured polygons.
semitransparent = true;
icolor = vertcol;
ialpha = 1.0;
#endif
// Apply dithering
@ -565,10 +586,10 @@ int4 SampleFromVRAM(int4 texpage, float2 coord)
#endif
#if USE_DUAL_SOURCE
o_col0 = float4(color * u_src_alpha_factor, output_alpha);
o_col1 = float4(0.0, 0.0, 0.0, u_dst_alpha_factor);
o_col0 = float4(color * (u_src_alpha_factor * ialpha), output_alpha);
o_col1 = float4(0.0, 0.0, 0.0, u_dst_alpha_factor / ialpha);
#else
o_col0 = float4(color * u_src_alpha_factor, u_dst_alpha_factor);
o_col0 = float4(color * (u_src_alpha_factor * ialpha), u_dst_alpha_factor / ialpha);
#endif
}
else
@ -578,18 +599,18 @@ int4 SampleFromVRAM(int4 texpage, float2 coord)
#endif
#if USE_DUAL_SOURCE
o_col0 = float4(color, output_alpha);
o_col0 = float4(color * ialpha, output_alpha);
o_col1 = float4(0.0, 0.0, 0.0, 0.0);
#else
o_col0 = float4(color, 0.0);
o_col0 = float4(color * ialpha, 1.0 - ialpha);
#endif
}
#else
// Non-transparency won't enable blending so we can write the mask here regardless.
o_col0 = float4(color, output_alpha);
o_col0 = float4(color * ialpha, output_alpha);
#if USE_DUAL_SOURCE
o_col1 = float4(0.0, 0.0, 0.0, 0.0);
o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha);
#endif
#endif
}

View file

@ -8,7 +8,7 @@ class GPU_HW_ShaderGen
{
public:
GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color,
bool supports_dual_source_belnd);
bool texture_filtering, bool supports_dual_source_belnd);
~GPU_HW_ShaderGen();
std::string GenerateBatchVertexShader(bool textured);
@ -25,6 +25,7 @@ public:
HostDisplay::RenderAPI m_render_api;
u32 m_resolution_scale;
bool m_true_color;
bool m_texture_filering;
bool m_glsl;
bool m_glsl_es;
bool m_supports_dual_source_blend;

View file

@ -57,6 +57,7 @@ void Settings::Load(const char* filename)
gpu_renderer = ParseRendererName(ini.GetValue("GPU", "Renderer", "OpenGL")).value_or(GPURenderer::HardwareOpenGL);
gpu_resolution_scale = static_cast<u32>(ini.GetLongValue("GPU", "ResolutionScale", 1));
gpu_true_color = ini.GetBoolValue("GPU", "TrueColor", false);
gpu_texture_filtering = ini.GetBoolValue("GPU", "TextureFiltering", false);
display_linear_filtering = ini.GetBoolValue("Display", "LinearFiltering", true);
@ -89,6 +90,7 @@ bool Settings::Save(const char* filename) const
ini.SetLongValue("GPU", "ResolutionScale", static_cast<long>(gpu_resolution_scale));
ini.SetBoolValue("GPU", "VSync", video_sync_enabled);
ini.SetBoolValue("GPU", "TrueColor", gpu_true_color);
ini.SetBoolValue("GPU", "TextureFiltering", gpu_texture_filtering);
ini.SetBoolValue("Display", "LinearFiltering", display_linear_filtering);

View file

@ -20,6 +20,7 @@ struct Settings
u32 gpu_resolution_scale = 1;
mutable u32 max_gpu_resolution_scale = 1;
bool gpu_true_color = false;
bool gpu_texture_filtering = false;
bool display_linear_filtering = true;
bool display_fullscreen = false;

View file

@ -850,6 +850,7 @@ void SDLHostInterface::DrawQuickSettingsMenu()
}
gpu_settings_changed |= ImGui::MenuItem("True (24-Bit) Color", nullptr, &m_settings.gpu_true_color);
gpu_settings_changed |= ImGui::MenuItem("Texture Filtering", nullptr, &m_settings.gpu_texture_filtering);
if (ImGui::MenuItem("Display Linear Filtering", nullptr, &m_settings.display_linear_filtering))
{
m_display->SetDisplayLinearFiltering(m_settings.display_linear_filtering);
@ -1180,6 +1181,7 @@ void SDLHostInterface::DrawSettingsWindow()
}
ImGui::Checkbox("True 24-bit Color (disables dithering)", &m_settings.gpu_true_color);
ImGui::Checkbox("Texture Filtering", &m_settings.gpu_texture_filtering);
}
ImGui::EndTabItem();