mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-01-19 06:45:39 +00:00
GPU: More accurate texture blending in non-true-colour-mode
Fixes Silent Hill loading animation, Advanced V.G. intro fade-out, and probably others.
This commit is contained in:
parent
c114873ed8
commit
bf3c83658a
|
@ -95,7 +95,8 @@ public:
|
||||||
MAX_PRIMITIVE_HEIGHT = 512,
|
MAX_PRIMITIVE_HEIGHT = 512,
|
||||||
DOT_TIMER_INDEX = 0,
|
DOT_TIMER_INDEX = 0,
|
||||||
HBLANK_TIMER_INDEX = 1,
|
HBLANK_TIMER_INDEX = 1,
|
||||||
MAX_RESOLUTION_SCALE = 16
|
MAX_RESOLUTION_SCALE = 16,
|
||||||
|
DITHER_MATRIX_SIZE = 4
|
||||||
};
|
};
|
||||||
|
|
||||||
enum : u16
|
enum : u16
|
||||||
|
@ -107,7 +108,7 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
// 4x4 dither matrix.
|
// 4x4 dither matrix.
|
||||||
static constexpr s32 DITHER_MATRIX[4][4] = {{-4, +0, -3, +1}, // row 0
|
static constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = {{-4, +0, -3, +1}, // row 0
|
||||||
{+2, -2, +3, -1}, // row 1
|
{+2, -2, +3, -1}, // row 1
|
||||||
{-3, +1, -4, +0}, // row 2
|
{-3, +1, -4, +0}, // row 2
|
||||||
{+4, -1, +2, -2}}; // row 3
|
{+4, -1, +2, -2}}; // row 3
|
||||||
|
|
|
@ -581,17 +581,20 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
|
||||||
ss << "};\n";
|
ss << "};\n";
|
||||||
|
|
||||||
ss << R"(
|
ss << R"(
|
||||||
int3 ApplyDithering(uint2 coord, int3 icol)
|
uint3 ApplyDithering(uint2 coord, uint3 icol)
|
||||||
{
|
{
|
||||||
|
#if DITHERING_SCALED
|
||||||
uint2 fc = coord & uint2(3u, 3u);
|
uint2 fc = coord & uint2(3u, 3u);
|
||||||
|
#else
|
||||||
|
uint2 fc = (coord / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & uint2(3u, 3u);
|
||||||
|
#endif
|
||||||
int offset = s_dither_values[fc.y * 4u + fc.x];
|
int offset = s_dither_values[fc.y * 4u + fc.x];
|
||||||
return icol + int3(offset, offset, offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
int3 TruncateTo15Bit(int3 icol)
|
#if !TRUE_COLOR
|
||||||
{
|
return uint3(clamp((int3(icol) + int3(offset, offset, offset)) >> 3, 0, 31));
|
||||||
icol = clamp(icol, int3(0, 0, 0), int3(255, 255, 255));
|
#else
|
||||||
return (icol & int3(~7, ~7, ~7)) | ((icol >> 3) & int3(7, 7, 7));
|
return uint3(clamp(int3(icol) + int3(offset, offset, offset), 0, 255));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if TEXTURED
|
#if TEXTURED
|
||||||
|
@ -654,10 +657,10 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
|
||||||
|
|
||||||
ss << R"(
|
ss << R"(
|
||||||
{
|
{
|
||||||
int3 vertcol = int3(v_col0.rgb * float3(255.0, 255.0, 255.0));
|
uint3 vertcol = uint3(v_col0.rgb * float3(255.0, 255.0, 255.0));
|
||||||
|
|
||||||
bool semitransparent;
|
bool semitransparent;
|
||||||
int3 icolor;
|
uint3 icolor;
|
||||||
float ialpha;
|
float ialpha;
|
||||||
float oalpha;
|
float oalpha;
|
||||||
|
|
||||||
|
@ -707,10 +710,27 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
|
||||||
ialpha = 1.0;
|
ialpha = 1.0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if RAW_TEXTURE
|
// If not using true color, truncate the framebuffer colors to 5-bit.
|
||||||
icolor = int3(texcol.rgb * float3(255.0, 255.0, 255.0));
|
#if !TRUE_COLOR
|
||||||
|
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0)) >> 3;
|
||||||
|
#if !RAW_TEXTURE
|
||||||
|
icolor = (icolor * vertcol) >> 4;
|
||||||
|
#if DITHERING
|
||||||
|
icolor = ApplyDithering(uint2(v_pos.xy), icolor);
|
||||||
#else
|
#else
|
||||||
icolor = (vertcol * int3(texcol.rgb * float3(255.0, 255.0, 255.0))) >> 7;
|
icolor = min(icolor >> 3, uint3(31u, 31u, 31u));
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0));
|
||||||
|
#if !RAW_TEXTURE
|
||||||
|
icolor = (icolor * vertcol) >> 7;
|
||||||
|
#if DITHERING
|
||||||
|
icolor = ApplyDithering(uint2(v_pos.xy), icolor);
|
||||||
|
#else
|
||||||
|
icolor = min(icolor, uint3(255u, 255u, 255u));
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Compute output alpha (mask bit)
|
// Compute output alpha (mask bit)
|
||||||
|
@ -721,19 +741,18 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
|
||||||
icolor = vertcol;
|
icolor = vertcol;
|
||||||
ialpha = 1.0;
|
ialpha = 1.0;
|
||||||
|
|
||||||
// However, the mask bit is cleared if set mask bit is false.
|
|
||||||
oalpha = float(u_set_mask_while_drawing);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Apply dithering
|
|
||||||
#if DITHERING
|
#if DITHERING
|
||||||
#if DITHERING_SCALED
|
|
||||||
icolor = ApplyDithering(uint2(v_pos.xy), icolor);
|
icolor = ApplyDithering(uint2(v_pos.xy), icolor);
|
||||||
#else
|
#else
|
||||||
icolor = ApplyDithering(uint2(v_pos.xy) / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE), icolor);
|
#if !TRUE_COLOR
|
||||||
|
icolor >>= 3;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// However, the mask bit is cleared if set mask bit is false.
|
||||||
|
oalpha = float(u_set_mask_while_drawing);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Premultiply alpha so we don't need to use a colour output for it.
|
// Premultiply alpha so we don't need to use a colour output for it.
|
||||||
float premultiply_alpha = ialpha;
|
float premultiply_alpha = ialpha;
|
||||||
#if TRANSPARENCY
|
#if TRANSPARENCY
|
||||||
|
@ -744,11 +763,10 @@ float4 SampleFromVRAM(uint4 texpage, uint2 icoord)
|
||||||
#if !TRUE_COLOR
|
#if !TRUE_COLOR
|
||||||
// We want to apply the alpha before the truncation to 16-bit, otherwise we'll be passing a 32-bit precision color
|
// We want to apply the alpha before the truncation to 16-bit, otherwise we'll be passing a 32-bit precision color
|
||||||
// into the blend unit, which can cause a small amount of error to accumulate.
|
// into the blend unit, which can cause a small amount of error to accumulate.
|
||||||
icolor = int3(((float3(icolor) / float3(255.0, 255.0, 255.0)) * premultiply_alpha) * float3(255.0, 255.0, 255.0));
|
color = floor(float3(icolor) * premultiply_alpha) / float3(31.0, 31.0, 31.0);
|
||||||
color = (float3(icolor >> 3) / float3(31.0, 31.0, 31.0));
|
|
||||||
#else
|
#else
|
||||||
// True color is actually simpler here since we want to preserve the precision.
|
// True color is actually simpler here since we want to preserve the precision.
|
||||||
color = (float3(icolor) / float3(255.0, 255.0, 255.0)) * premultiply_alpha;
|
color = (float3(icolor) * premultiply_alpha) / float3(255.0, 255.0, 255.0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if TRANSPARENCY
|
#if TRANSPARENCY
|
||||||
|
|
|
@ -551,6 +551,25 @@ void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT()
|
||||||
|
{
|
||||||
|
DitherLUT lut = {};
|
||||||
|
for (u32 i = 0; i < DITHER_MATRIX_SIZE; i++)
|
||||||
|
{
|
||||||
|
for (u32 j = 0; j < DITHER_MATRIX_SIZE; j++)
|
||||||
|
{
|
||||||
|
for (s32 value = 0; value < DITHER_LUT_SIZE; value++)
|
||||||
|
{
|
||||||
|
const s32 dithered_value = (value + DITHER_MATRIX[i][j]) >> 3;
|
||||||
|
lut[i][j][value] = static_cast<u8>((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lut;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr GPU_SW::DitherLUT s_dither_lut = GPU_SW::ComputeDitherLUT();
|
||||||
|
|
||||||
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
|
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
|
||||||
void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y)
|
void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y)
|
||||||
{
|
{
|
||||||
|
@ -612,23 +631,25 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const u8 r = Truncate8(std::min<u16>((ZeroExtend16(texture_color.GetR8()) * ZeroExtend16(color_r)) >> 7, 0xFF));
|
const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u;
|
||||||
const u8 g = Truncate8(std::min<u16>((ZeroExtend16(texture_color.GetG8()) * ZeroExtend16(color_g)) >> 7, 0xFF));
|
const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u;
|
||||||
const u8 b = Truncate8(std::min<u16>((ZeroExtend16(texture_color.GetB8()) * ZeroExtend16(color_b)) >> 7, 0xFF));
|
|
||||||
if constexpr (dithering_enable)
|
color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.r) * u16(color_r)) >> 4]) << 0) |
|
||||||
color.SetRGB24Dithered(x, y, r, g, b, texture_color.c);
|
(ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.g) * u16(color_g)) >> 4]) << 5) |
|
||||||
else
|
(ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.b) * u16(color_b)) >> 4]) << 10) |
|
||||||
color.SetRGB24(r, g, b, texture_color.c);
|
(texture_color.bits & 0x8000u);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
transparent = true;
|
transparent = true;
|
||||||
|
|
||||||
if constexpr (dithering_enable)
|
const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u;
|
||||||
color.SetRGB24Dithered(x, y, color_r, color_g, color_b);
|
const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u;
|
||||||
else
|
|
||||||
color.SetRGB24(color_r, color_g, color_b);
|
color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_r]) << 0) |
|
||||||
|
(ZeroExtend16(s_dither_lut[dither_y][dither_x][color_g]) << 5) |
|
||||||
|
(ZeroExtend16(s_dither_lut[dither_y][dither_x][color_b]) << 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
const VRAMPixel bg_color{GetPixel(static_cast<u32>(x), static_cast<u32>(y))};
|
const VRAMPixel bg_color{GetPixel(static_cast<u32>(x), static_cast<u32>(y))};
|
||||||
|
|
|
@ -23,6 +23,11 @@ public:
|
||||||
u16* GetPixelPtr(u32 x, u32 y) { return &m_vram[VRAM_WIDTH * y + x]; }
|
u16* GetPixelPtr(u32 x, u32 y) { return &m_vram[VRAM_WIDTH * y + x]; }
|
||||||
void SetPixel(u32 x, u32 y, u16 value) { m_vram[VRAM_WIDTH * y + x] = value; }
|
void SetPixel(u32 x, u32 y, u16 value) { m_vram[VRAM_WIDTH * y + x] = value; }
|
||||||
|
|
||||||
|
// this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512)
|
||||||
|
static constexpr u32 DITHER_LUT_SIZE = 512;
|
||||||
|
using DitherLUT = std::array<std::array<std::array<u8, 512>, DITHER_MATRIX_SIZE>, DITHER_MATRIX_SIZE>;
|
||||||
|
static constexpr DitherLUT ComputeDitherLUT();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
struct SWVertex
|
struct SWVertex
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue