GPU/SW: Improve software blending precision

This commit is contained in:
Connor McLaughlin 2021-07-05 21:18:24 +10:00
parent 0c4629bda2
commit acb108f3ea

View file

@ -84,11 +84,9 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman
u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y) u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y)
{ {
VRAMPixel color; VRAMPixel color;
bool transparent;
if constexpr (texture_enable) if constexpr (texture_enable)
{ {
// Apply texture window // Apply texture window
// TODO: Precompute the second half
texcoord_x = (texcoord_x & cmd->window.and_x) | cmd->window.or_x; texcoord_x = (texcoord_x & cmd->window.and_x) | cmd->window.or_x;
texcoord_y = (texcoord_y & cmd->window.and_y) | cmd->window.or_y; texcoord_y = (texcoord_y & cmd->window.and_y) | cmd->window.or_y;
@ -129,8 +127,6 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman
if (texture_color.bits == 0) if (texture_color.bits == 0)
return; return;
transparent = texture_color.c;
if constexpr (raw_texture_enable) if constexpr (raw_texture_enable)
{ {
color.bits = texture_color.bits; color.bits = texture_color.bits;
@ -148,60 +144,68 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman
} }
else else
{ {
transparent = true;
const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u; const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u;
const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u; const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u;
color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_r]) << 0) | color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_r]) << 0) |
(ZeroExtend16(s_dither_lut[dither_y][dither_x][color_g]) << 5) | (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_g]) << 5) |
(ZeroExtend16(s_dither_lut[dither_y][dither_x][color_b]) << 10); (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_b]) << 10) | 0x8000u;
} }
const VRAMPixel bg_color{GetPixel(static_cast<u32>(x), static_cast<u32>(y))}; const VRAMPixel bg_color{GetPixel(static_cast<u32>(x), static_cast<u32>(y))};
if constexpr (transparency_enable) if constexpr (transparency_enable)
{ {
if (transparent) if (color.bits & 0x8000u)
{ {
#define BLEND_AVERAGE(bg, fg) Truncate8(std::min<u32>((ZeroExtend32(bg) / 2) + (ZeroExtend32(fg) / 2), 0x1F)) // Based on blargg's efficient 15bpp pixel math.
#define BLEND_ADD(bg, fg) Truncate8(std::min<u32>(ZeroExtend32(bg) + ZeroExtend32(fg), 0x1F)) u32 bg_bits = ZeroExtend32(bg_color.bits);
#define BLEND_SUBTRACT(bg, fg) Truncate8((bg > fg) ? ((bg) - (fg)) : 0) u32 fg_bits = ZeroExtend32(color.bits);
#define BLEND_QUARTER(bg, fg) Truncate8(std::min<u32>(ZeroExtend32(bg) + ZeroExtend32(fg / 4), 0x1F))
#define BLEND_RGB(func) \
color.Set(func(bg_color.r.GetValue(), color.r.GetValue()), func(bg_color.g.GetValue(), color.g.GetValue()), \
func(bg_color.b.GetValue(), color.b.GetValue()), color.c.GetValue())
switch (cmd->draw_mode.transparency_mode) switch (cmd->draw_mode.transparency_mode)
{ {
case GPUTransparencyMode::HalfBackgroundPlusHalfForeground: case GPUTransparencyMode::HalfBackgroundPlusHalfForeground:
BLEND_RGB(BLEND_AVERAGE); {
break; bg_bits |= 0x8000u;
color.bits = Truncate16(((fg_bits + bg_bits) - ((fg_bits ^ bg_bits) & 0x0421u)) >> 1);
}
break;
case GPUTransparencyMode::BackgroundPlusForeground: case GPUTransparencyMode::BackgroundPlusForeground:
BLEND_RGB(BLEND_ADD); {
break; bg_bits &= ~0x8000u;
const u32 sum = fg_bits + bg_bits;
const u32 carry = (sum - ((fg_bits ^ bg_bits) & 0x8421u)) & 0x8420u;
color.bits = Truncate16((sum - carry) | (carry - (carry >> 5)));
}
break;
case GPUTransparencyMode::BackgroundMinusForeground: case GPUTransparencyMode::BackgroundMinusForeground:
BLEND_RGB(BLEND_SUBTRACT); {
break; bg_bits |= 0x8000u;
fg_bits &= ~0x8000u;
const u32 diff = bg_bits - fg_bits + 0x108420u;
const u32 borrow = (diff - ((bg_bits ^ fg_bits) & 0x108420u)) & 0x108420u;
color.bits = Truncate16((diff - borrow) & (borrow - (borrow >> 5)));
}
break;
case GPUTransparencyMode::BackgroundPlusQuarterForeground: case GPUTransparencyMode::BackgroundPlusQuarterForeground:
BLEND_RGB(BLEND_QUARTER); {
break; bg_bits &= ~0x8000u;
default: fg_bits = ((fg_bits >> 2) & 0x1CE7u) | 0x8000u;
break;
const u32 sum = fg_bits + bg_bits;
const u32 carry = (sum - ((fg_bits ^ bg_bits) & 0x8421u)) & 0x8420u;
color.bits = Truncate16((sum - carry) | (carry - (carry >> 5)));
}
break;
} }
#undef BLEND_RGB
#undef BLEND_QUARTER
#undef BLEND_SUBTRACT
#undef BLEND_ADD
#undef BLEND_AVERAGE
} }
} }
else
{
UNREFERENCED_VARIABLE(transparent);
}
const u16 mask_and = cmd->params.GetMaskAND(); const u16 mask_and = cmd->params.GetMaskAND();
if ((bg_color.bits & mask_and) != 0) if ((bg_color.bits & mask_and) != 0)