GPU/SW: Split out rasterizer, add dynamic selection

This commit is contained in:
Stenzek 2023-12-23 20:38:41 +10:00
parent 00634f68cc
commit 25bc8a6480
No known key found for this signature in database
13 changed files with 1493 additions and 881 deletions

View file

@ -57,6 +57,8 @@ add_library(core
gpu_sw.h gpu_sw.h
gpu_sw_backend.cpp gpu_sw_backend.cpp
gpu_sw_backend.h gpu_sw_backend.h
gpu_sw_rasterizer.cpp
gpu_sw_rasterizer.h
gpu_types.h gpu_types.h
guncon.cpp guncon.cpp
guncon.h guncon.h

View file

@ -50,6 +50,13 @@
<ClCompile Include="gpu_shadergen.cpp" /> <ClCompile Include="gpu_shadergen.cpp" />
<ClCompile Include="gpu_sw.cpp" /> <ClCompile Include="gpu_sw.cpp" />
<ClCompile Include="gpu_sw_backend.cpp" /> <ClCompile Include="gpu_sw_backend.cpp" />
<ClCompile Include="gpu_sw_rasterizer.cpp" />
<ClCompile Include="gpu_sw_rasterizer_avx2.cpp">
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<AdditionalOptions Condition="$(Configuration.Contains(Clang))">%(AdditionalOptions) -mavx2</AdditionalOptions>
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="gte.cpp" /> <ClCompile Include="gte.cpp" />
<ClCompile Include="dma.cpp" /> <ClCompile Include="dma.cpp" />
<ClCompile Include="gpu.cpp" /> <ClCompile Include="gpu.cpp" />
@ -127,6 +134,7 @@
<ClInclude Include="gpu_shadergen.h" /> <ClInclude Include="gpu_shadergen.h" />
<ClInclude Include="gpu_sw.h" /> <ClInclude Include="gpu_sw.h" />
<ClInclude Include="gpu_sw_backend.h" /> <ClInclude Include="gpu_sw_backend.h" />
<ClInclude Include="gpu_sw_rasterizer.h" />
<ClInclude Include="gpu_types.h" /> <ClInclude Include="gpu_types.h" />
<ClInclude Include="gte.h" /> <ClInclude Include="gte.h" />
<ClInclude Include="cpu_types.h" /> <ClInclude Include="cpu_types.h" />
@ -195,6 +203,9 @@
<Project>{57f6206d-f264-4b07-baf8-11b9bbe1f455}</Project> <Project>{57f6206d-f264-4b07-baf8-11b9bbe1f455}</Project>
</ProjectReference> </ProjectReference>
</ItemGroup> </ItemGroup>
<ItemGroup>
<None Include="gpu_sw_rasterizer.inl" />
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{868B98C8-65A1-494B-8346-250A73A48C0A}</ProjectGuid> <ProjectGuid>{868B98C8-65A1-494B-8346-250A73A48C0A}</ProjectGuid>
</PropertyGroup> </PropertyGroup>

View file

@ -67,6 +67,8 @@
<ClCompile Include="justifier.cpp" /> <ClCompile Include="justifier.cpp" />
<ClCompile Include="pine_server.cpp" /> <ClCompile Include="pine_server.cpp" />
<ClCompile Include="gdb_server.cpp" /> <ClCompile Include="gdb_server.cpp" />
<ClCompile Include="gpu_sw_rasterizer.cpp" />
<ClCompile Include="gpu_sw_rasterizer_avx2.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="types.h" /> <ClInclude Include="types.h" />
@ -140,5 +142,9 @@
<ClInclude Include="justifier.h" /> <ClInclude Include="justifier.h" />
<ClInclude Include="pine_server.h" /> <ClInclude Include="pine_server.h" />
<ClInclude Include="gdb_server.h" /> <ClInclude Include="gdb_server.h" />
<ClInclude Include="gpu_sw_rasterizer.h" />
</ItemGroup>
<ItemGroup>
<None Include="gpu_sw_rasterizer.inl" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View file

@ -26,7 +26,7 @@ bool GPUBackend::Initialize(bool force_thread)
void GPUBackend::Reset() void GPUBackend::Reset()
{ {
Sync(true); Sync(true);
m_drawing_area = {}; DrawingAreaChanged(GPUDrawingArea{0, 0, 0, 0}, GSVector4i::zero());
} }
void GPUBackend::UpdateSettings() void GPUBackend::UpdateSettings()
@ -310,8 +310,8 @@ void GPUBackend::HandleCommand(const GPUBackendCommand* cmd)
case GPUBackendCommandType::SetDrawingArea: case GPUBackendCommandType::SetDrawingArea:
{ {
FlushRender(); FlushRender();
m_drawing_area = static_cast<const GPUBackendSetDrawingAreaCommand*>(cmd)->new_area; const GPUBackendSetDrawingAreaCommand* ccmd = static_cast<const GPUBackendSetDrawingAreaCommand*>(cmd);
DrawingAreaChanged(); DrawingAreaChanged(ccmd->new_area, GSVector4i::load<false>(ccmd->new_clamped_area));
} }
break; break;

View file

@ -62,13 +62,11 @@ protected:
virtual void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) = 0; virtual void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) = 0;
virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0; virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0;
virtual void FlushRender() = 0; virtual void FlushRender() = 0;
virtual void DrawingAreaChanged() = 0; virtual void DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) = 0;
virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0;
void HandleCommand(const GPUBackendCommand* cmd); void HandleCommand(const GPUBackendCommand* cmd);
GPUDrawingArea m_drawing_area = {};
Threading::KernelSemaphore m_sync_semaphore; Threading::KernelSemaphore m_sync_semaphore;
std::atomic_bool m_gpu_thread_sleeping{false}; std::atomic_bool m_gpu_thread_sleeping{false};
std::atomic_bool m_gpu_loop_done{false}; std::atomic_bool m_gpu_loop_done{false};

View file

@ -501,6 +501,7 @@ void GPU_SW::DispatchRenderCommand()
{ {
GPUBackendSetDrawingAreaCommand* cmd = m_backend.NewSetDrawingAreaCommand(); GPUBackendSetDrawingAreaCommand* cmd = m_backend.NewSetDrawingAreaCommand();
cmd->new_area = m_drawing_area; cmd->new_area = m_drawing_area;
GSVector4i::store<false>(cmd->new_clamped_area, m_clamped_drawing_area);
m_backend.PushCommand(cmd); m_backend.PushCommand(cmd);
m_drawing_area_changed = false; m_drawing_area_changed = false;
} }

View file

@ -3,6 +3,7 @@
#include "gpu_sw_backend.h" #include "gpu_sw_backend.h"
#include "gpu.h" #include "gpu.h"
#include "gpu_sw_rasterizer.h"
#include "system.h" #include "system.h"
#include "util/gpu_device.h" #include "util/gpu_device.h"
@ -15,6 +16,8 @@ GPU_SW_Backend::~GPU_SW_Backend() = default;
bool GPU_SW_Backend::Initialize(bool force_thread) bool GPU_SW_Backend::Initialize(bool force_thread)
{ {
GPU_SW_Rasterizer::SelectImplementation();
return GPUBackend::Initialize(force_thread); return GPUBackend::Initialize(force_thread);
} }
@ -28,688 +31,31 @@ void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
const GPURenderCommand rc{cmd->rc.bits}; const GPURenderCommand rc{cmd->rc.bits};
const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable;
const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction( const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction(
rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable);
(this->*DrawFunction)(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); DrawFunction(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]);
if (rc.quad_polygon) if (rc.quad_polygon)
(this->*DrawFunction)(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); DrawFunction(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]);
} }
void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
{ {
const GPURenderCommand rc{cmd->rc.bits}; const GPURenderCommand rc{cmd->rc.bits};
const DrawRectangleFunction DrawFunction = const GPU_SW_Rasterizer::DrawRectangleFunction DrawFunction =
GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); GPU_SW_Rasterizer::GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable);
(this->*DrawFunction)(cmd); DrawFunction(cmd);
} }
void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd) void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd)
{ {
const DrawLineFunction DrawFunction = const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = GPU_SW_Rasterizer::GetDrawLineFunction(
GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled()); cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled());
for (u16 i = 1; i < cmd->num_vertices; i++) for (u16 i = 1; i < cmd->num_vertices; i++)
(this->*DrawFunction)(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]); DrawFunction(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]);
}
constexpr GPU_SW_Backend::DitherLUT GPU_SW_Backend::ComputeDitherLUT()
{
DitherLUT lut = {};
for (u32 i = 0; i < DITHER_MATRIX_SIZE; i++)
{
for (u32 j = 0; j < DITHER_MATRIX_SIZE; j++)
{
for (u32 value = 0; value < DITHER_LUT_SIZE; value++)
{
const s32 dithered_value = (static_cast<s32>(value) + DITHER_MATRIX[i][j]) >> 3;
lut[i][j][value] = static_cast<u8>((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value));
}
}
}
return lut;
}
static constexpr GPU_SW_Backend::DitherLUT s_dither_lut = GPU_SW_Backend::ComputeDitherLUT();
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r,
u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y)
{
VRAMPixel color;
if constexpr (texture_enable)
{
// Apply texture window
texcoord_x = (texcoord_x & cmd->window.and_x) | cmd->window.or_x;
texcoord_y = (texcoord_y & cmd->window.and_y) | cmd->window.or_y;
VRAMPixel texture_color;
switch (cmd->draw_mode.texture_mode)
{
case GPUTextureMode::Palette4Bit:
{
const u16 palette_value =
GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH,
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
const size_t palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu;
texture_color.bits = g_gpu_clut[palette_index];
}
break;
case GPUTextureMode::Palette8Bit:
{
const u16 palette_value =
GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH,
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
const size_t palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu;
texture_color.bits = g_gpu_clut[palette_index];
}
break;
default:
{
texture_color.bits = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x)) % VRAM_WIDTH,
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
}
break;
}
if (texture_color.bits == 0)
return;
if constexpr (raw_texture_enable)
{
color.bits = texture_color.bits;
}
else
{
const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u;
const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u;
color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.r) * u16(color_r)) >> 4]) << 0) |
(ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.g) * u16(color_g)) >> 4]) << 5) |
(ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.b) * u16(color_b)) >> 4]) << 10) |
(texture_color.bits & 0x8000u);
}
}
else
{
const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u;
const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u;
// Non-textured transparent polygons don't set bit 15, but are treated as transparent.
color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_r]) << 0) |
(ZeroExtend16(s_dither_lut[dither_y][dither_x][color_g]) << 5) |
(ZeroExtend16(s_dither_lut[dither_y][dither_x][color_b]) << 10) | (transparency_enable ? 0x8000u : 0);
}
const VRAMPixel bg_color{GetPixel(static_cast<u32>(x), static_cast<u32>(y))};
if constexpr (transparency_enable)
{
if (color.bits & 0x8000u || !texture_enable)
{
// Based on blargg's efficient 15bpp pixel math.
u32 bg_bits = ZeroExtend32(bg_color.bits);
u32 fg_bits = ZeroExtend32(color.bits);
switch (cmd->draw_mode.transparency_mode)
{
case GPUTransparencyMode::HalfBackgroundPlusHalfForeground:
{
bg_bits |= 0x8000u;
color.bits = Truncate16(((fg_bits + bg_bits) - ((fg_bits ^ bg_bits) & 0x0421u)) >> 1);
}
break;
case GPUTransparencyMode::BackgroundPlusForeground:
{
bg_bits &= ~0x8000u;
const u32 sum = fg_bits + bg_bits;
const u32 carry = (sum - ((fg_bits ^ bg_bits) & 0x8421u)) & 0x8420u;
color.bits = Truncate16((sum - carry) | (carry - (carry >> 5)));
}
break;
case GPUTransparencyMode::BackgroundMinusForeground:
{
bg_bits |= 0x8000u;
fg_bits &= ~0x8000u;
const u32 diff = bg_bits - fg_bits + 0x108420u;
const u32 borrow = (diff - ((bg_bits ^ fg_bits) & 0x108420u)) & 0x108420u;
color.bits = Truncate16((diff - borrow) & (borrow - (borrow >> 5)));
}
break;
case GPUTransparencyMode::BackgroundPlusQuarterForeground:
{
bg_bits &= ~0x8000u;
fg_bits = ((fg_bits >> 2) & 0x1CE7u) | 0x8000u;
const u32 sum = fg_bits + bg_bits;
const u32 carry = (sum - ((fg_bits ^ bg_bits) & 0x8421u)) & 0x8420u;
color.bits = Truncate16((sum - carry) | (carry - (carry >> 5)));
}
break;
default:
break;
}
// See above.
if constexpr (!texture_enable)
color.bits &= ~0x8000u;
}
}
const u16 mask_and = cmd->params.GetMaskAND();
if ((bg_color.bits & mask_and) != 0)
return;
DebugAssert(static_cast<u32>(x) < VRAM_WIDTH && static_cast<u32>(y) < VRAM_HEIGHT);
SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | cmd->params.GetMaskOR());
}
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
{
const s32 origin_x = cmd->x;
const s32 origin_y = cmd->y;
const auto [r, g, b] = UnpackColorRGB24(cmd->color);
const auto [origin_texcoord_x, origin_texcoord_y] = UnpackTexcoord(cmd->texcoord);
for (u32 offset_y = 0; offset_y < cmd->height; offset_y++)
{
const s32 y = origin_y + static_cast<s32>(offset_y);
if (y < static_cast<s32>(m_drawing_area.top) || y > static_cast<s32>(m_drawing_area.bottom) ||
(cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast<u32>(y)) & 1u)))
{
continue;
}
const u32 draw_y = static_cast<u32>(y) & VRAM_HEIGHT_MASK;
const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y);
for (u32 offset_x = 0; offset_x < cmd->width; offset_x++)
{
const s32 x = origin_x + static_cast<s32>(offset_x);
if (x < static_cast<s32>(m_drawing_area.left) || x > static_cast<s32>(m_drawing_area.right))
continue;
const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x);
ShadePixel<texture_enable, raw_texture_enable, transparency_enable, false>(cmd, static_cast<u32>(x), draw_y, r, g,
b, texcoord_x, texcoord_y);
}
}
}
//////////////////////////////////////////////////////////////////////////
// Polygon and line rasterization ported from Mednafen
//////////////////////////////////////////////////////////////////////////
#define COORD_FBS 12
#define COORD_MF_INT(n) ((n) << COORD_FBS)
#define COORD_POST_PADDING 12
static ALWAYS_INLINE_RELEASE s64 MakePolyXFP(s32 x)
{
return ((u64)x << 32) + ((1ULL << 32) - (1 << 11));
}
static ALWAYS_INLINE_RELEASE s64 MakePolyXFPStep(s32 dx, s32 dy)
{
s64 ret;
s64 dx_ex = (u64)dx << 32;
if (dx_ex < 0)
dx_ex -= dy - 1;
if (dx_ex > 0)
dx_ex += dy - 1;
ret = dx_ex / dy;
return (ret);
}
static ALWAYS_INLINE_RELEASE s32 GetPolyXFP_Int(s64 xfp)
{
return (xfp >> 32);
}
template<bool shading_enable, bool texture_enable>
bool ALWAYS_INLINE_RELEASE GPU_SW_Backend::CalcIDeltas(i_deltas& idl, const GPUBackendDrawPolygonCommand::Vertex* A,
const GPUBackendDrawPolygonCommand::Vertex* B,
const GPUBackendDrawPolygonCommand::Vertex* C)
{
#define CALCIS(x, y) (((B->x - A->x) * (C->y - B->y)) - ((C->x - B->x) * (B->y - A->y)))
s32 denom = CALCIS(x, y);
if (!denom)
return false;
if constexpr (shading_enable)
{
idl.dr_dx = (u32)(CALCIS(r, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
idl.dr_dy = (u32)(CALCIS(x, r) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
idl.dg_dx = (u32)(CALCIS(g, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
idl.dg_dy = (u32)(CALCIS(x, g) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
idl.db_dx = (u32)(CALCIS(b, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
idl.db_dy = (u32)(CALCIS(x, b) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
}
if constexpr (texture_enable)
{
idl.du_dx = (u32)(CALCIS(u, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
idl.du_dy = (u32)(CALCIS(x, u) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
idl.dv_dx = (u32)(CALCIS(v, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
idl.dv_dy = (u32)(CALCIS(x, v) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
}
return true;
#undef CALCIS
}
template<bool shading_enable, bool texture_enable>
void ALWAYS_INLINE_RELEASE GPU_SW_Backend::AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count /*= 1*/)
{
if constexpr (shading_enable)
{
ig.r += idl.dr_dx * count;
ig.g += idl.dg_dx * count;
ig.b += idl.db_dx * count;
}
if constexpr (texture_enable)
{
ig.u += idl.du_dx * count;
ig.v += idl.dv_dx * count;
}
}
template<bool shading_enable, bool texture_enable>
void ALWAYS_INLINE_RELEASE GPU_SW_Backend::AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count /*= 1*/)
{
if constexpr (shading_enable)
{
ig.r += idl.dr_dy * count;
ig.g += idl.dg_dy * count;
ig.b += idl.db_dy * count;
}
if constexpr (texture_enable)
{
ig.u += idl.du_dy * count;
ig.v += idl.dv_dy * count;
}
}
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
bool dithering_enable>
void GPU_SW_Backend::DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, i_group ig,
const i_deltas& idl)
{
if (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast<u32>(y)) & 1u))
return;
s32 x_ig_adjust = x_start;
s32 w = x_bound - x_start;
s32 x = TruncateGPUVertexPosition(x_start);
if (x < static_cast<s32>(m_drawing_area.left))
{
s32 delta = static_cast<s32>(m_drawing_area.left) - x;
x_ig_adjust += delta;
x += delta;
w -= delta;
}
if ((x + w) > (static_cast<s32>(m_drawing_area.right) + 1))
w = static_cast<s32>(m_drawing_area.right) + 1 - x;
if (w <= 0)
return;
AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, x_ig_adjust);
AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, y);
do
{
const u32 r = ig.r >> (COORD_FBS + COORD_POST_PADDING);
const u32 g = ig.g >> (COORD_FBS + COORD_POST_PADDING);
const u32 b = ig.b >> (COORD_FBS + COORD_POST_PADDING);
const u32 u = ig.u >> (COORD_FBS + COORD_POST_PADDING);
const u32 v = ig.v >> (COORD_FBS + COORD_POST_PADDING);
ShadePixel<texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
cmd, static_cast<u32>(x), static_cast<u32>(y), Truncate8(r), Truncate8(g), Truncate8(b), Truncate8(u),
Truncate8(v));
x++;
AddIDeltas_DX<shading_enable, texture_enable>(ig, idl);
} while (--w > 0);
}
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
bool dithering_enable>
void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd,
const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1,
const GPUBackendDrawPolygonCommand::Vertex* v2)
{
u32 core_vertex;
{
u32 cvtemp = 0;
if (v1->x <= v0->x)
{
if (v2->x <= v1->x)
cvtemp = (1 << 2);
else
cvtemp = (1 << 1);
}
else if (v2->x < v0->x)
cvtemp = (1 << 2);
else
cvtemp = (1 << 0);
if (v2->y < v1->y)
{
std::swap(v2, v1);
cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1);
}
if (v1->y < v0->y)
{
std::swap(v1, v0);
cvtemp = ((cvtemp >> 1) & 0x1) | ((cvtemp << 1) & 0x2) | (cvtemp & 0x4);
}
if (v2->y < v1->y)
{
std::swap(v2, v1);
cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1);
}
core_vertex = cvtemp >> 1;
}
if (v0->y == v2->y)
return;
if (static_cast<u32>(std::abs(v2->x - v0->x)) >= MAX_PRIMITIVE_WIDTH ||
static_cast<u32>(std::abs(v2->x - v1->x)) >= MAX_PRIMITIVE_WIDTH ||
static_cast<u32>(std::abs(v1->x - v0->x)) >= MAX_PRIMITIVE_WIDTH ||
static_cast<u32>(v2->y - v0->y) >= MAX_PRIMITIVE_HEIGHT)
{
return;
}
s64 base_coord = MakePolyXFP(v0->x);
s64 base_step = MakePolyXFPStep((v2->x - v0->x), (v2->y - v0->y));
s64 bound_coord_us;
s64 bound_coord_ls;
bool right_facing;
if (v1->y == v0->y)
{
bound_coord_us = 0;
right_facing = (bool)(v1->x > v0->x);
}
else
{
bound_coord_us = MakePolyXFPStep((v1->x - v0->x), (v1->y - v0->y));
right_facing = (bool)(bound_coord_us > base_step);
}
if (v2->y == v1->y)
bound_coord_ls = 0;
else
bound_coord_ls = MakePolyXFPStep((v2->x - v1->x), (v2->y - v1->y));
i_deltas idl;
if (!CalcIDeltas<shading_enable, texture_enable>(idl, v0, v1, v2))
return;
const GPUBackendDrawPolygonCommand::Vertex* vertices[3] = {v0, v1, v2};
i_group ig;
if constexpr (texture_enable)
{
ig.u = (COORD_MF_INT(vertices[core_vertex]->u) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
ig.v = (COORD_MF_INT(vertices[core_vertex]->v) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
}
ig.r = (COORD_MF_INT(vertices[core_vertex]->r) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
ig.g = (COORD_MF_INT(vertices[core_vertex]->g) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
ig.b = (COORD_MF_INT(vertices[core_vertex]->b) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->x);
AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->y);
struct TriangleHalf
{
u64 x_coord[2];
u64 x_step[2];
s32 y_coord;
s32 y_bound;
bool dec_mode;
} tripart[2];
u32 vo = 0;
u32 vp = 0;
if (core_vertex != 0)
vo = 1;
if (core_vertex == 2)
vp = 3;
{
TriangleHalf* tp = &tripart[vo];
tp->y_coord = vertices[0 ^ vo]->y;
tp->y_bound = vertices[1 ^ vo]->y;
tp->x_coord[right_facing] = MakePolyXFP(vertices[0 ^ vo]->x);
tp->x_step[right_facing] = bound_coord_us;
tp->x_coord[!right_facing] = base_coord + ((vertices[vo]->y - vertices[0]->y) * base_step);
tp->x_step[!right_facing] = base_step;
tp->dec_mode = vo;
}
{
TriangleHalf* tp = &tripart[vo ^ 1];
tp->y_coord = vertices[1 ^ vp]->y;
tp->y_bound = vertices[2 ^ vp]->y;
tp->x_coord[right_facing] = MakePolyXFP(vertices[1 ^ vp]->x);
tp->x_step[right_facing] = bound_coord_ls;
tp->x_coord[!right_facing] =
base_coord + ((vertices[1 ^ vp]->y - vertices[0]->y) *
base_step); // base_coord + ((vertices[1].y - vertices[0].y) * base_step);
tp->x_step[!right_facing] = base_step;
tp->dec_mode = vp;
}
for (u32 i = 0; i < 2; i++)
{
s32 yi = tripart[i].y_coord;
s32 yb = tripart[i].y_bound;
u64 lc = tripart[i].x_coord[0];
u64 ls = tripart[i].x_step[0];
u64 rc = tripart[i].x_coord[1];
u64 rs = tripart[i].x_step[1];
if (tripart[i].dec_mode)
{
while (yi > yb)
{
yi--;
lc -= ls;
rc -= rs;
s32 y = TruncateGPUVertexPosition(yi);
if (y < static_cast<s32>(m_drawing_area.top))
break;
if (y > static_cast<s32>(m_drawing_area.bottom))
continue;
DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
cmd, y & VRAM_HEIGHT_MASK, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
}
}
else
{
while (yi < yb)
{
s32 y = TruncateGPUVertexPosition(yi);
if (y > static_cast<s32>(m_drawing_area.bottom))
break;
if (y >= static_cast<s32>(m_drawing_area.top))
{
DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
cmd, y & VRAM_HEIGHT_MASK, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
}
yi++;
lc += ls;
rc += rs;
}
}
}
}
enum
{
Line_XY_FractBits = 32
};
enum
{
Line_RGB_FractBits = 12
};
struct line_fxp_coord
{
u64 x, y;
u32 r, g, b;
};
struct line_fxp_step
{
s64 dx_dk, dy_dk;
s32 dr_dk, dg_dk, db_dk;
};
static ALWAYS_INLINE_RELEASE s64 LineDivide(s64 delta, s32 dk)
{
delta = (u64)delta << Line_XY_FractBits;
if (delta < 0)
delta -= dk - 1;
if (delta > 0)
delta += dk - 1;
return (delta / dk);
}
template<bool shading_enable, bool transparency_enable, bool dithering_enable>
void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
const GPUBackendDrawLineCommand::Vertex* p1)
{
const s32 i_dx = std::abs(p1->x - p0->x);
const s32 i_dy = std::abs(p1->y - p0->y);
const s32 k = (i_dx > i_dy) ? i_dx : i_dy;
if (i_dx >= MAX_PRIMITIVE_WIDTH || i_dy >= MAX_PRIMITIVE_HEIGHT)
return;
if (p0->x >= p1->x && k > 0)
std::swap(p0, p1);
line_fxp_step step;
if (k == 0)
{
step.dx_dk = 0;
step.dy_dk = 0;
if constexpr (shading_enable)
{
step.dr_dk = 0;
step.dg_dk = 0;
step.db_dk = 0;
}
}
else
{
step.dx_dk = LineDivide(p1->x - p0->x, k);
step.dy_dk = LineDivide(p1->y - p0->y, k);
if constexpr (shading_enable)
{
step.dr_dk = (s32)((u32)(p1->r - p0->r) << Line_RGB_FractBits) / k;
step.dg_dk = (s32)((u32)(p1->g - p0->g) << Line_RGB_FractBits) / k;
step.db_dk = (s32)((u32)(p1->b - p0->b) << Line_RGB_FractBits) / k;
}
}
line_fxp_coord cur_point;
cur_point.x = ((u64)p0->x << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1));
cur_point.y = ((u64)p0->y << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1));
cur_point.x -= 1024;
if (step.dy_dk < 0)
cur_point.y -= 1024;
if constexpr (shading_enable)
{
cur_point.r = (p0->r << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
cur_point.g = (p0->g << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
cur_point.b = (p0->b << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
}
for (s32 i = 0; i <= k; i++)
{
// Sign extension is not necessary here for x and y, due to the maximum values that ClipX1 and ClipY1 can contain.
const s32 x = (cur_point.x >> Line_XY_FractBits) & 2047;
const s32 y = (cur_point.y >> Line_XY_FractBits) & 2047;
if ((!cmd->params.interlaced_rendering || cmd->params.active_line_lsb != (Truncate8(static_cast<u32>(y)) & 1u)) &&
x >= static_cast<s32>(m_drawing_area.left) && x <= static_cast<s32>(m_drawing_area.right) &&
y >= static_cast<s32>(m_drawing_area.top) && y <= static_cast<s32>(m_drawing_area.bottom))
{
const u8 r = shading_enable ? static_cast<u8>(cur_point.r >> Line_RGB_FractBits) : p0->r;
const u8 g = shading_enable ? static_cast<u8>(cur_point.g >> Line_RGB_FractBits) : p0->g;
const u8 b = shading_enable ? static_cast<u8>(cur_point.b >> Line_RGB_FractBits) : p0->b;
ShadePixel<false, false, transparency_enable, dithering_enable>(
cmd, static_cast<u32>(x), static_cast<u32>(y) & VRAM_HEIGHT_MASK, r, g, b, 0, 0);
}
cur_point.x += step.dx_dk;
cur_point.y += step.dy_dk;
if constexpr (shading_enable)
{
cur_point.r += step.dr_dk;
cur_point.g += step.dg_dk;
cur_point.b += step.db_dk;
}
}
} }
void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
@ -896,82 +242,16 @@ void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wi
} }
} }
void GPU_SW_Backend::FlushRender()
{
}
void GPU_SW_Backend::DrawingAreaChanged()
{
}
void GPU_SW_Backend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) void GPU_SW_Backend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit)
{ {
GPU::ReadCLUT(g_gpu_clut, reg, clut_is_8bit); GPU::ReadCLUT(g_gpu_clut, reg, clut_is_8bit);
} }
GPU_SW_Backend::DrawLineFunction GPU_SW_Backend::GetDrawLineFunction(bool shading_enable, bool transparency_enable, void GPU_SW_Backend::DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area)
bool dithering_enable)
{ {
static constexpr DrawLineFunction funcs[2][2][2] = { GPU_SW_Rasterizer::g_drawing_area = new_drawing_area;
{{&GPU_SW_Backend::DrawLine<false, false, false>, &GPU_SW_Backend::DrawLine<false, false, true>},
{&GPU_SW_Backend::DrawLine<false, true, false>, &GPU_SW_Backend::DrawLine<false, true, true>}},
{{&GPU_SW_Backend::DrawLine<true, false, false>, &GPU_SW_Backend::DrawLine<true, false, true>},
{&GPU_SW_Backend::DrawLine<true, true, false>, &GPU_SW_Backend::DrawLine<true, true, true>}}};
return funcs[u8(shading_enable)][u8(transparency_enable)][u8(dithering_enable)];
} }
GPU_SW_Backend::DrawRectangleFunction void GPU_SW_Backend::FlushRender()
GPU_SW_Backend::GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, bool transparency_enable)
{ {
static constexpr DrawRectangleFunction funcs[2][2][2] = {
{{&GPU_SW_Backend::DrawRectangle<false, false, false>, &GPU_SW_Backend::DrawRectangle<false, false, true>},
{&GPU_SW_Backend::DrawRectangle<false, false, false>, &GPU_SW_Backend::DrawRectangle<false, false, true>}},
{{&GPU_SW_Backend::DrawRectangle<true, false, false>, &GPU_SW_Backend::DrawRectangle<true, false, true>},
{&GPU_SW_Backend::DrawRectangle<true, true, false>, &GPU_SW_Backend::DrawRectangle<true, true, true>}}};
return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)];
}
GPU_SW_Backend::DrawTriangleFunction GPU_SW_Backend::GetDrawTriangleFunction(bool shading_enable, bool texture_enable,
bool raw_texture_enable,
bool transparency_enable,
bool dithering_enable)
{
static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = {
{{{{&GPU_SW_Backend::DrawTriangle<false, false, false, false, false>,
&GPU_SW_Backend::DrawTriangle<false, false, false, false, true>},
{&GPU_SW_Backend::DrawTriangle<false, false, false, true, false>,
&GPU_SW_Backend::DrawTriangle<false, false, false, true, true>}},
{{&GPU_SW_Backend::DrawTriangle<false, false, false, false, false>,
&GPU_SW_Backend::DrawTriangle<false, false, false, false, false>},
{&GPU_SW_Backend::DrawTriangle<false, false, false, true, false>,
&GPU_SW_Backend::DrawTriangle<false, false, false, true, false>}}},
{{{&GPU_SW_Backend::DrawTriangle<false, true, false, false, false>,
&GPU_SW_Backend::DrawTriangle<false, true, false, false, true>},
{&GPU_SW_Backend::DrawTriangle<false, true, false, true, false>,
&GPU_SW_Backend::DrawTriangle<false, true, false, true, true>}},
{{&GPU_SW_Backend::DrawTriangle<false, true, true, false, false>,
&GPU_SW_Backend::DrawTriangle<false, true, true, false, false>},
{&GPU_SW_Backend::DrawTriangle<false, true, true, true, false>,
&GPU_SW_Backend::DrawTriangle<false, true, true, true, false>}}}},
{{{{&GPU_SW_Backend::DrawTriangle<true, false, false, false, false>,
&GPU_SW_Backend::DrawTriangle<true, false, false, false, true>},
{&GPU_SW_Backend::DrawTriangle<true, false, false, true, false>,
&GPU_SW_Backend::DrawTriangle<true, false, false, true, true>}},
{{&GPU_SW_Backend::DrawTriangle<true, false, false, false, false>,
&GPU_SW_Backend::DrawTriangle<true, false, false, false, false>},
{&GPU_SW_Backend::DrawTriangle<true, false, false, true, false>,
&GPU_SW_Backend::DrawTriangle<true, false, false, true, false>}}},
{{{&GPU_SW_Backend::DrawTriangle<true, true, false, false, false>,
&GPU_SW_Backend::DrawTriangle<true, true, false, false, true>},
{&GPU_SW_Backend::DrawTriangle<true, true, false, true, false>,
&GPU_SW_Backend::DrawTriangle<true, true, false, true, true>}},
{{&GPU_SW_Backend::DrawTriangle<true, true, true, false, false>,
&GPU_SW_Backend::DrawTriangle<true, true, true, false, false>},
{&GPU_SW_Backend::DrawTriangle<true, true, true, true, false>,
&GPU_SW_Backend::DrawTriangle<true, true, true, true, false>}}}}};
return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]
[u8(dithering_enable)];
} }

View file

@ -17,77 +17,7 @@ public:
bool Initialize(bool force_thread) override; bool Initialize(bool force_thread) override;
void Reset() override; void Reset() override;
ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return g_vram[VRAM_WIDTH * y + x]; }
ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &g_vram[VRAM_WIDTH * y + x]; }
ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &g_vram[VRAM_WIDTH * y + x]; }
ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { g_vram[VRAM_WIDTH * y + x] = value; }
// this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512)
static constexpr u32 DITHER_LUT_SIZE = 512;
using DitherLUT = std::array<std::array<std::array<u8, 512>, DITHER_MATRIX_SIZE>, DITHER_MATRIX_SIZE>;
static constexpr DitherLUT ComputeDitherLUT();
protected: protected:
union VRAMPixel
{
u16 bits;
BitField<u16, u8, 0, 5> r;
BitField<u16, u8, 5, 5> g;
BitField<u16, u8, 10, 5> b;
BitField<u16, bool, 15, 1> c;
void Set(u8 r_, u8 g_, u8 b_, bool c_ = false)
{
bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast<u16>(c_) << 15);
}
void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false)
{
Set(std::min<u8>(r_, 0x1F), std::min<u8>(g_, 0x1F), std::min<u8>(b_, 0x1F), c_);
}
void SetRGB24(u32 rgb24, bool c_ = false)
{
bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) |
(static_cast<u16>(c_) << 15);
}
void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false)
{
bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) |
(static_cast<u16>(c_) << 15);
}
void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false)
{
const s32 offset = DITHER_MATRIX[y & 3][x & 3];
r8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(r8)) + offset, 0, 255));
g8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(g8)) + offset, 0, 255));
b8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(b8)) + offset, 0, 255));
SetRGB24(r8, g8, b8, c_);
}
u32 ToRGB24() const
{
const u32 r_ = ZeroExtend32(r.GetValue());
const u32 g_ = ZeroExtend32(g.GetValue());
const u32 b_ = ZeroExtend32(b.GetValue());
return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16);
}
};
static constexpr std::tuple<u8, u8> UnpackTexcoord(u16 texcoord)
{
return std::make_tuple(static_cast<u8>(texcoord), static_cast<u8>(texcoord >> 8));
}
static constexpr std::tuple<u8, u8, u8> UnpackColorRGB24(u32 rgb24)
{
return std::make_tuple(static_cast<u8>(rgb24), static_cast<u8>(rgb24 >> 8), static_cast<u8>(rgb24 >> 16));
}
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
@ -96,75 +26,7 @@ protected:
void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override;
void DrawLine(const GPUBackendDrawLineCommand* cmd) override; void DrawLine(const GPUBackendDrawLineCommand* cmd) override;
void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override; void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override;
void FlushRender() override; void DrawingAreaChanged(const GPUDrawingArea& new_drawing_area, const GSVector4i clamped_drawing_area) override;
void DrawingAreaChanged() override;
void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override;
void FlushRender() override;
//////////////////////////////////////////////////////////////////////////
// Rasterization
//////////////////////////////////////////////////////////////////////////
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
void ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x,
u8 texcoord_y);
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd);
using DrawRectangleFunction = void (GPU_SW_Backend::*)(const GPUBackendDrawRectangleCommand* cmd);
DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable,
bool transparency_enable);
//////////////////////////////////////////////////////////////////////////
// Polygon and line rasterization ported from Mednafen
//////////////////////////////////////////////////////////////////////////
struct i_deltas
{
u32 du_dx, dv_dx;
u32 dr_dx, dg_dx, db_dx;
u32 du_dy, dv_dy;
u32 dr_dy, dg_dy, db_dy;
};
struct i_group
{
u32 u, v;
u32 r, g, b;
};
template<bool shading_enable, bool texture_enable>
bool CalcIDeltas(i_deltas& idl, const GPUBackendDrawPolygonCommand::Vertex* A,
const GPUBackendDrawPolygonCommand::Vertex* B, const GPUBackendDrawPolygonCommand::Vertex* C);
template<bool shading_enable, bool texture_enable>
void AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count = 1);
template<bool shading_enable, bool texture_enable>
void AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count = 1);
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
bool dithering_enable>
void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, i_group ig,
const i_deltas& idl);
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
bool dithering_enable>
void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2);
using DrawTriangleFunction = void (GPU_SW_Backend::*)(const GPUBackendDrawPolygonCommand* cmd,
const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1,
const GPUBackendDrawPolygonCommand::Vertex* v2);
DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable,
bool transparency_enable, bool dithering_enable);
template<bool shading_enable, bool transparency_enable, bool dithering_enable>
void DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
const GPUBackendDrawLineCommand::Vertex* p1);
using DrawLineFunction = void (GPU_SW_Backend::*)(const GPUBackendDrawLineCommand* cmd,
const GPUBackendDrawLineCommand::Vertex* p0,
const GPUBackendDrawLineCommand::Vertex* p1);
DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable);
}; };

View file

@ -0,0 +1,100 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: PolyForm-Strict-1.0.0
#include "gpu_sw_rasterizer.h"
#include "gpu.h"
#include "cpuinfo.h"
#include "common/log.h"
#include "common/string_util.h"
Log_SetChannel(GPU_SW_Rasterizer);
namespace GPU_SW_Rasterizer {
// Default implementation, compatible with all ISAs.
extern const DrawRectangleFunctionTable DrawRectangleFunctions;
extern const DrawTriangleFunctionTable DrawTriangleFunctions;
extern const DrawLineFunctionTable DrawLineFunctions;
constinit const DitherLUT g_dither_lut = []() constexpr {
DitherLUT lut = {};
for (u32 i = 0; i < DITHER_MATRIX_SIZE; i++)
{
for (u32 j = 0; j < DITHER_MATRIX_SIZE; j++)
{
for (u32 value = 0; value < DITHER_LUT_SIZE; value++)
{
const s32 dithered_value = (static_cast<s32>(value) + DITHER_MATRIX[i][j]) >> 3;
lut[i][j][value] = static_cast<u8>((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value));
}
}
}
return lut;
}();
GPUDrawingArea g_drawing_area = {};
} // namespace GPU_SW_Rasterizer
// Default implementation definitions.
namespace GPU_SW_Rasterizer {
#include "gpu_sw_rasterizer.inl"
}
// Default vector implementation definitions.
#if defined(CPU_ARCH_SSE) || defined(CPU_ARCH_NEON)
namespace GPU_SW_Rasterizer::SIMD {
#include "gpu_sw_rasterizer.inl"
}
#endif
// Initialize with default implementation.
namespace GPU_SW_Rasterizer {
const DrawRectangleFunctionTable* SelectedDrawRectangleFunctions = &DrawRectangleFunctions;
const DrawTriangleFunctionTable* SelectedDrawTriangleFunctions = &DrawTriangleFunctions;
const DrawLineFunctionTable* SelectedDrawLineFunctions = &DrawLineFunctions;
} // namespace GPU_SW_Rasterizer
// Declare alternative implementations.
void GPU_SW_Rasterizer::SelectImplementation()
{
static bool selected = false;
if (selected)
return;
selected = true;
#define SELECT_ALTERNATIVE_RASTERIZER(isa) \
do \
{ \
INFO_LOG("Using " #isa " software rasterizer implementation."); \
SelectedDrawRectangleFunctions = &isa::DrawRectangleFunctions; \
SelectedDrawTriangleFunctions = &isa::DrawTriangleFunctions; \
SelectedDrawLineFunctions = &isa::DrawLineFunctions; \
} while (0)
#if defined(CPU_ARCH_SSE) || defined(CPU_ARCH_NEON)
const char* use_isa = std::getenv("SW_USE_ISA");
// Default to scalar for now, until vector is finished.
use_isa = use_isa ? use_isa : "Scalar";
#if defined(CPU_ARCH_SSE) && defined(_MSC_VER)
if (cpuinfo_has_x86_avx2() && (!use_isa || StringUtil::Strcasecmp(use_isa, "AVX2") == 0))
{
SELECT_ALTERNATIVE_RASTERIZER(AVX2);
return;
}
#endif
if (!use_isa || StringUtil::Strcasecmp(use_isa, "SIMD") == 0)
{
SELECT_ALTERNATIVE_RASTERIZER(SIMD);
return;
}
#endif
INFO_LOG("Using scalar software rasterizer implementation.");
#undef SELECT_ALTERNATIVE_RASTERIZER
}

View file

@ -0,0 +1,89 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: PolyForm-Strict-1.0.0
#pragma once
#include "gpu.h"
#include "gpu_types.h"
#include "common/intrin.h"
#include "common/types.h"
#include <algorithm>
#include <array>
namespace GPU_SW_Rasterizer {
// this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512)
static constexpr u32 DITHER_LUT_SIZE = 512;
using DitherLUT = std::array<std::array<std::array<u8, DITHER_LUT_SIZE>, DITHER_MATRIX_SIZE>, DITHER_MATRIX_SIZE>;
extern const DitherLUT g_dither_lut;
extern GPUDrawingArea g_drawing_area;
using DrawRectangleFunction = void (*)(const GPUBackendDrawRectangleCommand* cmd);
typedef const DrawRectangleFunction DrawRectangleFunctionTable[2][2][2];
using DrawTriangleFunction = void (*)(const GPUBackendDrawPolygonCommand* cmd,
const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1,
const GPUBackendDrawPolygonCommand::Vertex* v2);
typedef const DrawTriangleFunction DrawTriangleFunctionTable[2][2][2][2][2];
using DrawLineFunction = void (*)(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
const GPUBackendDrawLineCommand::Vertex* p1);
typedef const DrawLineFunction DrawLineFunctionTable[2][2][2];
// Default implementation, compatible with all ISAs.
extern const DrawRectangleFunctionTable DrawRectangleFunctions;
extern const DrawTriangleFunctionTable DrawTriangleFunctions;
extern const DrawLineFunctionTable DrawLineFunctions;
// Current implementation, selected at runtime.
extern const DrawRectangleFunctionTable* SelectedDrawRectangleFunctions;
extern const DrawTriangleFunctionTable* SelectedDrawTriangleFunctions;
extern const DrawLineFunctionTable* SelectedDrawLineFunctions;
extern void SelectImplementation();
ALWAYS_INLINE static DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable,
bool dithering_enable)
{
return (*SelectedDrawLineFunctions)[u8(shading_enable)][u8(transparency_enable)][u8(dithering_enable)];
}
ALWAYS_INLINE static DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable,
bool transparency_enable)
{
return (*SelectedDrawRectangleFunctions)[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)];
}
ALWAYS_INLINE static DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable,
bool raw_texture_enable, bool transparency_enable,
bool dithering_enable)
{
return (*SelectedDrawTriangleFunctions)[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)]
[u8(transparency_enable)][u8(dithering_enable)];
}
#define DECLARE_ALTERNATIVE_RASTERIZER(isa) \
namespace isa { \
extern const DrawRectangleFunctionTable DrawRectangleFunctions; \
extern const DrawTriangleFunctionTable DrawTriangleFunctions; \
extern const DrawLineFunctionTable DrawLineFunctions; \
}
// Have to define the symbols globally, because clang won't include them otherwise.
#if defined(CPU_ARCH_SSE) && defined(_MSC_VER)
#define ALTERNATIVE_RASTERIZER_LIST() DECLARE_ALTERNATIVE_RASTERIZER(AVX2)
#else
#define ALTERNATIVE_RASTERIZER_LIST()
#endif
ALTERNATIVE_RASTERIZER_LIST()
#undef DECLARE_ALTERNATIVE_RASTERIZER
} // namespace GPU_SW_Rasterizer
// static u32 s_bad_counter = 0;

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,12 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "gpu_sw_rasterizer.h"
#include "common/assert.h"
#include "common/gsvector.h"
namespace GPU_SW_Rasterizer::AVX2 {
#define USE_VECTOR 1
#include "gpu_sw_rasterizer.inl"
}

View file

@ -333,6 +333,7 @@ struct GPUBackendCopyVRAMCommand : public GPUBackendCommand
struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand
{ {
GPUDrawingArea new_area; GPUDrawingArea new_area;
s32 new_clamped_area[4];
}; };
struct GPUBackendUpdateCLUTCommand : public GPUBackendCommand struct GPUBackendUpdateCLUTCommand : public GPUBackendCommand