diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 6bd04803e..69cf2d905 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -1635,7 +1635,7 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he void GPU::SetClampedDrawingArea() { - if (!IsDrawingAreaIsValid()) [[unlikely]] + if (m_drawing_area.left > m_drawing_area.right || m_drawing_area.top > m_drawing_area.bottom) [[unlikely]] { m_clamped_drawing_area = GSVector4i::zero(); return; diff --git a/src/core/gpu.h b/src/core/gpu.h index 3b8759aee..e280af903 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -297,12 +297,6 @@ protected: BoolToUInt8(m_render_command.shading_enable); } - /// Returns true if the drawing area is valid (i.e. left <= right, top <= bottom). - ALWAYS_INLINE bool IsDrawingAreaIsValid() const - { - return (m_drawing_area.left <= m_drawing_area.right && m_drawing_area.top <= m_drawing_area.bottom); - } - void AddCommandTicks(TickCount ticks); void WriteGP1(u32 value); @@ -349,18 +343,11 @@ protected: AddCommandTicks(pixels); } - ALWAYS_INLINE_RELEASE void AddDrawRectangleTicks(s32 x, s32 y, u32 width, u32 height, bool textured, + ALWAYS_INLINE_RELEASE void AddDrawRectangleTicks(const GSVector4i clamped_rect, bool textured, bool semitransparent) { - // We do -1 on the inside of the clamp, in case the rectangle is entirely clipped. - u32 drawn_width = static_cast( - std::clamp(x + static_cast(width), static_cast(m_drawing_area.left), - static_cast(m_drawing_area.right) + 1) - - std::clamp(x, static_cast(m_drawing_area.left), static_cast(m_drawing_area.right) + 1)); - u32 drawn_height = static_cast( - std::clamp(y + static_cast(height), static_cast(m_drawing_area.top), - static_cast(m_drawing_area.bottom) + 1) - - std::clamp(y, static_cast(m_drawing_area.top), static_cast(m_drawing_area.bottom) + 1)); + u32 drawn_width = clamped_rect.width(); + u32 drawn_height = clamped_rect.height(); u32 ticks_per_row = drawn_width; if (textured) @@ -372,16 +359,10 @@ protected: AddCommandTicks(ticks_per_row * drawn_height); } - ALWAYS_INLINE_RELEASE void AddDrawLineTicks(s32 min_x, s32 min_y, s32 max_x, s32 max_y, bool shaded) + ALWAYS_INLINE_RELEASE void AddDrawLineTicks(const GSVector4i clamped_rect, bool shaded) { - // We do -1 on the inside of the clamp, in case the rectangle is entirely clipped. - // Lines are inclusive? - u32 drawn_width = static_cast( - std::clamp(max_x + 1, static_cast(m_drawing_area.left), static_cast(m_drawing_area.right) + 1) - - std::clamp(min_x, static_cast(m_drawing_area.left), static_cast(m_drawing_area.right) + 1)); - u32 drawn_height = static_cast( - std::clamp(max_y + 1, static_cast(m_drawing_area.top), static_cast(m_drawing_area.bottom) + 1) - - std::clamp(min_y, static_cast(m_drawing_area.top), static_cast(m_drawing_area.bottom) + 1)); + u32 drawn_width = clamped_rect.width(); + u32 drawn_height = clamped_rect.height(); if (m_GPUSTAT.SkipDrawingToActiveField()) drawn_height = std::max(drawn_height / 2, 1u); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 56630fb68..fdad424fb 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -42,15 +42,6 @@ static constexpr GPUTexture::Format VRAM_DS_EXTRACT_FORMAT = GPUTexture::Format: static u32 s_draw_number = 0; #endif -template -ALWAYS_INLINE static constexpr std::tuple MinMax(T v1, T v2) -{ - if (v1 > v2) - return std::tie(v2, v1); - else - return std::tie(v1, v2); -} - /// Returns the distance between two rectangles. ALWAYS_INLINE static float RectDistance(const GSVector4i lhs, const GSVector4i rhs) { @@ -668,7 +659,7 @@ void GPU_HW::AddDrawnRectangle(const GSVector4i rect) // Normally, we would check for overlap here. But the GPU's texture cache won't actually reload until the page // changes, or it samples a larger region, so we can get away without doing so. This reduces copies considerably in // games like Mega Man Legends 2. - m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(rect.rintersect(m_clamped_drawing_area)); + m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(m_clamped_drawing_area); } void GPU_HW::AddUnclampedDrawnRectangle(const GSVector4i rect) @@ -2047,10 +2038,15 @@ void GPU_HW::SetBatchSpriteMode(bool enabled) m_batch.sprite_mode = enabled; } -void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth) +void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth) { DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6); + const float x0 = bounds.x; + const float y0 = bounds.y; + const float x1 = bounds.z; + const float y1 = bounds.w; + const float dx = x1 - x0; const float dy = y1 - y0; if (dx == 0.0f && dy == 0.0f) @@ -2211,9 +2207,6 @@ void GPU_HW::LoadVertices() else if (m_allow_sprite_mode) SetBatchSpriteMode((pgxp && !is_3d) || IsPossibleSpritePolygon(vertices.data())); - if (!IsDrawingAreaIsValid()) [[unlikely]] - return; - if (m_sw_renderer) { GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices); @@ -2240,9 +2233,9 @@ void GPU_HW::LoadVertices() const GSVector4 max_pos_12 = v1f.max(v2f); const GSVector4i draw_rect_012 = GSVector4i(min_pos_12.min(v0f).upld(max_pos_12.max(v0f))).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const bool first_tri_culled = - (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || - !m_clamped_drawing_area.rintersects(draw_rect_012)); + const GSVector4i clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area); + const bool first_tri_culled = (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || + draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || clamped_draw_rect_012.rempty()); if (first_tri_culled) { GL_INS_FMT("Culling off-screen/too-large polygon: {},{} {},{} {},{}", native_vertex_positions[0].x, @@ -2257,7 +2250,7 @@ void GPU_HW::LoadVertices() if (textured && m_compute_uv_range) ComputePolygonUVLimits(vertices.data(), num_vertices); - AddDrawnRectangle(draw_rect_012); + AddDrawnRectangle(clamped_draw_rect_012); AddDrawTriangleTicks(GSVector4i(native_vertex_positions[0]), GSVector4i(native_vertex_positions[1]), GSVector4i(native_vertex_positions[2]), rc.shading_enable, rc.texture_enable, rc.transparency_enable); @@ -2284,11 +2277,12 @@ void GPU_HW::LoadVertices() const GSVector4 v3f = GSVector4::loadl(&vertices[3].x); const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min(v3f).upld(max_pos_12.max(v3f))).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area); // Cull polygons which are too large. const bool second_tri_culled = (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || - !m_clamped_drawing_area.rintersects(draw_rect_123)); + clamped_draw_rect_123.rempty()); if (second_tri_culled) { GL_INS_FMT("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}", @@ -2303,7 +2297,7 @@ void GPU_HW::LoadVertices() if (first_tri_culled && textured && m_compute_uv_range) ComputePolygonUVLimits(vertices.data(), num_vertices); - AddDrawnRectangle(draw_rect_123); + AddDrawnRectangle(clamped_draw_rect_123); AddDrawTriangleTicks(GSVector4i(native_vertex_positions[2]), GSVector4i(native_vertex_positions[1]), GSVector4i(native_vertex_positions[3]), rc.shading_enable, rc.texture_enable, rc.transparency_enable); @@ -2372,8 +2366,14 @@ void GPU_HW::LoadVertices() break; } - if (!IsDrawingAreaIsValid()) [[unlikely]] + const GSVector4i rect = + GSVector4i(pos_x, pos_y, pos_x + static_cast(rectangle_width), pos_y + static_cast(rectangle_height)); + const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); + if (clamped_rect.rempty()) [[unlikely]] + { + GL_INS_FMT("Culling off-screen rectangle {}", rect); return; + } // we can split the rectangle up into potentially 8 quads SetBatchDepthBuffer(false); @@ -2438,8 +2438,8 @@ void GPU_HW::LoadVertices() tex_top = 0; } - AddDrawnRectangle(GSVector4i(pos_x, pos_y, pos_x + rectangle_width, pos_y + rectangle_height)); - AddDrawRectangleTicks(pos_x, pos_y, rectangle_width, rectangle_height, rc.texture_enable, rc.transparency_enable); + AddDrawnRectangle(clamped_rect); + AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); if (m_sw_renderer) { @@ -2480,34 +2480,33 @@ void GPU_HW::LoadVertices() end_pos.bits = FifoPop(); } - if (!IsDrawingAreaIsValid()) [[unlikely]] - return; + const GSVector4i vstart_pos = GSVector4i(start_pos.x + m_drawing_offset.x, start_pos.y + m_drawing_offset.y); + const GSVector4i vend_pos = GSVector4i(end_pos.x + m_drawing_offset.x, end_pos.y + m_drawing_offset.y); + const GSVector4i bounds = vstart_pos.xyxy(vend_pos); + const GSVector4i rect = + vstart_pos.min_i32(vend_pos).xyxy(vstart_pos.max_i32(vend_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - s32 start_x = start_pos.x + m_drawing_offset.x; - s32 start_y = start_pos.y + m_drawing_offset.y; - s32 end_x = end_pos.x + m_drawing_offset.x; - s32 end_y = end_pos.y + m_drawing_offset.y; - const auto [min_x, max_x] = MinMax(start_x, end_x); - const auto [min_y, max_y] = MinMax(start_y, end_y); - if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) { - DEBUG_LOG("Culling too-large line: {},{} - {},{}", start_x, start_y, end_x, end_y); + GL_INS_FMT("Culling too-large/off-screen line: {},{} - {},{}", bounds.x, bounds.y, bounds.z, bounds.w); return; } - AddDrawnRectangle(GSVector4i(min_x, min_y, max_x + 1, max_y + 1)); - AddDrawLineTicks(min_x, min_y, max_x, max_y, rc.shading_enable); + AddDrawnRectangle(clamped_rect); + AddDrawLineTicks(clamped_rect, rc.shading_enable); // TODO: Should we do a PGXP lookup here? Most lines are 2D. - DrawLine(static_cast(start_x), static_cast(start_y), start_color, static_cast(end_x), - static_cast(end_y), end_color, depth); + DrawLine(GSVector4(bounds), start_color, end_color, depth); if (m_sw_renderer) { GPUBackendDrawLineCommand* cmd = m_sw_renderer->NewDrawLineCommand(2); FillDrawCommand(cmd, rc); - cmd->vertices[0].Set(start_x, start_y, start_color); - cmd->vertices[1].Set(end_x, end_y, end_color); + GSVector4i::storel(&cmd->vertices[0], bounds); + cmd->vertices[0].color = start_color; + GSVector4i::storeh(&cmd->vertices[1], bounds); + cmd->vertices[1].color = end_color; m_sw_renderer->PushCommand(cmd); } } @@ -2517,15 +2516,11 @@ void GPU_HW::LoadVertices() const u32 num_vertices = GetPolyLineVertexCount(); DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6)); - if (!IsDrawingAreaIsValid()) [[unlikely]] - return; - const bool shaded = rc.shading_enable; u32 buffer_pos = 0; const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; - s32 start_x = start_vp.x + m_drawing_offset.x; - s32 start_y = start_vp.y + m_drawing_offset.y; + GSVector4i start_pos = GSVector4i(start_vp.x + m_drawing_offset.x, start_vp.y + m_drawing_offset.y); u32 start_color = rc.color_for_first_vertex; GPUBackendDrawLineCommand* cmd; @@ -2533,7 +2528,8 @@ void GPU_HW::LoadVertices() { cmd = m_sw_renderer->NewDrawLineCommand(num_vertices); FillDrawCommand(cmd, rc); - cmd->vertices[0].Set(start_x, start_y, start_color); + GSVector4i::storel(&cmd->vertices[0].x, start_pos); + cmd->vertices[0].color = start_color; } else { @@ -2544,31 +2540,32 @@ void GPU_HW::LoadVertices() { const u32 end_color = shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : start_color; const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; - const s32 end_x = m_drawing_offset.x + vp.x; - const s32 end_y = m_drawing_offset.y + vp.y; - - const auto [min_x, max_x] = MinMax(start_x, end_x); - const auto [min_y, max_y] = MinMax(start_y, end_y); - if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) + const GSVector4i end_pos = GSVector4i(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y); + const GSVector4i bounds = start_pos.xyxy(end_pos); + const GSVector4i rect = + start_pos.min_i32(end_pos).xyxy(start_pos.max_i32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) { - DEBUG_LOG("Culling too-large line: {},{} - {},{}", start_x, start_y, end_x, end_y); + GL_INS_FMT("Culling too-large line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); } else { - AddDrawnRectangle(GSVector4i(min_x, min_y, max_x + 1, max_y + 1)); - AddDrawLineTicks(min_x, min_y, max_x, max_y, rc.shading_enable); + AddDrawnRectangle(clamped_rect); + AddDrawLineTicks(clamped_rect, rc.shading_enable); // TODO: Should we do a PGXP lookup here? Most lines are 2D. - DrawLine(static_cast(start_x), static_cast(start_y), start_color, static_cast(end_x), - static_cast(end_y), end_color, depth); + DrawLine(GSVector4(bounds), start_color, end_color, depth); } - start_x = end_x; - start_y = end_y; + start_pos = end_pos; start_color = end_color; if (cmd) - cmd->vertices[i].Set(end_x, end_y, end_color); + { + GSVector4i::storel(&cmd->vertices[i], end_pos); + cmd->vertices[i].color = end_color; + } } if (cmd) @@ -3183,7 +3180,7 @@ void GPU_HW::DispatchRenderCommand() { m_draw_mode.ClearTexturePageChangedFlag(); -#if 1 +#if 0 if (!m_vram_dirty_draw_rect.eq(INVALID_RECT) || !m_vram_dirty_write_rect.eq(INVALID_RECT)) { GL_INS_FMT("VRAM DIRTY: {} {}", m_vram_dirty_draw_rect, m_vram_dirty_write_rect); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index a8a678c67..fade25027 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -217,7 +217,7 @@ private: u32 height); /// Expands a line into two triangles. - void DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth); + void DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth); /// Handles quads with flipped texture coordinate directions. void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices); diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 7de702c54..075156f72 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -9,21 +9,13 @@ #include "common/align.h" #include "common/assert.h" #include "common/gsvector.h" +#include "common/gsvector_formatter.h" #include "common/log.h" #include Log_SetChannel(GPU_SW); -template -ALWAYS_INLINE static constexpr std::tuple MinMax(T v1, T v2) -{ - if (v1 > v2) - return std::tie(v2, v1); - else - return std::tie(v1, v2); -} - GPU_SW::GPU_SW() = default; GPU_SW::~GPU_SW() @@ -539,9 +531,6 @@ void GPU_SW::DispatchRenderCommand() positions[i] = GSVector4i::loadl(&vert->x); } - if (!IsDrawingAreaIsValid()) - return; - // Cull polygons which are too large. const GSVector4i min_pos_12 = positions[1].min_i32(positions[2]); const GSVector4i max_pos_12 = positions[1].max_i32(positions[2]); @@ -638,10 +627,15 @@ void GPU_SW::DispatchRenderCommand() break; } - if (!IsDrawingAreaIsValid()) + const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height); + const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); + if (clamped_rect.rempty()) [[unlikely]] + { + DEBUG_LOG("Culling off-screen rectangle {}", rect); return; + } - AddDrawRectangleTicks(cmd->x, cmd->y, cmd->width, cmd->height, rc.texture_enable, rc.transparency_enable); + AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); m_backend.PushCommand(cmd); } @@ -681,19 +675,19 @@ void GPU_SW::DispatchRenderCommand() cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; } - if (!IsDrawingAreaIsValid()) - return; + const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); + const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); + const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - const auto [min_x, max_x] = MinMax(cmd->vertices[0].x, cmd->vertices[1].x); - const auto [min_y, max_y] = MinMax(cmd->vertices[0].y, cmd->vertices[1].y); - if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) { - DEBUG_LOG("Culling too-large line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, cmd->vertices[1].x, - cmd->vertices[1].y); + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y); return; } - AddDrawLineTicks(min_x, min_y, max_x, max_y, rc.shading_enable); + AddDrawLineTicks(clamped_rect, rc.shading_enable); m_backend.PushCommand(cmd); } @@ -719,16 +713,20 @@ void GPU_SW::DispatchRenderCommand() cmd->vertices[i].x = m_drawing_offset.x + vp.x; cmd->vertices[i].y = m_drawing_offset.y + vp.y; - const auto [min_x, max_x] = MinMax(cmd->vertices[i - 1].x, cmd->vertices[i].x); - const auto [min_y, max_y] = MinMax(cmd->vertices[i - 1].y, cmd->vertices[i].y); - if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) + const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); + const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); + const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); + + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) { - DEBUG_LOG("Culling too-large line: {},{} - {},{}", cmd->vertices[i - 1].x, cmd->vertices[i - 1].y, - cmd->vertices[i].x, cmd->vertices[i].y); + DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[i - 1].x, + cmd->vertices[i - 1].y, cmd->vertices[i].x, cmd->vertices[i].y); + return; } else { - AddDrawLineTicks(min_x, min_y, max_x, max_y, rc.shading_enable); + AddDrawLineTicks(clamped_rect, rc.shading_enable); } }