GPU: Fix reading of write-combined GPU buffers

+100fps in recompiler modes
This commit is contained in:
Connor McLaughlin 2020-01-15 17:35:36 +10:00
parent 0861a1e579
commit 504510c242
2 changed files with 19 additions and 15 deletions

View file

@ -80,7 +80,6 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
const bool shaded = rc.shading_enable; const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable; const bool textured = rc.texture_enable;
BatchVertex* old_vertex_ptr = m_batch_current_vertex_ptr;
s32 min_x = std::numeric_limits<s32>::max(); s32 min_x = std::numeric_limits<s32>::max();
s32 max_x = std::numeric_limits<s32>::min(); s32 max_x = std::numeric_limits<s32>::min();
s32 min_y = std::numeric_limits<s32>::max(); s32 min_y = std::numeric_limits<s32>::max();
@ -100,7 +99,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
min_y = std::min(min_y, y); min_y = std::min(min_y, y);
max_y = std::max(max_y, y); max_y = std::max(max_y, y);
(m_batch_current_vertex_ptr++)->Set(x, y, color, texpage, packed_texcoord); AddVertex(x, y, color, texpage, packed_texcoord);
if (restart_strip) if (restart_strip)
{ {
@ -113,7 +112,9 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
if (static_cast<u32>(max_x - min_x) > MAX_PRIMITIVE_WIDTH || if (static_cast<u32>(max_x - min_x) > MAX_PRIMITIVE_WIDTH ||
static_cast<u32>(max_y - min_y) > MAX_PRIMITIVE_HEIGHT) static_cast<u32>(max_y - min_y) > MAX_PRIMITIVE_HEIGHT)
{ {
m_batch_current_vertex_ptr = old_vertex_ptr; m_batch_current_vertex_ptr -= 2;
AddDuplicateVertex();
AddDuplicateVertex();
} }
} }
break; break;
@ -165,13 +166,13 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
const u16 tex_right = tex_left + static_cast<u16>(rectangle_width); const u16 tex_right = tex_left + static_cast<u16>(rectangle_width);
const u16 tex_bottom = tex_top + static_cast<u16>(rectangle_height); const u16 tex_bottom = tex_top + static_cast<u16>(rectangle_height);
(m_batch_current_vertex_ptr++)->Set(pos_left, pos_top, color, texpage, tex_left, tex_top); AddVertex(pos_left, pos_top, color, texpage, tex_left, tex_top);
if (restart_strip) if (restart_strip)
AddDuplicateVertex(); AddDuplicateVertex();
(m_batch_current_vertex_ptr++)->Set(pos_right, pos_top, color, texpage, tex_right, tex_top); AddVertex(pos_right, pos_top, color, texpage, tex_right, tex_top);
(m_batch_current_vertex_ptr++)->Set(pos_left, pos_bottom, color, texpage, tex_left, tex_bottom); AddVertex(pos_left, pos_bottom, color, texpage, tex_left, tex_bottom);
(m_batch_current_vertex_ptr++)->Set(pos_right, pos_bottom, color, texpage, tex_right, tex_bottom); AddVertex(pos_right, pos_bottom, color, texpage, tex_right, tex_bottom);
} }
break; break;
@ -180,7 +181,6 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
const u32 first_color = rc.color_for_first_vertex; const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable; const bool shaded = rc.shading_enable;
BatchVertex* old_vertex_ptr = m_batch_current_vertex_ptr;
s32 min_x = std::numeric_limits<s32>::max(); s32 min_x = std::numeric_limits<s32>::max();
s32 max_x = std::numeric_limits<s32>::min(); s32 max_x = std::numeric_limits<s32>::min();
s32 min_y = std::numeric_limits<s32>::max(); s32 min_y = std::numeric_limits<s32>::max();
@ -201,12 +201,6 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
(m_batch_current_vertex_ptr++)->Set(x, y, color, 0, 0); (m_batch_current_vertex_ptr++)->Set(x, y, color, 0, 0);
} }
if (static_cast<u32>(max_x - min_x) > MAX_PRIMITIVE_WIDTH ||
static_cast<u32>(max_y - min_y) > MAX_PRIMITIVE_HEIGHT)
{
m_batch_current_vertex_ptr = old_vertex_ptr;
}
} }
break; break;
@ -218,7 +212,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
void GPU_HW::AddDuplicateVertex() void GPU_HW::AddDuplicateVertex()
{ {
std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(BatchVertex)); std::memcpy(m_batch_current_vertex_ptr, &m_batch_last_vertex, sizeof(BatchVertex));
m_batch_current_vertex_ptr++; m_batch_current_vertex_ptr++;
} }

View file

@ -4,6 +4,7 @@
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <tuple> #include <tuple>
#include <utility>
#include <vector> #include <vector>
class GPU_HW : public GPU class GPU_HW : public GPU
@ -152,6 +153,7 @@ protected:
BatchVertex* m_batch_start_vertex_ptr = nullptr; BatchVertex* m_batch_start_vertex_ptr = nullptr;
BatchVertex* m_batch_end_vertex_ptr = nullptr; BatchVertex* m_batch_end_vertex_ptr = nullptr;
BatchVertex* m_batch_current_vertex_ptr = nullptr; BatchVertex* m_batch_current_vertex_ptr = nullptr;
BatchVertex m_batch_last_vertex = {};
u32 m_batch_base_vertex = 0; u32 m_batch_base_vertex = 0;
u32 m_resolution_scale = 1; u32 m_resolution_scale = 1;
@ -184,4 +186,12 @@ private:
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr); void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
void AddDuplicateVertex(); void AddDuplicateVertex();
template<typename... Args>
ALWAYS_INLINE void AddVertex(Args&&... args)
{
m_batch_last_vertex.Set(std::forward<Args>(args)...);
std::memcpy(m_batch_current_vertex_ptr, &m_batch_last_vertex, sizeof(BatchVertex));
m_batch_current_vertex_ptr++;
}
}; };