GPU: Handle VRAM wrap-around behavior on scanout

This commit is contained in:
Connor McLaughlin 2020-04-11 16:09:03 +10:00
parent fc3efebb38
commit 9e024b7a51
6 changed files with 131 additions and 74 deletions

View file

@ -487,22 +487,19 @@ void GPU::UpdateCRTCDisplayParameters()
if (horizontal_display_end <= horizontal_visible_end_tick)
{
cs.display_vram_width = std::min<u16>(
cs.display_vram_width =
std::max<u16>((((horizontal_display_end - std::max(horizontal_display_start, horizontal_visible_start_tick)) +
(cs.dot_clock_divider - 1)) /
cs.dot_clock_divider),
1u),
VRAM_WIDTH - cs.display_vram_left);
1u);
}
else
{
cs.display_vram_width = std::min<u16>(
std::max<u16>(
(((horizontal_visible_end_tick - std::max(horizontal_display_start, horizontal_visible_start_tick)) +
(cs.dot_clock_divider - 1)) /
cs.dot_clock_divider),
1u),
VRAM_WIDTH - cs.display_vram_left);
cs.display_vram_width = std::max<u16>(
(((horizontal_visible_end_tick - std::max(horizontal_display_start, horizontal_visible_start_tick)) +
(cs.dot_clock_divider - 1)) /
cs.dot_clock_divider),
1u);
}
if (vertical_display_start >= vertical_visible_start_line)
@ -513,21 +510,19 @@ void GPU::UpdateCRTCDisplayParameters()
else
{
cs.display_origin_top = 0;
cs.display_vram_top = std::min<u16>(
m_crtc_state.regs.Y + ((vertical_visible_start_line - vertical_display_start) << height_shift), VRAM_HEIGHT - 1);
cs.display_vram_top =
m_crtc_state.regs.Y + ((vertical_visible_start_line - vertical_display_start) << height_shift);
}
if (vertical_display_end <= vertical_visible_end_line)
{
cs.display_vram_height = std::min<u16>(
(vertical_display_end - std::max(vertical_display_start, vertical_visible_start_line)) << height_shift,
VRAM_HEIGHT - cs.display_vram_top);
cs.display_vram_height = (vertical_display_end - std::max(vertical_display_start, vertical_visible_start_line))
<< height_shift;
}
else
{
cs.display_vram_height = std::min<u16>(
(vertical_visible_end_line - std::max(vertical_display_start, vertical_visible_start_line)) << height_shift,
VRAM_HEIGHT - cs.display_vram_top);
cs.display_vram_height = (vertical_visible_end_line - std::max(vertical_display_start, vertical_visible_start_line))
<< height_shift;
}
}

View file

@ -553,7 +553,9 @@ void GPU_HW_D3D11::UpdateDisplay()
{
m_host_display->ClearDisplayTexture();
}
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced)
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_vram_offset_y <= m_vram_texture.GetHeight()))
{
m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(),
m_vram_texture.GetHeight(), scaled_vram_offset_x, scaled_vram_offset_y,
@ -567,15 +569,15 @@ void GPU_HW_D3D11::UpdateDisplay()
const u32 reinterpret_field_offset = GetInterlacedField();
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X);
const u32 uniforms[4] = {reinterpret_field_offset, reinterpret_start_x};
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y, reinterpret_field_offset};
ID3D11PixelShader* display_pixel_shader =
m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Get();
SetViewportAndScissor(reinterpret_start_x, scaled_vram_offset_y, reinterpret_width, scaled_display_height);
SetViewportAndScissor(0, reinterpret_field_offset, reinterpret_width, scaled_display_height);
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),
m_display_texture.GetHeight(), scaled_vram_offset_x, scaled_vram_offset_y,
m_display_texture.GetHeight(), scaled_vram_offset_x - reinterpret_start_x, 0,
scaled_display_width, scaled_display_height);
RestoreGraphicsAPIState();

View file

@ -506,7 +506,9 @@ void GPU_HW_OpenGL::UpdateDisplay()
{
m_host_display->ClearDisplayTexture();
}
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced)
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_vram_offset_y <= m_vram_texture.GetHeight()))
{
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), scaled_vram_offset_x,
@ -525,20 +527,19 @@ void GPU_HW_OpenGL::UpdateDisplay()
const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
const u32 scaled_flipped_vram_offset_y =
m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height;
const u32 reinterpret_field_offset = GetInterlacedField();
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X);
const u32 uniforms[4] = {reinterpret_field_offset, reinterpret_start_x};
const u32 uniforms[4] = {reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_field_offset};
UploadUniformBlock(uniforms, sizeof(uniforms));
m_batch_ubo_dirty = true;
glViewport(reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_width, scaled_display_height);
glViewport(0, reinterpret_field_offset, reinterpret_width, scaled_display_height);
glDrawArrays(GL_TRIANGLES, 0, 3);
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
m_display_texture.GetWidth(), m_display_texture.GetHeight(),
scaled_vram_offset_x, m_vram_texture.GetHeight() - scaled_vram_offset_y,
scaled_vram_offset_x - reinterpret_start_x, scaled_display_height,
scaled_display_width, -static_cast<s32>(scaled_display_height));
// restore state

View file

@ -770,37 +770,39 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
DefineMacro(ss, "INTERLACED", interlaced);
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"int u_field_offset", "int u_vram_start_x"});
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_field_offset"});
DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
ss << R"(
{
int2 icoords = int2(v_pos.xy);
uint2 icoords = uint2(v_pos.xy) + u_vram_offset;
#if INTERLACED
if (((fixYCoord(icoords.y) / RESOLUTION_SCALE) & 1) != u_field_offset)
if (((icoords.y / uint(RESOLUTION_SCALE)) & 1u) != u_field_offset)
discard;
#endif
//icoords.y = uint(fixYCoord(int(icoords.y)));
#if DEPTH_24BIT
// relative to start of scanout
int relative_x = (icoords.x - u_vram_start_x) / RESOLUTION_SCALE;
icoords.x = u_vram_start_x + ((relative_x * 3) / 2) * RESOLUTION_SCALE;
uint relative_x = (icoords.x - u_vram_offset.x) / uint(RESOLUTION_SCALE);
icoords.x = u_vram_offset.x + ((relative_x * 3u) / 2u) * uint(RESOLUTION_SCALE);
// load adjacent 16-bit texels
uint s0 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, icoords, 0));
uint s1 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, icoords + int2(RESOLUTION_SCALE, 0), 0));
uint s0 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(icoords % uint2(VRAM_SIZE)), 0));
uint s1 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2((icoords + uint2(uint(RESOLUTION_SCALE), 0)) % uint2(VRAM_SIZE)), 0));
// select which part of the combined 16-bit texels we are currently shading
uint s1s0 = ((s1 << 16) | s0) >> ((relative_x & 1) * 8);
uint s1s0 = ((s1 << 16) | s0) >> ((relative_x & 1u) * 8u);
// extract components and normalize
o_col0 = float4(float(s1s0 & 0xFFu) / 255.0, float((s1s0 >> 8u) & 0xFFu) / 255.0,
float((s1s0 >> 16u) & 0xFFu) / 255.0, 1.0);
#else
// load and return
o_col0 = LOAD_TEXTURE(samp0, icoords, 0);
o_col0 = LOAD_TEXTURE(samp0, int2(icoords % uint2(VRAM_SIZE)), 0);
#endif
}
)";

View file

@ -41,38 +41,99 @@ void GPU_SW::Reset()
m_vram.fill(0);
}
void GPU_SW::CopyOut15Bit(const u16* src_ptr, u32 src_stride, u32* dst_ptr, u32 dst_stride, u32 width, u32 height)
void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced)
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = src_ptr;
u32* dst_row_ptr = dst_ptr;
for (u32 col = 0; col < width; col++)
*(dst_row_ptr++) = RGBA5551ToRGBA8888(*(src_row_ptr++));
const u8 interlaced_shift = BoolToUInt8(interlaced);
src_ptr += src_stride;
dst_ptr += dst_stride;
// Fast path when not wrapping around.
if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT)
{
dst_stride <<= interlaced_shift;
height >>= interlaced_shift;
const u16* src_ptr = &m_vram[src_y * VRAM_WIDTH + src_x];
const u32 src_stride = VRAM_WIDTH << interlaced_shift;
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = src_ptr;
u32* dst_row_ptr = dst_ptr;
for (u32 col = 0; col < width; col++)
*(dst_row_ptr++) = RGBA5551ToRGBA8888(*(src_row_ptr++));
src_ptr += src_stride;
dst_ptr += dst_stride;
}
}
else
{
dst_stride <<= interlaced_shift;
height >>= interlaced_shift;
const u32 end_x = src_x + width;
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
u32* dst_row_ptr = dst_ptr;
for (u32 col = src_x; col < end_x; col++)
*(dst_row_ptr++) = RGBA5551ToRGBA8888(src_row_ptr[col % VRAM_WIDTH]);
src_y += (1 << interlaced_shift);
dst_ptr += dst_stride;
}
}
}
void GPU_SW::CopyOut24Bit(const u16* src_ptr, u32 src_stride, u32* dst_ptr, u32 dst_stride, u32 width, u32 height)
void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced)
{
for (u32 row = 0; row < height; row++)
const u8 interlaced_shift = BoolToUInt8(interlaced);
if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT)
{
const u8* src_row_ptr = reinterpret_cast<const u8*>(src_ptr);
u32* dst_row_ptr = dst_ptr;
dst_stride <<= interlaced_shift;
height >>= interlaced_shift;
// Beware unaligned accesses.
for (u32 col = 0; col < width; col++)
const u8* src_ptr = reinterpret_cast<const u8*>(&m_vram[src_y * VRAM_WIDTH + src_x]);
const u32 src_stride = (VRAM_WIDTH << interlaced_shift) * sizeof(u16);
for (u32 row = 0; row < height; row++)
{
// This will fill the alpha channel with junk, but that's okay since we don't use it
std::memcpy(dst_row_ptr, src_row_ptr, sizeof(u32));
src_row_ptr += 3;
dst_row_ptr++;
}
const u8* src_row_ptr = src_ptr;
u8* dst_row_ptr = reinterpret_cast<u8*>(dst_ptr);
for (u32 col = 0; col < width; col++)
{
*(dst_row_ptr++) = *(src_row_ptr++);
*(dst_row_ptr++) = *(src_row_ptr++);
*(dst_row_ptr++) = *(src_row_ptr++);
*(dst_row_ptr++) = 0xFF;
}
src_ptr += src_stride;
dst_ptr += dst_stride;
src_ptr += src_stride;
dst_ptr += dst_stride;
}
}
else
{
dst_stride <<= interlaced_shift;
height >>= interlaced_shift;
const u32 end_x = src_x + width;
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
u32* dst_row_ptr = dst_ptr;
for (u32 col = 0; col < width; col++)
{
const u32 offset = (src_x + ((col * 3) / 2));
const u16 s0 = src_row_ptr[offset % VRAM_WIDTH];
const u16 s1 = src_row_ptr[(offset + 1) % VRAM_WIDTH];
const u8 shift = static_cast<u8>(col & 1u) * 8;
*(dst_row_ptr++) = (((ZeroExtend32(s1) << 16) | ZeroExtend32(s0)) >> shift) | 0xFF000000u;
}
src_y += (1 << interlaced_shift);
dst_ptr += dst_stride;
}
}
}
@ -98,34 +159,32 @@ void GPU_SW::UpdateDisplay()
const u32 field = GetInterlacedField();
if (m_GPUSTAT.display_area_color_depth_24)
{
CopyOut24Bit(m_vram.data() + (vram_offset_y + field) * VRAM_WIDTH + m_crtc_state.regs.X, VRAM_WIDTH * 2,
m_display_texture_buffer.data() + field * display_width, display_width * 2, display_width,
display_height / 2);
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH,
VRAM_WIDTH, display_width, display_height, true);
}
else
{
CopyOut15Bit(m_vram.data() + (vram_offset_y + field) * VRAM_WIDTH + m_crtc_state.regs.X, VRAM_WIDTH * 2,
m_display_texture_buffer.data() + field * display_width, display_width * 2, display_width,
display_height / 2);
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH,
VRAM_WIDTH, display_width, display_height, true);
}
}
else
{
if (m_GPUSTAT.display_area_color_depth_24)
{
CopyOut24Bit(m_vram.data() + vram_offset_y * VRAM_WIDTH + m_crtc_state.regs.X, VRAM_WIDTH,
m_display_texture_buffer.data(), display_width, display_width, display_height);
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, display_width,
display_height, false);
}
else
{
CopyOut15Bit(m_vram.data() + vram_offset_y * VRAM_WIDTH + m_crtc_state.regs.X, VRAM_WIDTH,
m_display_texture_buffer.data(), display_width, display_width, display_height);
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, display_width,
display_height, false);
}
}
const u32 texture_offset_x = m_crtc_state.display_vram_left - m_crtc_state.regs.X;
m_host_display->UpdateTexture(m_display_texture.get(), texture_offset_x, 0, display_width, display_height,
m_display_texture_buffer.data(), display_width * sizeof(u32));
m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32));
m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, texture_offset_x, 0,
display_width, display_height);
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
@ -135,7 +194,7 @@ void GPU_SW::UpdateDisplay()
}
else
{
CopyOut15Bit(m_vram.data(), VRAM_WIDTH, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT);
CopyOut15Bit(0, 0, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT, false);
m_host_display->UpdateTexture(m_display_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32));
m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH,

View file

@ -43,10 +43,8 @@ protected:
//////////////////////////////////////////////////////////////////////////
// Scanout
//////////////////////////////////////////////////////////////////////////
static void CopyOut15Bit(const u16* src_ptr, u32 src_stride, u32* dst_ptr, u32 dst_stride, u32 width, u32 height);
static void CopyOut24Bit(const u16* src_ptr, u32 src_stride, u32* dst_ptr, u32 dst_stride, u32 width, u32 height);
void CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced);
void CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced);
void UpdateDisplay() override;
//////////////////////////////////////////////////////////////////////////