Duckstation/src/core/gpu.cpp

1687 lines
57 KiB
C++
Raw Normal View History

#include "gpu.h"
2020-09-14 19:27:22 +00:00
#include "common/file_system.h"
#include "common/heap_array.h"
#include "common/log.h"
2021-01-13 09:24:41 +00:00
#include "common/string_util.h"
2019-09-11 04:59:41 +00:00
#include "dma.h"
#include "host.h"
#include "host_display.h"
2022-03-26 13:09:28 +00:00
#include "imgui.h"
2019-09-17 14:22:41 +00:00
#include "interrupt_controller.h"
#include "settings.h"
#include "stb_image_write.h"
#include "system.h"
2019-09-20 13:40:19 +00:00
#include "timers.h"
#include "util/state_wrapper.h"
2019-10-22 13:07:51 +00:00
#include <cmath>
Log_SetChannel(GPU);
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
std::unique_ptr<GPU> g_gpu;
2019-10-22 13:07:51 +00:00
const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable();
GPU::GPU() = default;
GPU::~GPU() = default;
bool GPU::Initialize()
{
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
m_force_progressive_scan = g_settings.gpu_disable_interlacing;
m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings;
m_crtc_tick_event = TimingEvents::CreateTimingEvent(
"GPU CRTC Tick", 1, 1,
[](void* param, TickCount ticks, TickCount ticks_late) { static_cast<GPU*>(param)->CRTCTickEvent(ticks); }, this,
true);
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
m_command_tick_event = TimingEvents::CreateTimingEvent(
"GPU Command Tick", 1, 1,
[](void* param, TickCount ticks, TickCount ticks_late) { static_cast<GPU*>(param)->CommandTickEvent(ticks); }, this,
true);
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
m_fifo_size = g_settings.gpu_fifo_size;
m_max_run_ahead = g_settings.gpu_max_run_ahead;
m_console_is_pal = System::IsPALRegion();
UpdateCRTCConfig();
g_host_display->SetDisplayLinearFiltering(g_settings.display_linear_filtering);
g_host_display->SetDisplayIntegerScaling(g_settings.display_integer_scaling);
g_host_display->SetDisplayStretch(g_settings.display_stretch);
if (g_settings.display_post_processing &&
!g_host_display->SetPostProcessingChain(g_settings.display_post_process_chain))
{
Host::AddOSDMessage(Host::TranslateStdString("OSDMessage", "Failed to load post processing shader chain."), 20.0f);
}
return true;
}
void GPU::UpdateSettings()
{
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
m_force_progressive_scan = g_settings.gpu_disable_interlacing;
m_fifo_size = g_settings.gpu_fifo_size;
m_max_run_ahead = g_settings.gpu_max_run_ahead;
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
if (m_force_ntsc_timings != g_settings.gpu_force_ntsc_timings || m_console_is_pal != System::IsPALRegion())
{
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings;
m_console_is_pal = System::IsPALRegion();
UpdateCRTCConfig();
}
// Crop mode calls this, so recalculate the display area
UpdateCRTCDisplayParameters();
g_host_display->SetDisplayLinearFiltering(g_settings.display_linear_filtering);
g_host_display->SetDisplayIntegerScaling(g_settings.display_integer_scaling);
g_host_display->SetDisplayStretch(g_settings.display_stretch);
}
bool GPU::IsHardwareRenderer()
{
const GPURenderer renderer = GetRendererType();
return (renderer != GPURenderer::Software);
}
void GPU::CPUClockChanged()
{
UpdateCRTCConfig();
}
void GPU::UpdateResolutionScale() {}
std::tuple<u32, u32> GPU::GetEffectiveDisplayResolution(bool scaled /* = true */)
{
return std::tie(m_crtc_state.display_vram_width, m_crtc_state.display_vram_height);
}
std::tuple<u32, u32> GPU::GetFullDisplayResolution(bool scaled /* = true */)
{
return std::tie(m_crtc_state.display_width, m_crtc_state.display_height);
}
void GPU::Reset(bool clear_vram)
{
m_GPUSTAT.bits = 0x14802000;
2019-12-11 06:47:49 +00:00
m_set_texture_disable_mask = false;
m_GPUREAD_latch = 0;
m_crtc_state.fractional_ticks = 0;
m_crtc_state.fractional_dot_ticks = 0;
m_crtc_state.current_tick_in_scanline = 0;
m_crtc_state.current_scanline = 0;
m_crtc_state.in_hblank = false;
m_crtc_state.in_vblank = false;
m_crtc_state.interlaced_field = 0;
m_crtc_state.interlaced_display_field = 0;
SoftReset();
2021-08-24 01:57:12 +00:00
UpdateDisplay();
}
void GPU::SoftReset()
{
FlushRender();
if (m_blitter_state == BlitterState::WritingVRAM)
FinishVRAMWrite();
m_GPUSTAT.texture_page_x_base = 0;
m_GPUSTAT.texture_page_y_base = 0;
m_GPUSTAT.semi_transparency_mode = GPUTransparencyMode::HalfBackgroundPlusHalfForeground;
m_GPUSTAT.texture_color_mode = GPUTextureMode::Palette4Bit;
m_GPUSTAT.dither_enable = false;
m_GPUSTAT.draw_to_displayed_field = false;
m_GPUSTAT.set_mask_while_drawing = false;
m_GPUSTAT.check_mask_before_draw = false;
m_GPUSTAT.reverse_flag = false;
m_GPUSTAT.texture_disable = false;
m_GPUSTAT.horizontal_resolution_2 = 0;
m_GPUSTAT.horizontal_resolution_1 = 0;
m_GPUSTAT.vertical_resolution = false;
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
m_GPUSTAT.pal_mode = System::IsPALRegion();
m_GPUSTAT.display_area_color_depth_24 = false;
m_GPUSTAT.vertical_interlace = false;
m_GPUSTAT.display_disable = true;
m_GPUSTAT.dma_direction = DMADirection::Off;
m_drawing_area.Set(0, 0, 0, 0);
m_drawing_area_changed = true;
2019-10-04 10:33:37 +00:00
m_drawing_offset = {};
std::memset(&m_crtc_state.regs, 0, sizeof(m_crtc_state.regs));
2019-09-17 04:25:25 +00:00
m_crtc_state.regs.horizontal_display_range = 0xC60260;
m_crtc_state.regs.vertical_display_range = 0x3FC10;
m_blitter_state = BlitterState::Idle;
2020-06-12 15:28:49 +00:00
m_pending_command_ticks = 0;
m_command_total_words = 0;
m_vram_transfer = {};
m_fifo.Clear();
m_blit_buffer.clear();
m_blit_remaining_words = 0;
m_draw_mode.texture_window_value = 0xFFFFFFFFu;
2019-12-11 06:35:14 +00:00
SetDrawMode(0);
SetTexturePalette(0);
SetTextureWindow(0);
UpdateDMARequest();
2019-09-17 04:25:25 +00:00
UpdateCRTCConfig();
2020-06-12 15:28:49 +00:00
UpdateCRTCTickEvent();
UpdateCommandTickEvent();
UpdateGPUIdle();
2019-09-11 04:59:41 +00:00
}
bool GPU::DoState(StateWrapper& sw, HostDisplayTexture** host_texture, bool update_display)
2019-09-14 10:28:47 +00:00
{
if (sw.IsReading())
2019-10-04 10:33:37 +00:00
{
// perform a reset to discard all pending draws/fb state
Reset(host_texture == nullptr);
2019-10-04 10:33:37 +00:00
}
2019-09-14 10:28:47 +00:00
sw.Do(&m_GPUSTAT.bits);
2019-12-11 10:28:14 +00:00
sw.Do(&m_draw_mode.mode_reg.bits);
sw.Do(&m_draw_mode.palette_reg);
sw.Do(&m_draw_mode.texture_window_value);
2019-12-11 06:35:14 +00:00
sw.Do(&m_draw_mode.texture_page_x);
sw.Do(&m_draw_mode.texture_page_y);
sw.Do(&m_draw_mode.texture_palette_x);
sw.Do(&m_draw_mode.texture_palette_y);
sw.Do(&m_draw_mode.texture_window.and_x);
sw.Do(&m_draw_mode.texture_window.and_y);
sw.Do(&m_draw_mode.texture_window.or_x);
sw.Do(&m_draw_mode.texture_window.or_y);
2019-12-11 06:35:14 +00:00
sw.Do(&m_draw_mode.texture_x_flip);
sw.Do(&m_draw_mode.texture_y_flip);
sw.Do(&m_drawing_area.left);
sw.Do(&m_drawing_area.top);
sw.Do(&m_drawing_area.right);
sw.Do(&m_drawing_area.bottom);
2019-09-14 10:28:47 +00:00
sw.Do(&m_drawing_offset.x);
sw.Do(&m_drawing_offset.y);
sw.Do(&m_drawing_offset.x);
sw.Do(&m_console_is_pal);
2019-12-11 06:47:49 +00:00
sw.Do(&m_set_texture_disable_mask);
2019-09-17 04:40:23 +00:00
sw.Do(&m_crtc_state.regs.display_address_start);
sw.Do(&m_crtc_state.regs.horizontal_display_range);
sw.Do(&m_crtc_state.regs.vertical_display_range);
sw.Do(&m_crtc_state.dot_clock_divider);
sw.Do(&m_crtc_state.display_width);
sw.Do(&m_crtc_state.display_height);
sw.Do(&m_crtc_state.display_origin_left);
sw.Do(&m_crtc_state.display_origin_top);
sw.Do(&m_crtc_state.display_vram_left);
sw.Do(&m_crtc_state.display_vram_top);
sw.Do(&m_crtc_state.display_vram_width);
sw.Do(&m_crtc_state.display_vram_height);
sw.Do(&m_crtc_state.horizontal_total);
2020-12-12 07:59:09 +00:00
sw.Do(&m_crtc_state.horizontal_visible_start);
sw.Do(&m_crtc_state.horizontal_visible_end);
sw.Do(&m_crtc_state.horizontal_display_start);
sw.Do(&m_crtc_state.horizontal_display_end);
sw.Do(&m_crtc_state.vertical_total);
2020-12-12 07:59:09 +00:00
sw.Do(&m_crtc_state.vertical_visible_start);
sw.Do(&m_crtc_state.vertical_visible_end);
sw.Do(&m_crtc_state.vertical_display_start);
sw.Do(&m_crtc_state.vertical_display_end);
2019-09-17 04:40:23 +00:00
sw.Do(&m_crtc_state.fractional_ticks);
sw.Do(&m_crtc_state.current_tick_in_scanline);
sw.Do(&m_crtc_state.current_scanline);
sw.DoEx(&m_crtc_state.fractional_dot_ticks, 46, 0);
2019-09-17 04:40:23 +00:00
sw.Do(&m_crtc_state.in_hblank);
sw.Do(&m_crtc_state.in_vblank);
sw.Do(&m_crtc_state.interlaced_field);
sw.Do(&m_crtc_state.interlaced_display_field);
sw.Do(&m_crtc_state.active_line_lsb);
2019-09-17 04:40:23 +00:00
sw.Do(&m_blitter_state);
2020-06-12 15:28:49 +00:00
sw.Do(&m_pending_command_ticks);
sw.Do(&m_command_total_words);
sw.Do(&m_GPUREAD_latch);
sw.Do(&m_vram_transfer.x);
sw.Do(&m_vram_transfer.y);
sw.Do(&m_vram_transfer.width);
sw.Do(&m_vram_transfer.height);
sw.Do(&m_vram_transfer.col);
sw.Do(&m_vram_transfer.row);
2019-09-17 04:40:23 +00:00
sw.Do(&m_fifo);
sw.Do(&m_blit_buffer);
sw.Do(&m_blit_remaining_words);
sw.Do(&m_render_command.bits);
sw.Do(&m_max_run_ahead);
sw.Do(&m_fifo_size);
2019-09-14 10:28:47 +00:00
if (sw.IsReading())
{
2019-12-11 06:35:14 +00:00
m_draw_mode.texture_page_changed = true;
m_draw_mode.texture_window_changed = true;
m_drawing_area_changed = true;
UpdateDMARequest();
2019-09-14 10:28:47 +00:00
}
if (!host_texture)
{
if (!sw.DoMarker("GPU-VRAM"))
return false;
if (sw.IsReading())
{
// Still need a temporary here.
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> temp;
sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data(), false, false);
}
else
{
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
}
}
2019-09-14 10:45:26 +00:00
if (sw.IsReading())
{
UpdateCRTCConfig();
if (update_display)
UpdateDisplay();
2020-06-12 15:28:49 +00:00
UpdateCRTCTickEvent();
UpdateCommandTickEvent();
2019-09-14 10:45:26 +00:00
}
2019-09-14 10:28:47 +00:00
return !sw.HasError();
}
void GPU::ResetGraphicsAPIState() {}
void GPU::RestoreGraphicsAPIState() {}
void GPU::UpdateDMARequest()
2019-09-11 04:59:41 +00:00
{
switch (m_blitter_state)
{
case BlitterState::Idle:
m_GPUSTAT.ready_to_send_vram = false;
m_GPUSTAT.ready_to_recieve_dma = (m_fifo.IsEmpty() || m_fifo.GetSize() < m_command_total_words);
break;
case BlitterState::WritingVRAM:
m_GPUSTAT.ready_to_send_vram = false;
m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size);
break;
case BlitterState::ReadingVRAM:
m_GPUSTAT.ready_to_send_vram = true;
m_GPUSTAT.ready_to_recieve_dma = false;
break;
}
bool dma_request;
switch (m_GPUSTAT.dma_direction)
{
case DMADirection::Off:
dma_request = false;
break;
case DMADirection::FIFO:
dma_request = m_GPUSTAT.ready_to_recieve_dma;
break;
case DMADirection::CPUtoGP0:
dma_request = m_GPUSTAT.ready_to_recieve_dma;
break;
case DMADirection::GPUREADtoCPU:
dma_request = m_GPUSTAT.ready_to_send_vram;
break;
default:
dma_request = false;
break;
}
m_GPUSTAT.dma_data_request = dma_request;
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
g_dma.SetRequest(DMA::Channel::GPU, dma_request);
}
2020-06-12 15:28:49 +00:00
void GPU::UpdateGPUIdle()
{
switch (m_blitter_state)
{
case BlitterState::Idle:
m_GPUSTAT.gpu_idle = (m_pending_command_ticks <= 0 && m_fifo.IsEmpty());
2020-06-12 15:28:49 +00:00
break;
case BlitterState::WritingVRAM:
m_GPUSTAT.gpu_idle = false;
break;
case BlitterState::ReadingVRAM:
m_GPUSTAT.gpu_idle = false;
break;
}
}
u32 GPU::ReadRegister(u32 offset)
{
2019-09-11 04:59:41 +00:00
switch (offset)
{
2019-09-11 04:59:41 +00:00
case 0x00:
return ReadGPUREAD();
case 0x04:
{
// code can be dependent on the odd/even bit, so update the GPU state when reading.
// we can mitigate this slightly by only updating when the raster is actually hitting a new line
2020-06-12 15:28:49 +00:00
if (IsCRTCScanlinePending())
SynchronizeCRTC();
if (IsCommandCompletionPending())
m_command_tick_event->InvokeEarly();
return m_GPUSTAT.bits;
}
2019-09-11 04:59:41 +00:00
default:
Log_ErrorPrintf("Unhandled register read: %02X", offset);
return UINT32_C(0xFFFFFFFF);
}
2019-09-11 04:59:41 +00:00
}
void GPU::WriteRegister(u32 offset, u32 value)
{
switch (offset)
{
2019-09-11 04:59:41 +00:00
case 0x00:
m_fifo.Push(value);
ExecuteCommands();
2020-06-12 15:28:49 +00:00
UpdateCommandTickEvent();
2019-09-11 04:59:41 +00:00
return;
case 0x04:
WriteGP1(value);
return;
default:
Log_ErrorPrintf("Unhandled register write: %02X <- %08X", offset, value);
return;
}
2019-09-11 04:59:41 +00:00
}
void GPU::DMARead(u32* words, u32 word_count)
2019-09-11 04:59:41 +00:00
{
if (m_GPUSTAT.dma_direction != DMADirection::GPUREADtoCPU)
{
Log_ErrorPrintf("Invalid DMA direction from GPU DMA read");
std::fill_n(words, word_count, UINT32_C(0xFFFFFFFF));
return;
}
for (u32 i = 0; i < word_count; i++)
words[i] = ReadGPUREAD();
}
2020-08-01 14:25:07 +00:00
void GPU::EndDMAWrite()
2019-09-11 04:59:41 +00:00
{
2020-08-01 14:25:07 +00:00
m_fifo_pushed = true;
if (!m_syncing)
{
2020-08-01 14:25:07 +00:00
ExecuteCommands();
UpdateCommandTickEvent();
}
else
{
UpdateDMARequest();
}
2019-09-11 04:59:41 +00:00
}
/**
* NTSC GPU clock 53.693175 MHz
* PAL GPU clock 53.203425 MHz
* courtesy of @ggrtk
*
* NTSC - sysclk * 715909 / 451584
* PAL - sysclk * 709379 / 451584
*/
TickCount GPU::GetCRTCFrequency() const
{
return m_console_is_pal ? 53203425 : 53693175;
}
2020-06-12 15:28:49 +00:00
TickCount GPU::CRTCTicksToSystemTicks(TickCount gpu_ticks, TickCount fractional_ticks) const
{
// convert to master clock, rounding up as we want to overshoot not undershoot
if (!m_console_is_pal)
return static_cast<TickCount>((u64(gpu_ticks) * u64(451584) + fractional_ticks + u64(715908)) / u64(715909));
else
return static_cast<TickCount>((u64(gpu_ticks) * u64(451584) + fractional_ticks + u64(709378)) / u64(709379));
}
2020-06-12 15:28:49 +00:00
TickCount GPU::SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const
{
u64 mul = u64(sysclk_ticks);
mul *= !m_console_is_pal ? u64(715909) : u64(709379);
mul += u64(*fractional_ticks);
const TickCount ticks = static_cast<TickCount>(mul / u64(451584));
*fractional_ticks = static_cast<TickCount>(mul % u64(451584));
return ticks;
}
void GPU::AddCommandTicks(TickCount ticks)
{
2020-06-12 15:28:49 +00:00
m_pending_command_ticks += ticks;
}
2020-06-12 15:28:49 +00:00
void GPU::SynchronizeCRTC()
{
2020-06-12 15:28:49 +00:00
m_crtc_tick_event->InvokeEarly();
}
float GPU::ComputeHorizontalFrequency() const
{
const CRTCState& cs = m_crtc_state;
TickCount fractional_ticks = 0;
return static_cast<float>(
static_cast<double>(SystemTicksToCRTCTicks(System::GetTicksPerSecond(), &fractional_ticks)) /
static_cast<double>(cs.horizontal_total));
}
float GPU::ComputeVerticalFrequency() const
{
const CRTCState& cs = m_crtc_state;
const TickCount ticks_per_frame = cs.horizontal_total * cs.vertical_total;
TickCount fractional_ticks = 0;
return static_cast<float>(
static_cast<double>(SystemTicksToCRTCTicks(System::GetTicksPerSecond(), &fractional_ticks)) /
static_cast<double>(ticks_per_frame));
}
float GPU::GetDisplayAspectRatio() const
{
if (g_settings.display_force_4_3_for_24bit && m_GPUSTAT.display_area_color_depth_24)
{
return 4.0f / 3.0f;
}
else if (g_settings.display_aspect_ratio == DisplayAspectRatio::Auto)
{
const CRTCState& cs = m_crtc_state;
float relative_width = static_cast<float>(cs.horizontal_visible_end - cs.horizontal_visible_start);
float relative_height = static_cast<float>(cs.vertical_visible_end - cs.vertical_visible_start);
if (relative_width <= 0 || relative_height <= 0)
return 4.0f / 3.0f;
if (m_GPUSTAT.pal_mode)
{
relative_width /= static_cast<float>(PAL_HORIZONTAL_ACTIVE_END - PAL_HORIZONTAL_ACTIVE_START);
relative_height /= static_cast<float>(PAL_VERTICAL_ACTIVE_END - PAL_VERTICAL_ACTIVE_START);
}
else
{
relative_width /= static_cast<float>(NTSC_HORIZONTAL_ACTIVE_END - NTSC_HORIZONTAL_ACTIVE_START);
relative_height /= static_cast<float>(NTSC_VERTICAL_ACTIVE_END - NTSC_VERTICAL_ACTIVE_START);
}
return (relative_width / relative_height) * (4.0f / 3.0f);
}
else if (g_settings.display_aspect_ratio == DisplayAspectRatio::PAR1_1)
{
if (m_crtc_state.display_width == 0 || m_crtc_state.display_height == 0)
return 4.0f / 3.0f;
return static_cast<float>(m_crtc_state.display_width) / static_cast<float>(m_crtc_state.display_height);
}
else
{
return g_settings.GetDisplayAspectRatioValue();
}
}
2019-09-17 04:25:25 +00:00
void GPU::UpdateCRTCConfig()
{
static constexpr std::array<u16, 8> dot_clock_dividers = {{10, 8, 5, 4, 7, 7, 7, 7}};
2019-09-17 04:25:25 +00:00
CRTCState& cs = m_crtc_state;
if (m_GPUSTAT.pal_mode)
{
cs.vertical_total = PAL_TOTAL_LINES;
cs.current_scanline %= PAL_TOTAL_LINES;
cs.horizontal_total = PAL_TICKS_PER_LINE;
cs.horizontal_sync_start = PAL_HSYNC_TICKS;
cs.current_tick_in_scanline %= System::ScaleTicksToOverclock(PAL_TICKS_PER_LINE);
2019-09-17 04:25:25 +00:00
}
else
{
cs.vertical_total = NTSC_TOTAL_LINES;
cs.current_scanline %= NTSC_TOTAL_LINES;
cs.horizontal_total = NTSC_TICKS_PER_LINE;
cs.horizontal_sync_start = NTSC_HSYNC_TICKS;
cs.current_tick_in_scanline %= System::ScaleTicksToOverclock(NTSC_TICKS_PER_LINE);
2019-09-17 04:25:25 +00:00
}
cs.in_hblank = (cs.current_tick_in_scanline >= cs.horizontal_sync_start);
const u8 horizontal_resolution_index = m_GPUSTAT.horizontal_resolution_1 | (m_GPUSTAT.horizontal_resolution_2 << 2);
cs.dot_clock_divider = dot_clock_dividers[horizontal_resolution_index];
cs.horizontal_display_start =
(std::min<u16>(cs.regs.X1, cs.horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
cs.horizontal_display_end =
(std::min<u16>(cs.regs.X2, cs.horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
cs.vertical_display_start = std::min<u16>(cs.regs.Y1, cs.vertical_total);
cs.vertical_display_end = std::min<u16>(cs.regs.Y2, cs.vertical_total);
if (m_GPUSTAT.pal_mode && m_force_ntsc_timings)
{
// scale to NTSC parameters
cs.horizontal_display_start =
static_cast<u16>((static_cast<u32>(cs.horizontal_display_start) * NTSC_TICKS_PER_LINE) / PAL_TICKS_PER_LINE);
cs.horizontal_display_end = static_cast<u16>(
((static_cast<u32>(cs.horizontal_display_end) * NTSC_TICKS_PER_LINE) + (PAL_TICKS_PER_LINE - 1)) /
PAL_TICKS_PER_LINE);
cs.vertical_display_start =
static_cast<u16>((static_cast<u32>(cs.vertical_display_start) * NTSC_TOTAL_LINES) / PAL_TOTAL_LINES);
cs.vertical_display_end = static_cast<u16>(
((static_cast<u32>(cs.vertical_display_end) * NTSC_TOTAL_LINES) + (PAL_TOTAL_LINES - 1)) / PAL_TOTAL_LINES);
cs.vertical_total = NTSC_TOTAL_LINES;
cs.current_scanline %= NTSC_TOTAL_LINES;
cs.horizontal_total = NTSC_TICKS_PER_LINE;
cs.current_tick_in_scanline %= NTSC_TICKS_PER_LINE;
}
cs.horizontal_display_start =
static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_display_start)));
cs.horizontal_display_end =
static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_display_end)));
cs.horizontal_total = static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_total)));
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
System::SetThrottleFrequency(ComputeVerticalFrequency());
UpdateCRTCDisplayParameters();
2020-06-12 15:28:49 +00:00
UpdateCRTCTickEvent();
}
void GPU::UpdateCRTCDisplayParameters()
{
CRTCState& cs = m_crtc_state;
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
const DisplayCropMode crop_mode = g_settings.display_crop_mode;
const u16 horizontal_total = m_GPUSTAT.pal_mode ? PAL_TICKS_PER_LINE : NTSC_TICKS_PER_LINE;
const u16 vertical_total = m_GPUSTAT.pal_mode ? PAL_TOTAL_LINES : NTSC_TOTAL_LINES;
const u16 horizontal_display_start =
(std::min<u16>(cs.regs.X1, horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
const u16 horizontal_display_end =
(std::min<u16>(cs.regs.X2, horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
const u16 vertical_display_start = std::min<u16>(cs.regs.Y1, vertical_total);
const u16 vertical_display_end = std::min<u16>(cs.regs.Y2, vertical_total);
if (m_GPUSTAT.pal_mode)
{
// TODO: Verify PAL numbers.
switch (crop_mode)
{
case DisplayCropMode::None:
2020-12-12 07:59:09 +00:00
cs.horizontal_visible_start = PAL_HORIZONTAL_ACTIVE_START;
cs.horizontal_visible_end = PAL_HORIZONTAL_ACTIVE_END;
cs.vertical_visible_start = PAL_VERTICAL_ACTIVE_START;
cs.vertical_visible_end = PAL_VERTICAL_ACTIVE_END;
break;
case DisplayCropMode::Overscan:
2020-12-12 07:59:09 +00:00
cs.horizontal_visible_start = static_cast<u16>(std::max<int>(0, 628 + g_settings.display_active_start_offset));
cs.horizontal_visible_end =
static_cast<u16>(std::max<int>(cs.horizontal_visible_start, 3188 + g_settings.display_active_end_offset));
cs.vertical_visible_start = static_cast<u16>(std::max<int>(0, 30 + g_settings.display_line_start_offset));
cs.vertical_visible_end =
static_cast<u16>(std::max<int>(cs.vertical_visible_start, 298 + g_settings.display_line_end_offset));
break;
case DisplayCropMode::Borders:
default:
2020-12-12 07:59:09 +00:00
cs.horizontal_visible_start = horizontal_display_start;
cs.horizontal_visible_end = horizontal_display_end;
cs.vertical_visible_start = vertical_display_start;
cs.vertical_visible_end = vertical_display_end;
break;
}
cs.horizontal_visible_start =
std::clamp<u16>(cs.horizontal_visible_start, PAL_HORIZONTAL_ACTIVE_START, PAL_HORIZONTAL_ACTIVE_END);
cs.horizontal_visible_end =
std::clamp<u16>(cs.horizontal_visible_end, cs.horizontal_visible_start, PAL_HORIZONTAL_ACTIVE_END);
cs.vertical_visible_start =
std::clamp<u16>(cs.vertical_visible_start, PAL_VERTICAL_ACTIVE_START, PAL_VERTICAL_ACTIVE_END);
cs.vertical_visible_end =
std::clamp<u16>(cs.vertical_visible_end, cs.vertical_visible_start, PAL_VERTICAL_ACTIVE_END);
}
else
{
switch (crop_mode)
{
case DisplayCropMode::None:
2020-12-12 07:59:09 +00:00
cs.horizontal_visible_start = NTSC_HORIZONTAL_ACTIVE_START;
cs.horizontal_visible_end = NTSC_HORIZONTAL_ACTIVE_END;
cs.vertical_visible_start = NTSC_VERTICAL_ACTIVE_START;
cs.vertical_visible_end = NTSC_VERTICAL_ACTIVE_END;
break;
case DisplayCropMode::Overscan:
2020-12-12 07:59:09 +00:00
cs.horizontal_visible_start = static_cast<u16>(std::max<int>(0, 608 + g_settings.display_active_start_offset));
cs.horizontal_visible_end =
static_cast<u16>(std::max<int>(cs.horizontal_visible_start, 3168 + g_settings.display_active_end_offset));
cs.vertical_visible_start = static_cast<u16>(std::max<int>(0, 24 + g_settings.display_line_start_offset));
cs.vertical_visible_end =
static_cast<u16>(std::max<int>(cs.vertical_visible_start, 248 + g_settings.display_line_end_offset));
break;
case DisplayCropMode::Borders:
default:
2020-12-12 07:59:09 +00:00
cs.horizontal_visible_start = horizontal_display_start;
cs.horizontal_visible_end = horizontal_display_end;
cs.vertical_visible_start = vertical_display_start;
cs.vertical_visible_end = vertical_display_end;
break;
}
cs.horizontal_visible_start =
std::clamp<u16>(cs.horizontal_visible_start, NTSC_HORIZONTAL_ACTIVE_START, NTSC_HORIZONTAL_ACTIVE_END);
cs.horizontal_visible_end =
std::clamp<u16>(cs.horizontal_visible_end, cs.horizontal_visible_start, NTSC_HORIZONTAL_ACTIVE_END);
cs.vertical_visible_start =
std::clamp<u16>(cs.vertical_visible_start, NTSC_VERTICAL_ACTIVE_START, NTSC_VERTICAL_ACTIVE_END);
cs.vertical_visible_end =
std::clamp<u16>(cs.vertical_visible_end, cs.vertical_visible_start, NTSC_VERTICAL_ACTIVE_END);
}
// If force-progressive is enabled, we only double the height in 480i mode. This way non-interleaved 480i framebuffers
// won't be broken when displayed.
const u8 y_shift = BoolToUInt8(m_GPUSTAT.vertical_interlace && m_GPUSTAT.vertical_resolution);
const u8 height_shift = m_force_progressive_scan ? y_shift : BoolToUInt8(m_GPUSTAT.vertical_interlace);
// Determine screen size.
2020-12-12 07:59:09 +00:00
cs.display_width = (cs.horizontal_visible_end - cs.horizontal_visible_start) / cs.dot_clock_divider;
cs.display_height = (cs.vertical_visible_end - cs.vertical_visible_start) << height_shift;
// Determine number of pixels outputted from VRAM (in general, round to 4-pixel multiple).
// TODO: Verify behavior if values are outside of the active video portion of scanline.
const u16 horizontal_display_ticks =
(horizontal_display_end < horizontal_display_start) ? 0 : (horizontal_display_end - horizontal_display_start);
const u16 horizontal_display_pixels = horizontal_display_ticks / cs.dot_clock_divider;
if (horizontal_display_pixels == 1u)
cs.display_vram_width = 4u;
else
cs.display_vram_width = (horizontal_display_pixels + 2u) & ~3u;
// Determine if we need to adjust the VRAM rectangle (because the display is starting outside the visible area) or add
// padding.
u16 horizontal_skip_pixels;
2020-12-12 07:59:09 +00:00
if (horizontal_display_start >= cs.horizontal_visible_start)
{
2020-12-12 07:59:09 +00:00
cs.display_origin_left = (horizontal_display_start - cs.horizontal_visible_start) / cs.dot_clock_divider;
cs.display_vram_left = cs.regs.X;
horizontal_skip_pixels = 0;
}
else
{
2020-12-12 07:59:09 +00:00
horizontal_skip_pixels = (cs.horizontal_visible_start - horizontal_display_start) / cs.dot_clock_divider;
cs.display_origin_left = 0;
cs.display_vram_left = (cs.regs.X + horizontal_skip_pixels) % VRAM_WIDTH;
}
// apply the crop from the start (usually overscan)
cs.display_vram_width -= std::min(cs.display_vram_width, horizontal_skip_pixels);
// Apply crop from the end by shrinking VRAM rectangle width if display would end outside the visible area.
cs.display_vram_width = std::min<u16>(cs.display_vram_width, cs.display_width - cs.display_origin_left);
2020-12-12 07:59:09 +00:00
if (vertical_display_start >= cs.vertical_visible_start)
{
2020-12-12 07:59:09 +00:00
cs.display_origin_top = (vertical_display_start - cs.vertical_visible_start) << y_shift;
cs.display_vram_top = cs.regs.Y;
}
else
{
cs.display_origin_top = 0;
cs.display_vram_top = (cs.regs.Y + ((cs.vertical_visible_start - vertical_display_start) << y_shift)) % VRAM_HEIGHT;
}
2020-12-12 07:59:09 +00:00
if (vertical_display_end <= cs.vertical_visible_end)
{
2020-12-12 07:59:09 +00:00
cs.display_vram_height =
(vertical_display_end -
std::min(vertical_display_end, std::max(vertical_display_start, cs.vertical_visible_start)))
<< height_shift;
}
else
{
cs.display_vram_height =
2020-12-12 07:59:09 +00:00
(cs.vertical_visible_end -
std::min(cs.vertical_visible_end, std::max(vertical_display_start, cs.vertical_visible_start)))
<< height_shift;
}
}
2020-06-12 15:28:49 +00:00
TickCount GPU::GetPendingCRTCTicks() const
{
2020-06-12 15:28:49 +00:00
const TickCount pending_sysclk_ticks = m_crtc_tick_event->GetTicksSinceLastExecution();
TickCount fractional_ticks = m_crtc_state.fractional_ticks;
2020-06-12 15:28:49 +00:00
return SystemTicksToCRTCTicks(pending_sysclk_ticks, &fractional_ticks);
2019-09-17 04:25:25 +00:00
}
2020-06-12 15:28:49 +00:00
TickCount GPU::GetPendingCommandTicks() const
{
if (!m_command_tick_event->IsActive())
return 0;
return SystemTicksToGPUTicks(m_command_tick_event->GetTicksSinceLastExecution());
}
void GPU::UpdateCRTCTickEvent()
{
// figure out how many GPU ticks until the next vblank or event
TickCount lines_until_event;
if (g_timers.IsSyncEnabled(HBLANK_TIMER_INDEX))
{
// when the timer sync is enabled we need to sync at vblank start and end
lines_until_event =
(m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end) ?
(m_crtc_state.vertical_total - m_crtc_state.current_scanline + m_crtc_state.vertical_display_start) :
(m_crtc_state.vertical_display_end - m_crtc_state.current_scanline);
}
else
{
lines_until_event =
(m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end ?
(m_crtc_state.vertical_total - m_crtc_state.current_scanline + m_crtc_state.vertical_display_end) :
(m_crtc_state.vertical_display_end - m_crtc_state.current_scanline));
}
if (g_timers.IsExternalIRQEnabled(HBLANK_TIMER_INDEX))
lines_until_event = std::min(lines_until_event, g_timers.GetTicksUntilIRQ(HBLANK_TIMER_INDEX));
TickCount ticks_until_event =
lines_until_event * m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline;
if (g_timers.IsExternalIRQEnabled(DOT_TIMER_INDEX))
{
const TickCount dots_until_irq = g_timers.GetTicksUntilIRQ(DOT_TIMER_INDEX);
const TickCount ticks_until_irq =
(dots_until_irq * m_crtc_state.dot_clock_divider) - m_crtc_state.fractional_dot_ticks;
ticks_until_event = std::min(ticks_until_event, std::max<TickCount>(ticks_until_irq, 0));
}
#if 0
const TickCount ticks_until_hblank =
(m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_display_end) ?
(m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline + m_crtc_state.horizontal_display_end) :
(m_crtc_state.horizontal_display_end - m_crtc_state.current_tick_in_scanline);
#endif
2020-06-12 15:28:49 +00:00
m_crtc_tick_event->Schedule(CRTCTicksToSystemTicks(ticks_until_event, m_crtc_state.fractional_ticks));
2019-09-17 04:25:25 +00:00
}
2020-06-12 15:28:49 +00:00
bool GPU::IsCRTCScanlinePending() const
{
const TickCount ticks = (GetPendingCRTCTicks() + m_crtc_state.current_tick_in_scanline);
return (ticks >= (m_crtc_state.in_hblank ? m_crtc_state.horizontal_total : m_crtc_state.horizontal_sync_start));
}
2020-06-12 15:28:49 +00:00
bool GPU::IsCommandCompletionPending() const
{
2020-06-12 15:28:49 +00:00
return (m_pending_command_ticks > 0 && GetPendingCommandTicks() >= m_pending_command_ticks);
}
2020-06-12 15:28:49 +00:00
void GPU::CRTCTickEvent(TickCount ticks)
2019-09-17 04:25:25 +00:00
{
// convert cpu/master clock to GPU ticks, accounting for partial cycles because of the non-integer divider
{
2020-06-12 15:28:49 +00:00
const TickCount gpu_ticks = SystemTicksToCRTCTicks(ticks, &m_crtc_state.fractional_ticks);
m_crtc_state.current_tick_in_scanline += gpu_ticks;
if (g_timers.IsUsingExternalClock(DOT_TIMER_INDEX))
{
m_crtc_state.fractional_dot_ticks += gpu_ticks;
const TickCount dots = m_crtc_state.fractional_dot_ticks / m_crtc_state.dot_clock_divider;
m_crtc_state.fractional_dot_ticks = m_crtc_state.fractional_dot_ticks % m_crtc_state.dot_clock_divider;
if (dots > 0)
g_timers.AddTicks(DOT_TIMER_INDEX, dots);
}
2019-09-17 04:25:25 +00:00
}
if (m_crtc_state.current_tick_in_scanline < m_crtc_state.horizontal_total)
2019-09-17 04:25:25 +00:00
{
// short path when we execute <1 line.. this shouldn't occur often.
const bool old_hblank = m_crtc_state.in_hblank;
const bool new_hblank = (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_sync_start);
m_crtc_state.in_hblank = new_hblank;
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
if (!old_hblank && new_hblank && g_timers.IsUsingExternalClock(HBLANK_TIMER_INDEX))
g_timers.AddTicks(HBLANK_TIMER_INDEX, 1);
2019-09-17 04:25:25 +00:00
2020-06-12 15:28:49 +00:00
UpdateCRTCTickEvent();
return;
}
2019-09-26 14:03:28 +00:00
u32 lines_to_draw = m_crtc_state.current_tick_in_scanline / m_crtc_state.horizontal_total;
m_crtc_state.current_tick_in_scanline %= m_crtc_state.horizontal_total;
#if 0
Log_WarningPrintf("Old line: %u, new line: %u, drawing %u", m_crtc_state.current_scanline,
m_crtc_state.current_scanline + lines_to_draw, lines_to_draw);
#endif
const bool old_hblank = m_crtc_state.in_hblank;
const bool new_hblank = (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_sync_start);
m_crtc_state.in_hblank = new_hblank;
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
if (g_timers.IsUsingExternalClock(HBLANK_TIMER_INDEX))
{
const u32 hblank_timer_ticks = BoolToUInt32(!old_hblank) + BoolToUInt32(new_hblank) + (lines_to_draw - 1);
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
g_timers.AddTicks(HBLANK_TIMER_INDEX, static_cast<TickCount>(hblank_timer_ticks));
}
while (lines_to_draw > 0)
{
const u32 lines_to_draw_this_loop =
std::min(lines_to_draw, m_crtc_state.vertical_total - m_crtc_state.current_scanline);
const u32 prev_scanline = m_crtc_state.current_scanline;
m_crtc_state.current_scanline += lines_to_draw_this_loop;
DebugAssert(m_crtc_state.current_scanline <= m_crtc_state.vertical_total);
lines_to_draw -= lines_to_draw_this_loop;
// clear the vblank flag if the beam would pass through the display area
if (prev_scanline < m_crtc_state.vertical_display_start &&
m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end)
{
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
g_timers.SetGate(HBLANK_TIMER_INDEX, false);
m_crtc_state.in_vblank = false;
2019-09-26 14:03:28 +00:00
}
const bool new_vblank = m_crtc_state.current_scanline < m_crtc_state.vertical_display_start ||
m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end;
if (m_crtc_state.in_vblank != new_vblank)
2019-09-17 04:25:25 +00:00
{
if (new_vblank)
2019-09-20 13:40:19 +00:00
{
Log_DebugPrintf("Now in v-blank");
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
g_interrupt_controller.InterruptRequest(InterruptController::IRQ::VBLANK);
// flush any pending draws and "scan out" the image
FlushRender();
UpdateDisplay();
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
System::FrameDone();
// switch fields early. this is needed so we draw to the correct one.
if (m_GPUSTAT.InInterleaved480iMode())
m_crtc_state.interlaced_display_field = m_crtc_state.interlaced_field ^ 1u;
else
m_crtc_state.interlaced_display_field = 0;
2019-09-20 13:40:19 +00:00
}
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
g_timers.SetGate(HBLANK_TIMER_INDEX, new_vblank);
m_crtc_state.in_vblank = new_vblank;
2019-09-17 04:25:25 +00:00
}
// past the end of vblank?
if (m_crtc_state.current_scanline == m_crtc_state.vertical_total)
{
// start the new frame
m_crtc_state.current_scanline = 0;
if (m_GPUSTAT.vertical_interlace)
{
m_crtc_state.interlaced_field ^= 1u;
m_GPUSTAT.interlaced_field = !m_crtc_state.interlaced_field;
}
else
{
m_crtc_state.interlaced_field = 0;
m_GPUSTAT.interlaced_field = 0u; // new GPU = 1, old GPU = 0
}
}
2019-09-17 04:25:25 +00:00
}
// alternating even line bit in 240-line mode
if (m_GPUSTAT.InInterleaved480iMode())
{
m_crtc_state.active_line_lsb =
Truncate8((m_crtc_state.regs.Y + BoolToUInt32(m_crtc_state.interlaced_display_field)) & u32(1));
m_GPUSTAT.display_line_lsb = ConvertToBoolUnchecked(
(m_crtc_state.regs.Y + (BoolToUInt8(!m_crtc_state.in_vblank) & m_crtc_state.interlaced_display_field)) & u32(1));
}
else
{
m_crtc_state.active_line_lsb = 0;
m_GPUSTAT.display_line_lsb = ConvertToBoolUnchecked((m_crtc_state.regs.Y + m_crtc_state.current_scanline) & u32(1));
}
2020-06-12 15:28:49 +00:00
UpdateCRTCTickEvent();
}
void GPU::CommandTickEvent(TickCount ticks)
{
m_pending_command_ticks -= SystemTicksToGPUTicks(ticks);
m_command_tick_event->Deactivate();
2020-06-12 15:28:49 +00:00
// we can be syncing if this came from a DMA write. recursively executing commands would be bad.
if (!m_syncing)
ExecuteCommands();
UpdateGPUIdle();
if (m_pending_command_ticks <= 0)
m_pending_command_ticks = 0;
else
m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
}
void GPU::UpdateCommandTickEvent()
{
if (m_pending_command_ticks <= 0)
m_command_tick_event->Deactivate();
else if (!m_command_tick_event->IsActive())
m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
}
bool GPU::ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y, float x_scale, u32* out_tick,
u32* out_line) const
{
auto [display_x, display_y] = g_host_display->ConvertWindowCoordinatesToDisplayCoordinates(
window_x, window_y, g_host_display->GetWindowWidth(), g_host_display->GetWindowHeight(),
g_host_display->GetDisplayTopMargin());
if (x_scale != 1.0f)
{
const float dw = static_cast<float>(m_crtc_state.display_width);
float scaled_x = ((display_x / dw) * 2.0f) - 1.0f; // 0..1 -> -1..1
scaled_x *= x_scale;
display_x = (((scaled_x + 1.0f) * 0.5f) * dw); // -1..1 -> 0..1
}
Log_DebugPrintf("win %d,%d -> disp %.2f,%.2f (size %u,%u frac %f,%f)", window_x, window_y, display_x, display_y,
m_crtc_state.display_width, m_crtc_state.display_height,
display_x / static_cast<float>(m_crtc_state.display_width),
display_y / static_cast<float>(m_crtc_state.display_height));
if (display_x < 0 || static_cast<u32>(display_x) >= m_crtc_state.display_width || display_y < 0 ||
static_cast<u32>(display_y) >= m_crtc_state.display_height)
{
return false;
}
*out_line = (static_cast<u32>(std::round(display_y)) >> BoolToUInt8(m_GPUSTAT.vertical_interlace)) +
m_crtc_state.vertical_visible_start;
*out_tick = static_cast<u32>(std::round(display_x * static_cast<float>(m_crtc_state.dot_clock_divider))) +
m_crtc_state.horizontal_visible_start;
return true;
}
2019-09-11 04:59:41 +00:00
u32 GPU::ReadGPUREAD()
{
if (m_blitter_state != BlitterState::ReadingVRAM)
return m_GPUREAD_latch;
// Read two pixels out of VRAM and combine them. Zero fill odd pixel counts.
u32 value = 0;
for (u32 i = 0; i < 2; i++)
{
// Read with correct wrap-around behavior.
const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH;
const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT;
value |= ZeroExtend32(m_vram_ptr[read_y * VRAM_WIDTH + read_x]) << (i * 16);
if (++m_vram_transfer.col == m_vram_transfer.width)
{
m_vram_transfer.col = 0;
if (++m_vram_transfer.row == m_vram_transfer.height)
{
Log_DebugPrintf("End of VRAM->CPU transfer");
m_vram_transfer = {};
m_blitter_state = BlitterState::Idle;
// end of transfer, catch up on any commands which were written (unlikely)
ExecuteCommands();
2020-06-12 15:28:49 +00:00
UpdateCommandTickEvent();
break;
}
}
}
m_GPUREAD_latch = value;
return value;
}
void GPU::WriteGP1(u32 value)
{
2020-07-03 15:28:18 +00:00
const u32 command = (value >> 24) & 0x3Fu;
2019-09-11 04:59:41 +00:00
const u32 param = value & UINT32_C(0x00FFFFFF);
switch (command)
{
2019-10-15 12:36:26 +00:00
case 0x00: // Reset GPU
{
Log_DebugPrintf("GP1 reset GPU");
m_command_tick_event->InvokeEarly();
2020-06-12 15:28:49 +00:00
SynchronizeCRTC();
2019-10-15 12:36:26 +00:00
SoftReset();
}
break;
case 0x01: // Clear FIFO
{
Log_DebugPrintf("GP1 clear FIFO");
m_command_tick_event->InvokeEarly();
2020-06-12 15:28:49 +00:00
SynchronizeCRTC();
// flush partial writes
if (m_blitter_state == BlitterState::WritingVRAM)
FinishVRAMWrite();
m_blitter_state = BlitterState::Idle;
m_command_total_words = 0;
m_vram_transfer = {};
m_fifo.Clear();
m_blit_buffer.clear();
m_blit_remaining_words = 0;
2020-06-12 15:28:49 +00:00
m_pending_command_ticks = 0;
m_command_tick_event->Deactivate();
UpdateDMARequest();
2020-06-12 15:28:49 +00:00
UpdateGPUIdle();
}
break;
2019-10-05 12:22:49 +00:00
case 0x02: // Acknowledge Interrupt
{
Log_DebugPrintf("Acknowledge interrupt");
m_GPUSTAT.interrupt_request = false;
}
break;
case 0x03: // Display on/off
{
const bool disable = ConvertToBoolUnchecked(value & 0x01);
Log_DebugPrintf("Display %s", disable ? "disabled" : "enabled");
2020-06-12 15:28:49 +00:00
SynchronizeCRTC();
if (!m_GPUSTAT.display_disable && disable && m_GPUSTAT.vertical_interlace && !m_force_progressive_scan)
ClearDisplay();
m_GPUSTAT.display_disable = disable;
}
break;
2019-09-11 04:59:41 +00:00
case 0x04: // DMA Direction
{
Log_DebugPrintf("DMA direction <- 0x%02X", static_cast<u32>(param));
if (m_GPUSTAT.dma_direction != static_cast<DMADirection>(param))
{
m_GPUSTAT.dma_direction = static_cast<DMADirection>(param);
UpdateDMARequest();
}
}
break;
case 0x05: // Set display start address
{
const u32 new_value = param & CRTCState::Regs::DISPLAY_ADDRESS_START_MASK;
Log_DebugPrintf("Display address start <- 0x%08X", new_value);
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
System::IncrementInternalFrameNumber();
if (m_crtc_state.regs.display_address_start != new_value)
{
SynchronizeCRTC();
m_crtc_state.regs.display_address_start = new_value;
2020-10-23 09:37:56 +00:00
UpdateCRTCDisplayParameters();
}
2019-09-17 04:25:25 +00:00
}
break;
case 0x06: // Set horizontal display range
{
const u32 new_value = param & CRTCState::Regs::HORIZONTAL_DISPLAY_RANGE_MASK;
Log_DebugPrintf("Horizontal display range <- 0x%08X", new_value);
if (m_crtc_state.regs.horizontal_display_range != new_value)
{
2020-06-12 15:28:49 +00:00
SynchronizeCRTC();
m_crtc_state.regs.horizontal_display_range = new_value;
UpdateCRTCConfig();
}
2019-09-17 04:25:25 +00:00
}
break;
case 0x07: // Set vertical display range
2019-09-17 04:25:25 +00:00
{
const u32 new_value = param & CRTCState::Regs::VERTICAL_DISPLAY_RANGE_MASK;
Log_DebugPrintf("Vertical display range <- 0x%08X", new_value);
if (m_crtc_state.regs.vertical_display_range != new_value)
{
2020-06-12 15:28:49 +00:00
SynchronizeCRTC();
m_crtc_state.regs.vertical_display_range = new_value;
UpdateCRTCConfig();
}
2019-09-17 04:25:25 +00:00
}
break;
case 0x08: // Set display mode
{
union GP1_08h
{
u32 bits;
BitField<u32, u8, 0, 2> horizontal_resolution_1;
2019-10-15 12:36:10 +00:00
BitField<u32, bool, 2, 1> vertical_resolution;
2019-09-17 04:25:25 +00:00
BitField<u32, bool, 3, 1> pal_mode;
BitField<u32, bool, 4, 1> display_area_color_depth;
BitField<u32, bool, 5, 1> vertical_interlace;
BitField<u32, bool, 6, 1> horizontal_resolution_2;
BitField<u32, bool, 7, 1> reverse_flag;
};
const GP1_08h dm{param};
GPUSTAT new_GPUSTAT{m_GPUSTAT.bits};
new_GPUSTAT.horizontal_resolution_1 = dm.horizontal_resolution_1;
new_GPUSTAT.vertical_resolution = dm.vertical_resolution;
new_GPUSTAT.pal_mode = dm.pal_mode;
new_GPUSTAT.display_area_color_depth_24 = dm.display_area_color_depth;
new_GPUSTAT.vertical_interlace = dm.vertical_interlace;
new_GPUSTAT.horizontal_resolution_2 = dm.horizontal_resolution_2;
new_GPUSTAT.reverse_flag = dm.reverse_flag;
2019-09-17 04:25:25 +00:00
Log_DebugPrintf("Set display mode <- 0x%08X", dm.bits);
if (!m_GPUSTAT.vertical_interlace && dm.vertical_interlace && !m_force_progressive_scan)
{
// bit of a hack, technically we should pull the previous frame in, but this may not exist anymore
ClearDisplay();
}
if (m_GPUSTAT.bits != new_GPUSTAT.bits)
{
// Have to be careful when setting this because Synchronize() can modify GPUSTAT.
static constexpr u32 SET_MASK = UINT32_C(0b00000000011111110100000000000000);
m_command_tick_event->InvokeEarly();
2020-06-12 15:28:49 +00:00
SynchronizeCRTC();
m_GPUSTAT.bits = (m_GPUSTAT.bits & ~SET_MASK) | (new_GPUSTAT.bits & SET_MASK);
UpdateCRTCConfig();
}
2019-09-11 04:59:41 +00:00
}
break;
2019-12-11 06:47:49 +00:00
case 0x09: // Allow texture disable
{
m_set_texture_disable_mask = ConvertToBoolUnchecked(param & 0x01);
Log_DebugPrintf("Set texture disable mask <- %s", m_set_texture_disable_mask ? "allowed" : "ignored");
}
break;
2019-10-05 12:22:49 +00:00
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1A:
case 0x1B:
case 0x1C:
case 0x1D:
case 0x1E:
case 0x1F:
{
HandleGetGPUInfoCommand(value);
2019-10-05 12:22:49 +00:00
}
break;
2019-09-11 04:59:41 +00:00
default:
Log_ErrorPrintf("Unimplemented GP1 command 0x%02X", command);
break;
2019-10-05 12:22:49 +00:00
}
}
void GPU::HandleGetGPUInfoCommand(u32 value)
2019-10-05 12:22:49 +00:00
{
const u8 subcommand = Truncate8(value & 0x07);
2019-10-05 12:22:49 +00:00
switch (subcommand)
{
case 0x00:
case 0x01:
case 0x06:
case 0x07:
// leave GPUREAD intact
break;
case 0x02: // Get Texture Window
{
Log_DebugPrintf("Get texture window");
2019-12-11 06:35:14 +00:00
m_GPUREAD_latch = m_draw_mode.texture_window_value;
}
break;
case 0x03: // Get Draw Area Top Left
{
Log_DebugPrintf("Get drawing area top left");
m_GPUREAD_latch =
((m_drawing_area.left & UINT32_C(0b1111111111)) | ((m_drawing_area.top & UINT32_C(0b1111111111)) << 10));
}
break;
case 0x04: // Get Draw Area Bottom Right
{
Log_DebugPrintf("Get drawing area bottom right");
m_GPUREAD_latch =
((m_drawing_area.right & UINT32_C(0b1111111111)) | ((m_drawing_area.bottom & UINT32_C(0b1111111111)) << 10));
}
break;
case 0x05: // Get Drawing Offset
{
Log_DebugPrintf("Get drawing offset");
m_GPUREAD_latch =
((m_drawing_offset.x & INT32_C(0b11111111111)) | ((m_drawing_offset.y & INT32_C(0b11111111111)) << 11));
}
break;
2019-10-05 12:22:49 +00:00
default:
Log_WarningPrintf("Unhandled GetGPUInfo(0x%02X)", ZeroExtend32(subcommand));
break;
2019-09-11 04:59:41 +00:00
}
}
void GPU::ClearDisplay() {}
2019-09-26 11:44:02 +00:00
void GPU::UpdateDisplay() {}
void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) {}
2019-09-14 10:45:26 +00:00
void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{
const u16 color16 = VRAMRGBA8888ToRGBA5551(color);
2020-04-03 14:11:39 +00:00
if ((x + width) <= VRAM_WIDTH && !IsInterlacedRenderingEnabled())
{
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
}
}
2020-04-03 14:11:39 +00:00
else if (IsInterlacedRenderingEnabled())
{
// Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field.
2020-06-12 15:28:49 +00:00
if (IsCRTCScanlinePending())
SynchronizeCRTC();
const u32 active_field = GetActiveLineLSB();
2020-04-03 14:11:39 +00:00
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
if ((row & u32(1)) == active_field)
continue;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
row_ptr[col] = color16;
}
}
}
else
{
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
row_ptr[col] = color16;
}
}
}
}
2019-09-14 06:43:39 +00:00
void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask)
{
// Fast path when the copy is not oversized.
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask)
{
const u16* src_ptr = static_cast<const u16*>(data);
u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
std::copy_n(src_ptr, width, dst_ptr);
src_ptr += width;
dst_ptr += VRAM_WIDTH;
}
}
else
{
// Slow path when we need to handle wrap-around.
// During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or }
const u16* src_ptr = static_cast<const u16*>(data);
const u16 mask_and = check_mask ? 0x8000 : 0;
const u16 mask_or = set_mask ? 0x8000 : 0;
for (u32 row = 0; row < height;)
{
u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width;)
{
// TODO: Handle unaligned reads...
u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
if (((*pixel_ptr) & mask_and) == 0)
*pixel_ptr = *(src_ptr++) | mask_or;
}
}
}
}
2019-09-12 15:10:08 +00:00
void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
{
// Break up oversized copies. This behavior has not been verified on console.
if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH)
{
u32 remaining_rows = height;
u32 current_src_y = src_y;
u32 current_dst_y = dst_y;
while (remaining_rows > 0)
{
const u32 rows_to_copy =
std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y));
u32 remaining_columns = width;
u32 current_src_x = src_x;
u32 current_dst_x = dst_x;
while (remaining_columns > 0)
{
const u32 columns_to_copy =
std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x));
CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy);
current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH;
current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH;
remaining_columns -= columns_to_copy;
}
current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT;
current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT;
remaining_rows -= rows_to_copy;
}
return;
}
// This doesn't have a fast path, but do we really need one? It's not common.
const u16 mask_and = m_GPUSTAT.GetMaskAND();
const u16 mask_or = m_GPUSTAT.GetMaskOR();
// Copy in reverse when src_x < dst_x, this is verified on console.
if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH))
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
{
const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH];
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH];
if ((*dst_pixel_ptr & mask_and) == 0)
*dst_pixel_ptr = src_pixel | mask_or;
}
}
}
else
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width; col++)
{
const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH];
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH];
if ((*dst_pixel_ptr & mask_and) == 0)
*dst_pixel_ptr = src_pixel | mask_or;
}
}
}
}
2019-09-17 14:58:30 +00:00
void GPU::DispatchRenderCommand() {}
2019-09-12 02:53:04 +00:00
void GPU::FlushRender() {}
2019-12-11 06:35:14 +00:00
void GPU::SetDrawMode(u16 value)
{
GPUDrawModeReg new_mode_reg{static_cast<u16>(value & GPUDrawModeReg::MASK)};
2019-12-11 06:47:49 +00:00
if (!m_set_texture_disable_mask)
new_mode_reg.texture_disable = false;
if (new_mode_reg.bits == m_draw_mode.mode_reg.bits)
return;
if ((new_mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) !=
(m_draw_mode.mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK))
2019-12-11 06:35:14 +00:00
{
m_draw_mode.texture_page_x = new_mode_reg.GetTexturePageBaseX();
m_draw_mode.texture_page_y = new_mode_reg.GetTexturePageBaseY();
2019-12-11 06:35:14 +00:00
m_draw_mode.texture_page_changed = true;
}
2019-12-11 06:47:49 +00:00
m_draw_mode.mode_reg.bits = new_mode_reg.bits;
if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field)
FlushRender();
2019-12-11 06:35:14 +00:00
// Bits 0..10 are returned in the GPU status register.
m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) |
(ZeroExtend32(new_mode_reg.bits) & GPUDrawModeReg::GPUSTAT_MASK);
2019-12-11 06:35:14 +00:00
m_GPUSTAT.texture_disable = m_draw_mode.mode_reg.texture_disable;
}
2019-12-11 06:35:14 +00:00
void GPU::SetTexturePalette(u16 value)
{
2019-12-11 06:35:14 +00:00
value &= DrawMode::PALETTE_MASK;
if (m_draw_mode.palette_reg == value)
return;
2019-12-11 06:35:14 +00:00
m_draw_mode.texture_palette_x = ZeroExtend32(value & 0x3F) * 16;
m_draw_mode.texture_palette_y = ZeroExtend32(value >> 6);
m_draw_mode.palette_reg = value;
m_draw_mode.texture_page_changed = true;
}
void GPU::SetTextureWindow(u32 value)
2019-10-05 13:25:06 +00:00
{
value &= DrawMode::TEXTURE_WINDOW_MASK;
if (m_draw_mode.texture_window_value == value)
2019-10-05 13:25:06 +00:00
return;
FlushRender();
const u8 mask_x = Truncate8(value & UINT32_C(0x1F));
const u8 mask_y = Truncate8((value >> 5) & UINT32_C(0x1F));
const u8 offset_x = Truncate8((value >> 10) & UINT32_C(0x1F));
const u8 offset_y = Truncate8((value >> 15) & UINT32_C(0x1F));
Log_DebugPrintf("Set texture window %02X %02X %02X %02X", mask_x, mask_y, offset_x, offset_y);
m_draw_mode.texture_window.and_x = ~(mask_x * 8);
m_draw_mode.texture_window.and_y = ~(mask_y * 8);
m_draw_mode.texture_window.or_x = (offset_x & mask_x) * 8u;
m_draw_mode.texture_window.or_y = (offset_y & mask_y) * 8u;
m_draw_mode.texture_window_value = value;
m_draw_mode.texture_window_changed = true;
2019-10-05 13:25:06 +00:00
}
2021-01-13 09:24:41 +00:00
bool GPU::DumpVRAMToFile(const char* filename)
{
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
const char* extension = std::strrchr(filename, '.');
if (extension && StringUtil::Strcasecmp(extension, ".png") == 0)
{
return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, m_vram_ptr, true);
}
else if (extension && StringUtil::Strcasecmp(extension, ".bin") == 0)
{
return FileSystem::WriteBinaryFile(filename, m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
}
else
{
Log_ErrorPrintf("Unknown extension: '%s'", filename);
return false;
}
}
bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha)
{
2020-09-14 19:27:22 +00:00
auto fp = FileSystem::OpenManagedCFile(filename, "wb");
if (!fp)
{
Log_ErrorPrintf("Can't open file '%s'", filename);
return false;
}
auto rgba8_buf = std::make_unique<u32[]>(width * height);
const char* ptr_in = static_cast<const char*>(buffer);
2020-09-14 19:27:22 +00:00
u32* ptr_out = rgba8_buf.get();
for (u32 row = 0; row < height; row++)
{
const char* row_ptr_in = ptr_in;
for (u32 col = 0; col < width; col++)
{
u16 src_col;
std::memcpy(&src_col, row_ptr_in, sizeof(u16));
row_ptr_in += sizeof(u16);
*(ptr_out++) = VRAMRGBA5551ToRGBA8888(remove_alpha ? (src_col | u16(0x8000)) : src_col);
}
ptr_in += stride;
}
2020-09-14 19:27:22 +00:00
const auto write_func = [](void* context, void* data, int size) {
std::fwrite(data, 1, size, static_cast<std::FILE*>(context));
};
return (stbi_write_png_to_func(write_func, fp.get(), width, height, 4, rgba8_buf.get(), sizeof(u32) * width) != 0);
}
2019-10-12 12:15:38 +00:00
void GPU::DrawDebugStateWindow()
{
const float framebuffer_scale = Host::GetOSDScale();
ImGui::SetNextWindowSize(ImVec2(450.0f * framebuffer_scale, 550.0f * framebuffer_scale), ImGuiCond_FirstUseEver);
if (!ImGui::Begin("GPU", nullptr))
2019-10-12 12:15:38 +00:00
{
ImGui::End();
return;
}
const bool is_idle_frame = m_stats.num_polygons == 0;
if (!is_idle_frame)
{
m_last_stats = m_stats;
m_stats = {};
}
if (ImGui::CollapsingHeader("Statistics", ImGuiTreeNodeFlags_DefaultOpen))
{
const Stats& stats = m_last_stats;
ImGui::Columns(2);
ImGui::SetColumnWidth(0, 200.0f * framebuffer_scale);
ImGui::TextUnformatted("Idle Frame: ");
ImGui::NextColumn();
ImGui::Text("%s", is_idle_frame ? "Yes" : "No");
ImGui::NextColumn();
ImGui::TextUnformatted("VRAM Reads: ");
ImGui::NextColumn();
ImGui::Text("%u", stats.num_vram_reads);
ImGui::NextColumn();
ImGui::TextUnformatted("VRAM Fills: ");
ImGui::NextColumn();
ImGui::Text("%u", stats.num_vram_fills);
ImGui::NextColumn();
ImGui::TextUnformatted("VRAM Writes: ");
ImGui::NextColumn();
ImGui::Text("%u", stats.num_vram_writes);
ImGui::NextColumn();
ImGui::TextUnformatted("VRAM Copies: ");
ImGui::NextColumn();
ImGui::Text("%u", stats.num_vram_copies);
ImGui::NextColumn();
ImGui::TextUnformatted("Vertices Processed: ");
ImGui::NextColumn();
ImGui::Text("%u", stats.num_vertices);
ImGui::NextColumn();
ImGui::TextUnformatted("Polygons Drawn: ");
ImGui::NextColumn();
ImGui::Text("%u", stats.num_polygons);
ImGui::NextColumn();
ImGui::Columns(1);
}
DrawRendererStats(is_idle_frame);
2020-05-17 04:11:42 +00:00
if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen))
{
static constexpr std::array<const char*, 5> state_strings = {
{"Idle", "Reading VRAM", "Writing VRAM", "Drawing Polyline"}};
ImGui::Text("State: %s", state_strings[static_cast<u8>(m_blitter_state)]);
ImGui::Text("Dither: %s", m_GPUSTAT.dither_enable ? "Enabled" : "Disabled");
ImGui::Text("Draw To Displayed Field: %s", m_GPUSTAT.draw_to_displayed_field ? "Enabled" : "Disabled");
ImGui::Text("Draw Set Mask Bit: %s", m_GPUSTAT.set_mask_while_drawing ? "Yes" : "No");
ImGui::Text("Draw To Masked Pixels: %s", m_GPUSTAT.check_mask_before_draw ? "Yes" : "No");
ImGui::Text("Reverse Flag: %s", m_GPUSTAT.reverse_flag ? "Yes" : "No");
ImGui::Text("Texture Disable: %s", m_GPUSTAT.texture_disable ? "Yes" : "No");
ImGui::Text("PAL Mode: %s", m_GPUSTAT.pal_mode ? "Yes" : "No");
ImGui::Text("Interrupt Request: %s", m_GPUSTAT.interrupt_request ? "Yes" : "No");
ImGui::Text("DMA Request: %s", m_GPUSTAT.dma_data_request ? "Yes" : "No");
}
2019-10-12 12:15:38 +00:00
if (ImGui::CollapsingHeader("CRTC", ImGuiTreeNodeFlags_DefaultOpen))
{
const auto& cs = m_crtc_state;
ImGui::Text("Clock: %s", (m_console_is_pal ? (m_GPUSTAT.pal_mode ? "PAL-on-PAL" : "NTSC-on-PAL") :
(m_GPUSTAT.pal_mode ? "PAL-on-NTSC" : "NTSC-on-NTSC")));
ImGui::Text("Horizontal Frequency: %.3f KHz", ComputeHorizontalFrequency() / 1000.0f);
ImGui::Text("Vertical Frequency: %.3f Hz", ComputeVerticalFrequency());
2019-10-12 12:15:38 +00:00
ImGui::Text("Dot Clock Divider: %u", cs.dot_clock_divider);
ImGui::Text("Vertical Interlace: %s (%s field)", m_GPUSTAT.vertical_interlace ? "Yes" : "No",
cs.interlaced_field ? "odd" : "even");
ImGui::Text("Current Scanline: %u (tick %u)", cs.current_scanline, cs.current_tick_in_scanline);
ImGui::Text("Display Disable: %s", m_GPUSTAT.display_disable ? "Yes" : "No");
ImGui::Text("Displaying Odd Lines: %s", cs.active_line_lsb ? "Yes" : "No");
2019-10-12 12:15:38 +00:00
ImGui::Text("Color Depth: %u-bit", m_GPUSTAT.display_area_color_depth_24 ? 24 : 15);
ImGui::Text("Start Offset in VRAM: (%u, %u)", cs.regs.X.GetValue(), cs.regs.Y.GetValue());
ImGui::Text("Display Total: %u (%u) horizontal, %u vertical", cs.horizontal_total,
cs.horizontal_total / cs.dot_clock_divider, cs.vertical_total);
ImGui::Text("Configured Display Range: %u-%u (%u-%u), %u-%u", cs.regs.X1.GetValue(), cs.regs.X2.GetValue(),
cs.regs.X1.GetValue() / cs.dot_clock_divider, cs.regs.X2.GetValue() / cs.dot_clock_divider,
cs.regs.Y1.GetValue(), cs.regs.Y2.GetValue());
ImGui::Text("Output Display Range: %u-%u (%u-%u), %u-%u", cs.horizontal_display_start, cs.horizontal_display_end,
cs.horizontal_display_start / cs.dot_clock_divider, cs.horizontal_display_end / cs.dot_clock_divider,
cs.vertical_display_start, cs.vertical_display_end);
ImGui::Text("Cropping: %s", Settings::GetDisplayCropModeName(g_settings.display_crop_mode));
2020-12-12 07:59:09 +00:00
ImGui::Text("Visible Display Range: %u-%u (%u-%u), %u-%u", cs.horizontal_visible_start, cs.horizontal_visible_end,
cs.horizontal_visible_start / cs.dot_clock_divider, cs.horizontal_visible_end / cs.dot_clock_divider,
cs.vertical_visible_start, cs.vertical_visible_end);
ImGui::Text("Display Resolution: %ux%u", cs.display_width, cs.display_height);
ImGui::Text("Display Origin: %u, %u", cs.display_origin_left, cs.display_origin_top);
ImGui::Text("Displayed/Visible VRAM Portion: %ux%u @ (%u, %u)", cs.display_vram_width, cs.display_vram_height,
cs.display_vram_left, cs.display_vram_top);
ImGui::Text("Padding: Left=%d, Top=%d, Right=%d, Bottom=%d", cs.display_origin_left, cs.display_origin_top,
cs.display_width - cs.display_vram_width - cs.display_origin_left,
cs.display_height - cs.display_vram_height - cs.display_origin_top);
2019-10-12 12:15:38 +00:00
}
ImGui::End();
2019-10-22 13:07:51 +00:00
}
void GPU::DrawRendererStats(bool is_idle_frame) {}