Duckstation/src/core/mdec.h

151 lines
3.7 KiB
C
Raw Normal View History

2019-09-29 02:51:34 +00:00
#pragma once
#include "common/bitfield.h"
#include "common/fifo_queue.h"
#include "types.h"
2019-09-29 08:11:18 +00:00
#include <array>
#include <memory>
2019-09-29 02:51:34 +00:00
class StateWrapper;
class TimingEvent;
2019-09-29 02:51:34 +00:00
class MDEC
{
public:
MDEC();
~MDEC();
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
void Initialize();
void Shutdown();
2019-09-29 02:51:34 +00:00
void Reset();
bool DoState(StateWrapper& sw);
// I/O
u32 ReadRegister(u32 offset);
void WriteRegister(u32 offset, u32 value);
void DMARead(u32* words, u32 word_count);
void DMAWrite(const u32* words, u32 word_count);
2019-09-29 02:51:34 +00:00
2019-10-26 03:31:21 +00:00
void DrawDebugStateWindow();
2019-10-12 12:47:00 +00:00
2019-09-29 02:51:34 +00:00
private:
2020-03-25 14:13:07 +00:00
static constexpr u32 DATA_IN_FIFO_SIZE = 1024;
static constexpr u32 DATA_OUT_FIFO_SIZE = 768;
static constexpr u32 NUM_BLOCKS = 6;
static constexpr TickCount TICKS_PER_BLOCK = 448;
2019-09-29 02:51:34 +00:00
enum DataOutputDepth : u8
{
DataOutputDepth_4Bit = 0,
DataOutputDepth_8Bit = 1,
DataOutputDepth_24Bit = 2,
DataOutputDepth_15Bit = 3
};
2019-09-29 08:11:18 +00:00
enum class Command : u8
{
None = 0,
DecodeMacroblock = 1,
SetIqTab = 2,
SetScale = 3
};
2020-03-25 14:13:07 +00:00
enum class State : u8
{
Idle,
DecodingMacroblock,
WritingMacroblock,
SetIqTable,
SetScaleTable
};
2019-09-29 02:51:34 +00:00
union StatusRegister
{
u32 bits;
BitField<u32, bool, 31, 1> data_out_fifo_empty;
BitField<u32, bool, 30, 1> data_in_fifo_full;
BitField<u32, bool, 29, 1> command_busy;
BitField<u32, bool, 28, 1> data_in_request;
BitField<u32, bool, 27, 1> data_out_request;
BitField<u32, DataOutputDepth, 25, 2> data_output_depth;
BitField<u32, bool, 24, 1> data_output_signed;
BitField<u32, u8, 23, 1> data_output_bit15;
BitField<u32, u8, 16, 3> current_block;
BitField<u32, u16, 0, 16> parameter_words_remaining;
};
union ControlRegister
{
u32 bits;
BitField<u32, bool, 31, 1> reset;
BitField<u32, bool, 30, 1> enable_dma_in;
BitField<u32, bool, 29, 1> enable_dma_out;
};
2019-09-29 08:11:18 +00:00
union CommandWord
{
u32 bits;
BitField<u32, Command, 29, 3> command;
BitField<u32, DataOutputDepth, 27, 2> data_output_depth;
BitField<u32, bool, 26, 1> data_output_signed;
BitField<u32, u8, 25, 1> data_output_bit15;
BitField<u32, u16, 0, 16> parameter_word_count;
};
bool HasPendingBlockCopyOut() const;
2019-09-29 02:51:34 +00:00
void SoftReset();
2020-03-25 14:13:07 +00:00
void ResetDecoder();
void UpdateStatus();
2019-09-29 02:51:34 +00:00
u32 ReadDataRegister();
2019-09-29 08:11:18 +00:00
void WriteCommandRegister(u32 value);
2020-03-25 14:13:07 +00:00
void Execute();
2019-09-29 08:11:18 +00:00
bool HandleDecodeMacroblockCommand();
void HandleSetQuantTableCommand();
void HandleSetScaleCommand();
2019-09-29 08:11:18 +00:00
bool DecodeMonoMacroblock();
bool DecodeColoredMacroblock();
void ScheduleBlockCopyOut(TickCount ticks);
void CopyOutBlock();
2019-09-29 08:11:18 +00:00
// from nocash spec
bool rl_decode_block(s16* blk, const u8* qt);
2019-09-29 08:11:18 +00:00
void IDCT(s16* blk);
void yuv_to_rgb(u32 xx, u32 yy, const std::array<s16, 64>& Crblk, const std::array<s16, 64>& Cbblk,
const std::array<s16, 64>& Yblk);
void y_to_mono(const std::array<s16, 64>& Yblk);
2019-09-29 02:51:34 +00:00
2019-09-29 08:11:18 +00:00
StatusRegister m_status = {};
bool m_enable_dma_in = false;
bool m_enable_dma_out = false;
2019-09-29 08:11:18 +00:00
// Even though the DMA is in words, we access the FIFO as halfwords.
InlineFIFOQueue<u16, DATA_IN_FIFO_SIZE / sizeof(u16)> m_data_in_fifo;
InlineFIFOQueue<u32, DATA_OUT_FIFO_SIZE / sizeof(u32)> m_data_out_fifo;
2020-03-25 14:13:07 +00:00
State m_state = State::Idle;
u32 m_remaining_halfwords = 0;
2019-09-29 08:11:18 +00:00
std::array<u8, 64> m_iq_uv{};
std::array<u8, 64> m_iq_y{};
std::array<s16, 64> m_scale_table{};
// blocks, for colour: 0 - Crblk, 1 - Cbblk, 2-5 - Y 1-4
std::array<std::array<s16, 64>, NUM_BLOCKS> m_blocks;
u32 m_current_block = 0; // block (0-5)
u32 m_current_coefficient = 64; // k (in block)
u16 m_current_q_scale = 0;
2019-10-12 12:47:00 +00:00
std::array<u32, 256> m_block_rgb{};
std::unique_ptr<TimingEvent> m_block_copy_out_event;
u32 m_total_blocks_decoded = 0;
};
JIT optimizations and refactoring (#675) * CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
2020-07-31 07:09:18 +00:00
extern MDEC g_mdec;