mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-12-01 18:15:41 +00:00
MDEC: Store scale/IDCT matrix transposed
This commit is contained in:
parent
6d6659c85e
commit
e1f90e6baf
|
@ -107,6 +107,7 @@ static bool HandleDecodeMacroblockCommand();
|
||||||
static void HandleSetQuantTableCommand();
|
static void HandleSetQuantTableCommand();
|
||||||
static void HandleSetScaleCommand();
|
static void HandleSetScaleCommand();
|
||||||
|
|
||||||
|
static void SetScaleMatrix(const u16* values);
|
||||||
static bool DecodeMonoMacroblock();
|
static bool DecodeMonoMacroblock();
|
||||||
static bool DecodeColoredMacroblock();
|
static bool DecodeColoredMacroblock();
|
||||||
static void ScheduleBlockCopyOut(TickCount ticks);
|
static void ScheduleBlockCopyOut(TickCount ticks);
|
||||||
|
@ -181,7 +182,18 @@ bool MDEC::DoState(StateWrapper& sw)
|
||||||
sw.Do(&s_remaining_halfwords);
|
sw.Do(&s_remaining_halfwords);
|
||||||
sw.Do(&s_iq_uv);
|
sw.Do(&s_iq_uv);
|
||||||
sw.Do(&s_iq_y);
|
sw.Do(&s_iq_y);
|
||||||
|
|
||||||
|
if (sw.GetVersion() < 66) [[unlikely]]
|
||||||
|
{
|
||||||
|
std::array<u16, 64> old_scale_matrix;
|
||||||
|
sw.Do(&old_scale_matrix);
|
||||||
|
SetScaleMatrix(old_scale_matrix.data());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
sw.Do(&s_scale_table);
|
sw.Do(&s_scale_table);
|
||||||
|
}
|
||||||
|
|
||||||
sw.Do(&s_blocks);
|
sw.Do(&s_blocks);
|
||||||
sw.Do(&s_current_block);
|
sw.Do(&s_current_block);
|
||||||
sw.Do(&s_current_coefficient);
|
sw.Do(&s_current_coefficient);
|
||||||
|
@ -817,7 +829,7 @@ void MDEC::IDCT_Old(s16* blk)
|
||||||
{
|
{
|
||||||
s64 sum = 0;
|
s64 sum = 0;
|
||||||
for (u32 u = 0; u < 8; u++)
|
for (u32 u = 0; u < 8; u++)
|
||||||
sum += s32(blk[u * 8 + x]) * s32(s_scale_table[u * 8 + y]);
|
sum += s32(blk[u * 8 + x]) * s32(s_scale_table[y * 8 + u]);
|
||||||
temp_buffer[x + y * 8] = sum;
|
temp_buffer[x + y * 8] = sum;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -827,7 +839,7 @@ void MDEC::IDCT_Old(s16* blk)
|
||||||
{
|
{
|
||||||
s64 sum = 0;
|
s64 sum = 0;
|
||||||
for (u32 u = 0; u < 8; u++)
|
for (u32 u = 0; u < 8; u++)
|
||||||
sum += s64(temp_buffer[u + y * 8]) * s32(s_scale_table[u * 8 + x]);
|
sum += s64(temp_buffer[u + y * 8]) * s32(s_scale_table[x * 8 + u]);
|
||||||
|
|
||||||
blk[x + y * 8] =
|
blk[x + y * 8] =
|
||||||
static_cast<s16>(std::clamp<s32>(SignExtendN<9, s32>((sum >> 32) + ((sum >> 31) & 1)), -128, 127));
|
static_cast<s16>(std::clamp<s32>(SignExtendN<9, s32>((sum >> 32) + ((sum >> 31) & 1)), -128, 127));
|
||||||
|
@ -923,29 +935,35 @@ bool MDEC::DecodeRLE_New(s16* blk, const u8* qt)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename BlkType>
|
||||||
|
static s32 IDCTRow(const BlkType* blk, const s16* idct_matrix)
|
||||||
|
{
|
||||||
|
// IDCT matrix is -32768..32767, block is -16384..16383. 4 adds can happen without overflow.
|
||||||
|
const s32 sum1 = static_cast<s32>(blk[0]) * static_cast<s32>(idct_matrix[0]) +
|
||||||
|
static_cast<s32>(blk[1]) * static_cast<s32>(idct_matrix[1]) +
|
||||||
|
static_cast<s32>(blk[2]) * static_cast<s32>(idct_matrix[2]) +
|
||||||
|
static_cast<s32>(blk[3]) * static_cast<s32>(idct_matrix[3]);
|
||||||
|
const s32 sum2 = static_cast<s32>(blk[4]) * static_cast<s32>(idct_matrix[4]) +
|
||||||
|
static_cast<s32>(blk[5]) * static_cast<s32>(idct_matrix[5]) +
|
||||||
|
static_cast<s32>(blk[6]) * static_cast<s32>(idct_matrix[6]) +
|
||||||
|
static_cast<s32>(blk[7]) * static_cast<s32>(idct_matrix[7]);
|
||||||
|
return static_cast<s32>(((static_cast<s64>(sum1) + static_cast<s64>(sum2)) + 0x20000) >> 18);
|
||||||
|
}
|
||||||
|
|
||||||
void MDEC::IDCT_New(s16* blk)
|
void MDEC::IDCT_New(s16* blk)
|
||||||
{
|
{
|
||||||
std::array<s32, 64> temp;
|
std::array<s32, 64> temp;
|
||||||
for (u32 x = 0; x < 8; x++)
|
for (u32 x = 0; x < 8; x++)
|
||||||
{
|
{
|
||||||
for (u32 y = 0; y < 8; y++)
|
for (u32 y = 0; y < 8; y++)
|
||||||
{
|
temp[y * 8 + x] = IDCTRow(&blk[x * 8], &s_scale_table[y * 8]);
|
||||||
// TODO: We could invert scale_table to get these in row-major order,
|
|
||||||
// in which case we could do optimize this to a vector multiply.
|
|
||||||
s32 sum = 0;
|
|
||||||
for (u32 z = 0; z < 8; z++)
|
|
||||||
sum += (s32(blk[x * 8 + z]) * s32(s_scale_table[z * 8 + y])) / 8;
|
|
||||||
temp[y * 8 + x] = static_cast<s32>((sum + 0x4000) >> 15);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for (u32 x = 0; x < 8; x++)
|
for (u32 x = 0; x < 8; x++)
|
||||||
{
|
{
|
||||||
for (u32 y = 0; y < 8; y++)
|
for (u32 y = 0; y < 8; y++)
|
||||||
{
|
{
|
||||||
s32 sum = 0;
|
const s32 sum = IDCTRow(&temp[x * 8], &s_scale_table[y * 8]);
|
||||||
for (u32 z = 0; z < 8; z++)
|
blk[x * 8 + y] = static_cast<s16>(std::clamp(SignExtendN<9, s32>(sum), -128, 127));
|
||||||
sum += (temp[x * 8 + z] * s32(s_scale_table[z * 8 + y])) / 8;
|
|
||||||
blk[x * 8 + y] = static_cast<s16>(std::clamp(SignExtendN<9, s32>((sum + 0x4000) >> 15), -128, 127));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1005,11 +1023,19 @@ void MDEC::HandleSetScaleCommand()
|
||||||
{
|
{
|
||||||
DebugAssert(s_remaining_halfwords == 64);
|
DebugAssert(s_remaining_halfwords == 64);
|
||||||
|
|
||||||
// TODO: Remove extra copies..
|
|
||||||
std::array<u16, 64> packed_data;
|
std::array<u16, 64> packed_data;
|
||||||
s_data_in_fifo.PopRange(packed_data.data(), static_cast<u32>(packed_data.size()));
|
s_data_in_fifo.PopRange(packed_data.data(), static_cast<u32>(packed_data.size()));
|
||||||
s_remaining_halfwords -= 32;
|
s_remaining_halfwords -= 32;
|
||||||
std::memcpy(s_scale_table.data(), packed_data.data(), s_scale_table.size() * sizeof(s16));
|
SetScaleMatrix(packed_data.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
void MDEC::SetScaleMatrix(const u16* values)
|
||||||
|
{
|
||||||
|
for (u32 y = 0; y < 8; y++)
|
||||||
|
{
|
||||||
|
for (u32 x = 0; x < 8; x++)
|
||||||
|
s_scale_table[y * 8 + x] = values[x * 8 + y];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MDEC::DrawDebugStateWindow()
|
void MDEC::DrawDebugStateWindow()
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
|
|
||||||
static constexpr u32 SAVE_STATE_MAGIC = 0x43435544;
|
static constexpr u32 SAVE_STATE_MAGIC = 0x43435544;
|
||||||
static constexpr u32 SAVE_STATE_VERSION = 65;
|
static constexpr u32 SAVE_STATE_VERSION = 66;
|
||||||
static constexpr u32 SAVE_STATE_MINIMUM_VERSION = 42;
|
static constexpr u32 SAVE_STATE_MINIMUM_VERSION = 42;
|
||||||
|
|
||||||
static_assert(SAVE_STATE_VERSION >= SAVE_STATE_MINIMUM_VERSION);
|
static_assert(SAVE_STATE_VERSION >= SAVE_STATE_MINIMUM_VERSION);
|
||||||
|
|
Loading…
Reference in a new issue