mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-26 07:35:41 +00:00
MDEC: Ensure alignment of inputs
This commit is contained in:
parent
f2896d55b8
commit
460acce561
|
@ -73,9 +73,9 @@ static void YUVToRGB_Scalar(const std::array<s16, 64>& Crblk, const std::array<s
|
||||||
|
|
||||||
TEST(GSVector, YUVToRGB)
|
TEST(GSVector, YUVToRGB)
|
||||||
{
|
{
|
||||||
std::array<s16, 64> crblk;
|
alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;
|
||||||
std::array<s16, 64> cbblk;
|
alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;
|
||||||
std::array<s16, 64> yblk;
|
alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;
|
||||||
for (s16 i = -128; i < 128; i++)
|
for (s16 i = -128; i < 128; i++)
|
||||||
{
|
{
|
||||||
for (u32 j = 0; j < 64; j++)
|
for (u32 j = 0; j < 64; j++)
|
||||||
|
@ -91,10 +91,10 @@ TEST(GSVector, YUVToRGB)
|
||||||
for (u32 j = 0; j < 64; j++)
|
for (u32 j = 0; j < 64; j++)
|
||||||
yblk[j] = l;
|
yblk[j] = l;
|
||||||
|
|
||||||
u32 rows[64];
|
alignas(VECTOR_ALIGNMENT) u32 rows[64];
|
||||||
YUVToRGB_Scalar(crblk, cbblk, yblk, rows, false);
|
YUVToRGB_Scalar(crblk, cbblk, yblk, rows, false);
|
||||||
|
|
||||||
u32 rowv[64];
|
alignas(VECTOR_ALIGNMENT) u32 rowv[64];
|
||||||
YUVToRGB_Vector(crblk, cbblk, yblk, rowv, false);
|
YUVToRGB_Vector(crblk, cbblk, yblk, rowv, false);
|
||||||
ASSERT_EQ(std::memcmp(rows, rowv, sizeof(rows)), 0);
|
ASSERT_EQ(std::memcmp(rows, rowv, sizeof(rows)), 0);
|
||||||
|
|
||||||
|
@ -112,9 +112,9 @@ u32 g_gsvector_yuvtorgb_temp[64];
|
||||||
|
|
||||||
TEST(GSVector, YUVToRGB_Scalar)
|
TEST(GSVector, YUVToRGB_Scalar)
|
||||||
{
|
{
|
||||||
std::array<s16, 64> crblk;
|
alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;
|
||||||
std::array<s16, 64> cbblk;
|
alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;
|
||||||
std::array<s16, 64> yblk;
|
alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;
|
||||||
for (s16 i = -128; i < 128; i++)
|
for (s16 i = -128; i < 128; i++)
|
||||||
{
|
{
|
||||||
for (u32 j = 0; j < 64; j++)
|
for (u32 j = 0; j < 64; j++)
|
||||||
|
@ -138,9 +138,9 @@ TEST(GSVector, YUVToRGB_Scalar)
|
||||||
|
|
||||||
TEST(GSVector, YUVToRGB_Vector)
|
TEST(GSVector, YUVToRGB_Vector)
|
||||||
{
|
{
|
||||||
std::array<s16, 64> crblk;
|
alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;
|
||||||
std::array<s16, 64> cbblk;
|
alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;
|
||||||
std::array<s16, 64> yblk;
|
alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;
|
||||||
for (s16 i = -128; i < 128; i++)
|
for (s16 i = -128; i < 128; i++)
|
||||||
{
|
{
|
||||||
for (u32 j = 0; j < 64; j++)
|
for (u32 j = 0; j < 64; j++)
|
||||||
|
|
|
@ -142,7 +142,7 @@ struct MDECState
|
||||||
std::array<u8, 64> iq_uv{};
|
std::array<u8, 64> iq_uv{};
|
||||||
std::array<u8, 64> iq_y{};
|
std::array<u8, 64> iq_y{};
|
||||||
|
|
||||||
std::array<s16, 64> scale_table{};
|
alignas(VECTOR_ALIGNMENT) std::array<s16, 64> scale_table{};
|
||||||
|
|
||||||
// blocks, for colour: 0 - Crblk, 1 - Cbblk, 2-5 - Y 1-4
|
// blocks, for colour: 0 - Crblk, 1 - Cbblk, 2-5 - Y 1-4
|
||||||
alignas(VECTOR_ALIGNMENT) std::array<std::array<s16, 64>, NUM_BLOCKS> blocks;
|
alignas(VECTOR_ALIGNMENT) std::array<std::array<s16, 64>, NUM_BLOCKS> blocks;
|
||||||
|
@ -950,7 +950,7 @@ bool MDEC::DecodeRLE_New(s16* blk, const u8* qt)
|
||||||
static s16 IDCTRow(const s16* blk, const s16* idct_matrix)
|
static s16 IDCTRow(const s16* blk, const s16* idct_matrix)
|
||||||
{
|
{
|
||||||
// IDCT matrix is -32768..32767, block is -16384..16383. 4 adds can happen without overflow.
|
// IDCT matrix is -32768..32767, block is -16384..16383. 4 adds can happen without overflow.
|
||||||
GSVector4i sum = GSVector4i::load<false>(blk).madd_s16(GSVector4i::load<false>(idct_matrix)).addp_s32();
|
GSVector4i sum = GSVector4i::load<false>(blk).madd_s16(GSVector4i::load<true>(idct_matrix)).addp_s32();
|
||||||
return static_cast<s16>(((static_cast<s64>(sum.extract32<0>()) + static_cast<s64>(sum.extract32<1>())) + 0x20000) >>
|
return static_cast<s16>(((static_cast<s64>(sum.extract32<0>()) + static_cast<s64>(sum.extract32<1>())) + 0x20000) >>
|
||||||
18);
|
18);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue