diff --git a/src/common/gsvector_neon.h b/src/common/gsvector_neon.h index 9e4dc9d5e..622ba4ff4 100644 --- a/src/common/gsvector_neon.h +++ b/src/common/gsvector_neon.h @@ -309,6 +309,14 @@ public: ALWAYS_INLINE GSVector4i addp_s32() const { return GSVector4i(vpaddq_s32(v4s, v4s)); } + ALWAYS_INLINE u8 minv_u8() const { return vminvq_u8(vreinterpretq_u8_s32(v4s)); } + + ALWAYS_INLINE u16 maxv_u8() const { return vmaxvq_u8(vreinterpretq_u8_s32(v4s)); } + + ALWAYS_INLINE u16 minv_u16() const { return vminvq_u16(vreinterpretq_u16_s32(v4s)); } + + ALWAYS_INLINE u16 maxv_u16() const { return vmaxvq_u16(vreinterpretq_u16_s32(v4s)); } + ALWAYS_INLINE s32 minv_s32() const { return vminvq_s32(v4s); } ALWAYS_INLINE u32 minv_u32() const { return vminvq_u32(v4s); } diff --git a/src/common/gsvector_nosimd.h b/src/common/gsvector_nosimd.h index 75f044bb5..debaf164c 100644 --- a/src/common/gsvector_nosimd.h +++ b/src/common/gsvector_nosimd.h @@ -312,6 +312,70 @@ public: GSVector4i addp_s32() const { return GSVector4i(x + y, z + w, 0, 0); } + u8 minv_u8() const + { + return std::min( + U8[0], + std::min( + U8[1], + std::min( + U8[2], + std::min( + U8[3], + std::min( + U8[4], + std::min( + U8[5], + std::min( + U8[6], + std::min( + U8[7], + std::min( + U8[9], + std::min(U8[10], + std::min(U8[11], std::min(U8[12], std::min(U8[13], std::min(U8[14], U8[15])))))))))))))); + } + + u16 maxv_u8() const + { + return std::max( + U8[0], + std::max( + U8[1], + std::max( + U8[2], + std::max( + U8[3], + std::max( + U8[4], + std::max( + U8[5], + std::max( + U8[6], + std::max( + U8[7], + std::max( + U8[9], + std::max(U8[10], + std::max(U8[11], std::max(U8[12], std::max(U8[13], std::max(U8[14], U8[15])))))))))))))); + } + + u16 minv_u16() const + { + return std::min( + U16[0], + std::min(U16[1], + std::min(U16[2], std::min(U16[3], std::min(U16[4], std::min(U16[5], std::min(U16[6], U16[7]))))))); + } + + u16 maxv_u16() const + { + return std::max( + U16[0], + std::max(U16[1], + std::max(U16[2], std::max(U16[3], std::max(U16[4], std::max(U16[5], std::max(U16[6], U16[7]))))))); + } + s32 minv_s32() const { return std::min(x, std::min(y, std::min(z, w))); } u32 minv_u32() const { return std::min(U32[0], std::min(U32[1], std::min(U32[2], U32[3]))); } diff --git a/src/common/gsvector_sse.h b/src/common/gsvector_sse.h index 60bb88573..41351cc4f 100644 --- a/src/common/gsvector_sse.h +++ b/src/common/gsvector_sse.h @@ -263,6 +263,42 @@ public: ALWAYS_INLINE GSVector4i addp_s32() const { return GSVector4i(_mm_hadd_epi32(m, m)); } + ALWAYS_INLINE u8 minv_u8() const + { + __m128i vmin = _mm_min_epu8(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2))); + vmin = _mm_min_epu8(vmin, _mm_shuffle_epi32(vmin, _MM_SHUFFLE(1, 1, 1, 1))); + return static_cast(std::min( + static_cast(_mm_extract_epi8(vmin, 0)), + std::min(static_cast(_mm_extract_epi8(vmin, 1)), + std::min(static_cast(_mm_extract_epi8(vmin, 2)), static_cast(_mm_extract_epi8(vmin, 3)))))); + } + + ALWAYS_INLINE u16 maxv_u8() const + { + __m128i vmax = _mm_max_epu8(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2))); + vmax = _mm_max_epu8(vmax, _mm_shuffle_epi32(vmax, _MM_SHUFFLE(1, 1, 1, 1))); + return static_cast(std::max( + static_cast(_mm_extract_epi8(vmax, 0)), + std::max(static_cast(_mm_extract_epi8(vmax, 1)), + std::max(static_cast(_mm_extract_epi8(vmax, 2)), static_cast(_mm_extract_epi8(vmax, 3)))))); + } + + ALWAYS_INLINE u16 minv_u16() const + { + __m128i vmin = _mm_min_epu16(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2))); + vmin = _mm_min_epu16(vmin, _mm_shuffle_epi32(vmin, _MM_SHUFFLE(1, 1, 1, 1))); + return static_cast( + std::min(static_cast(_mm_extract_epi16(vmin, 0)), static_cast(_mm_extract_epi16(vmin, 1)))); + } + + ALWAYS_INLINE u16 maxv_u16() const + { + __m128i vmax = _mm_max_epu16(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2))); + vmax = _mm_max_epu16(vmax, _mm_shuffle_epi32(vmax, _MM_SHUFFLE(1, 1, 1, 1))); + return static_cast( + std::max(static_cast(_mm_extract_epi16(vmax, 0)), static_cast(_mm_extract_epi16(vmax, 1)))); + } + ALWAYS_INLINE s32 minv_s32() const { const __m128i vmin = _mm_min_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2)));