mirror of
				https://github.com/RetroDECK/Duckstation.git
				synced 2025-04-10 19:15:14 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			606 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			606 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright (c) Microsoft Corporation. All rights reserved.
 | |
| 
 | |
| /*==========================================================================;
 | |
| *
 | |
| *  Copyright (C) Microsoft Corporation.  All Rights Reserved.
 | |
| *
 | |
| *  File:       PIXEventsCommon.h
 | |
| *  Content:    PIX include file
 | |
| *              Don't include this file directly - use pix3.h
 | |
| *
 | |
| ****************************************************************************/
 | |
| #pragma once
 | |
| 
 | |
| #ifndef _PIXEventsCommon_H_
 | |
| #define _PIXEventsCommon_H_
 | |
| 
 | |
| #if defined(XBOX) || defined(_XBOX_ONE) || defined(_DURANGO) || defined(_GAMING_XBOX) || defined(_GAMING_XBOX_SCARLETT)
 | |
| #define PIX_XBOX
 | |
| #endif
 | |
| 
 | |
| #include <cstdint>
 | |
| 
 | |
| #if defined(_M_X64) || defined(_M_IX86)
 | |
| #include <emmintrin.h>
 | |
| #endif
 | |
| 
 | |
| //
 | |
| // The PIXBeginEvent and PIXSetMarker functions have an optimized path for
 | |
| // copying strings that work by copying 128-bit or 64-bits at a time. In some
 | |
| // circumstances this may result in PIX logging the remaining memory after the
 | |
| // null terminator.
 | |
| //
 | |
| // By default this optimization is enabled unless Address Sanitizer is enabled,
 | |
| // since this optimization can trigger a global-buffer-overflow when copying
 | |
| // string literals.
 | |
| //
 | |
| // The PIX_ENABLE_BLOCK_ARGUMENT_COPY controls whether or not this optimization
 | |
| // is enabled. Applications may also explicitly set this macro to 0 to disable
 | |
| // the optimization if necessary.
 | |
| //
 | |
| 
 | |
| // Check for Address Sanitizer on either Clang or MSVC
 | |
| 
 | |
| #if defined(__has_feature)
 | |
| #if __has_feature(address_sanitizer)
 | |
| #define PIX_ASAN_ENABLED
 | |
| #endif
 | |
| #elif defined(__SANITIZE_ADDRESS__)
 | |
| #define PIX_ASAN_ENABLED
 | |
| #endif
 | |
| 
 | |
| #if defined(PIX_ENABLE_BLOCK_ARGUMENT_COPY)
 | |
| // Previously set values override everything
 | |
| # define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 0
 | |
| #elif defined(PIX_ASAN_ENABLED)
 | |
| // Disable block argument copy when address sanitizer is enabled
 | |
| #define PIX_ENABLE_BLOCK_ARGUMENT_COPY 0
 | |
| #define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 1
 | |
| #endif
 | |
| 
 | |
| #if !defined(PIX_ENABLE_BLOCK_ARGUMENT_COPY)
 | |
| // Default to enabled.
 | |
| #define PIX_ENABLE_BLOCK_ARGUMENT_COPY 1
 | |
| #define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 1
 | |
| #endif
 | |
| 
 | |
| struct PIXEventsBlockInfo;
 | |
| 
 | |
| struct PIXEventsThreadInfo
 | |
| {
 | |
|     PIXEventsBlockInfo* block;
 | |
|     UINT64* biasedLimit;
 | |
|     UINT64* destination;
 | |
| };
 | |
| 
 | |
| #ifdef PIX_XBOX
 | |
| extern "C" UINT64 WINAPI PIXEventsReplaceBlock(bool getEarliestTime) noexcept;
 | |
| #else
 | |
| extern "C" UINT64 WINAPI PIXEventsReplaceBlock(PIXEventsThreadInfo * threadInfo, bool getEarliestTime) noexcept;
 | |
| #endif
 | |
| 
 | |
| enum PIXEventType
 | |
| {
 | |
|     PIXEvent_EndEvent                       = 0x000,
 | |
|     PIXEvent_BeginEvent_VarArgs             = 0x001,
 | |
|     PIXEvent_BeginEvent_NoArgs              = 0x002,
 | |
|     PIXEvent_SetMarker_VarArgs              = 0x007,
 | |
|     PIXEvent_SetMarker_NoArgs               = 0x008,
 | |
| 
 | |
|     PIXEvent_EndEvent_OnContext             = 0x010,
 | |
|     PIXEvent_BeginEvent_OnContext_VarArgs   = 0x011,
 | |
|     PIXEvent_BeginEvent_OnContext_NoArgs    = 0x012,
 | |
|     PIXEvent_SetMarker_OnContext_VarArgs    = 0x017,
 | |
|     PIXEvent_SetMarker_OnContext_NoArgs     = 0x018,
 | |
| };
 | |
| 
 | |
| static const UINT64 PIXEventsReservedRecordSpaceQwords = 64;
 | |
| //this is used to make sure SSE string copy always will end 16-byte write in the current block
 | |
| //this way only a check if destination < limit can be performed, instead of destination < limit - 1
 | |
| //since both these are UINT64* and SSE writes in 16 byte chunks, 8 bytes are kept in reserve
 | |
| //so even if SSE overwrites 8 extra bytes, those will still belong to the correct block
 | |
| //on next iteration check destination will be greater than limit
 | |
| //this is used as well for fixed size UMD events and PIXEndEvent since these require less space
 | |
| //than other variable length user events and do not need big reserved space
 | |
| static const UINT64 PIXEventsReservedTailSpaceQwords = 2;
 | |
| static const UINT64 PIXEventsSafeFastCopySpaceQwords = PIXEventsReservedRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
 | |
| static const UINT64 PIXEventsGraphicsRecordSpaceQwords = 64;
 | |
| 
 | |
| //Bits 7-19 (13 bits)
 | |
| static const UINT64 PIXEventsBlockEndMarker     = 0x00000000000FFF80;
 | |
| 
 | |
| //Bits 10-19 (10 bits)
 | |
| static const UINT64 PIXEventsTypeReadMask       = 0x00000000000FFC00;
 | |
| static const UINT64 PIXEventsTypeWriteMask      = 0x00000000000003FF;
 | |
| static const UINT64 PIXEventsTypeBitShift       = 10;
 | |
| 
 | |
| //Bits 20-63 (44 bits)
 | |
| static const UINT64 PIXEventsTimestampReadMask  = 0xFFFFFFFFFFF00000;
 | |
| static const UINT64 PIXEventsTimestampWriteMask = 0x00000FFFFFFFFFFF;
 | |
| static const UINT64 PIXEventsTimestampBitShift  = 20;
 | |
| 
 | |
| inline UINT64 PIXEncodeEventInfo(UINT64 timestamp, PIXEventType eventType)
 | |
| {
 | |
|     return ((timestamp & PIXEventsTimestampWriteMask) << PIXEventsTimestampBitShift) |
 | |
|         (((UINT64)eventType & PIXEventsTypeWriteMask) << PIXEventsTypeBitShift);
 | |
| }
 | |
| 
 | |
| //Bits 60-63 (4)
 | |
| static const UINT64 PIXEventsStringAlignmentWriteMask     = 0x000000000000000F;
 | |
| static const UINT64 PIXEventsStringAlignmentReadMask      = 0xF000000000000000;
 | |
| static const UINT64 PIXEventsStringAlignmentBitShift      = 60;
 | |
| 
 | |
| //Bits 55-59 (5)
 | |
| static const UINT64 PIXEventsStringCopyChunkSizeWriteMask = 0x000000000000001F;
 | |
| static const UINT64 PIXEventsStringCopyChunkSizeReadMask  = 0x0F80000000000000;
 | |
| static const UINT64 PIXEventsStringCopyChunkSizeBitShift  = 55;
 | |
| 
 | |
| //Bit 54
 | |
| static const UINT64 PIXEventsStringIsANSIWriteMask        = 0x0000000000000001;
 | |
| static const UINT64 PIXEventsStringIsANSIReadMask         = 0x0040000000000000;
 | |
| static const UINT64 PIXEventsStringIsANSIBitShift         = 54;
 | |
| 
 | |
| //Bit 53
 | |
| static const UINT64 PIXEventsStringIsShortcutWriteMask    = 0x0000000000000001;
 | |
| static const UINT64 PIXEventsStringIsShortcutReadMask     = 0x0020000000000000;
 | |
| static const UINT64 PIXEventsStringIsShortcutBitShift     = 53;
 | |
| 
 | |
| inline UINT64 PIXEncodeStringInfo(UINT64 alignment, UINT64 copyChunkSize, BOOL isANSI, BOOL isShortcut)
 | |
| {
 | |
|     return ((alignment & PIXEventsStringAlignmentWriteMask) << PIXEventsStringAlignmentBitShift) |
 | |
|         ((copyChunkSize & PIXEventsStringCopyChunkSizeWriteMask) << PIXEventsStringCopyChunkSizeBitShift) |
 | |
|         (((UINT64)isANSI & PIXEventsStringIsANSIWriteMask) << PIXEventsStringIsANSIBitShift) |
 | |
|         (((UINT64)isShortcut & PIXEventsStringIsShortcutWriteMask) << PIXEventsStringIsShortcutBitShift);
 | |
| }
 | |
| 
 | |
| template<UINT alignment, class T>
 | |
| inline bool PIXIsPointerAligned(T* pointer)
 | |
| {
 | |
|     return !(((UINT64)pointer) & (alignment - 1));
 | |
| }
 | |
| 
 | |
| // Generic template version slower because of the additional clear write
 | |
| template<class T>
 | |
| inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, T argument)
 | |
| {
 | |
|     if (destination < limit)
 | |
|     {
 | |
|         *destination = 0ull;
 | |
|         *((T*)destination) = argument;
 | |
|         ++destination;
 | |
|     }
 | |
| }
 | |
| 
 | |
| // int32 specialization to avoid slower double memory writes
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<INT32>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, INT32 argument)
 | |
| {
 | |
|     if (destination < limit)
 | |
|     {
 | |
|         *reinterpret_cast<INT64*>(destination) = static_cast<INT64>(argument);
 | |
|         ++destination;
 | |
|     }
 | |
| }
 | |
| 
 | |
| // unsigned int32 specialization to avoid slower double memory writes
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<UINT32>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, UINT32 argument)
 | |
| {
 | |
|     if (destination < limit)
 | |
|     {
 | |
|         *destination = static_cast<UINT64>(argument);
 | |
|         ++destination;
 | |
|     }
 | |
| }
 | |
| 
 | |
| // int64 specialization to avoid slower double memory writes
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<INT64>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, INT64 argument)
 | |
| {
 | |
|     if (destination < limit)
 | |
|     {
 | |
|         *reinterpret_cast<INT64*>(destination) = argument;
 | |
|         ++destination;
 | |
|     }
 | |
| }
 | |
| 
 | |
| // unsigned int64 specialization to avoid slower double memory writes
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<UINT64>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, UINT64 argument)
 | |
| {
 | |
|     if (destination < limit)
 | |
|     {
 | |
|         *destination = argument;
 | |
|         ++destination;
 | |
|     }
 | |
| }
 | |
| 
 | |
| //floats must be cast to double during writing the data to be properly printed later when reading the data
 | |
| //this is needed because when float is passed to varargs function it's cast to double
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<float>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, float argument)
 | |
| {
 | |
|     if (destination < limit)
 | |
|     {
 | |
|         *reinterpret_cast<double*>(destination) = static_cast<double>(argument);
 | |
|         ++destination;
 | |
|     }
 | |
| }
 | |
| 
 | |
| //char has to be cast to a longer signed integer type
 | |
| //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, char argument)
 | |
| {
 | |
|     if (destination < limit)
 | |
|     {
 | |
|         *reinterpret_cast<INT64*>(destination) = static_cast<INT64>(argument);
 | |
|         ++destination;
 | |
|     }
 | |
| }
 | |
| 
 | |
| //unsigned char has to be cast to a longer unsigned integer type
 | |
| //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<unsigned char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, unsigned char argument)
 | |
| {
 | |
|     if (destination < limit)
 | |
|     {
 | |
|         *destination = static_cast<UINT64>(argument);
 | |
|         ++destination;
 | |
|     }
 | |
| }
 | |
| 
 | |
| //bool has to be cast to an integer since it's not explicitly supported by string format routines
 | |
| //there's no format specifier for bool type, but it should work with integer format specifiers
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<bool>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, bool argument)
 | |
| {
 | |
|     if (destination < limit)
 | |
|     {
 | |
|         *destination = static_cast<UINT64>(argument);
 | |
|         ++destination;
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
 | |
| {
 | |
|     *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
 | |
|     while (destination < limit)
 | |
|     {
 | |
|         UINT64 c = static_cast<uint8_t>(argument[0]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = 0;
 | |
|             return;
 | |
|         }
 | |
|         UINT64 x = c;
 | |
|         c = static_cast<uint8_t>(argument[1]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = x;
 | |
|             return;
 | |
|         }
 | |
|         x |= c << 8;
 | |
|         c = static_cast<uint8_t>(argument[2]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = x;
 | |
|             return;
 | |
|         }
 | |
|         x |= c << 16;
 | |
|         c = static_cast<uint8_t>(argument[3]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = x;
 | |
|             return;
 | |
|         }
 | |
|         x |= c << 24;
 | |
|         c = static_cast<uint8_t>(argument[4]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = x;
 | |
|             return;
 | |
|         }
 | |
|         x |= c << 32;
 | |
|         c = static_cast<uint8_t>(argument[5]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = x;
 | |
|             return;
 | |
|         }
 | |
|         x |= c << 40;
 | |
|         c = static_cast<uint8_t>(argument[6]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = x;
 | |
|             return;
 | |
|         }
 | |
|         x |= c << 48;
 | |
|         c = static_cast<uint8_t>(argument[7]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = x;
 | |
|             return;
 | |
|         }
 | |
|         x |= c << 56;
 | |
|         *destination++ = x;
 | |
|         argument += 8;
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
 | |
| {
 | |
| #if PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | |
|     if (PIXIsPointerAligned<8>(argument))
 | |
|     {
 | |
|         *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
 | |
|         UINT64* source = (UINT64*)argument;
 | |
|         while (destination < limit)
 | |
|         {
 | |
|             UINT64 qword = *source++;
 | |
|             *destination++ = qword;
 | |
|             //check if any of the characters is a terminating zero
 | |
|             if (!((qword & 0xFF00000000000000) &&
 | |
|                 (qword & 0xFF000000000000) &&
 | |
|                 (qword & 0xFF0000000000) &&
 | |
|                 (qword & 0xFF00000000) &&
 | |
|                 (qword & 0xFF000000) &&
 | |
|                 (qword & 0xFF0000) &&
 | |
|                 (qword & 0xFF00) &&
 | |
|                 (qword & 0xFF)))
 | |
|             {
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     else
 | |
| #endif // PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | |
|     {
 | |
|         PIXCopyEventArgumentSlowest(destination, limit, argument);
 | |
|     }
 | |
| }
 | |
| 
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<PCSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
 | |
| {
 | |
|     if (destination < limit)
 | |
|     {
 | |
|         if (argument != nullptr)
 | |
|         {
 | |
| #if (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | |
|             if (PIXIsPointerAligned<16>(argument))
 | |
|             {
 | |
|                 *destination++ = PIXEncodeStringInfo(0, 16, TRUE, FALSE);
 | |
|                 __m128i zero = _mm_setzero_si128();
 | |
|                 if (PIXIsPointerAligned<16>(destination))
 | |
|                 {
 | |
|                     while (destination < limit)
 | |
|                     {
 | |
|                         __m128i mem = _mm_load_si128((__m128i*)argument);
 | |
|                         _mm_store_si128((__m128i*)destination, mem);
 | |
|                         //check if any of the characters is a terminating zero
 | |
|                         __m128i res = _mm_cmpeq_epi8(mem, zero);
 | |
|                         destination += 2;
 | |
|                         if (_mm_movemask_epi8(res))
 | |
|                             break;
 | |
|                         argument += 16;
 | |
|                     }
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     while (destination < limit)
 | |
|                     {
 | |
|                         __m128i mem = _mm_load_si128((__m128i*)argument);
 | |
|                         _mm_storeu_si128((__m128i*)destination, mem);
 | |
|                         //check if any of the characters is a terminating zero
 | |
|                         __m128i res = _mm_cmpeq_epi8(mem, zero);
 | |
|                         destination += 2;
 | |
|                         if (_mm_movemask_epi8(res))
 | |
|                             break;
 | |
|                         argument += 16;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             else
 | |
| #endif // (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | |
|             {
 | |
|                 PIXCopyEventArgumentSlow(destination, limit, argument);
 | |
|             }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             *destination++ = 0ull;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<PSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PSTR argument)
 | |
| {
 | |
|     PIXCopyEventArgument(destination, limit, (PCSTR)argument);
 | |
| }
 | |
| 
 | |
| inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
 | |
| {
 | |
|     *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
 | |
|     while (destination < limit)
 | |
|     {
 | |
|         UINT64 c = static_cast<uint16_t>(argument[0]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = 0;
 | |
|             return;
 | |
|         }
 | |
|         UINT64 x = c;
 | |
|         c = static_cast<uint16_t>(argument[1]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = x;
 | |
|             return;
 | |
|         }
 | |
|         x |= c << 16;
 | |
|         c = static_cast<uint16_t>(argument[2]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = x;
 | |
|             return;
 | |
|         }
 | |
|         x |= c << 32;
 | |
|         c = static_cast<uint16_t>(argument[3]);
 | |
|         if (!c)
 | |
|         {
 | |
|             *destination++ = x;
 | |
|             return;
 | |
|         }
 | |
|         x |= c << 48;
 | |
|         *destination++ = x;
 | |
|         argument += 4;
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
 | |
| {
 | |
| #if PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | |
|     if (PIXIsPointerAligned<8>(argument))
 | |
|     {
 | |
|         *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
 | |
|         UINT64* source = (UINT64*)argument;
 | |
|         while (destination < limit)
 | |
|         {
 | |
|             UINT64 qword = *source++;
 | |
|             *destination++ = qword;
 | |
|             //check if any of the characters is a terminating zero
 | |
|             //TODO: check if reversed condition is faster
 | |
|             if (!((qword & 0xFFFF000000000000) &&
 | |
|                 (qword & 0xFFFF00000000) &&
 | |
|                 (qword & 0xFFFF0000) &&
 | |
|                 (qword & 0xFFFF)))
 | |
|             {
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     else
 | |
| #endif // PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | |
|     {
 | |
|         PIXCopyEventArgumentSlowest(destination, limit, argument);
 | |
|     }
 | |
| }
 | |
| 
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<PCWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
 | |
| {
 | |
|     if (destination < limit)
 | |
|     {
 | |
|         if (argument != nullptr)
 | |
|         {
 | |
| #if (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | |
|             if (PIXIsPointerAligned<16>(argument))
 | |
|             {
 | |
|                 *destination++ = PIXEncodeStringInfo(0, 16, FALSE, FALSE);
 | |
|                 __m128i zero = _mm_setzero_si128();
 | |
|                 if (PIXIsPointerAligned<16>(destination))
 | |
|                 {
 | |
|                     while (destination < limit)
 | |
|                     {
 | |
|                         __m128i mem = _mm_load_si128((__m128i*)argument);
 | |
|                         _mm_store_si128((__m128i*)destination, mem);
 | |
|                         //check if any of the characters is a terminating zero
 | |
|                         __m128i res = _mm_cmpeq_epi16(mem, zero);
 | |
|                         destination += 2;
 | |
|                         if (_mm_movemask_epi8(res))
 | |
|                             break;
 | |
|                         argument += 8;
 | |
|                     }
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     while (destination < limit)
 | |
|                     {
 | |
|                         __m128i mem = _mm_load_si128((__m128i*)argument);
 | |
|                         _mm_storeu_si128((__m128i*)destination, mem);
 | |
|                         //check if any of the characters is a terminating zero
 | |
|                         __m128i res = _mm_cmpeq_epi16(mem, zero);
 | |
|                         destination += 2;
 | |
|                         if (_mm_movemask_epi8(res))
 | |
|                             break;
 | |
|                         argument += 8;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             else
 | |
| #endif // (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | |
|             {
 | |
|                 PIXCopyEventArgumentSlow(destination, limit, argument);
 | |
|             }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             *destination++ = 0ull;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| template<>
 | |
| inline void PIXCopyEventArgument<PWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PWSTR argument)
 | |
| {
 | |
|     PIXCopyEventArgument(destination, limit, (PCWSTR)argument);
 | |
| };
 | |
| 
 | |
| #if defined(__d3d12_x_h__) || defined(__d3d12_xs_h__) || defined(__d3d12_h__)
 | |
| 
 | |
| inline void PIXSetGPUMarkerOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
 | |
| {
 | |
|     commandList->SetMarker(D3D12_EVENT_METADATA, data, size);
 | |
| }
 | |
| 
 | |
| inline void PIXSetGPUMarkerOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
 | |
| {
 | |
|     commandQueue->SetMarker(D3D12_EVENT_METADATA, data, size);
 | |
| }
 | |
| 
 | |
| inline void PIXBeginGPUEventOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
 | |
| {
 | |
|     commandList->BeginEvent(D3D12_EVENT_METADATA, data, size);
 | |
| }
 | |
| 
 | |
| inline void PIXBeginGPUEventOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
 | |
| {
 | |
|     commandQueue->BeginEvent(D3D12_EVENT_METADATA, data, size);
 | |
| }
 | |
| 
 | |
| inline void PIXEndGPUEventOnContext(_In_ ID3D12GraphicsCommandList* commandList)
 | |
| {
 | |
|     commandList->EndEvent();
 | |
| }
 | |
| 
 | |
| inline void PIXEndGPUEventOnContext(_In_ ID3D12CommandQueue* commandQueue)
 | |
| {
 | |
|     commandQueue->EndEvent();
 | |
| }
 | |
| 
 | |
| #endif //__d3d12_h__
 | |
| 
 | |
| template<class T> struct PIXInferScopedEventType { typedef T Type; };
 | |
| template<class T> struct PIXInferScopedEventType<const T> { typedef T Type; };
 | |
| template<class T> struct PIXInferScopedEventType<T*> { typedef T Type; };
 | |
| template<class T> struct PIXInferScopedEventType<T* const> { typedef T Type; };
 | |
| template<> struct PIXInferScopedEventType<UINT64> { typedef void Type; };
 | |
| template<> struct PIXInferScopedEventType<const UINT64> { typedef void Type; };
 | |
| template<> struct PIXInferScopedEventType<INT64> { typedef void Type; };
 | |
| template<> struct PIXInferScopedEventType<const INT64> { typedef void Type; };
 | |
| template<> struct PIXInferScopedEventType<UINT> { typedef void Type; };
 | |
| template<> struct PIXInferScopedEventType<const UINT> { typedef void Type; };
 | |
| template<> struct PIXInferScopedEventType<INT> { typedef void Type; };
 | |
| template<> struct PIXInferScopedEventType<const INT> { typedef void Type; };
 | |
| 
 | |
| 
 | |
| #if PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET
 | |
| #undef PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | |
| #endif
 | |
| 
 | |
| #undef PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET
 | |
| 
 | |
| #endif //_PIXEventsCommon_H_
 | 
