mirror of
				https://github.com/RetroDECK/Duckstation.git
				synced 2025-04-10 19:15:14 +00:00 
			
		
		
		
	
		
			
	
	
		
			606 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			606 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
|   | // Copyright (c) Microsoft Corporation. All rights reserved.
 | ||
|  | 
 | ||
|  | /*==========================================================================;
 | ||
|  | * | ||
|  | *  Copyright (C) Microsoft Corporation.  All Rights Reserved. | ||
|  | * | ||
|  | *  File:       PIXEventsCommon.h | ||
|  | *  Content:    PIX include file | ||
|  | *              Don't include this file directly - use pix3.h | ||
|  | * | ||
|  | ****************************************************************************/ | ||
|  | #pragma once
 | ||
|  | 
 | ||
|  | #ifndef _PIXEventsCommon_H_
 | ||
|  | #define _PIXEventsCommon_H_
 | ||
|  | 
 | ||
|  | #if defined(XBOX) || defined(_XBOX_ONE) || defined(_DURANGO) || defined(_GAMING_XBOX) || defined(_GAMING_XBOX_SCARLETT)
 | ||
|  | #define PIX_XBOX
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #include <cstdint>
 | ||
|  | 
 | ||
|  | #if defined(_M_X64) || defined(_M_IX86)
 | ||
|  | #include <emmintrin.h>
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | //
 | ||
|  | // The PIXBeginEvent and PIXSetMarker functions have an optimized path for
 | ||
|  | // copying strings that work by copying 128-bit or 64-bits at a time. In some
 | ||
|  | // circumstances this may result in PIX logging the remaining memory after the
 | ||
|  | // null terminator.
 | ||
|  | //
 | ||
|  | // By default this optimization is enabled unless Address Sanitizer is enabled,
 | ||
|  | // since this optimization can trigger a global-buffer-overflow when copying
 | ||
|  | // string literals.
 | ||
|  | //
 | ||
|  | // The PIX_ENABLE_BLOCK_ARGUMENT_COPY controls whether or not this optimization
 | ||
|  | // is enabled. Applications may also explicitly set this macro to 0 to disable
 | ||
|  | // the optimization if necessary.
 | ||
|  | //
 | ||
|  | 
 | ||
|  | // Check for Address Sanitizer on either Clang or MSVC
 | ||
|  | 
 | ||
|  | #if defined(__has_feature)
 | ||
|  | #if __has_feature(address_sanitizer)
 | ||
|  | #define PIX_ASAN_ENABLED
 | ||
|  | #endif
 | ||
|  | #elif defined(__SANITIZE_ADDRESS__)
 | ||
|  | #define PIX_ASAN_ENABLED
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #if defined(PIX_ENABLE_BLOCK_ARGUMENT_COPY)
 | ||
|  | // Previously set values override everything
 | ||
|  | # define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 0
 | ||
|  | #elif defined(PIX_ASAN_ENABLED)
 | ||
|  | // Disable block argument copy when address sanitizer is enabled
 | ||
|  | #define PIX_ENABLE_BLOCK_ARGUMENT_COPY 0
 | ||
|  | #define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 1
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #if !defined(PIX_ENABLE_BLOCK_ARGUMENT_COPY)
 | ||
|  | // Default to enabled.
 | ||
|  | #define PIX_ENABLE_BLOCK_ARGUMENT_COPY 1
 | ||
|  | #define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 1
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | struct PIXEventsBlockInfo; | ||
|  | 
 | ||
|  | struct PIXEventsThreadInfo | ||
|  | { | ||
|  |     PIXEventsBlockInfo* block; | ||
|  |     UINT64* biasedLimit; | ||
|  |     UINT64* destination; | ||
|  | }; | ||
|  | 
 | ||
|  | #ifdef PIX_XBOX
 | ||
|  | extern "C" UINT64 WINAPI PIXEventsReplaceBlock(bool getEarliestTime) noexcept; | ||
|  | #else
 | ||
|  | extern "C" UINT64 WINAPI PIXEventsReplaceBlock(PIXEventsThreadInfo * threadInfo, bool getEarliestTime) noexcept; | ||
|  | #endif
 | ||
|  | 
 | ||
|  | enum PIXEventType | ||
|  | { | ||
|  |     PIXEvent_EndEvent                       = 0x000, | ||
|  |     PIXEvent_BeginEvent_VarArgs             = 0x001, | ||
|  |     PIXEvent_BeginEvent_NoArgs              = 0x002, | ||
|  |     PIXEvent_SetMarker_VarArgs              = 0x007, | ||
|  |     PIXEvent_SetMarker_NoArgs               = 0x008, | ||
|  | 
 | ||
|  |     PIXEvent_EndEvent_OnContext             = 0x010, | ||
|  |     PIXEvent_BeginEvent_OnContext_VarArgs   = 0x011, | ||
|  |     PIXEvent_BeginEvent_OnContext_NoArgs    = 0x012, | ||
|  |     PIXEvent_SetMarker_OnContext_VarArgs    = 0x017, | ||
|  |     PIXEvent_SetMarker_OnContext_NoArgs     = 0x018, | ||
|  | }; | ||
|  | 
 | ||
|  | static const UINT64 PIXEventsReservedRecordSpaceQwords = 64; | ||
|  | //this is used to make sure SSE string copy always will end 16-byte write in the current block
 | ||
|  | //this way only a check if destination < limit can be performed, instead of destination < limit - 1
 | ||
|  | //since both these are UINT64* and SSE writes in 16 byte chunks, 8 bytes are kept in reserve
 | ||
|  | //so even if SSE overwrites 8 extra bytes, those will still belong to the correct block
 | ||
|  | //on next iteration check destination will be greater than limit
 | ||
|  | //this is used as well for fixed size UMD events and PIXEndEvent since these require less space
 | ||
|  | //than other variable length user events and do not need big reserved space
 | ||
|  | static const UINT64 PIXEventsReservedTailSpaceQwords = 2; | ||
|  | static const UINT64 PIXEventsSafeFastCopySpaceQwords = PIXEventsReservedRecordSpaceQwords - PIXEventsReservedTailSpaceQwords; | ||
|  | static const UINT64 PIXEventsGraphicsRecordSpaceQwords = 64; | ||
|  | 
 | ||
|  | //Bits 7-19 (13 bits)
 | ||
|  | static const UINT64 PIXEventsBlockEndMarker     = 0x00000000000FFF80; | ||
|  | 
 | ||
|  | //Bits 10-19 (10 bits)
 | ||
|  | static const UINT64 PIXEventsTypeReadMask       = 0x00000000000FFC00; | ||
|  | static const UINT64 PIXEventsTypeWriteMask      = 0x00000000000003FF; | ||
|  | static const UINT64 PIXEventsTypeBitShift       = 10; | ||
|  | 
 | ||
|  | //Bits 20-63 (44 bits)
 | ||
|  | static const UINT64 PIXEventsTimestampReadMask  = 0xFFFFFFFFFFF00000; | ||
|  | static const UINT64 PIXEventsTimestampWriteMask = 0x00000FFFFFFFFFFF; | ||
|  | static const UINT64 PIXEventsTimestampBitShift  = 20; | ||
|  | 
 | ||
|  | inline UINT64 PIXEncodeEventInfo(UINT64 timestamp, PIXEventType eventType) | ||
|  | { | ||
|  |     return ((timestamp & PIXEventsTimestampWriteMask) << PIXEventsTimestampBitShift) | | ||
|  |         (((UINT64)eventType & PIXEventsTypeWriteMask) << PIXEventsTypeBitShift); | ||
|  | } | ||
|  | 
 | ||
|  | //Bits 60-63 (4)
 | ||
|  | static const UINT64 PIXEventsStringAlignmentWriteMask     = 0x000000000000000F; | ||
|  | static const UINT64 PIXEventsStringAlignmentReadMask      = 0xF000000000000000; | ||
|  | static const UINT64 PIXEventsStringAlignmentBitShift      = 60; | ||
|  | 
 | ||
|  | //Bits 55-59 (5)
 | ||
|  | static const UINT64 PIXEventsStringCopyChunkSizeWriteMask = 0x000000000000001F; | ||
|  | static const UINT64 PIXEventsStringCopyChunkSizeReadMask  = 0x0F80000000000000; | ||
|  | static const UINT64 PIXEventsStringCopyChunkSizeBitShift  = 55; | ||
|  | 
 | ||
|  | //Bit 54
 | ||
|  | static const UINT64 PIXEventsStringIsANSIWriteMask        = 0x0000000000000001; | ||
|  | static const UINT64 PIXEventsStringIsANSIReadMask         = 0x0040000000000000; | ||
|  | static const UINT64 PIXEventsStringIsANSIBitShift         = 54; | ||
|  | 
 | ||
|  | //Bit 53
 | ||
|  | static const UINT64 PIXEventsStringIsShortcutWriteMask    = 0x0000000000000001; | ||
|  | static const UINT64 PIXEventsStringIsShortcutReadMask     = 0x0020000000000000; | ||
|  | static const UINT64 PIXEventsStringIsShortcutBitShift     = 53; | ||
|  | 
 | ||
|  | inline UINT64 PIXEncodeStringInfo(UINT64 alignment, UINT64 copyChunkSize, BOOL isANSI, BOOL isShortcut) | ||
|  | { | ||
|  |     return ((alignment & PIXEventsStringAlignmentWriteMask) << PIXEventsStringAlignmentBitShift) | | ||
|  |         ((copyChunkSize & PIXEventsStringCopyChunkSizeWriteMask) << PIXEventsStringCopyChunkSizeBitShift) | | ||
|  |         (((UINT64)isANSI & PIXEventsStringIsANSIWriteMask) << PIXEventsStringIsANSIBitShift) | | ||
|  |         (((UINT64)isShortcut & PIXEventsStringIsShortcutWriteMask) << PIXEventsStringIsShortcutBitShift); | ||
|  | } | ||
|  | 
 | ||
|  | template<UINT alignment, class T> | ||
|  | inline bool PIXIsPointerAligned(T* pointer) | ||
|  | { | ||
|  |     return !(((UINT64)pointer) & (alignment - 1)); | ||
|  | } | ||
|  | 
 | ||
|  | // Generic template version slower because of the additional clear write
 | ||
|  | template<class T> | ||
|  | inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, T argument) | ||
|  | { | ||
|  |     if (destination < limit) | ||
|  |     { | ||
|  |         *destination = 0ull; | ||
|  |         *((T*)destination) = argument; | ||
|  |         ++destination; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | // int32 specialization to avoid slower double memory writes
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<INT32>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, INT32 argument) | ||
|  | { | ||
|  |     if (destination < limit) | ||
|  |     { | ||
|  |         *reinterpret_cast<INT64*>(destination) = static_cast<INT64>(argument); | ||
|  |         ++destination; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | // unsigned int32 specialization to avoid slower double memory writes
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<UINT32>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, UINT32 argument) | ||
|  | { | ||
|  |     if (destination < limit) | ||
|  |     { | ||
|  |         *destination = static_cast<UINT64>(argument); | ||
|  |         ++destination; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | // int64 specialization to avoid slower double memory writes
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<INT64>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, INT64 argument) | ||
|  | { | ||
|  |     if (destination < limit) | ||
|  |     { | ||
|  |         *reinterpret_cast<INT64*>(destination) = argument; | ||
|  |         ++destination; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | // unsigned int64 specialization to avoid slower double memory writes
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<UINT64>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, UINT64 argument) | ||
|  | { | ||
|  |     if (destination < limit) | ||
|  |     { | ||
|  |         *destination = argument; | ||
|  |         ++destination; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | //floats must be cast to double during writing the data to be properly printed later when reading the data
 | ||
|  | //this is needed because when float is passed to varargs function it's cast to double
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<float>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, float argument) | ||
|  | { | ||
|  |     if (destination < limit) | ||
|  |     { | ||
|  |         *reinterpret_cast<double*>(destination) = static_cast<double>(argument); | ||
|  |         ++destination; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | //char has to be cast to a longer signed integer type
 | ||
|  | //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, char argument) | ||
|  | { | ||
|  |     if (destination < limit) | ||
|  |     { | ||
|  |         *reinterpret_cast<INT64*>(destination) = static_cast<INT64>(argument); | ||
|  |         ++destination; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | //unsigned char has to be cast to a longer unsigned integer type
 | ||
|  | //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<unsigned char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, unsigned char argument) | ||
|  | { | ||
|  |     if (destination < limit) | ||
|  |     { | ||
|  |         *destination = static_cast<UINT64>(argument); | ||
|  |         ++destination; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | //bool has to be cast to an integer since it's not explicitly supported by string format routines
 | ||
|  | //there's no format specifier for bool type, but it should work with integer format specifiers
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<bool>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, bool argument) | ||
|  | { | ||
|  |     if (destination < limit) | ||
|  |     { | ||
|  |         *destination = static_cast<UINT64>(argument); | ||
|  |         ++destination; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument) | ||
|  | { | ||
|  |     *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE); | ||
|  |     while (destination < limit) | ||
|  |     { | ||
|  |         UINT64 c = static_cast<uint8_t>(argument[0]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = 0; | ||
|  |             return; | ||
|  |         } | ||
|  |         UINT64 x = c; | ||
|  |         c = static_cast<uint8_t>(argument[1]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = x; | ||
|  |             return; | ||
|  |         } | ||
|  |         x |= c << 8; | ||
|  |         c = static_cast<uint8_t>(argument[2]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = x; | ||
|  |             return; | ||
|  |         } | ||
|  |         x |= c << 16; | ||
|  |         c = static_cast<uint8_t>(argument[3]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = x; | ||
|  |             return; | ||
|  |         } | ||
|  |         x |= c << 24; | ||
|  |         c = static_cast<uint8_t>(argument[4]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = x; | ||
|  |             return; | ||
|  |         } | ||
|  |         x |= c << 32; | ||
|  |         c = static_cast<uint8_t>(argument[5]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = x; | ||
|  |             return; | ||
|  |         } | ||
|  |         x |= c << 40; | ||
|  |         c = static_cast<uint8_t>(argument[6]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = x; | ||
|  |             return; | ||
|  |         } | ||
|  |         x |= c << 48; | ||
|  |         c = static_cast<uint8_t>(argument[7]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = x; | ||
|  |             return; | ||
|  |         } | ||
|  |         x |= c << 56; | ||
|  |         *destination++ = x; | ||
|  |         argument += 8; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument) | ||
|  | { | ||
|  | #if PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | ||
|  |     if (PIXIsPointerAligned<8>(argument)) | ||
|  |     { | ||
|  |         *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE); | ||
|  |         UINT64* source = (UINT64*)argument; | ||
|  |         while (destination < limit) | ||
|  |         { | ||
|  |             UINT64 qword = *source++; | ||
|  |             *destination++ = qword; | ||
|  |             //check if any of the characters is a terminating zero
 | ||
|  |             if (!((qword & 0xFF00000000000000) && | ||
|  |                 (qword & 0xFF000000000000) && | ||
|  |                 (qword & 0xFF0000000000) && | ||
|  |                 (qword & 0xFF00000000) && | ||
|  |                 (qword & 0xFF000000) && | ||
|  |                 (qword & 0xFF0000) && | ||
|  |                 (qword & 0xFF00) && | ||
|  |                 (qword & 0xFF))) | ||
|  |             { | ||
|  |                 break; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     else | ||
|  | #endif // PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | ||
|  |     { | ||
|  |         PIXCopyEventArgumentSlowest(destination, limit, argument); | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<PCSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument) | ||
|  | { | ||
|  |     if (destination < limit) | ||
|  |     { | ||
|  |         if (argument != nullptr) | ||
|  |         { | ||
|  | #if (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | ||
|  |             if (PIXIsPointerAligned<16>(argument)) | ||
|  |             { | ||
|  |                 *destination++ = PIXEncodeStringInfo(0, 16, TRUE, FALSE); | ||
|  |                 __m128i zero = _mm_setzero_si128(); | ||
|  |                 if (PIXIsPointerAligned<16>(destination)) | ||
|  |                 { | ||
|  |                     while (destination < limit) | ||
|  |                     { | ||
|  |                         __m128i mem = _mm_load_si128((__m128i*)argument); | ||
|  |                         _mm_store_si128((__m128i*)destination, mem); | ||
|  |                         //check if any of the characters is a terminating zero
 | ||
|  |                         __m128i res = _mm_cmpeq_epi8(mem, zero); | ||
|  |                         destination += 2; | ||
|  |                         if (_mm_movemask_epi8(res)) | ||
|  |                             break; | ||
|  |                         argument += 16; | ||
|  |                     } | ||
|  |                 } | ||
|  |                 else | ||
|  |                 { | ||
|  |                     while (destination < limit) | ||
|  |                     { | ||
|  |                         __m128i mem = _mm_load_si128((__m128i*)argument); | ||
|  |                         _mm_storeu_si128((__m128i*)destination, mem); | ||
|  |                         //check if any of the characters is a terminating zero
 | ||
|  |                         __m128i res = _mm_cmpeq_epi8(mem, zero); | ||
|  |                         destination += 2; | ||
|  |                         if (_mm_movemask_epi8(res)) | ||
|  |                             break; | ||
|  |                         argument += 16; | ||
|  |                     } | ||
|  |                 } | ||
|  |             } | ||
|  |             else | ||
|  | #endif // (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | ||
|  |             { | ||
|  |                 PIXCopyEventArgumentSlow(destination, limit, argument); | ||
|  |             } | ||
|  |         } | ||
|  |         else | ||
|  |         { | ||
|  |             *destination++ = 0ull; | ||
|  |         } | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<PSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PSTR argument) | ||
|  | { | ||
|  |     PIXCopyEventArgument(destination, limit, (PCSTR)argument); | ||
|  | } | ||
|  | 
 | ||
|  | inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument) | ||
|  | { | ||
|  |     *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE); | ||
|  |     while (destination < limit) | ||
|  |     { | ||
|  |         UINT64 c = static_cast<uint16_t>(argument[0]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = 0; | ||
|  |             return; | ||
|  |         } | ||
|  |         UINT64 x = c; | ||
|  |         c = static_cast<uint16_t>(argument[1]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = x; | ||
|  |             return; | ||
|  |         } | ||
|  |         x |= c << 16; | ||
|  |         c = static_cast<uint16_t>(argument[2]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = x; | ||
|  |             return; | ||
|  |         } | ||
|  |         x |= c << 32; | ||
|  |         c = static_cast<uint16_t>(argument[3]); | ||
|  |         if (!c) | ||
|  |         { | ||
|  |             *destination++ = x; | ||
|  |             return; | ||
|  |         } | ||
|  |         x |= c << 48; | ||
|  |         *destination++ = x; | ||
|  |         argument += 4; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument) | ||
|  | { | ||
|  | #if PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | ||
|  |     if (PIXIsPointerAligned<8>(argument)) | ||
|  |     { | ||
|  |         *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE); | ||
|  |         UINT64* source = (UINT64*)argument; | ||
|  |         while (destination < limit) | ||
|  |         { | ||
|  |             UINT64 qword = *source++; | ||
|  |             *destination++ = qword; | ||
|  |             //check if any of the characters is a terminating zero
 | ||
|  |             //TODO: check if reversed condition is faster
 | ||
|  |             if (!((qword & 0xFFFF000000000000) && | ||
|  |                 (qword & 0xFFFF00000000) && | ||
|  |                 (qword & 0xFFFF0000) && | ||
|  |                 (qword & 0xFFFF))) | ||
|  |             { | ||
|  |                 break; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     else | ||
|  | #endif // PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | ||
|  |     { | ||
|  |         PIXCopyEventArgumentSlowest(destination, limit, argument); | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<PCWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument) | ||
|  | { | ||
|  |     if (destination < limit) | ||
|  |     { | ||
|  |         if (argument != nullptr) | ||
|  |         { | ||
|  | #if (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | ||
|  |             if (PIXIsPointerAligned<16>(argument)) | ||
|  |             { | ||
|  |                 *destination++ = PIXEncodeStringInfo(0, 16, FALSE, FALSE); | ||
|  |                 __m128i zero = _mm_setzero_si128(); | ||
|  |                 if (PIXIsPointerAligned<16>(destination)) | ||
|  |                 { | ||
|  |                     while (destination < limit) | ||
|  |                     { | ||
|  |                         __m128i mem = _mm_load_si128((__m128i*)argument); | ||
|  |                         _mm_store_si128((__m128i*)destination, mem); | ||
|  |                         //check if any of the characters is a terminating zero
 | ||
|  |                         __m128i res = _mm_cmpeq_epi16(mem, zero); | ||
|  |                         destination += 2; | ||
|  |                         if (_mm_movemask_epi8(res)) | ||
|  |                             break; | ||
|  |                         argument += 8; | ||
|  |                     } | ||
|  |                 } | ||
|  |                 else | ||
|  |                 { | ||
|  |                     while (destination < limit) | ||
|  |                     { | ||
|  |                         __m128i mem = _mm_load_si128((__m128i*)argument); | ||
|  |                         _mm_storeu_si128((__m128i*)destination, mem); | ||
|  |                         //check if any of the characters is a terminating zero
 | ||
|  |                         __m128i res = _mm_cmpeq_epi16(mem, zero); | ||
|  |                         destination += 2; | ||
|  |                         if (_mm_movemask_epi8(res)) | ||
|  |                             break; | ||
|  |                         argument += 8; | ||
|  |                     } | ||
|  |                 } | ||
|  |             } | ||
|  |             else | ||
|  | #endif // (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | ||
|  |             { | ||
|  |                 PIXCopyEventArgumentSlow(destination, limit, argument); | ||
|  |             } | ||
|  |         } | ||
|  |         else | ||
|  |         { | ||
|  |             *destination++ = 0ull; | ||
|  |         } | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | template<> | ||
|  | inline void PIXCopyEventArgument<PWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PWSTR argument) | ||
|  | { | ||
|  |     PIXCopyEventArgument(destination, limit, (PCWSTR)argument); | ||
|  | }; | ||
|  | 
 | ||
|  | #if defined(__d3d12_x_h__) || defined(__d3d12_xs_h__) || defined(__d3d12_h__)
 | ||
|  | 
 | ||
|  | inline void PIXSetGPUMarkerOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size) | ||
|  | { | ||
|  |     commandList->SetMarker(D3D12_EVENT_METADATA, data, size); | ||
|  | } | ||
|  | 
 | ||
|  | inline void PIXSetGPUMarkerOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size) | ||
|  | { | ||
|  |     commandQueue->SetMarker(D3D12_EVENT_METADATA, data, size); | ||
|  | } | ||
|  | 
 | ||
|  | inline void PIXBeginGPUEventOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size) | ||
|  | { | ||
|  |     commandList->BeginEvent(D3D12_EVENT_METADATA, data, size); | ||
|  | } | ||
|  | 
 | ||
|  | inline void PIXBeginGPUEventOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size) | ||
|  | { | ||
|  |     commandQueue->BeginEvent(D3D12_EVENT_METADATA, data, size); | ||
|  | } | ||
|  | 
 | ||
|  | inline void PIXEndGPUEventOnContext(_In_ ID3D12GraphicsCommandList* commandList) | ||
|  | { | ||
|  |     commandList->EndEvent(); | ||
|  | } | ||
|  | 
 | ||
|  | inline void PIXEndGPUEventOnContext(_In_ ID3D12CommandQueue* commandQueue) | ||
|  | { | ||
|  |     commandQueue->EndEvent(); | ||
|  | } | ||
|  | 
 | ||
|  | #endif //__d3d12_h__
 | ||
|  | 
 | ||
|  | template<class T> struct PIXInferScopedEventType { typedef T Type; }; | ||
|  | template<class T> struct PIXInferScopedEventType<const T> { typedef T Type; }; | ||
|  | template<class T> struct PIXInferScopedEventType<T*> { typedef T Type; }; | ||
|  | template<class T> struct PIXInferScopedEventType<T* const> { typedef T Type; }; | ||
|  | template<> struct PIXInferScopedEventType<UINT64> { typedef void Type; }; | ||
|  | template<> struct PIXInferScopedEventType<const UINT64> { typedef void Type; }; | ||
|  | template<> struct PIXInferScopedEventType<INT64> { typedef void Type; }; | ||
|  | template<> struct PIXInferScopedEventType<const INT64> { typedef void Type; }; | ||
|  | template<> struct PIXInferScopedEventType<UINT> { typedef void Type; }; | ||
|  | template<> struct PIXInferScopedEventType<const UINT> { typedef void Type; }; | ||
|  | template<> struct PIXInferScopedEventType<INT> { typedef void Type; }; | ||
|  | template<> struct PIXInferScopedEventType<const INT> { typedef void Type; }; | ||
|  | 
 | ||
|  | 
 | ||
|  | #if PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET
 | ||
|  | #undef PIX_ENABLE_BLOCK_ARGUMENT_COPY
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #undef PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET
 | ||
|  | 
 | ||
|  | #endif //_PIXEventsCommon_H_
 |