From dd226a6073368eae6446c13457e34f2119ccf1c5 Mon Sep 17 00:00:00 2001 From: "Zhao, Maosu" Date: Tue, 23 Dec 2025 03:33:15 +0100 Subject: [PATCH] [DevSAN] Fix issue where shadow value may be overwritten According to the UR spec, urEnqueueUSMFill API requires user to guarantee the buffer of pattern is not freed util the command is finished. So, we use a more robust way to poison shadow memory. --- libdevice/sanitizer/asan_rtl.cpp | 6 +- .../sanitizer/asan/asan_interceptor.cpp | 37 +++++++----- .../layers/sanitizer/asan/asan_libdevice.hpp | 31 +++++----- .../layers/sanitizer/asan/asan_shadow.cpp | 27 ++++----- .../layers/sanitizer/asan/asan_shadow.hpp | 6 +- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 12 ++-- .../sanitizer/msan/msan_interceptor.cpp | 15 ++--- .../layers/sanitizer/msan/msan_libdevice.hpp | 3 + .../layers/sanitizer/msan/msan_origin.hpp | 2 +- .../layers/sanitizer/msan/msan_shadow.cpp | 58 ++++++++++--------- .../layers/sanitizer/msan/msan_shadow.hpp | 27 +++++---- .../sanitizer_common/sanitizer_utils.cpp | 9 +++ .../sanitizer_common/sanitizer_utils.hpp | 13 +++-- .../layers/sanitizer/tsan/tsan_shadow.cpp | 6 +- 14 files changed, 141 insertions(+), 111 deletions(-) diff --git a/libdevice/sanitizer/asan_rtl.cpp b/libdevice/sanitizer/asan_rtl.cpp index c4319c2c6f128..c173ab35c9ad9 100644 --- a/libdevice/sanitizer/asan_rtl.cpp +++ b/libdevice/sanitizer/asan_rtl.cpp @@ -485,7 +485,7 @@ void ReportAccessError(uptr poisoned_addr, uint32_t as, bool is_recover, // Check Error Type auto *shadow_address = (__SYCL_GLOBAL__ s8 *)MemToShadow(poisoned_addr, as, debug); - int shadow_value = *shadow_address; + s8 shadow_value = *shadow_address; if (shadow_value > 0) { shadow_value = *(shadow_address + 1); } @@ -531,7 +531,7 @@ void ReportMisalignError(uptr addr, uint32_t as, bool is_recover, while (*shadow >= 0) { ++shadow; } - int shadow_value = *shadow; + s8 shadow_value = *shadow; SaveReport(ErrorType::MISALIGNED, GetMemoryTypeByShadowValue(shadow_value), is_recover, debug); @@ -564,7 +564,7 @@ inline int IsAddressPoisoned(uptr a, uint32_t as, size_t size, const DebugInfo *debug) { auto *shadow_address = (__SYCL_GLOBAL__ s8 *)MemToShadow(a, as, debug); if (shadow_address) { - auto shadow_value = *shadow_address; + s8 shadow_value = *shadow_address; if (shadow_value) { if (size == ASAN_SHADOW_GRANULARITY) return true; diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index 75de16d79a8a3..20d01408d240d 100644 --- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -21,6 +21,7 @@ #include "sanitizer_common/sanitizer_options.hpp" #include "sanitizer_common/sanitizer_stacktrace.hpp" #include "sanitizer_common/sanitizer_utils.hpp" +#include namespace ur_sanitizer_layer { namespace asan { @@ -353,22 +354,22 @@ AsanInterceptor::enqueueAllocInfo(std::shared_ptr &DeviceInfo, ur_queue_handle_t Queue, std::shared_ptr &AI) { if (AI->IsReleased) { - int ShadowByte; + const int8_t *ShadowByte; switch (AI->Type) { case AllocType::HOST_USM: - ShadowByte = kUsmHostDeallocatedMagic; + ShadowByte = &kUsmHostDeallocatedMagic; break; case AllocType::DEVICE_USM: - ShadowByte = kUsmDeviceDeallocatedMagic; + ShadowByte = &kUsmDeviceDeallocatedMagic; break; case AllocType::SHARED_USM: - ShadowByte = kUsmSharedDeallocatedMagic; + ShadowByte = &kUsmSharedDeallocatedMagic; break; case AllocType::MEM_BUFFER: - ShadowByte = kMemBufferDeallocatedMagic; + ShadowByte = &kMemBufferDeallocatedMagic; break; default: - ShadowByte = 0xff; + ShadowByte = &kUnknownMagic; assert(false && "Unknow AllocInfo Type"); } UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, AI->AllocBegin, @@ -377,39 +378,45 @@ AsanInterceptor::enqueueAllocInfo(std::shared_ptr &DeviceInfo, } // Init zero + static const int8_t Zero = 0; UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, AI->AllocBegin, - AI->AllocSize, 0)); + AI->AllocSize, &Zero)); uptr TailBegin = RoundUpTo(AI->UserEnd, ASAN_SHADOW_GRANULARITY); uptr TailEnd = AI->AllocBegin + AI->AllocSize; // User tail if (TailBegin != AI->UserEnd) { + static const std::array TailMagic = [] { + std::array a{}; + std::iota(a.begin(), a.end(), 0); + return a; + }(); auto Value = AI->UserEnd - RoundDownTo(AI->UserEnd, ASAN_SHADOW_GRANULARITY); UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, AI->UserEnd, 1, - static_cast(Value))); + &TailMagic[Value])); } - int ShadowByte; + const int8_t *ShadowByte; switch (AI->Type) { case AllocType::HOST_USM: - ShadowByte = kUsmHostRedzoneMagic; + ShadowByte = &kUsmHostRedzoneMagic; break; case AllocType::DEVICE_USM: - ShadowByte = kUsmDeviceRedzoneMagic; + ShadowByte = &kUsmDeviceRedzoneMagic; break; case AllocType::SHARED_USM: - ShadowByte = kUsmSharedRedzoneMagic; + ShadowByte = &kUsmSharedRedzoneMagic; break; case AllocType::MEM_BUFFER: - ShadowByte = kMemBufferRedzoneMagic; + ShadowByte = &kMemBufferRedzoneMagic; break; case AllocType::DEVICE_GLOBAL: - ShadowByte = kDeviceGlobalRedzoneMagic; + ShadowByte = &kDeviceGlobalRedzoneMagic; break; default: - ShadowByte = 0xff; + ShadowByte = &kUnknownMagic; assert(false && "Unknow AllocInfo Type"); } diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_libdevice.hpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_libdevice.hpp index 07199a14f89ee..7ff7db02e1b02 100644 --- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_libdevice.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_libdevice.hpp @@ -88,24 +88,27 @@ constexpr uint64_t ASAN_PRIVATE_SIZE = 0xffffffULL + 1; // These magic values are written to shadow for better error // reporting. -constexpr int kUsmDeviceRedzoneMagic = (char)0x81; -constexpr int kUsmHostRedzoneMagic = (char)0x82; -constexpr int kUsmSharedRedzoneMagic = (char)0x83; -constexpr int kMemBufferRedzoneMagic = (char)0x84; -constexpr int kDeviceGlobalRedzoneMagic = (char)0x85; -constexpr int kNullPointerRedzoneMagic = (char)0x86; +constexpr int8_t kUsmDeviceRedzoneMagic = (int8_t)0x81; +constexpr int8_t kUsmHostRedzoneMagic = (int8_t)0x82; +constexpr int8_t kUsmSharedRedzoneMagic = (int8_t)0x83; +constexpr int8_t kMemBufferRedzoneMagic = (int8_t)0x84; +constexpr int8_t kDeviceGlobalRedzoneMagic = (int8_t)0x85; +constexpr int8_t kNullPointerRedzoneMagic = (int8_t)0x86; -constexpr int kUsmDeviceDeallocatedMagic = (char)0x91; -constexpr int kUsmHostDeallocatedMagic = (char)0x92; -constexpr int kUsmSharedDeallocatedMagic = (char)0x93; -constexpr int kMemBufferDeallocatedMagic = (char)0x93; +constexpr int8_t kUsmDeviceDeallocatedMagic = (int8_t)0x91; +constexpr int8_t kUsmHostDeallocatedMagic = (int8_t)0x92; +constexpr int8_t kUsmSharedDeallocatedMagic = (int8_t)0x93; +constexpr int8_t kMemBufferDeallocatedMagic = (int8_t)0x93; -constexpr int kSharedLocalRedzoneMagic = (char)0xa1; +constexpr int8_t kSharedLocalRedzoneMagic = (int8_t)0xa1; // Same with host ASan stack -const int kPrivateLeftRedzoneMagic = (char)0xf1; -const int kPrivateMidRedzoneMagic = (char)0xf2; -const int kPrivateRightRedzoneMagic = (char)0xf3; +const int8_t kPrivateLeftRedzoneMagic = (int8_t)0xf1; +const int8_t kPrivateMidRedzoneMagic = (int8_t)0xf2; +const int8_t kPrivateRightRedzoneMagic = (int8_t)0xf3; + +// Unknown shadow value +constexpr int8_t kUnknownMagic = (int8_t)0xff; constexpr auto kSPIR_AsanDeviceGlobalMetadata = "__AsanDeviceGlobalMetadata"; constexpr auto kSPIR_AsanSpirKernelMetadata = "__AsanKernelMetadata"; diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp index b784eb228a975..e7217fef91d1a 100644 --- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp @@ -47,7 +47,7 @@ ur_result_t ShadowMemoryCPU::Setup() { // For CPU, we use a typical page size of 4K bytes. constexpr size_t NullptrRedzoneSize = 4096; auto URes = - EnqueuePoisonShadow({}, 0, NullptrRedzoneSize, kNullPointerRedzoneMagic); + EnqueuePoisonShadow({}, 0, NullptrRedzoneSize, &kNullPointerRedzoneMagic); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "EnqueuePoisonShadow(NullPointerRZ): {}", URes); @@ -74,7 +74,8 @@ uptr ShadowMemoryCPU::MemToShadow(uptr Ptr) { } ur_result_t ShadowMemoryCPU::EnqueuePoisonShadow(ur_queue_handle_t, uptr Ptr, - uptr Size, u8 Value) { + uptr Size, + const int8_t *Value) { if (Size == 0) { return UR_RESULT_SUCCESS; } @@ -85,8 +86,8 @@ ur_result_t ShadowMemoryCPU::EnqueuePoisonShadow(ur_queue_handle_t, uptr Ptr, UR_LOG_L(getContext()->logger, DEBUG, "EnqueuePoisonShadow(addr={}, count={}, value={})", (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, - (void *)(size_t)Value); - memset((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); + (void *)(size_t)*Value); + memset((void *)ShadowBegin, *Value, ShadowEnd - ShadowBegin + 1); return UR_RESULT_SUCCESS; } @@ -118,7 +119,7 @@ ur_result_t ShadowMemoryGPU::Setup() { << ASAN_SHADOW_SCALE; ManagedQueue Queue(Context, Device); Result = EnqueuePoisonShadow(Queue, 0, NullptrRedzoneSize, - kNullPointerRedzoneMagic); + &kNullPointerRedzoneMagic); if (Result != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "EnqueuePoisonShadow(NullPointerRZ): {}", Result); @@ -168,7 +169,7 @@ ur_result_t ShadowMemoryGPU::Destory() { ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, - u8 Value) { + const int8_t *Value) { if (Size == 0) { return UR_RESULT_SUCCESS; } @@ -180,7 +181,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, UR_LOG_L(getContext()->logger, DEBUG, "EnqueuePoisonShadow(addr={}, count={}, value={})", (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, - (void *)(size_t)Value); + (void *)(size_t)*Value); // Make sure the shadow memory is mapped to physical memory { @@ -216,7 +217,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, (void *)MappedPtr, (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = EnqueueUSMSet(Queue, (void *)MappedPtr, (char)0, PageSize); + URes = EnqueueUSMSetZero(Queue, (void *)MappedPtr, PageSize); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "EnqueueUSMBlockingSet(): {}", URes); @@ -240,7 +241,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, UR_LOG_L(getContext()->logger, ERR, "EnqueuePoisonShadow(addr={}, count={}, value={}): {}", (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, - (void *)(size_t)Value, URes); + (void *)(size_t)*Value, URes); return URes; } @@ -271,8 +272,8 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, (void **)&LocalShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMSet(Queue, (void *)LocalShadowOffset, (char)0, - RequiredShadowSize); + ur_result_t URes = + EnqueueUSMSetZero(Queue, (void *)LocalShadowOffset, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset)); @@ -341,8 +342,8 @@ ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, Context, Device, nullptr, nullptr, NewPrivateShadowSize, (void **)&PrivateShadowOffset)); LastPrivateShadowAllocedSize = NewPrivateShadowSize; - UR_CALL_THROWS(EnqueueUSMSet(Queue, (void *)PrivateShadowOffset, (char)0, - NewPrivateShadowSize)); + UR_CALL_THROWS(EnqueueUSMSetZero(Queue, (void *)PrivateShadowOffset, + NewPrivateShadowSize)); ContextInfo->Stats.UpdateShadowMalloced(NewPrivateShadowSize); } diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.hpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.hpp index e504043ae3965..90744ae0d6690 100644 --- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.hpp @@ -48,7 +48,7 @@ struct ShadowMemory { virtual uptr MemToShadow(uptr Ptr) = 0; virtual ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, - uptr Size, u8 Value) = 0; + uptr Size, const int8_t *Value) = 0; virtual size_t GetShadowSize() = 0; @@ -80,7 +80,7 @@ struct ShadowMemoryCPU final : public ShadowMemory { uptr MemToShadow(uptr Ptr) override; ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, - u8 Value) override; + const int8_t *Value) override; size_t GetShadowSize() override { return 0x80000000000ULL; } @@ -106,7 +106,7 @@ struct ShadowMemoryGPU : public ShadowMemory { ur_result_t Destory() override; ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, - u8 Value) override final; + const int8_t *Value) override final; ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG, uptr &Begin, uptr &End) override final; diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 178aff3050481..44e0e328e78d1 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -652,8 +652,8 @@ ur_result_t urMemBufferCreate( // Update shadow memory std::shared_ptr DeviceInfo = getMsanInterceptor()->getDeviceInfo(hDevice); - UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(InternalQueue, - (uptr)Handle, size, 0)); + UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow( + InternalQueue, (uptr)Handle, size, &kMemInitializedMagic)); } } @@ -1552,8 +1552,8 @@ ur_result_t urEnqueueUSMFill( uptr MemShadow = DeviceInfo->Shadow->MemToShadow((uptr)pMem); ur_event_handle_t Event = nullptr; - UR_CALL(EnqueueUSMSet(hQueue, (void *)MemShadow, (char)0, size, 0, nullptr, - &Event)); + UR_CALL( + EnqueueUSMSetZero(hQueue, (void *)MemShadow, size, 0, nullptr, &Event)); Events.push_back(Event); } @@ -1645,8 +1645,8 @@ ur_result_t urEnqueueUSMMemcpy( { const auto DstShadow = DstDI->Shadow->MemToShadow((uptr)pDst); ur_event_handle_t Event = nullptr; - UR_CALL(EnqueueUSMSet(hQueue, (void *)DstShadow, (char)0, size, 0, - nullptr, &Event)); + UR_CALL(EnqueueUSMSetZero(hQueue, (void *)DstShadow, size, 0, nullptr, + &Event)); Events.push_back(Event); } } diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 806fd9f9638b5..59946476c8543 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -107,12 +107,14 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, // Update shadow memory auto EnqueuePoison = [&](const std::vector &Devices) { - u8 Value = DontCheckHostOrSharedUSM ? 0 : 0xff; for (ur_device_handle_t Device : Devices) { ManagedQueue Queue(Context, Device); std::shared_ptr DI = getDeviceInfo(Device); DI->Shadow->EnqueuePoisonShadowWithOrigin(Queue, (uptr)Allocated, Size, - Value, HeapOrigin.rawId()); + DontCheckHostOrSharedUSM + ? &kMemInitializedMagic + : &kMemUninitializedMagic, + HeapOrigin.rawId()); } }; if (Device) { // shared/device USM @@ -310,8 +312,8 @@ MsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) { MsanShadowMemoryPVC::IsDeviceUSM(GVInfo.Addr)) || (DeviceInfo->Type == DeviceType::GPU_DG2 && MsanShadowMemoryDG2::IsDeviceUSM(GVInfo.Addr))) { - UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, GVInfo.Addr, - GVInfo.Size, 0)); + UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow( + Queue, GVInfo.Addr, GVInfo.Size, &kMemInitializedMagic)); ContextInfo->CleanShadowSize = std::max(ContextInfo->CleanShadowSize, GVInfo.Size); } @@ -502,9 +504,8 @@ ur_result_t MsanInterceptor::prepareLaunch( ContextInfo->Handle, DeviceInfo->Handle, nullptr, nullptr, ContextInfo->CleanShadowSize, (void **)&LaunchInfo.Data.Host.CleanShadow)); - UR_CALL(EnqueueUSMSet(Queue, (void *)LaunchInfo.Data.Host.CleanShadow, - (char)0, ContextInfo->CleanShadowSize, 0, nullptr, - nullptr)); + UR_CALL(EnqueueUSMSetZero(Queue, (void *)LaunchInfo.Data.Host.CleanShadow, + ContextInfo->CleanShadowSize)); if (LaunchInfo.LocalWorkSize.empty()) { LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp index d57d0c8553446..099f31cbfa456 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp @@ -78,6 +78,9 @@ struct MsanRuntimeData { // variable have changed constexpr std::size_t MSAN_PRIVATE_SIZE = 0xffffffULL + 1; +constexpr uint8_t kMemInitializedMagic = 0; +constexpr uint8_t kMemUninitializedMagic = 0xff; + constexpr auto kSPIR_MsanDeviceGlobalMetadata = "__MsanDeviceGlobalMetadata"; constexpr auto kSPIR_MsanSpirKernelMetadata = "__MsanKernelMetadata"; diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp index 6a3833583abe6..d1ae8a264bb43 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_origin.hpp @@ -34,7 +34,7 @@ namespace msan { class Origin { public: - uint32_t rawId() const { return raw_id_; } + const uint32_t *rawId() const { return &raw_id_; } bool isHeapOrigin() const { return isDeviceUSMOrigin() || isHostUSMOrigin() || isSharedUSMOrigin() || diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp index c61d2c66fc7d2..04a6ee5ab26f5 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -142,16 +142,17 @@ uptr MsanShadowMemoryCPU::MemToOrigin(uptr Ptr) { } ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadow( - ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t NumEvents, - const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { - return EnqueuePoisonShadowWithOrigin(Queue, Ptr, Size, Value, 0, NumEvents, - EventWaitList, OutEvent); + ur_queue_handle_t Queue, uptr Ptr, uptr Size, const u8 *Value, + uint32_t NumEvents, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent) { + return EnqueuePoisonShadowWithOrigin(Queue, Ptr, Size, Value, nullptr, + NumEvents, EventWaitList, OutEvent); } ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadowWithOrigin( - ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t Origin, - uint32_t NumEvents, const ur_event_handle_t *EventWaitList, - ur_event_handle_t *OutEvent) { + ur_queue_handle_t Queue, uptr Ptr, uptr Size, const u8 *Value, + const uint32_t *Origin, uint32_t NumEvents, + const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { if (Size) { { const uptr ShadowBegin = MemToShadow(Ptr); @@ -160,10 +161,10 @@ ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadowWithOrigin( UR_LOG_L(getContext()->logger, DEBUG, "EnqueuePoisonShadow(addr={}, count={}, value={})", (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, - (void *)(uptr)Value); - memset((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); + (void *)(uptr)*Value); + memset((void *)ShadowBegin, *Value, ShadowEnd - ShadowBegin + 1); } - if (Origin) { + if (Origin && *Origin != 0) { const uptr OriginBegin = MemToOrigin(Ptr); const uptr OriginEnd = MemToOrigin(Ptr + Size - 1) + MSAN_ORIGIN_GRANULARITY; @@ -171,9 +172,9 @@ ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadowWithOrigin( UR_LOG_L(getContext()->logger, DEBUG, "EnqueuePoisonOrigin(addr={}, count={}, value={})", (void *)OriginBegin, OriginEnd - OriginBegin + 1, - (void *)(uptr)Origin); + (void *)(uptr)*Origin); // memset((void *)OriginBegin, Value, OriginEnd - OriginBegin + 1); - std::fill((uint32_t *)OriginBegin, (uint32_t *)OriginEnd, Origin); + std::fill((uint32_t *)OriginBegin, (uint32_t *)OriginEnd, *Origin); } } @@ -286,16 +287,17 @@ ur_result_t MsanShadowMemoryGPU::EnqueueVirtualMemMap( } ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow( - ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t NumEvents, - const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { - return EnqueuePoisonShadowWithOrigin(Queue, Ptr, Size, Value, 0, NumEvents, - EventWaitList, OutEvent); + ur_queue_handle_t Queue, uptr Ptr, uptr Size, const u8 *Value, + uint32_t NumEvents, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent) { + return EnqueuePoisonShadowWithOrigin(Queue, Ptr, Size, Value, nullptr, + NumEvents, EventWaitList, OutEvent); } ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadowWithOrigin( - ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t Origin, - uint32_t NumEvents, const ur_event_handle_t *EventWaitList, - ur_event_handle_t *OutEvent) { + ur_queue_handle_t Queue, uptr Ptr, uptr Size, const u8 *Value, + const uint32_t *Origin, uint32_t NumEvents, + const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { if (Size == 0) { if (OutEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( @@ -316,7 +318,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadowWithOrigin( UR_LOG_L(getContext()->logger, DEBUG, "EnqueuePoisonShadow(addr={}, size={}, value={})", (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, - (void *)(size_t)Value); + (void *)(size_t)*Value); UR_CALL(EnqueueUSMSet(Queue, (void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1, Events.size(), @@ -325,17 +327,17 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadowWithOrigin( { uptr OriginBegin = MemToOrigin(Ptr); - uptr OriginEnd = MemToOrigin(Ptr + Size - 1) + sizeof(Origin) - 1; + uptr OriginEnd = MemToOrigin(Ptr + Size - 1) + sizeof(*Origin) - 1; UR_CALL(EnqueueVirtualMemMap(OriginBegin, OriginEnd, Events, OutEvent)); - if (Origin) { + if (Origin && *Origin != 0) { UR_LOG_L(getContext()->logger, DEBUG, "EnqueuePoisonOrigin(addr={}, size={}, value={})", (void *)OriginBegin, OriginEnd - OriginBegin + 1, - (void *)(uptr)Origin); + (void *)(uptr)*Origin); UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( - Queue, (void *)OriginBegin, sizeof(Origin), &Origin, + Queue, (void *)OriginBegin, sizeof(*Origin), Origin, OriginEnd - OriginBegin + 1, NumEvents, EventWaitList, OutEvent)); } } @@ -392,8 +394,8 @@ ur_result_t MsanShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, (void **)&LocalShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMSet(Queue, (void *)LocalShadowOffset, (char)0, - RequiredShadowSize); + ur_result_t URes = + EnqueueUSMSetZero(Queue, (void *)LocalShadowOffset, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset)); @@ -458,8 +460,8 @@ ur_result_t MsanShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, Context, Device, nullptr, nullptr, NewPrivateShadowSize, (void **)&PrivateShadowOffset)); LastPrivateShadowAllocedSize = NewPrivateShadowSize; - UR_CALL_THROWS(EnqueueUSMSet(Queue, (void *)PrivateShadowOffset, (char)0, - NewPrivateShadowSize)); + UR_CALL_THROWS(EnqueueUSMSetZero(Queue, (void *)PrivateShadowOffset, + NewPrivateShadowSize)); } Base = (uptr *)PrivateBasePtr; diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp index 660c93e19f2d2..2c27aca350c05 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_shadow.hpp @@ -35,14 +35,15 @@ struct MsanShadowMemory { virtual uptr MemToOrigin(uptr Ptr) = 0; virtual ur_result_t - EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, - uint32_t NumEvents = 0, + EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, + const u8 *Value, uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) = 0; virtual ur_result_t EnqueuePoisonShadowWithOrigin( - ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t Origin, - uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, + ur_queue_handle_t Queue, uptr Ptr, uptr Size, const u8 *Value, + const uint32_t *Origin, uint32_t NumEvents = 0, + const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) = 0; virtual ur_result_t ReleaseShadow(std::shared_ptr) { @@ -95,14 +96,15 @@ struct MsanShadowMemoryCPU final : public MsanShadowMemory { uptr MemToOrigin(uptr Ptr) override; ur_result_t - EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, - uint32_t NumEvents = 0, + EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, + const u8 *Value, uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) override; ur_result_t EnqueuePoisonShadowWithOrigin( - ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t Origin, - uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, + ur_queue_handle_t Queue, uptr Ptr, uptr Size, const u8 *Value, + const uint32_t *Origin, uint32_t NumEvents = 0, + const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) override; ur_result_t AllocLocalShadow(ur_queue_handle_t, uint32_t, uptr &Begin, @@ -131,14 +133,15 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory { ur_result_t Destory() override; ur_result_t - EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, - uint32_t NumEvents = 0, + EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, + const u8 *Value, uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) override final; ur_result_t EnqueuePoisonShadowWithOrigin( - ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t Origin, - uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, + ur_queue_handle_t Queue, uptr Ptr, uptr Size, const u8 *Value, + const uint32_t *Origin, uint32_t NumEvents = 0, + const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) override; ur_result_t ReleaseShadow(std::shared_ptr AI) override final; diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index edfc4f8a56f3d..a29ff744a173b 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -358,4 +358,13 @@ void PrintUrBuildLogIfError(ur_result_t Result, ur_program_handle_t Program, } } +ur_result_t EnqueueUSMSetZero(ur_queue_handle_t Queue, void *Ptr, size_t Size, + uint32_t NumEvents, + const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent) { + static const char Zero = 0; + return getContext()->urDdiTable.Enqueue.pfnUSMFill( + Queue, Ptr, 1, &Zero, Size, NumEvents, EventWaitList, OutEvent); +} + } // namespace ur_sanitizer_layer diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index 3b8fad483acd6..88243bfb08582 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -70,19 +70,20 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, ur_device_handle_t Device); template -ur_result_t EnqueueUSMSet(ur_queue_handle_t Queue, void *Ptr, T Value, +ur_result_t EnqueueUSMSet(ur_queue_handle_t Queue, void *Ptr, T *Value, size_t Size, uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) { assert(Size % sizeof(T) == 0); - thread_local static T StaticValue; - - StaticValue = Value; return getContext()->urDdiTable.Enqueue.pfnUSMFill( - Queue, Ptr, sizeof(T), &StaticValue, Size, NumEvents, EventWaitList, - OutEvent); + Queue, Ptr, sizeof(T), Value, Size, NumEvents, EventWaitList, OutEvent); } +ur_result_t EnqueueUSMSetZero(ur_queue_handle_t Queue, void *Ptr, size_t Size, + uint32_t NumEvents = 0, + const ur_event_handle_t *EventWaitList = nullptr, + ur_event_handle_t *OutEvent = nullptr); + void PrintUrBuildLogIfError(ur_result_t Result, ur_program_handle_t Program, ur_device_handle_t *Devices, size_t NumDevices); diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp index 236f2765ef223..cc4929d9d8e3c 100644 --- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_shadow.cpp @@ -175,8 +175,8 @@ ur_result_t ShadowMemoryGPU::CleanShadow(ur_queue_handle_t Queue, uptr Ptr, } // Initialize to zero - auto URes = EnqueueUSMSet(Queue, (void *)Begin, (char)0, - Size / kShadowCell * kShadowCnt * kShadowSize); + auto URes = EnqueueUSMSetZero(Queue, (void *)Begin, + Size / kShadowCell * kShadowCnt * kShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "EnqueueUSMBlockingSet(): {}", URes); return URes; @@ -209,7 +209,7 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, // Initialize shadow memory ur_result_t URes = - EnqueueUSMSet(Queue, (void *)LocalShadowOffset, 0, RequiredShadowSize); + EnqueueUSMSetZero(Queue, (void *)LocalShadowOffset, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset));