[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 4/5] Implement rundown-based MRSW lock


  • To: Tu Dinh <ngoc-tu.dinh@xxxxxxxxxx>, "win-pv-devel@xxxxxxxxxxxxxxxxxxxx" <win-pv-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Owen Smith <owen.smith@xxxxxxxxxx>
  • Date: Wed, 13 May 2026 14:05:08 +0000
  • Accept-language: en-GB, en-US
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=citrix.com; dmarc=pass action=none header.from=citrix.com; dkim=pass header.d=citrix.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector10001; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=AywKJuq7cZ17BIUIhFo4V+zRD9jDte/+vdmVxlJzaSk=; b=O6jofNli6wIKy/SHHNzQEZxfXaqOBzGIBMsK6OGyzhZr6e5EzVbi2HEGQ2yU4yIEuol+wXLRL9lXGg6dsuoB8cU7cnbO5cG1aZ2MAv4iDypjE1M4AzslfHmr5EX0Vrv+rrDPJgnVFKIgJYAyHC/dEPPs2ITv31aEh5DeeFfyKyIgVJ40dLU4eTAM4W8yPvhFT6vHF//ajMmcLEIUcXwsMGBQsD8pV12BiJZQ4yhN+K7hOrBEcYzvp/oJNGc9Pw6vfBH8ZpitY55QtsM/2uJICmmnbKmlG70ra6l+7i/TDkY1nFf5xBhhnBgvLEQvoTok6s0pu5cmjIZy73SSpUXtLA==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none; b=wfIo9lYG8AsN75vT0z3qb7rGNwzYRW4W1LCZpXrbadpVARRDTqEZjoL/6cHJY33ScFmMV+wju7Rexfm3nqmct2VOVml+Kmv75ONZtUX6RrMq9twTPdYOqS+OBGMIli5HcdEkut/EHlvRMHCtkD6yRfnAeSbi83y4529NOYs2fZNNJbsoyd7XKWyahzq49O5UBxLM0EWmq/Ze7U1egD1F3Iz+tTj4IszTy+Z/f1UnIwUt0nmCo4USt+93ikCQARs3gU3Ofx/+/f9O6Z1rV+OghKXKIAQq5NoxhHr9kv0w30rLQqOJ+sL9IrhrOjylih58itIS8uCMFztbj9RVP7NvJA==
  • Authentication-results: eu.smtp.expurgate.cloud; dkim=pass header.s=selector1 header.d=citrix.com header.i="@citrix.com" header.h="From:Date:Subject:Message-ID:Content-Type:MIME-Version:x-ms-exchange-senderadcheck"
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=citrix.com;
  • Delivery-date: Wed, 13 May 2026 14:05:16 +0000
  • List-id: Developer list for the Windows PV Drivers subproject <win-pv-devel.lists.xenproject.org>
  • Msip_labels:
  • Thread-index: AQHczZGVucbfB0LVdUy2gYIuu73oyrYMJkPM
  • Thread-topic: [PATCH 4/5] Implement rundown-based MRSW lock

Hi Tu,

I'm having an issue with this patch
During a VM start, its possible for a single system thread to acquire the lock 
exclusively,
and while its locked, a DPC can then attempt to acquire the lock in shared 
mode, resulting
in a deadlock (if AcquireMrswLockShared is called) or spin (if 
SpinAcquireMrswLockShared
is called).

0: kd> k
 # Child-SP          RetAddr               Call Site
00 fffff805`49b48aa8 fffff805`b66b2b2c     nt!DbgBreakPointWithStatus
01 fffff805`49b48ab0 fffff805`b66b0ada     nt!KeAccumulateTicks+0x70c
02 fffff805`49b48b20 fffff805`b66b0833     nt!KiUpdateRunTime+0xd6
03 fffff805`49b48c70 fffff805`b66aed45     nt!KiUpdateTime+0x473
04 fffff805`49b48e20 fffff805`b66ae35d     nt!KeClockInterruptNotify+0x195
05 fffff805`49b48f50 fffff805`b6a7520e     
nt!KiCallInterruptServiceRoutine+0x2ed
06 fffff805`49b48fb0 fffff805`b6a75a1c     
nt!KiInterruptSubDispatchNoLockNoEtw+0x4e
07 fffff805`49b412b0 fffff805`b66d6616     
nt!KiInterruptDispatchNoLockNoEtw+0x3c
08 fffff805`49b41448 fffff805`4c1d6689     nt!ExfAcquireRundownProtection+0x6
09 (Inline Function) --------`--------     xenvif!__MrswRundownAcquire+0x9 
[src\xenvif\mrsw.h @ 84]
0a (Inline Function) --------`--------     
xenvif!__SpinAcquireMrswLockShared+0x9 [src\xenvif\mrsw.h @ 283]
0b fffff805`49b41450 fffff805`4c4a5dd0     xenvif!VifReceiverReturnPacket+0x29 
[src\xenvif\vif.c @ 537]
0c (Inline Function) --------`--------     
xennet!__ReceiverReturnNetBufferLists+0x54 [src\xennet\receiver.c @ 213]
0d (Inline Function) --------`--------     
xennet!ReceiverReturnNetBufferLists+0x54 [src\xennet\receiver.c @ 535]
0e fffff805`49b414a0 fffff805`4ac84661     
xennet!MiniportReturnNetBufferLists+0x70 [src\xennet\miniport.c @ 208]
0f fffff805`49b414d0 fffff805`4b341824     ndis!NdisFReturnNetBufferLists+0x411
10 fffff805`49b415a0 fffff805`4b34759a     
wfplwfs!LwfLowerReturnNetBufferLists+0x84
11 fffff805`49b415f0 fffff805`4b347afa     
wfplwfs!L2NdisReceiveNetBufferListsComplete+0x4e
12 fffff805`49b41620 fffff805`4ac8b628     
wfplwfs!LwfLowerRecvNetBufferLists+0x20a
13 fffff805`49b416e0 fffff805`4c4a62d4     
ndis!NdisMIndicateReceiveNetBufferLists+0xa78
14 (Inline Function) --------`--------     
xennet!__IndicateReceiveNetBufferLists+0x41 [src\xennet\receiver.c @ 353]
15 (Inline Function) --------`--------     xennet!__ReceiverPushPackets+0xaa 
[src\xennet\receiver.c @ 422]
16 fffff805`49b41900 fffff805`4c4a1147     xennet!ReceiverQueuePacket+0x1c4 
[src\xennet\receiver.c @ 603]
17 fffff805`49b41980 fffff805`4c1c3cea     xennet!AdapterVifCallback+0x77 
[src\xennet\adapter.c @ 237]
18 (Inline Function) --------`--------     xenvif!VifReceiverQueuePacket+0x90 
[src\xenvif\vif.c @ 1244]
19 (Inline Function) --------`--------     xenvif!__ReceiverRingSwizzle+0x6f1 
[src\xenvif\receiver.c @ 1541]
1a fffff805`49b41a00 fffff805`b665993e     xenvif!ReceiverRingQueueDpc+0x71a 
[src\xenvif\receiver.c @ 1637]
1b fffff805`49b41ad0 fffff805`b6604f1b     nt!KiExecuteAllDpcs+0x67e
1c fffff805`49b41d20 fffff805`b6a7ae15     nt!KiRetireDpcList+0x36b
1d fffff805`49b41fb0 fffff805`b6a7adbf     nt!KxSwapStacksAndRetireDpcList+0x5
1e ffffdf04`854cf2f0 fffff805`b66ed1d5     nt!KiPlatformSwapStacksAndCallReturn
1f ffffdf04`854cf300 fffff805`b6a7a24f     nt!KiDispatchInterrupt+0x65
20 ffffdf04`854cf330 fffff805`b6608670     nt!KiDpcInterrupt+0x39f
21 ffffdf04`854cf4c0 fffff805`4c1d5991     nt!KeReleaseSpinLock+0x30
22 (Inline Function) --------`--------     xenvif!FrontendSetState+0x1b4 
[src\xenvif\frontend.c @ 2797]
23 ffffdf04`854cf4f0 fffff805`4c4a5c83     xenvif!VifEnable+0x241 
[src\xenvif\vif.c @ 176]
24 (Inline Function) --------`--------     xennet!AdapterEnable+0x64 
[src\xennet\adapter.c @ 1934]
25 ffffdf04`854cf550 fffff805`4adceb91     xennet!MiniportRestart+0x73 
[src\xennet\miniport.c @ 143]
26 ffffdf04`854cf580 fffff805`4ade13d1     ndis!ndisMInvokeRestart+0x51
27 ffffdf04`854cf5d0 fffff805`4adcea65     ndis!ndisMRestartMiniportInner+0x141
28 ffffdf04`854cf660 fffff805`4adbf26c     ndis!ndisMRestartMiniport+0x65
29 ffffdf04`854cf750 fffff805`4adbb4fb     ndis!Ndis::BindEngine::Iterate+0x7dc
2a ffffdf04`854cf910 fffff805`4adbac8a     
ndis!Ndis::BindEngine::UpdateBindings+0xab
2b ffffdf04`854cf9c0 fffff805`4add1771     
ndis!Ndis::BindEngine::UpdateBindingsWorkItem+0x5a
2c ffffdf04`854cfa10 fffff805`b6709e32     
ndis!KWorkItemBase<Ndis::BindEngine,KWorkItem<Ndis::BindEngine> 
>::CallbackThunk+0x11
2d ffffdf04`854cfa40 fffff805`b685904a     nt!ExpWorkerThread+0x1b2
2e ffffdf04`854cfbf0 fffff805`b6a741c4     nt!PspSystemThreadStartup+0x5a
2f ffffdf04`854cfc40 00000000`00000000     nt!KiStartSystemThread+0x34

This seems to be a race condition, which depends on the ReceiverRingQueueDpc 
getting
executed when FrontendSetState drops the spinlock

Owen

________________________________________
From: Tu Dinh <ngoc-tu.dinh@xxxxxxxxxx>
Sent: 16 April 2026 12:10 PM
To: win-pv-devel@xxxxxxxxxxxxxxxxxxxx
Cc: Tu Dinh; Owen Smith
Subject: [PATCH 4/5] Implement rundown-based MRSW lock

The current MRSW lock has a few downsides:
* Large size (have to store a 1KB holder list)
* Long fast-path acquire (~1KB of code) due to having to scan the holder
  list every acquire
* Limited to 64 holders
* Multiple IRQL operations per acquire
* Always raises the critical section to DISPATCH_LEVEL

It's currently only used as the cleanup lock for the VIF interface, so
replace it with something simpler.

Implement a read-write lock with the following properties:
* The reader section is protected by an EX_RUNDOWN_REF.
* Writer/cleanup section is implemented by running down the reference
  within a guarded mutex.
* It offers 3 read acquire flavors: try acquire, spin acquire (both of
  which can be used from DISPATCH_LEVEL) and sleeping acquire (using the
  guarded mutex).
* It makes the downgrade call explicit instead of overloading it via
  ReleaseMrswLockExclusive.
* Finally, the lock supports cache-aware mode, where EX_RUNDOWN_REF is
  replaced with EX_RUNDOWN_REF_CACHE_AWARE using the corresponding
  macros.

As the changes are only internal to vif.c, the VIF version doesn't need
to be increased.

Signed-off-by: Tu Dinh <ngoc-tu.dinh@xxxxxxxxxx>
---
 src/xenvif/mrsw.h | 530 +++++++++++++++++++++++++---------------------
 src/xenvif/vif.c  |  65 +++---
 2 files changed, 327 insertions(+), 268 deletions(-)

diff --git a/src/xenvif/mrsw.h b/src/xenvif/mrsw.h
index e1ff056..8bfa441 100644
--- a/src/xenvif/mrsw.h
+++ b/src/xenvif/mrsw.h
@@ -1,289 +1,347 @@
 /* Copyright (c) Xen Project.
  * Copyright (c) Cloud Software Group, Inc.
+ * Copyright (c) Vates.
  * All rights reserved.
- *
- * Redistribution and use in source and binary forms,
- * with or without modification, are permitted provided
+ *
+ * Redistribution and use in source and binary forms,
+ * with or without modification, are permitted provided
  * that the following conditions are met:
- *
- * *   Redistributions of source code must retain the above
- *     copyright notice, this list of conditions and the
+ *
+ * *   Redistributions of source code must retain the above
+ *     copyright notice, this list of conditions and the
  *     following disclaimer.
- * *   Redistributions in binary form must reproduce the above
- *     copyright notice, this list of conditions and the
- *     following disclaimer in the documentation and/or other
+ * *   Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the
+ *     following disclaimer in the documentation and/or other
  *     materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */

-#ifndef _XENVIF_MRSW_H
-#define _XENVIF_MRSW_H
+#ifndef _MRSW_H
+#define _MRSW_H

-#include <ntddk.h>
+#include <wdm.h>

 #include "assert.h"
 #include "util.h"

-#pragma warning(disable:4127)   // conditional expression is constant
-
-typedef struct _XENVIF_MRSW_HOLDER {
-    PKTHREAD    Thread;
-    LONG        Level;
-} XENVIF_MRSW_HOLDER, *PXENVIF_MRSW_HOLDER;
-
-typedef struct _XENVIF_MRSW_LOCK {
-    volatile LONG64 Mask;
-    XENVIF_MRSW_HOLDER     Holder[64];
-    KEVENT          Event;
-} XENVIF_MRSW_LOCK, *PXENVIF_MRSW_LOCK;
-
-C_ASSERT(RTL_FIELD_SIZE(XENVIF_MRSW_LOCK, Holder) == 
RTL_FIELD_SIZE(XENVIF_MRSW_LOCK, Mask) * 8 * sizeof (XENVIF_MRSW_HOLDER));
-
-#define XENVIF_MRSW_EXCLUSIVE_SLOT  0
-
-static FORCEINLINE VOID
-InitializeMrswLock(
-    IN  PXENVIF_MRSW_LOCK   Lock
+#pragma warning(push)
+#pragma warning(disable:4201) // nameless struct/union
+struct _MRSW_LOCK {
+    KGUARDED_MUTEX                  Mutex;
+    union {
+        EX_RUNDOWN_REF              Rundown;
+        PEX_RUNDOWN_REF_CACHE_AWARE RundownCacheAware;
+    };
+};
+#pragma warning(pop)
+
+typedef struct _MRSW_LOCK   MRSW_LOCK, *PMRSW_LOCK;
+typedef struct _MRSW_LOCK   MRSW_CACHE_AWARE_LOCK, *PMRSW_CACHE_AWARE_LOCK;
+
+static FORCEINLINE NTSTATUS
+__MrswRundownInitialize(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware,
+    _In_ ULONG                  Tag
     )
 {
-    LONG                    Slot;
-
-    RtlZeroMemory(Lock, sizeof (XENVIF_MRSW_LOCK));
-
-    for (Slot = 0; Slot < (LONG) sizeof (Lock->Mask) * 8; Slot++)
-        Lock->Holder[Slot].Level = -1;
+    if (CacheAware) {
+        Lock->RundownCacheAware = 
ExAllocateCacheAwareRundownProtection(NonPagedPoolNx,
+                                                                        Tag);
+        if (!Lock->RundownCacheAware)
+            return STATUS_NO_MEMORY;
+    } else {
+        ExInitializeRundownProtection(&Lock->Rundown);
+    }

-    KeInitializeEvent(&Lock->Event, NotificationEvent, FALSE);
+    return STATUS_SUCCESS;
 }

 static FORCEINLINE BOOLEAN
-__ClaimExclusive(
-    IN  PXENVIF_MRSW_LOCK   Lock
+__MrswRundownAcquire(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    LONG64                  Old;
-    LONG64                  New;
-
-    Old = 0;
-    New = 1ll << XENVIF_MRSW_EXCLUSIVE_SLOT;
-
-    return (InterlockedCompareExchange64(&Lock->Mask, New, Old) == Old) ? TRUE 
: FALSE;
+    if (CacheAware)
+        return ExAcquireRundownProtectionCacheAware(Lock->RundownCacheAware);
+    else
+        return ExAcquireRundownProtection(&Lock->Rundown);
 }

-static FORCEINLINE KIRQL
-__drv_maxIRQL(APC_LEVEL)
-__drv_raisesIRQL(DISPATCH_LEVEL)
-__drv_savesIRQL
-__AcquireMrswLockExclusive(
-    IN  PXENVIF_MRSW_LOCK   Lock
+static FORCEINLINE VOID
+__MrswRundownRelease(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    KIRQL                   Irql;
-    LONG                    Slot;
-    PKTHREAD                Self;
-    PXENVIF_MRSW_HOLDER     Holder;
-
-    ASSERT3U(KeGetCurrentIrql(), <, DISPATCH_LEVEL);
-    KeRaiseIrql(DISPATCH_LEVEL, &Irql);
-
-    Self = KeGetCurrentThread();
-
-    // Make sure we do not already hold the lock
-    for (Slot = 0; Slot < (LONG) sizeof (Lock->Mask) * 8; Slot++)
-        ASSERT(Lock->Holder[Slot].Thread != Self);
-
-    for (;;) {
-        if (__ClaimExclusive(Lock))
-            break;
-
-        KeLowerIrql(Irql);
-
-        (VOID) KeWaitForSingleObject(&Lock->Event,
-                                     Executive,
-                                     KernelMode,
-                                     FALSE,
-                                     NULL);
-        KeClearEvent(&Lock->Event);
-
-        KeRaiseIrql(DISPATCH_LEVEL, &Irql);
-    }
-
-    Holder = &Lock->Holder[XENVIF_MRSW_EXCLUSIVE_SLOT];
-
-    ASSERT3P(Holder->Thread, ==, NULL);
-    Holder->Thread = Self;
-    Holder->Level = 0;
-
-    return Irql;
+    if (CacheAware)
+        ExReleaseRundownProtectionCacheAware(Lock->RundownCacheAware);
+    else
+        ExReleaseRundownProtection(&Lock->Rundown);
 }

-#define AcquireMrswLockExclusive(_Lock, _Irql)              \
-        do {                                                \
-            *(_Irql) = __AcquireMrswLockExclusive(_Lock);   \
-        } while (FALSE)
-
 static FORCEINLINE VOID
-__drv_maxIRQL(DISPATCH_LEVEL)
-__drv_requiresIRQL(DISPATCH_LEVEL)
-ReleaseMrswLockExclusive(
-    IN  PXENVIF_MRSW_LOCK           Lock,
-    IN  __drv_restoresIRQL KIRQL    Irql,
-    IN  BOOLEAN                     Shared
+__MrswRundownWait(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    LONG                            Slot;
-    PKTHREAD                        Self;
-    LONG64                          Old;
-    LONG64                          New;
-    PXENVIF_MRSW_HOLDER             Holder;
-
-    ASSERT3U(KeGetCurrentIrql(), ==, DISPATCH_LEVEL);
-
-    Slot = XENVIF_MRSW_EXCLUSIVE_SLOT + 1; // Choose any slot other than the 
exclusive slot
-
-    Old = 1ll << XENVIF_MRSW_EXCLUSIVE_SLOT;
-    New = (Shared) ? (1ll << Slot) : 0;
-
-    Old = InterlockedCompareExchange64(&Lock->Mask, New, Old);
-    ASSERT3U(Old, == , 1ll << XENVIF_MRSW_EXCLUSIVE_SLOT);
-
-    Self = KeGetCurrentThread();
-
-    ASSERT3P(Lock->Holder[XENVIF_MRSW_EXCLUSIVE_SLOT].Thread, ==, Self);
-
-    // If we are leaving the lock held shared then we need to transfer
-    // our identity information into the hew slot.
-    if (Shared)
-        Lock->Holder[Slot] = Lock->Holder[XENVIF_MRSW_EXCLUSIVE_SLOT];
-
-    Holder = &Lock->Holder[XENVIF_MRSW_EXCLUSIVE_SLOT];
-
-    Holder->Thread = NULL;
-    Holder->Level = -1;
-
-    KeLowerIrql(Irql);
+    if (CacheAware)
+        ExWaitForRundownProtectionReleaseCacheAware(Lock->RundownCacheAware);
+    else
+        ExWaitForRundownProtectionRelease(&Lock->Rundown);
 }

-static FORCEINLINE LONG
-__ClaimShared(
-    IN  PXENVIF_MRSW_LOCK   Lock
+static FORCEINLINE VOID
+__MrswRundownCompleted(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    LONG                    Slot;
-    LONG64                  Old;
-    LONG64                  New;
-
-    // Make sure the exclusive bit is set so that we don't find it
-    Old = Lock->Mask | (1ll << XENVIF_MRSW_EXCLUSIVE_SLOT);
-
-    Slot = __ffu((ULONG64)Old);
-    ASSERT(Slot >= 0);
-    ASSERT3U(Slot, != , XENVIF_MRSW_EXCLUSIVE_SLOT);
+    if (CacheAware)
+        ExRundownCompletedCacheAware(Lock->RundownCacheAware);
+    else
+        ExRundownCompleted(&Lock->Rundown);
+}

-    Old &= ~(1ll << XENVIF_MRSW_EXCLUSIVE_SLOT);
-    New = Old | (1ll << Slot);
+static FORCEINLINE VOID
+__MrswRundownReInitialize(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+    if (CacheAware)
+        ExReInitializeRundownProtectionCacheAware(Lock->RundownCacheAware);
+    else
+        ExReInitializeRundownProtection(&Lock->Rundown);
+}

-    return (InterlockedCompareExchange64(&Lock->Mask, New, Old) == Old) ? Slot 
: -1;
+static FORCEINLINE VOID
+__MrswRundownTeardown(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+    if (CacheAware)
+        ExFreeCacheAwareRundownProtection(Lock->RundownCacheAware);
+    else
+        RtlZeroMemory(&Lock->Rundown, sizeof(Lock->Rundown));
 }

+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
+static FORCEINLINE NTSTATUS
+__InitializeMrswLock(
+    _Out_ struct _MRSW_LOCK     *Lock,
+    _In_ BOOLEAN                CacheAware,
+    _In_ ULONG                  Tag
+    )
+{
+    KeInitializeGuardedMutex(&Lock->Mutex);
+    return __MrswRundownInitialize(Lock, CacheAware, Tag);
+}
+#define InitializeMrswLock(Lock, Tag) \
+    __InitializeMrswLock(Lock, FALSE, Tag)
+#define InitializeMrswCacheAwareLock(Lock, Tag) \
+    __InitializeMrswLock(Lock, TRUE, Tag)
+
+_Requires_lock_not_held_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
+static FORCEINLINE VOID
+__TeardownMrswLock(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+#if DBG
+    BUG_ON(!KeTryToAcquireGuardedMutex(&Lock->Mutex));
+    BUG_ON(!__MrswRundownAcquire(Lock, CacheAware));
+    KeReleaseGuardedMutex(&Lock->Mutex);
+    __MrswRundownRelease(Lock, CacheAware);
+#endif
+
+    __MrswRundownTeardown(Lock, CacheAware);
+    RtlZeroMemory(Lock, sizeof(MRSW_LOCK));
+}
+#define TeardownMrswLock(Lock) \
+    __TeardownMrswLock(Lock, FALSE)
+#define TeardownMrswCacheAwareLock(Lock) \
+    __TeardownMrswLock(Lock, TRUE)
+
+_Acquires_lock_(_Global_critical_region_)
+_Requires_lock_not_held_(*Lock)
+_Acquires_exclusive_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
 static FORCEINLINE VOID
-AcquireMrswLockShared(
-    IN  PXENVIF_MRSW_LOCK   Lock
+__AcquireMrswLockExclusive(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    KIRQL                   Irql;
-    LONG                    Level;
-    LONG                    Slot;
-    PKTHREAD                Self;
-    PXENVIF_MRSW_HOLDER     Holder;
+    ASSERT3U(KeGetCurrentIrql(), <=, APC_LEVEL);

+    KeAcquireGuardedMutex(&Lock->Mutex);
+    __MrswRundownWait(Lock, CacheAware);
+    __MrswRundownCompleted(Lock, CacheAware);
+}
+#define AcquireMrswLockExclusive(Lock) \
+    __AcquireMrswLockExclusive(Lock, FALSE)
+#define AcquireMrswCacheAwareLockExclusive(Lock) \
+    __AcquireMrswLockExclusive(Lock, TRUE)
+
+_Releases_lock_(_Global_critical_region_)
+_Requires_exclusive_lock_held_(*Lock)
+_Releases_exclusive_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
+static FORCEINLINE VOID
+__ReleaseMrswLockExclusive(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+    __MrswRundownReInitialize(Lock, CacheAware);
+    KeReleaseGuardedMutex(&Lock->Mutex);
+}
+#define ReleaseMrswLockExclusive(Lock) \
+    __ReleaseMrswLockExclusive(Lock, FALSE)
+#define ReleaseMrswCacheAwareLockExclusive(Lock) \
+    __ReleaseMrswLockExclusive(Lock, TRUE)
+
+_Releases_lock_(_Global_critical_region_)
+_Requires_exclusive_lock_held_(*Lock)
+_Releases_exclusive_lock_(*Lock)
+_Acquires_shared_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
+static FORCEINLINE VOID
+__DowngradeMrswLockExclusive(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+    __MrswRundownReInitialize(Lock, CacheAware);
+    BUG_ON(!__MrswRundownAcquire(Lock, CacheAware));
+    KeReleaseGuardedMutex(&Lock->Mutex);
+}
+#define DowngradeMrswLockExclusive(Lock) \
+    __DowngradeMrswLockExclusive(Lock, FALSE)
+#define DowngradeMrswCacheAwareLockExclusive(Lock) \
+    __DowngradeMrswLockExclusive(Lock, TRUE)
+
+_When_(return, _Acquires_shared_lock_(*Lock))
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(DISPATCH_LEVEL)
+static FORCEINLINE BOOLEAN
+__TryAcquireMrswLockShared(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+#if DBG
     ASSERT3U(KeGetCurrentIrql(), <=, DISPATCH_LEVEL);
-    KeRaiseIrql(DISPATCH_LEVEL, &Irql);
-
-    Self = KeGetCurrentThread();
-
-    // Do we already hold the lock? If so, get the nesting level
-    Level = -1;
-    for (Slot = 0; Slot < (LONG) sizeof (Lock->Mask) * 8; Slot++) {
-        if (Lock->Holder[Slot].Thread == Self && Lock->Holder[Slot].Level > 
Level)
-            Level = Lock->Holder[Slot].Level;
-    }
-    Level++;
+#endif

-    for (;;) {
-        Slot = __ClaimShared(Lock);
-        if (Slot >= 0)
-            break;
-
-        _mm_pause();
-    }
-
-    Holder = &Lock->Holder[Slot];
-
-    Holder->Thread = Self;
-    Holder->Level = Level;
-
-    KeLowerIrql(Irql);
+    return __MrswRundownAcquire(Lock, CacheAware);
 }
-
+#define TryAcquireMrswLockShared(Lock) \
+    __TryAcquireMrswLockShared(Lock, FALSE)
+#define TryAcquireMrswCacheAwareLockShared(Lock) \
+    __TryAcquireMrswLockShared(Lock, TRUE)
+
+_Acquires_shared_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(DISPATCH_LEVEL)
 static FORCEINLINE VOID
-ReleaseMrswLockShared(
-    IN  PXENVIF_MRSW_LOCK   Lock
+__SpinAcquireMrswLockShared(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    KIRQL                   Irql;
-    PKTHREAD                Self;
-    LONG                    Level;
-    LONG                    Deepest;
-    LONG                    Slot;
-    LONG64                  Old;
-    LONG64                  New;
-    PXENVIF_MRSW_HOLDER     Holder;
-
+#if DBG
     ASSERT3U(KeGetCurrentIrql(), <=, DISPATCH_LEVEL);
-    KeRaiseIrql(DISPATCH_LEVEL, &Irql);
-
-    Self = KeGetCurrentThread();
-
-    Level = -1;
-    Deepest = -1;
-    for (Slot = 0; Slot < (LONG) sizeof (Lock->Mask) * 8; Slot++) {
-        if (Lock->Holder[Slot].Thread == Self && Lock->Holder[Slot].Level > 
Level) {
-            Level = Lock->Holder[Slot].Level;
-            Deepest = Slot;
-        }
-    }
-    ASSERT(Level >= 0);
-
-    Slot = Deepest;
-    ASSERT3U(Slot, !=, XENVIF_MRSW_EXCLUSIVE_SLOT);
-
-    Holder = &Lock->Holder[Slot];
+#endif

-    Holder->Thread = NULL;
-    Holder->Level = -1;
-
-    do {
-        Old = Lock->Mask;
-        New = Old & ~(1ll << Slot);
-    } while (InterlockedCompareExchange64(&Lock->Mask, New, Old) != Old);
-
-    KeSetEvent(&Lock->Event, IO_NO_INCREMENT, FALSE);
-    KeLowerIrql(Irql);
+    while (!__MrswRundownAcquire(Lock, CacheAware))
+        YieldProcessor();
+}
+#define SpinAcquireMrswLockShared(Lock) \
+    __SpinAcquireMrswLockShared(Lock, FALSE)
+#define SpinAcquireMrswCacheAwareLockShared(Lock) \
+    __SpinAcquireMrswLockShared(Lock, TRUE)
+
+/*
+ * Unlike SpinAcquireMrswLockShared, AcquireMrswLockShared will
+ * sleep when the lock acquisition fails. Thus it cannot be used at
+ * DISPATCH_LEVEL.
+ */
+_Acquires_shared_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
+static FORCEINLINE VOID
+__AcquireMrswLockShared(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+#if DBG
+    ASSERT3U(KeGetCurrentIrql(), <=, APC_LEVEL);
+#endif
+
+    if (__MrswRundownAcquire(Lock, CacheAware))
+        return;
+
+    /*
+     * Don't bother retrying, since it's most likely that another writer 
section
+     * is cleaning up and not ending any time soon. Just jump straight into
+     * sleep.
+     */
+    KeAcquireGuardedMutex(&Lock->Mutex);
+    /*
+     * Since we have the write mutex, we know that there are no writers. So 
this
+     * acquire must succeed.
+     */
+    BUG_ON(!__MrswRundownAcquire(Lock, CacheAware));
+    KeReleaseGuardedMutex(&Lock->Mutex);
+}
+#define AcquireMrswLockShared(Lock) \
+    __AcquireMrswLockShared(Lock, FALSE)
+#define AcquireMrswCacheAwareLockShared(Lock) \
+    __AcquireMrswLockShared(Lock, TRUE)
+
+_Requires_shared_lock_held_(*Lock)
+_Releases_shared_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(DISPATCH_LEVEL)
+static FORCEINLINE VOID
+__ReleaseMrswLockShared(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+    __MrswRundownRelease(Lock, CacheAware);
 }
+#define ReleaseMrswLockShared(Lock) \
+    __ReleaseMrswLockShared(Lock, FALSE)
+#define ReleaseMrswCacheAwareLockShared(Lock) \
+    __ReleaseMrswLockShared(Lock, TRUE)

-#endif  // _XENVIF_MRSW_H
+#endif  // _MRSW_H
diff --git a/src/xenvif/vif.c b/src/xenvif/vif.c
index 6f468ee..cb4cf84 100644
--- a/src/xenvif/vif.c
+++ b/src/xenvif/vif.c
@@ -47,7 +47,7 @@

 struct _XENVIF_VIF_CONTEXT {
     PXENVIF_PDO                 Pdo;
-    XENVIF_MRSW_LOCK            Lock;
+    MRSW_LOCK                   Lock;
     LONG                        References;
     PXENVIF_FRONTEND            Frontend;
     BOOLEAN                     Enabled;
@@ -150,13 +150,12 @@ VifEnable(
     )
 {
     PXENVIF_VIF_CONTEXT     Context = Interface->Context;
-    KIRQL                   Irql;
     BOOLEAN                 Exclusive;
     NTSTATUS                status;

     Trace("====>\n");

-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);
     Exclusive = TRUE;

     if (Context->Enabled)
@@ -188,7 +187,7 @@ VifEnable(

 done:
     ASSERT(Exclusive);
-    ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+    ReleaseMrswLockExclusive(&Context->Lock);

     Trace("<====\n");

@@ -199,7 +198,7 @@ fail3:

     (VOID) FrontendSetState(Context->Frontend, FRONTEND_CONNECTED);

-    ReleaseMrswLockExclusive(&Context->Lock, Irql, TRUE);
+    DowngradeMrswLockExclusive(&Context->Lock);
     Exclusive = FALSE;

     ReceiverWaitForPackets(FrontendGetReceiver(Context->Frontend));
@@ -234,7 +233,7 @@ fail1:
     Context->Callback = NULL;

     if (Exclusive)
-        ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+        ReleaseMrswLockExclusive(&Context->Lock);
     else
         ReleaseMrswLockShared(&Context->Lock);

@@ -389,17 +388,16 @@ VifEnableVersion9(
     )
 {
     PXENVIF_VIF_CONTEXT             Context = Interface->Context;
-    KIRQL                           Irql;
     NTSTATUS                        status;

     Trace("====>\n");

-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);

     Context->CallbackVersion9 = Callback;
     Context->ArgumentVersion9 = Argument;

-    ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+    ReleaseMrswLockExclusive(&Context->Lock);

     status = VifEnable(Interface, VifCallbackVersion9, Context);

@@ -416,17 +414,16 @@ VifEnableVersion8(
     )
 {
     PXENVIF_VIF_CONTEXT             Context = Interface->Context;
-    KIRQL                           Irql;
     NTSTATUS                        status;

     Trace("====>\n");

-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);

     Context->CallbackVersion8 = Callback;
     Context->ArgumentVersion8 = Argument;

-    ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+    ReleaseMrswLockExclusive(&Context->Lock);

     status = VifEnableVersion9(Interface, VifCallbackVersion8, Context);

@@ -441,14 +438,13 @@ VifDisable(
     )
 {
     PXENVIF_VIF_CONTEXT Context = Interface->Context;
-    KIRQL               Irql;

     Trace("====>\n");

-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);

     if (!Context->Enabled) {
-        ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+        ReleaseMrswLockExclusive(&Context->Lock);
         goto done;
     }

@@ -463,7 +459,7 @@ VifDisable(

     (VOID) FrontendSetState(Context->Frontend, FRONTEND_CONNECTED);

-    ReleaseMrswLockExclusive(&Context->Lock, Irql, TRUE);
+    DowngradeMrswLockExclusive(&Context->Lock);

     ReceiverWaitForPackets(FrontendGetReceiver(Context->Frontend));
     TransmitterAbortPackets(FrontendGetTransmitter(Context->Frontend));
@@ -511,7 +507,7 @@ VifQueryStatistic(
     status = STATUS_INVALID_PARAMETER;
     if (Index >= XENVIF_VIF_STATISTIC_COUNT)
         goto done;
-
+
     AcquireMrswLockShared(&Context->Lock);

     FrontendQueryStatistic(Context->Frontend, Index, Value);
@@ -567,7 +563,8 @@ VifReceiverReturnPacket(
 {
     PXENVIF_VIF_CONTEXT Context = Interface->Context;

-    AcquireMrswLockShared(&Context->Lock);
+    // Called from MINIPORT_RETURN_NET_BUFFER_LISTS
+    SpinAcquireMrswLockShared(&Context->Lock);

     ReceiverReturnPacket(FrontendGetReceiver(Context->Frontend),
                          Cookie);
@@ -592,9 +589,10 @@ VifTransmitterQueuePacket(
     PXENVIF_VIF_CONTEXT             Context = Interface->Context;
     NTSTATUS                        status;

-    AcquireMrswLockShared(&Context->Lock);
-
     status = STATUS_UNSUCCESSFUL;
+    if (!TryAcquireMrswLockShared(&Context->Lock))
+        return status;
+
     if (!Context->Enabled)
         goto done;

@@ -948,9 +946,8 @@ VifAcquire(
     )
 {
     PXENVIF_VIF_CONTEXT     Context = Interface->Context;
-    KIRQL                   Irql;

-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);

     if (Context->References++ != 0)
         goto done;
@@ -963,7 +960,7 @@ VifAcquire(
     Trace("<====\n");

 done:
-    ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+    ReleaseMrswLockExclusive(&Context->Lock);

     return STATUS_SUCCESS;
 }
@@ -974,9 +971,8 @@ VifRelease(
     )
 {
     PXENVIF_VIF_CONTEXT     Context = Interface->Context;
-    KIRQL                   Irql;

-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);

     if (--Context->References > 0)
         goto done;
@@ -991,7 +987,7 @@ VifRelease(
     Trace("<====\n");

 done:
-    ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+    ReleaseMrswLockExclusive(&Context->Lock);
 }

 static struct _XENVIF_VIF_INTERFACE_V8 VifInterfaceVersion8 = {
@@ -1100,7 +1096,9 @@ VifInitialize(
     if (*Context == NULL)
         goto fail1;

-    InitializeMrswLock(&(*Context)->Lock);
+    status = InitializeMrswLock(&(*Context)->Lock, XENVIF_VIF_TAG);
+    if (!NT_SUCCESS(status))
+        goto fail2;

     FdoGetSuspendInterface(PdoGetFdo(Pdo),&(*Context)->SuspendInterface);

@@ -1110,7 +1108,7 @@ VifInitialize(
                           *Context,
                           &(*Context)->MacThread);
     if (!NT_SUCCESS(status))
-        goto fail2;
+        goto fail3;

     (*Context)->Pdo = Pdo;

@@ -1118,7 +1116,7 @@ VifInitialize(

     return STATUS_SUCCESS;

-fail2:
+fail3:
     Error("fail3\n");

     RtlZeroMemory(&(*Context)->MacEvent, sizeof (KEVENT));
@@ -1126,7 +1124,10 @@ fail2:
     RtlZeroMemory(&(*Context)->SuspendInterface,
                   sizeof (XENBUS_SUSPEND_INTERFACE));

-    RtlZeroMemory(&(*Context)->Lock, sizeof (XENVIF_MRSW_LOCK));
+    TeardownMrswLock(&(*Context)->Lock);
+
+fail2:
+    Error("fail2\n");

     ASSERT(IsZeroMemory(*Context, sizeof (XENVIF_VIF_CONTEXT)));
     __VifFree(*Context);
@@ -1205,7 +1206,7 @@ VifGetInterface(
     }

     return status;
-}
+}

 VOID
 VifTeardown(
@@ -1228,7 +1229,7 @@ VifTeardown(
     RtlZeroMemory(&Context->SuspendInterface,
                   sizeof (XENBUS_SUSPEND_INTERFACE));

-    RtlZeroMemory(&Context->Lock, sizeof (XENVIF_MRSW_LOCK));
+    TeardownMrswLock(&Context->Lock);

     ASSERT(IsZeroMemory(Context, sizeof (XENVIF_VIF_CONTEXT)));
     __VifFree(Context);
--
2.53.0.windows.2



--
Ngoc Tu Dinh | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.