[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2] Use batched hypercalls for mapping foreign pages


  • To: Tu Dinh <ngoc-tu.dinh@xxxxxxxxxx>, win-pv-devel@xxxxxxxxxxxxxxxxxxxx
  • From: Rafał Wojdyła <omeg@xxxxxxxxxxxxxxxxxxxxxx>
  • Date: Mon, 15 Jun 2026 20:03:37 +0200
  • Authentication-results: eu.smtp.expurgate.cloud; dkim=pass header.s=fm1 header.d=invisiblethingslab.com header.i="@invisiblethingslab.com" header.h="Content-Transfer-Encoding:Content-Type:Date:From:In-Reply-To:Message-ID:MIME-Version:References:Subject:To"; dkim=pass header.s=fm1 header.d=messagingengine.com header.i="@messagingengine.com" header.h="Content-Transfer-Encoding:Content-Type:Date:Feedback-ID:From:In-Reply-To:Message-ID:MIME-Version:References:Subject:To:X-ME-Proxy:X-ME-Sender"
  • Delivery-date: Mon, 15 Jun 2026 18:03:50 +0000
  • Feedback-id: i409c4082:Fastmail
  • List-id: Developer list for the Windows PV Drivers subproject <win-pv-devel.lists.xenproject.org>



W dniu 15.06.2026 o 17:00, Tu Dinh pisze:
Hi,

On 10/06/2026 14:45, Rafał Wojdyła wrote:
Use one hypercall op with multiple pages instead of
separate hypercalls for each page.

Also fix debug display of addresses.

Signed-off-by: Rafał Wojdyła <omeg@xxxxxxxxxxxxxxxxxxxxxx>

A comment on unrelated code: I didn't realize that
gnttab.c:GnttabMapForeignPages already allocates one ULONG per requested
page, which can grow up to 4MB per call given the current NumberPages
constraint of 1M. I wonder if we can further constrain this value or
alternatively allocate the MapEntry buffer from paged pool instead,
given that this buffer isn't actively used.

Agreed, I was thinking that the current count limit in the higher drivers is probably too large. The buffer can likely be moved to paged pool since currently I think only IOCTLs use these map functions. I'll do some testing with that.

More comments below.

---
   include/xen.h         |  20 ++--
   src/xen/grant_table.c | 241 +++++++++++++++++++++++++++++++++++-------
   src/xenbus/gnttab.c   |  47 +++-----
   3 files changed, 228 insertions(+), 80 deletions(-)

diff --git a/include/xen.h b/include/xen.h
index 0c3e8a4..eaab567 100644
--- a/include/xen.h
+++ b/include/xen.h
@@ -265,20 +265,22 @@ GrantTableCopy(
   _Check_return_
   XEN_API
   NTSTATUS
-GrantTableMapForeignPage(
-    _In_ USHORT             Domain,
-    _In_ ULONG              GrantRef,
-    _In_ PHYSICAL_ADDRESS   Address,
-    _In_ BOOLEAN            ReadOnly,
-    _Out_ ULONG             *Handle
+GrantTableMapForeignPages(
+    _In_ USHORT                        Domain,
+    _In_ ULONG                         NumberPages,
+    _In_reads_(NumberPages) PULONG     GrantRefs,
+    _In_ PHYSICAL_ADDRESS              Address,
+    _In_ BOOLEAN                       ReadOnly,
+    _Out_writes_(NumberPages) PULONG   Handles
       );
_Check_return_
   XEN_API
   NTSTATUS
-GrantTableUnmapForeignPage(
-    _In_ ULONG              Handle,
-    _In_ PHYSICAL_ADDRESS   Address
+GrantTableUnmapForeignPages(
+    _In_ ULONG                       NumberPages,
+    _In_reads_(NumberPages) PULONG   Handles,
+    _In_ PHYSICAL_ADDRESS            Address
       );
_Check_return_
diff --git a/src/xen/grant_table.c b/src/xen/grant_table.c
index 96a5a26..8935bd4 100644
--- a/src/xen/grant_table.c
+++ b/src/xen/grant_table.c
@@ -38,10 +38,15 @@
   #include "hypercall.h"
   #include "dbg_print.h"
   #include "assert.h"
+#include "util.h"
#pragma warning(push)
   #pragma warning(disable:4127)   // conditional expression is constant
+#define XEN_GNTTAB_TAG 'TNGX'
+
+#define GNTTAB_BATCH_SIZE  256
+
   // Most of the GNTST_* values don't have meaningful NTSTATUS counterparts,
   // this macro translates those that do.
   #define GNTST_TO_STATUS(_gntst, _status)                    \
@@ -168,27 +173,20 @@ fail1:
   _Check_return_
   XEN_API
   NTSTATUS
-GrantTableMapForeignPage(
-    _In_ USHORT                 Domain,
-    _In_ ULONG                  GrantRef,
-    _In_ PHYSICAL_ADDRESS       Address,
-    _In_ BOOLEAN                ReadOnly,
-    _Out_ ULONG                 *Handle
+GrantTableUnmapForeignPage(
+    _In_ ULONG                    Handle,
+    _In_ PHYSICAL_ADDRESS         Address
       )
   {
-    struct gnttab_map_grant_ref op;
-    LONG_PTR                    rc;
-    NTSTATUS                    status;
+    struct gnttab_unmap_grant_ref op;
+    LONG_PTR                      rc;
+    NTSTATUS                      status;
RtlZeroMemory(&op, sizeof(op));
-    op.dom = Domain;
-    op.ref = GrantRef;
-    op.flags = GNTMAP_host_map;
-    if (ReadOnly)
-        op.flags |= GNTMAP_readonly;
+    op.handle = Handle;
       op.host_addr = Address.QuadPart;
- rc = GrantTableOp(GNTTABOP_map_grant_ref, &op, 1);
+    rc = GrantTableOp(GNTTABOP_unmap_grant_ref, &op, 1);
if (rc < 0) {
           ERRNO_TO_STATUS(-rc, status);
@@ -196,9 +194,7 @@ GrantTableMapForeignPage(
       }
if (op.status != GNTST_okay) {
-        Warning("%u:%u -> %u.%u failed (%d)\n",
-                op.dom,
-                op.ref,
+        Warning("0x%08x%08x failed (%hd)\n",
                   Address.HighPart,
                   Address.LowPart,
                   op.status);
@@ -207,8 +203,6 @@ GrantTableMapForeignPage(
           goto fail2;
       }
- *Handle = op.handle;
-
       return STATUS_SUCCESS;
fail2:
@@ -223,40 +217,207 @@ fail1:
   _Check_return_
   XEN_API
   NTSTATUS
-GrantTableUnmapForeignPage(
-    _In_ ULONG                    Handle,
-    _In_ PHYSICAL_ADDRESS         Address
+GrantTableMapForeignPages(
+    _In_ USHORT                       Domain,
+    _In_ ULONG                        NumberPages,
+    _In_reads_(NumberPages) PULONG    GrantRefs,
+    _In_ PHYSICAL_ADDRESS             Address,
+    _In_ BOOLEAN                      ReadOnly,
+    _Out_writes_(NumberPages) PULONG  Handles
       )
   {
-    struct gnttab_unmap_grant_ref op;
-    LONG_PTR                      rc;
-    NTSTATUS                      status;
+    struct gnttab_map_grant_ref       *ops;
+    LONG_PTR                          rc;
+    NTSTATUS                          status;
+    ULONG                             chunk_start;
+    ULONG                             chunk_size;
+    ULONG                             i;
+    ULONG                             fail_count;
+    ULONG                             first_fail_index;
+    PHYSICAL_ADDRESS                  page_address;
+
+    for (i = 0; i < NumberPages; i++)
+        Handles[i] = 0;
+
+    status = STATUS_NO_MEMORY;
+    ops = __AllocatePoolWithTag(NonPagedPool,
+                                GNTTAB_BATCH_SIZE * sizeof(*ops),
+                                XEN_GNTTAB_TAG);
+    if (ops == NULL)
+        goto fail1;
- RtlZeroMemory(&op, sizeof(op));
-    op.handle = Handle;
-    op.host_addr = Address.QuadPart;
+    status = STATUS_SUCCESS;
+    fail_count = 0;
+    first_fail_index = 0;
+    page_address.QuadPart = Address.QuadPart;
+
+    for (chunk_start = 0;
+         chunk_start < NumberPages;
+         chunk_start += GNTTAB_BATCH_SIZE) {
+        chunk_size = (NumberPages - chunk_start < GNTTAB_BATCH_SIZE) ?
+                     NumberPages - chunk_start : GNTTAB_BATCH_SIZE;
+
+        for (i = 0; i < chunk_size; i++) {
+            ops[i].dom = Domain;
+            ops[i].ref = GrantRefs[chunk_start + i];
+            ops[i].flags = GNTMAP_host_map;
+            if (ReadOnly)
+                ops[i].flags |= GNTMAP_readonly;
+
+            ops[i].host_addr = page_address.QuadPart +
+                               ((ULONGLONG)i << PAGE_SHIFT);
+        }
+
+        rc = GrantTableOp(GNTTABOP_map_grant_ref, ops, chunk_size);
+
+        if (rc < 0) {
+            ERRNO_TO_STATUS(-rc, status);
+            goto fail2;

Going to fail2 here will skip cleanup of existing successful chunks, so
I think you might want to goto fail3 instead (while keeping track of
successful indices)

Right, I'll fix this.

+        }
+
+        // check every op, they are independently processed
+        for (i = 0; i < chunk_size; i++) {
+            if (ops[i].status != GNTST_okay) {
+                if (fail_count == 0) {
+                    Warning("op[%u] %hu:%u -> 0x%08x%08x failed (%hd)\n",
+                            chunk_start + i,
+                            ops[i].dom, ops[i].ref,
+                            (ULONG)(ops[i].host_addr >> 32),
+                            (ULONG)ops[i].host_addr,
+                            ops[i].status);
+                    first_fail_index = chunk_start + i;
+                    GNTST_TO_STATUS(ops[i].status, status);
+                }
+                fail_count++;
+                continue;
+            }
+
+            Handles[chunk_start + i] = ops[i].handle;
+        }
+
+        page_address.QuadPart += (ULONGLONG)chunk_size << PAGE_SHIFT;
+    }
- rc = GrantTableOp(GNTTABOP_unmap_grant_ref, &op, 1);
+    if (fail_count > 0) {
+        Warning("%u of %u ops failed (first at %u)\n",
+                fail_count, NumberPages, first_fail_index);
+        goto fail3;
+    }
- if (rc < 0) {
-        ERRNO_TO_STATUS(-rc, status);
+    __FreePoolWithTag(ops, XEN_GNTTAB_TAG);
+    return STATUS_SUCCESS;
+
+fail3:
+    Error("fail3 (%08x)\n", status);
+
+    // undo successful ops
+    for (i = 0; i < NumberPages; i++) {
+        PHYSICAL_ADDRESS unmap_address;
+        NTSTATUS         unmap_status;
+
+        if (Handles[i] == 0)
+            continue;
+
+        unmap_address.QuadPart = Address.QuadPart + ((ULONGLONG)i << 
PAGE_SHIFT);
+        unmap_status = GrantTableUnmapForeignPage(Handles[i],
+                                                  unmap_address);
+        if (!NT_SUCCESS(unmap_status))
+            Warning("unmap of handle 0x%x at op %u failed (%08x), leaking\n",
+                    Handles[i], i, unmap_status);
+    }
+
+fail2:
+    Error("fail2 (%08x)\n", status);
+    __FreePoolWithTag(ops, XEN_GNTTAB_TAG);
+
+fail1:
+    Error("fail1 (%08x)\n", status);
+
+    return status;
+}
+
+_Check_return_
+XEN_API
+NTSTATUS
+GrantTableUnmapForeignPages(
+    _In_ ULONG                      NumberPages,
+    _In_reads_(NumberPages) PULONG  Handles,
+    _In_ PHYSICAL_ADDRESS           Address
+    )
+{

Is there a way for callers to know which pages got successfully unmapped
and which pages were not?

Not with the current API signatures. I can add a way to report exact failed pages (and bump the interface version), but can the caller do anything useful with that information? I guess it could retry later (if the error was GNTST_eagain).

+    struct gnttab_unmap_grant_ref   *ops;
+    LONG_PTR                        rc;
+    NTSTATUS                        status;
+    ULONG                           chunk_start;
+    ULONG                           chunk_size;
+    ULONG                           i;
+    ULONG                           fail_count;
+    ULONG                           first_fail_index;
+    PHYSICAL_ADDRESS                page_address;
+
+    status = STATUS_NO_MEMORY;
+    ops = __AllocatePoolWithTag(NonPagedPool,
+                                GNTTAB_BATCH_SIZE * sizeof(*ops),
+                                XEN_GNTTAB_TAG);
+    if (ops == NULL)
           goto fail1;
+
+    status = STATUS_SUCCESS;
+    fail_count = 0;
+    first_fail_index = 0;
+    page_address.QuadPart = Address.QuadPart;
+
+    for (chunk_start = 0;
+         chunk_start < NumberPages;
+         chunk_start += GNTTAB_BATCH_SIZE) {
+        chunk_size = (NumberPages - chunk_start < GNTTAB_BATCH_SIZE) ?
+                     NumberPages - chunk_start : GNTTAB_BATCH_SIZE;
+
+        for (i = 0; i < chunk_size; i++) {
+            ops[i].handle = Handles[chunk_start + i];
+            ops[i].host_addr = page_address.QuadPart +
+                               ((ULONGLONG)i << PAGE_SHIFT);
+        }
+
+        rc = GrantTableOp(GNTTABOP_unmap_grant_ref, ops, chunk_size);
+
+        if (rc < 0) {
+            ERRNO_TO_STATUS(-rc, status);
+            goto fail2;

Should we continue with cleanup of other chunks even if some chunks
might have failed?

Yes, we should free everything we can before returning.

+        }
+
+        for (i = 0; i < chunk_size; i++) {
+            if (ops[i].status != GNTST_okay) {
+                if (fail_count == 0) {
+                    Warning("op[%u] 0x%08x%08x failed (%hd)\n",
+                            chunk_start + i,
+                            (ULONG)(ops[i].host_addr >> 32),
+                            (ULONG)ops[i].host_addr,
+                            ops[i].status);
+                    first_fail_index = chunk_start + i;
+                    GNTST_TO_STATUS(ops[i].status, status);
+                }
+                fail_count++;
+            }
+        }
+
+        page_address.QuadPart += (ULONGLONG)chunk_size << PAGE_SHIFT;
       }
- if (op.status != GNTST_okay) {
-        Warning("%u.%u failed (%d)\n",
-                Address.HighPart,
-                Address.LowPart,
-                op.status);
+    __FreePoolWithTag(ops, XEN_GNTTAB_TAG);
- GNTST_TO_STATUS(op.status, status);
-        goto fail2;
+    if (fail_count > 0) {
+        Warning("%u of %u ops failed (first at %u)\n",
+                fail_count, NumberPages, first_fail_index);
+        Error("fail3 (%08x)\n", status);
+        return status;

Was the intention here to have a fail3 block instead?

Oops, yes.

       }
return STATUS_SUCCESS; fail2:
-    Error("fail2\n");
+    Error("fail2 (%08x)\n", status);
+    __FreePoolWithTag(ops, XEN_GNTTAB_TAG);
fail1:
       Error("fail1 (%08x)\n", status);
diff --git a/src/xenbus/gnttab.c b/src/xenbus/gnttab.c
index 71f8ec1..f6fcef6 100644
--- a/src/xenbus/gnttab.c
+++ b/src/xenbus/gnttab.c
@@ -667,8 +667,6 @@ GnttabMapForeignPages(
   {
       PXENBUS_GNTTAB_CONTEXT      Context = Interface->Context;
       PMDL                        Mdl;
-    LONG                        PageIndex;
-    PHYSICAL_ADDRESS            PageAddress;
       PXENBUS_GNTTAB_MAP_ENTRY    MapEntry;
       NTSTATUS                    status;
@@ -689,19 +687,15 @@ GnttabMapForeignPages(
       MapEntry->Mdl = Mdl;
Address->QuadPart = MmGetMdlPfnArray(Mdl)[0] << PAGE_SHIFT;
-    PageAddress.QuadPart = Address->QuadPart;
-
-    for (PageIndex = 0; PageIndex < (LONG)NumberPages; PageIndex++) {
-        status = GrantTableMapForeignPage(Domain,
-                                          References[PageIndex],
-                                          PageAddress,
-                                          ReadOnly,
-                                          &MapEntry->MapHandles[PageIndex]);
-        if (!NT_SUCCESS(status))
-            goto fail3;
- PageAddress.QuadPart += PAGE_SIZE;
-    }
+    status = GrantTableMapForeignPages(Domain,
+                                       NumberPages,
+                                       References,
+                                       *Address,
+                                       ReadOnly,
+                                       MapEntry->MapHandles);
+    if (!NT_SUCCESS(status))
+        goto fail3;
status = HashTableAdd(Context->MapTable,
                             (ULONG_PTR)Address->QuadPart,
@@ -714,15 +708,13 @@ GnttabMapForeignPages(
   fail4:
       Error("fail4\n");
+ (VOID) GrantTableUnmapForeignPages(NumberPages,
+                                       MapEntry->MapHandles,
+                                       *Address);
+
   fail3:
       Error("fail3\n");
- while (--PageIndex >= 0) {
-        PageAddress.QuadPart -= PAGE_SIZE;
-        (VOID) GrantTableUnmapForeignPage(MapEntry->MapHandles[PageIndex],
-                                          PageAddress);
-    }
-
       Address->QuadPart = 0;
__GnttabFree(MapEntry);
@@ -746,8 +738,6 @@ GnttabUnmapForeignPages(
   {
       PXENBUS_GNTTAB_CONTEXT      Context = Interface->Context;
       ULONG                       NumberPages;
-    PHYSICAL_ADDRESS            PageAddress;
-    ULONG                       PageIndex;
       PXENBUS_GNTTAB_MAP_ENTRY    MapEntry;
       PMDL                        Mdl;
       NTSTATUS                    status;
@@ -763,18 +753,13 @@ GnttabUnmapForeignPages(
       if (!NT_SUCCESS(status))
           goto fail2;
- PageAddress.QuadPart = Address.QuadPart;
-
       Mdl = MapEntry->Mdl;
       NumberPages = Mdl->ByteCount >> PAGE_SHIFT;
- for (PageIndex = 0; PageIndex < NumberPages; PageIndex++) {
-        status = GrantTableUnmapForeignPage(MapEntry->MapHandles[PageIndex],
-                                            PageAddress);
-        BUG_ON(!NT_SUCCESS(status));
-
-        PageAddress.QuadPart += PAGE_SIZE;
-    }
+    status = GrantTableUnmapForeignPages(NumberPages,
+                                         MapEntry->MapHandles,
+                                         Address);
+    BUG_ON(!NT_SUCCESS(status));

Similarly to my previous email, I don't think we should crash here since
we can be low on resources, or we can have some edge cases that allows
userspace to trigger unmap failures.

I'll make this consistent with the xen code.
__GnttabFree(MapEntry);







 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.