|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v2] Use batched hypercalls for mapping foreign pages
On 15/06/2026 20:04, Rafał Wojdyła wrote:
>
>
> W dniu 15.06.2026 o 17:00, Tu Dinh pisze:
>> Hi,
>>
>> On 10/06/2026 14:45, Rafał Wojdyła wrote:
>>> Use one hypercall op with multiple pages instead of
>>> separate hypercalls for each page.
>>>
>>> Also fix debug display of addresses.
>>>
>>> Signed-off-by: Rafał Wojdyła <omeg@xxxxxxxxxxxxxxxxxxxxxx>
>>
>> A comment on unrelated code: I didn't realize that
>> gnttab.c:GnttabMapForeignPages already allocates one ULONG per requested
>> page, which can grow up to 4MB per call given the current NumberPages
>> constraint of 1M. I wonder if we can further constrain this value or
>> alternatively allocate the MapEntry buffer from paged pool instead,
>> given that this buffer isn't actively used.
>>
> Agreed, I was thinking that the current count limit in the higher
> drivers is probably too large. The buffer can likely be moved to paged
> pool since currently I think only IOCTLs use these map functions. I'll
> do some testing with that.
>
Thanks. Perhaps an easier way for the next version of the IOCTL would be
to simply marshal the results to userspace after every chunk, then let
userspace drive the continuation itself. Then the driver can work with a
fixed-size allocation and maybe even just use one off the stack.
>> More comments below.
>>
>>> ---
>>> include/xen.h | 20 ++--
>>> src/xen/grant_table.c | 241 ++++++++++++++++++++++++++++++++++
>>> +-------
>>> src/xenbus/gnttab.c | 47 +++-----
>>> 3 files changed, 228 insertions(+), 80 deletions(-)
>>>
>>> diff --git a/include/xen.h b/include/xen.h
>>> index 0c3e8a4..eaab567 100644
>>> --- a/include/xen.h
>>> +++ b/include/xen.h
>>> @@ -265,20 +265,22 @@ GrantTableCopy(
>>> _Check_return_
>>> XEN_API
>>> NTSTATUS
>>> -GrantTableMapForeignPage(
>>> - _In_ USHORT Domain,
>>> - _In_ ULONG GrantRef,
>>> - _In_ PHYSICAL_ADDRESS Address,
>>> - _In_ BOOLEAN ReadOnly,
>>> - _Out_ ULONG *Handle
>>> +GrantTableMapForeignPages(
>>> + _In_ USHORT Domain,
>>> + _In_ ULONG NumberPages,
>>> + _In_reads_(NumberPages) PULONG GrantRefs,
>>> + _In_ PHYSICAL_ADDRESS Address,
>>> + _In_ BOOLEAN ReadOnly,
>>> + _Out_writes_(NumberPages) PULONG Handles
>>> );
>>> _Check_return_
>>> XEN_API
>>> NTSTATUS
>>> -GrantTableUnmapForeignPage(
>>> - _In_ ULONG Handle,
>>> - _In_ PHYSICAL_ADDRESS Address
>>> +GrantTableUnmapForeignPages(
>>> + _In_ ULONG NumberPages,
>>> + _In_reads_(NumberPages) PULONG Handles,
>>> + _In_ PHYSICAL_ADDRESS Address
>>> );
>>> _Check_return_
>>> diff --git a/src/xen/grant_table.c b/src/xen/grant_table.c
>>> index 96a5a26..8935bd4 100644
>>> --- a/src/xen/grant_table.c
>>> +++ b/src/xen/grant_table.c
>>> @@ -38,10 +38,15 @@
>>> #include "hypercall.h"
>>> #include "dbg_print.h"
>>> #include "assert.h"
>>> +#include "util.h"
>>> #pragma warning(push)
>>> #pragma warning(disable:4127) // conditional expression is constant
>>> +#define XEN_GNTTAB_TAG 'TNGX'
>>> +
>>> +#define GNTTAB_BATCH_SIZE 256
>>> +
>>> // Most of the GNTST_* values don't have meaningful NTSTATUS
>>> counterparts,
>>> // this macro translates those that do.
>>> #define GNTST_TO_STATUS(_gntst, _status) \
>>> @@ -168,27 +173,20 @@ fail1:
>>> _Check_return_
>>> XEN_API
>>> NTSTATUS
>>> -GrantTableMapForeignPage(
>>> - _In_ USHORT Domain,
>>> - _In_ ULONG GrantRef,
>>> - _In_ PHYSICAL_ADDRESS Address,
>>> - _In_ BOOLEAN ReadOnly,
>>> - _Out_ ULONG *Handle
>>> +GrantTableUnmapForeignPage(
>>> + _In_ ULONG Handle,
>>> + _In_ PHYSICAL_ADDRESS Address
>>> )
>>> {
>>> - struct gnttab_map_grant_ref op;
>>> - LONG_PTR rc;
>>> - NTSTATUS status;
>>> + struct gnttab_unmap_grant_ref op;
>>> + LONG_PTR rc;
>>> + NTSTATUS status;
>>> RtlZeroMemory(&op, sizeof(op));
>>> - op.dom = Domain;
>>> - op.ref = GrantRef;
>>> - op.flags = GNTMAP_host_map;
>>> - if (ReadOnly)
>>> - op.flags |= GNTMAP_readonly;
>>> + op.handle = Handle;
>>> op.host_addr = Address.QuadPart;
>>> - rc = GrantTableOp(GNTTABOP_map_grant_ref, &op, 1);
>>> + rc = GrantTableOp(GNTTABOP_unmap_grant_ref, &op, 1);
>>> if (rc < 0) {
>>> ERRNO_TO_STATUS(-rc, status);
>>> @@ -196,9 +194,7 @@ GrantTableMapForeignPage(
>>> }
>>> if (op.status != GNTST_okay) {
>>> - Warning("%u:%u -> %u.%u failed (%d)\n",
>>> - op.dom,
>>> - op.ref,
>>> + Warning("0x%08x%08x failed (%hd)\n",
>>> Address.HighPart,
>>> Address.LowPart,
>>> op.status);
>>> @@ -207,8 +203,6 @@ GrantTableMapForeignPage(
>>> goto fail2;
>>> }
>>> - *Handle = op.handle;
>>> -
>>> return STATUS_SUCCESS;
>>> fail2:
>>> @@ -223,40 +217,207 @@ fail1:
>>> _Check_return_
>>> XEN_API
>>> NTSTATUS
>>> -GrantTableUnmapForeignPage(
>>> - _In_ ULONG Handle,
>>> - _In_ PHYSICAL_ADDRESS Address
>>> +GrantTableMapForeignPages(
>>> + _In_ USHORT Domain,
>>> + _In_ ULONG NumberPages,
>>> + _In_reads_(NumberPages) PULONG GrantRefs,
>>> + _In_ PHYSICAL_ADDRESS Address,
>>> + _In_ BOOLEAN ReadOnly,
>>> + _Out_writes_(NumberPages) PULONG Handles
>>> )
>>> {
>>> - struct gnttab_unmap_grant_ref op;
>>> - LONG_PTR rc;
>>> - NTSTATUS status;
>>> + struct gnttab_map_grant_ref *ops;
>>> + LONG_PTR rc;
>>> + NTSTATUS status;
>>> + ULONG chunk_start;
>>> + ULONG chunk_size;
>>> + ULONG i;
>>> + ULONG fail_count;
>>> + ULONG first_fail_index;
>>> + PHYSICAL_ADDRESS page_address;
>>> +
>>> + for (i = 0; i < NumberPages; i++)
>>> + Handles[i] = 0;
>>> +
>>> + status = STATUS_NO_MEMORY;
>>> + ops = __AllocatePoolWithTag(NonPagedPool,
>>> + GNTTAB_BATCH_SIZE * sizeof(*ops),
>>> + XEN_GNTTAB_TAG);
>>> + if (ops == NULL)
>>> + goto fail1;
>>> - RtlZeroMemory(&op, sizeof(op));
>>> - op.handle = Handle;
>>> - op.host_addr = Address.QuadPart;
>>> + status = STATUS_SUCCESS;
>>> + fail_count = 0;
>>> + first_fail_index = 0;
>>> + page_address.QuadPart = Address.QuadPart;
>>> +
>>> + for (chunk_start = 0;
>>> + chunk_start < NumberPages;
>>> + chunk_start += GNTTAB_BATCH_SIZE) {
>>> + chunk_size = (NumberPages - chunk_start < GNTTAB_BATCH_SIZE) ?
>>> + NumberPages - chunk_start : GNTTAB_BATCH_SIZE;
>>> +
>>> + for (i = 0; i < chunk_size; i++) {
>>> + ops[i].dom = Domain;
>>> + ops[i].ref = GrantRefs[chunk_start + i];
>>> + ops[i].flags = GNTMAP_host_map;
>>> + if (ReadOnly)
>>> + ops[i].flags |= GNTMAP_readonly;
>>> +
>>> + ops[i].host_addr = page_address.QuadPart +
>>> + ((ULONGLONG)i << PAGE_SHIFT);
>>> + }
>>> +
>>> + rc = GrantTableOp(GNTTABOP_map_grant_ref, ops, chunk_size);
>>> +
>>> + if (rc < 0) {
>>> + ERRNO_TO_STATUS(-rc, status);
>>> + goto fail2;
>>
>> Going to fail2 here will skip cleanup of existing successful chunks, so
>> I think you might want to goto fail3 instead (while keeping track of
>> successful indices)
>>
> Right, I'll fix this.
>
>>> + }
>>> +
>>> + // check every op, they are independently processed
>>> + for (i = 0; i < chunk_size; i++) {
>>> + if (ops[i].status != GNTST_okay) {
>>> + if (fail_count == 0) {
>>> + Warning("op[%u] %hu:%u -> 0x%08x%08x failed
>>> (%hd)\n",
>>> + chunk_start + i,
>>> + ops[i].dom, ops[i].ref,
>>> + (ULONG)(ops[i].host_addr >> 32),
>>> + (ULONG)ops[i].host_addr,
>>> + ops[i].status);
>>> + first_fail_index = chunk_start + i;
>>> + GNTST_TO_STATUS(ops[i].status, status);
>>> + }
>>> + fail_count++;
>>> + continue;
>>> + }
>>> +
>>> + Handles[chunk_start + i] = ops[i].handle;
>>> + }
>>> +
>>> + page_address.QuadPart += (ULONGLONG)chunk_size << PAGE_SHIFT;
>>> + }
>>> - rc = GrantTableOp(GNTTABOP_unmap_grant_ref, &op, 1);
>>> + if (fail_count > 0) {
>>> + Warning("%u of %u ops failed (first at %u)\n",
>>> + fail_count, NumberPages, first_fail_index);
>>> + goto fail3;
>>> + }
>>> - if (rc < 0) {
>>> - ERRNO_TO_STATUS(-rc, status);
>>> + __FreePoolWithTag(ops, XEN_GNTTAB_TAG);
>>> + return STATUS_SUCCESS;
>>> +
>>> +fail3:
>>> + Error("fail3 (%08x)\n", status);
>>> +
>>> + // undo successful ops
>>> + for (i = 0; i < NumberPages; i++) {
>>> + PHYSICAL_ADDRESS unmap_address;
>>> + NTSTATUS unmap_status;
>>> +
>>> + if (Handles[i] == 0)
>>> + continue;
>>> +
>>> + unmap_address.QuadPart = Address.QuadPart + ((ULONGLONG)i <<
>>> PAGE_SHIFT);
>>> + unmap_status = GrantTableUnmapForeignPage(Handles[i],
>>> + unmap_address);
>>> + if (!NT_SUCCESS(unmap_status))
>>> + Warning("unmap of handle 0x%x at op %u failed (%08x),
>>> leaking\n",
>>> + Handles[i], i, unmap_status);
>>> + }
>>> +
>>> +fail2:
>>> + Error("fail2 (%08x)\n", status);
>>> + __FreePoolWithTag(ops, XEN_GNTTAB_TAG);
>>> +
>>> +fail1:
>>> + Error("fail1 (%08x)\n", status);
>>> +
>>> + return status;
>>> +}
>>> +
>>> +_Check_return_
>>> +XEN_API
>>> +NTSTATUS
>>> +GrantTableUnmapForeignPages(
>>> + _In_ ULONG NumberPages,
>>> + _In_reads_(NumberPages) PULONG Handles,
>>> + _In_ PHYSICAL_ADDRESS Address
>>> + )
>>> +{
>>
>> Is there a way for callers to know which pages got successfully unmapped
>> and which pages were not?
>>
> Not with the current API signatures. I can add a way to report exact
> failed pages (and bump the interface version), but can the caller do
> anything useful with that information? I guess it could retry later (if
> the error was GNTST_eagain).
>
I see. But I do think that for the next interface version, having
per-page status would be useful for retries, even if the errors don't
necessarily come from the hypervisor (might have come from the driver
instead)
>>> + struct gnttab_unmap_grant_ref *ops;
>>> + LONG_PTR rc;
>>> + NTSTATUS status;
>>> + ULONG chunk_start;
>>> + ULONG chunk_size;
>>> + ULONG i;
>>> + ULONG fail_count;
>>> + ULONG first_fail_index;
>>> + PHYSICAL_ADDRESS page_address;
>>> +
>>> + status = STATUS_NO_MEMORY;
>>> + ops = __AllocatePoolWithTag(NonPagedPool,
>>> + GNTTAB_BATCH_SIZE * sizeof(*ops),
>>> + XEN_GNTTAB_TAG);
>>> + if (ops == NULL)
>>> goto fail1;
>>> +
>>> + status = STATUS_SUCCESS;
>>> + fail_count = 0;
>>> + first_fail_index = 0;
>>> + page_address.QuadPart = Address.QuadPart;
>>> +
>>> + for (chunk_start = 0;
>>> + chunk_start < NumberPages;
>>> + chunk_start += GNTTAB_BATCH_SIZE) {
>>> + chunk_size = (NumberPages - chunk_start < GNTTAB_BATCH_SIZE) ?
>>> + NumberPages - chunk_start : GNTTAB_BATCH_SIZE;
>>> +
>>> + for (i = 0; i < chunk_size; i++) {
>>> + ops[i].handle = Handles[chunk_start + i];
>>> + ops[i].host_addr = page_address.QuadPart +
>>> + ((ULONGLONG)i << PAGE_SHIFT);
>>> + }
>>> +
>>> + rc = GrantTableOp(GNTTABOP_unmap_grant_ref, ops, chunk_size);
>>> +
>>> + if (rc < 0) {
>>> + ERRNO_TO_STATUS(-rc, status);
>>> + goto fail2;
>>
>> Should we continue with cleanup of other chunks even if some chunks
>> might have failed?
>>
> Yes, we should free everything we can before returning.
>
>>> + }
>>> +
>>> + for (i = 0; i < chunk_size; i++) {
>>> + if (ops[i].status != GNTST_okay) {
>>> + if (fail_count == 0) {
>>> + Warning("op[%u] 0x%08x%08x failed (%hd)\n",
>>> + chunk_start + i,
>>> + (ULONG)(ops[i].host_addr >> 32),
>>> + (ULONG)ops[i].host_addr,
>>> + ops[i].status);
>>> + first_fail_index = chunk_start + i;
>>> + GNTST_TO_STATUS(ops[i].status, status);
>>> + }
>>> + fail_count++;
>>> + }
>>> + }
>>> +
>>> + page_address.QuadPart += (ULONGLONG)chunk_size << PAGE_SHIFT;
>>> }
>>> - if (op.status != GNTST_okay) {
>>> - Warning("%u.%u failed (%d)\n",
>>> - Address.HighPart,
>>> - Address.LowPart,
>>> - op.status);
>>> + __FreePoolWithTag(ops, XEN_GNTTAB_TAG);
>>> - GNTST_TO_STATUS(op.status, status);
>>> - goto fail2;
>>> + if (fail_count > 0) {
>>> + Warning("%u of %u ops failed (first at %u)\n",
>>> + fail_count, NumberPages, first_fail_index);
>>> + Error("fail3 (%08x)\n", status);
>>> + return status;
>>
>> Was the intention here to have a fail3 block instead?
>>
> Oops, yes.
>
>>> }
>>> return STATUS_SUCCESS;
>>> fail2:
>>> - Error("fail2\n");
>>> + Error("fail2 (%08x)\n", status);
>>> + __FreePoolWithTag(ops, XEN_GNTTAB_TAG);
>>> fail1:
>>> Error("fail1 (%08x)\n", status);
>>> diff --git a/src/xenbus/gnttab.c b/src/xenbus/gnttab.c
>>> index 71f8ec1..f6fcef6 100644
>>> --- a/src/xenbus/gnttab.c
>>> +++ b/src/xenbus/gnttab.c
>>> @@ -667,8 +667,6 @@ GnttabMapForeignPages(
>>> {
>>> PXENBUS_GNTTAB_CONTEXT Context = Interface->Context;
>>> PMDL Mdl;
>>> - LONG PageIndex;
>>> - PHYSICAL_ADDRESS PageAddress;
>>> PXENBUS_GNTTAB_MAP_ENTRY MapEntry;
>>> NTSTATUS status;
>>> @@ -689,19 +687,15 @@ GnttabMapForeignPages(
>>> MapEntry->Mdl = Mdl;
>>> Address->QuadPart = MmGetMdlPfnArray(Mdl)[0] << PAGE_SHIFT;
>>> - PageAddress.QuadPart = Address->QuadPart;
>>> -
>>> - for (PageIndex = 0; PageIndex < (LONG)NumberPages; PageIndex++) {
>>> - status = GrantTableMapForeignPage(Domain,
>>> - References[PageIndex],
>>> - PageAddress,
>>> - ReadOnly,
>>> - &MapEntry-
>>> >MapHandles[PageIndex]);
>>> - if (!NT_SUCCESS(status))
>>> - goto fail3;
>>> - PageAddress.QuadPart += PAGE_SIZE;
>>> - }
>>> + status = GrantTableMapForeignPages(Domain,
>>> + NumberPages,
>>> + References,
>>> + *Address,
>>> + ReadOnly,
>>> + MapEntry->MapHandles);
>>> + if (!NT_SUCCESS(status))
>>> + goto fail3;
>>> status = HashTableAdd(Context->MapTable,
>>> (ULONG_PTR)Address->QuadPart,
>>> @@ -714,15 +708,13 @@ GnttabMapForeignPages(
>>> fail4:
>>> Error("fail4\n");
>>> + (VOID) GrantTableUnmapForeignPages(NumberPages,
>>> + MapEntry->MapHandles,
>>> + *Address);
>>> +
>>> fail3:
>>> Error("fail3\n");
>>> - while (--PageIndex >= 0) {
>>> - PageAddress.QuadPart -= PAGE_SIZE;
>>> - (VOID) GrantTableUnmapForeignPage(MapEntry-
>>> >MapHandles[PageIndex],
>>> - PageAddress);
>>> - }
>>> -
>>> Address->QuadPart = 0;
>>> __GnttabFree(MapEntry);
>>> @@ -746,8 +738,6 @@ GnttabUnmapForeignPages(
>>> {
>>> PXENBUS_GNTTAB_CONTEXT Context = Interface->Context;
>>> ULONG NumberPages;
>>> - PHYSICAL_ADDRESS PageAddress;
>>> - ULONG PageIndex;
>>> PXENBUS_GNTTAB_MAP_ENTRY MapEntry;
>>> PMDL Mdl;
>>> NTSTATUS status;
>>> @@ -763,18 +753,13 @@ GnttabUnmapForeignPages(
>>> if (!NT_SUCCESS(status))
>>> goto fail2;
>>> - PageAddress.QuadPart = Address.QuadPart;
>>> -
>>> Mdl = MapEntry->Mdl;
>>> NumberPages = Mdl->ByteCount >> PAGE_SHIFT;
>>> - for (PageIndex = 0; PageIndex < NumberPages; PageIndex++) {
>>> - status = GrantTableUnmapForeignPage(MapEntry-
>>> >MapHandles[PageIndex],
>>> - PageAddress);
>>> - BUG_ON(!NT_SUCCESS(status));
>>> -
>>> - PageAddress.QuadPart += PAGE_SIZE;
>>> - }
>>> + status = GrantTableUnmapForeignPages(NumberPages,
>>> + MapEntry->MapHandles,
>>> + Address);
>>> + BUG_ON(!NT_SUCCESS(status));
>>
>> Similarly to my previous email, I don't think we should crash here since
>> we can be low on resources, or we can have some edge cases that allows
>> userspace to trigger unmap failures.
>>
> I'll make this consistent with the xen code.
>>> __GnttabFree(MapEntry);
>>
>>
>>
>
>
--
Ngoc Tu Dinh | Vates XCP-ng Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |