|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v4 16/17] xen: mapcache: Add support for grant mappings
On Thu, May 2, 2024 at 9:18 PM Stefano Stabellini
<sstabellini@xxxxxxxxxx> wrote:
>
> On Tue, 30 Apr 2024, Edgar E. Iglesias wrote:
> > From: "Edgar E. Iglesias" <edgar.iglesias@xxxxxxx>
> >
> > Add a second mapcache for grant mappings. The mapcache for
> > grants needs to work with XC_PAGE_SIZE granularity since
> > we can't map larger ranges than what has been granted to us.
> >
> > Like with foreign mappings (xen_memory), machines using grants
> > are expected to initialize the xen_grants MR and map it
> > into their address-map accordingly.
> >
> > Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xxxxxxx>
> > ---
> > hw/xen/xen-hvm-common.c | 12 ++-
> > hw/xen/xen-mapcache.c | 158 +++++++++++++++++++++++++-------
> > include/hw/xen/xen-hvm-common.h | 3 +
> > include/sysemu/xen.h | 7 ++
> > 4 files changed, 145 insertions(+), 35 deletions(-)
> >
> > diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
> > index 0267b88d26..fdec400491 100644
> > --- a/hw/xen/xen-hvm-common.c
> > +++ b/hw/xen/xen-hvm-common.c
> > @@ -10,12 +10,18 @@
> > #include "hw/boards.h"
> > #include "hw/xen/arch_hvm.h"
> >
> > -MemoryRegion xen_memory;
> > +MemoryRegion xen_memory, xen_grants;
> >
> > -/* Check for xen memory. */
> > +/* Check for any kind of xen memory, foreign mappings or grants. */
> > bool xen_mr_is_memory(MemoryRegion *mr)
> > {
> > - return mr == &xen_memory;
> > + return mr == &xen_memory || mr == &xen_grants;
> > +}
> > +
> > +/* Check specifically for grants. */
> > +bool xen_mr_is_grants(MemoryRegion *mr)
> > +{
> > + return mr == &xen_grants;
> > }
> >
> > void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
> > diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
> > index 1b32d0c003..96cd68e28d 100644
> > --- a/hw/xen/xen-mapcache.c
> > +++ b/hw/xen/xen-mapcache.c
> > @@ -14,6 +14,7 @@
> >
> > #include <sys/resource.h>
> >
> > +#include "hw/xen/xen-hvm-common.h"
> > #include "hw/xen/xen_native.h"
> > #include "qemu/bitmap.h"
> >
> > @@ -21,6 +22,8 @@
> > #include "sysemu/xen-mapcache.h"
> > #include "trace.h"
> >
> > +#include <xenevtchn.h>
> > +#include <xengnttab.h>
> >
> > #if HOST_LONG_BITS == 32
> > # define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */
> > @@ -41,6 +44,7 @@ typedef struct MapCacheEntry {
> > unsigned long *valid_mapping;
> > uint32_t lock;
> > #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
> > +#define XEN_MAPCACHE_ENTRY_GRANT (1 << 1)
> > uint8_t flags;
> > hwaddr size;
> >
> > @@ -74,6 +78,8 @@ typedef struct MapCache {
> > } MapCache;
> >
> > static MapCache *mapcache;
> > +static MapCache *mapcache_grants;
> > +static xengnttab_handle *xen_region_gnttabdev;
> >
> > static inline void mapcache_lock(MapCache *mc)
> > {
> > @@ -132,6 +138,12 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void
> > *opaque)
> > unsigned long max_mcache_size;
> > unsigned int bucket_shift;
> >
> > + xen_region_gnttabdev = xengnttab_open(NULL, 0);
> > + if (xen_region_gnttabdev == NULL) {
> > + error_report("mapcache: Failed to open gnttab device");
> > + exit(EXIT_FAILURE);
> > + }
> > +
> > if (HOST_LONG_BITS == 32) {
> > bucket_shift = 16;
> > } else {
> > @@ -160,6 +172,15 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void
> > *opaque)
> > mapcache = xen_map_cache_init_single(f, opaque,
> > bucket_shift,
> > max_mcache_size);
> > +
> > + /*
> > + * Grant mappings must use XC_PAGE_SIZE granularity since we can't
> > + * map anything beyond the number of pages granted to us.
> > + */
> > + mapcache_grants = xen_map_cache_init_single(f, opaque,
> > + XC_PAGE_SHIFT,
> > + max_mcache_size);
> > +
> > setrlimit(RLIMIT_AS, &rlimit_as);
> > }
> >
> > @@ -169,17 +190,25 @@ static void xen_remap_bucket(MapCache *mc,
> > hwaddr size,
> > hwaddr address_index,
> > bool dummy,
> > + bool grant,
> > + bool grant_is_write,
> > + hwaddr grant_ref,
> > ram_addr_t ram_offset)
>
> Any chance we could pass grant_ref as address_index ?
>
Yes, good catch :-)
grant_ref is already the same as address_index.
> Also instead of grant_is_write we could have a generic is_write that
> applies to both.
Sounds good.
>
> I am not sure about this, but instead of bool grant, we could check on
> address_index using XEN_GRANT_ADDR_OFF? This one might not work.
>
Yeah, this won't work since we're only getting the offset into the
xen_grants memory region.
> I admit that there is no real advantage on these suggestions except to
> consolidate the parameters and make them look a bit more similar in the
> two cases.
>
>
>
> > {
> > uint8_t *vaddr_base;
> > - xen_pfn_t *pfns;
> > + uint32_t *refs = NULL;
> > + xen_pfn_t *pfns = NULL;
> > int *err;
> > unsigned int i;
> > hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
> >
> > trace_xen_remap_bucket(address_index);
> >
> > - pfns = g_new0(xen_pfn_t, nb_pfn);
> > + if (grant) {
> > + refs = g_new0(uint32_t, nb_pfn);
> > + } else {
> > + pfns = g_new0(xen_pfn_t, nb_pfn);
> > + }
> > err = g_new0(int, nb_pfn);
> >
> > if (entry->vaddr_base != NULL) {
> > @@ -208,21 +237,45 @@ static void xen_remap_bucket(MapCache *mc,
> > g_free(entry->valid_mapping);
> > entry->valid_mapping = NULL;
> >
> > - for (i = 0; i < nb_pfn; i++) {
> > - pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) +
> > i;
> > + if (grant) {
> > + for (i = 0; i < nb_pfn; i++) {
> > + refs[i] = grant_ref + i;
> > + }
> > + } else {
> > + for (i = 0; i < nb_pfn; i++) {
> > + pfns[i] = (address_index << (mc->bucket_shift -
> > XC_PAGE_SHIFT)) + i;
> > + }
> > }
> >
> > - /*
> > - * If the caller has requested the mapping at a specific address use
> > - * MAP_FIXED to make sure it's honored.
> > - */
> > + entry->flags &= ~XEN_MAPCACHE_ENTRY_GRANT;
> > +
> > if (!dummy) {
> > - vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
> > - PROT_READ | PROT_WRITE,
> > - vaddr ? MAP_FIXED : 0,
> > - nb_pfn, pfns, err);
> > + if (grant) {
> > + int prot = PROT_READ;
> > +
> > + if (grant_is_write) {
> > + prot |= PROT_WRITE;
> > + }
> > +
> > + entry->flags |= XEN_MAPCACHE_ENTRY_GRANT;
> > + assert(vaddr == NULL);
> > + vaddr_base =
> > xengnttab_map_domain_grant_refs(xen_region_gnttabdev,
> > + nb_pfn,
> > + xen_domid, refs,
> > + prot);
> > + } else {
> > + /*
> > + * If the caller has requested the mapping at a specific
> > address use
> > + * MAP_FIXED to make sure it's honored.
> > + */
> > + vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
> > + PROT_READ | PROT_WRITE,
> > + vaddr ? MAP_FIXED : 0,
> > + nb_pfn, pfns, err);
> > + }
> > if (vaddr_base == NULL) {
> > - perror("xenforeignmemory_map2");
> > + perror(grant ? "xengnttab_map_domain_grant_refs"
> > + : "xenforeignmemory_map2");
> > exit(-1);
> > }
> > } else {
> > @@ -263,6 +316,7 @@ static void xen_remap_bucket(MapCache *mc,
> > }
> > }
> >
> > + g_free(refs);
> > g_free(pfns);
> > g_free(err);
> > }
> > @@ -270,10 +324,12 @@ static void xen_remap_bucket(MapCache *mc,
> > static uint8_t *xen_map_cache_unlocked(MapCache *mc,
> > hwaddr phys_addr, hwaddr size,
> > ram_addr_t ram_offset,
> > - uint8_t lock, bool dma, bool
> > is_write)
> > + uint8_t lock, bool dma,
> > + bool grant, bool is_write)
> > {
> > MapCacheEntry *entry, *pentry = NULL,
> > *free_entry = NULL, *free_pentry = NULL;
> > + hwaddr grant_ref = phys_addr >> XC_PAGE_SHIFT;
> > hwaddr address_index;
> > hwaddr address_offset;
> > hwaddr cache_size = size;
> > @@ -342,7 +398,7 @@ tryagain:
> > entry = g_new0(MapCacheEntry, 1);
> > pentry->next = entry;
> > xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
> > - ram_offset);
> > + grant, is_write, grant_ref, ram_offset);
> > } else if (!entry->lock) {
> > if (!entry->vaddr_base || entry->paddr_index != address_index ||
> > entry->size != cache_size ||
> > @@ -350,7 +406,7 @@ tryagain:
> > test_bit_size >> XC_PAGE_SHIFT,
> > entry->valid_mapping)) {
> > xen_remap_bucket(mc, entry, NULL, cache_size, address_index,
> > dummy,
> > - ram_offset);
> > + grant, is_write, grant_ref, ram_offset);
> > }
> > }
> >
> > @@ -401,12 +457,28 @@ uint8_t *xen_map_cache(MemoryRegion *mr,
> > uint8_t lock, bool dma,
> > bool is_write)
> > {
> > + bool grant = xen_mr_is_grants(mr);
> > + MapCache *mc = grant ? mapcache_grants : mapcache;
> > uint8_t *p;
> >
> > - mapcache_lock(mapcache);
> > - p = xen_map_cache_unlocked(mapcache, phys_addr, size, ram_addr_offset,
> > - lock, dma, is_write);
> > - mapcache_unlock(mapcache);
> > + if (grant) {
> > + /*
> > + * Grants are only supported via address_space_map(). Anything
> > + * else is considered a user/guest error.
> > + *
> > + * QEMU generally doesn't expect these mappings to ever fail, so
> > + * if this happens we report an error message and abort().
> > + */
> > + if (!lock) {
> > + error_report("Trying access a grant reference without mapping
> > it.");
> > + abort();
> > + }
> > + }
> > +
> > + mapcache_lock(mc);
> > + p = xen_map_cache_unlocked(mc, phys_addr, size, ram_addr_offset,
> > + lock, dma, grant, is_write);
> > + mapcache_unlock(mc);
> > return p;
> > }
> >
> > @@ -451,7 +523,14 @@ static ram_addr_t
> > xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr)
> >
> > ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
> > {
> > - return xen_ram_addr_from_mapcache_single(mapcache, ptr);
> > + ram_addr_t addr;
> > +
> > + addr = xen_ram_addr_from_mapcache_single(mapcache, ptr);
> > + if (addr == RAM_ADDR_INVALID) {
> > + addr = xen_ram_addr_from_mapcache_single(mapcache_grants, ptr);
> > + }
> > +
> > + return addr;
> > }
> >
> > static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
> > @@ -504,9 +583,14 @@ static void
> > xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
> > }
> >
> > ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size);
> > - if (munmap(entry->vaddr_base, entry->size) != 0) {
> > - perror("unmap fails");
> > - exit(-1);
> > + if (entry->flags & XEN_MAPCACHE_ENTRY_GRANT) {
> > + xengnttab_unmap(xen_region_gnttabdev, entry->vaddr_base,
> > + (entry->size + mc->bucket_size - 1) >>
> > mc->bucket_shift);
>
> Am I getting this right that the + mc->bucket_size - 1 is unnecessary
> because the bucket size is PAGE_SIZE and we can only map at page
> granularity?
>
Yes, you're right.
I'll fix this up in the next version.
> Also can we check for return errors?
Yes, I'll add error checking.
>
>
> > + } else {
> > + if (munmap(entry->vaddr_base, entry->size) != 0) {
> > + perror("unmap fails");
> > + exit(-1);
> > + }
> > }
> > if (pentry) {
> > pentry->next = entry->next;
> > @@ -522,14 +606,24 @@ typedef struct XenMapCacheData {
> > uint8_t *buffer;
> > } XenMapCacheData;
> >
> > +static void xen_invalidate_map_cache_entry_single(MapCache *mc, uint8_t
> > *buffer)
> > +{
> > + mapcache_lock(mc);
> > + xen_invalidate_map_cache_entry_unlocked(mc, buffer);
> > + mapcache_unlock(mc);
> > +}
> > +
> > +static void xen_invalidate_map_cache_entry_all(uint8_t *buffer)
> > +{
> > + xen_invalidate_map_cache_entry_single(mapcache, buffer);
> > + xen_invalidate_map_cache_entry_single(mapcache_grants, buffer);
> > +}
> > +
> > static void xen_invalidate_map_cache_entry_bh(void *opaque)
> > {
> > XenMapCacheData *data = opaque;
> >
> > - mapcache_lock(mapcache);
> > - xen_invalidate_map_cache_entry_unlocked(mapcache, data->buffer);
> > - mapcache_unlock(mapcache);
> > -
> > + xen_invalidate_map_cache_entry_all(data->buffer);
> > aio_co_wake(data->co);
> > }
> >
> > @@ -544,9 +638,7 @@ void coroutine_mixed_fn
> > xen_invalidate_map_cache_entry(uint8_t *buffer)
> > xen_invalidate_map_cache_entry_bh, &data);
> > qemu_coroutine_yield();
> > } else {
> > - mapcache_lock(mapcache);
> > - xen_invalidate_map_cache_entry_unlocked(mapcache, buffer);
> > - mapcache_unlock(mapcache);
> > + xen_invalidate_map_cache_entry_all(buffer);
> > }
> > }
> >
> > @@ -598,6 +690,7 @@ void xen_invalidate_map_cache(void)
> > bdrv_drain_all();
> >
> > xen_invalidate_map_cache_single(mapcache);
> > + xen_invalidate_map_cache_single(mapcache_grants);
> > }
> >
> > static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
> > @@ -639,7 +732,8 @@ static uint8_t
> > *xen_replace_cache_entry_unlocked(MapCache *mc,
> > trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr);
> >
> > xen_remap_bucket(mc, entry, entry->vaddr_base,
> > - cache_size, address_index, false, entry->ram_offset);
> > + cache_size, address_index, false,
> > + false, false, 0, entry->ram_offset);
>
> If I understand correctly, xen_replace_cache_entry_unlocked cannot be
> called on grants because xen_replace_cache_entry_unlocked is always
> called on unlocked entries while grants are always locked. Should we
> have an assert on !entry->lock and/or !(entry->flags &
> XEN_MAPCACHE_ENTRY_GRANT)?
>
Sounds good, I'll add this in the next version as well.
>
>
> > if (!test_bits(address_offset >> XC_PAGE_SHIFT,
> > test_bit_size >> XC_PAGE_SHIFT,
> > entry->valid_mapping)) {
> > diff --git a/include/hw/xen/xen-hvm-common.h
> > b/include/hw/xen/xen-hvm-common.h
> > index 65a51aac2e..3d796235dc 100644
> > --- a/include/hw/xen/xen-hvm-common.h
> > +++ b/include/hw/xen/xen-hvm-common.h
> > @@ -16,6 +16,7 @@
> > #include <xen/hvm/ioreq.h>
> >
> > extern MemoryRegion xen_memory;
> > +extern MemoryRegion xen_grants;
> > extern MemoryListener xen_io_listener;
> > extern DeviceListener xen_device_listener;
> >
> > @@ -29,6 +30,8 @@ extern DeviceListener xen_device_listener;
> > do { } while (0)
> > #endif
> >
> > +#define XEN_GRANT_ADDR_OFF (1ULL << 63)
> > +
> > static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i)
> > {
> > return shared_page->vcpu_ioreq[i].vp_eport;
> > diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h
> > index dc72f83bcb..19dccf4d71 100644
> > --- a/include/sysemu/xen.h
> > +++ b/include/sysemu/xen.h
> > @@ -35,6 +35,7 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
> > struct MemoryRegion *mr, Error **errp);
> >
> > bool xen_mr_is_memory(MemoryRegion *mr);
> > +bool xen_mr_is_grants(MemoryRegion *mr);
> >
> > #else /* !CONFIG_XEN_IS_POSSIBLE */
> >
> > @@ -55,6 +56,12 @@ static inline bool xen_mr_is_memory(MemoryRegion *mr)
> > return false;
> > }
> >
> > +static inline bool xen_mr_is_grants(MemoryRegion *mr)
> > +{
> > + g_assert_not_reached();
> > + return false;
> > +}
> > +
> > #endif /* CONFIG_XEN_IS_POSSIBLE */
> >
> > #endif
> > --
> > 2.40.1
> >
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |