|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH 1/4] xen: Introduce non-broken hypercalls for the paging mempool size
The existing XEN_DOMCTL_SHADOW_OP_{GET,SET}_ALLOCATION have problems:
* All set_allocation() flavours have an overflow-before-widen bug when
calculating "sc->mb << (20 - PAGE_SHIFT)".
* All flavours have a granularity of 1M. This was tolerable when the size of
the pool could only be set at the same granularity, but is broken now that
ARM has a 16-page stopgap allocation in use.
* All get_allocation() flavours round up, and in particular turn 0 into 1,
meaning the get op returns junk before a successful set op.
* The x86 flavours reject the hypercalls before the VM has vCPUs allocated,
despite the pool size being a domain property.
* Even the hypercall names are long-obsolete.
Implement a better interface, which can be first used to unit test the
behaviour, and subsequently correct a broken implementation. The old
interface will be retired in due course.
The unit of bytes (as opposed pages) is a deliberate API/ABI improvement to
more easily support multiple page granularities.
This is part of XSA-409 / CVE-2022-33747.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Release-acked-by: Henry Wang <Henry.Wang@xxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
CC: Wei Liu <wl@xxxxxxx>
CC: Stefano Stabellini <sstabellini@xxxxxxxxxx>
CC: Julien Grall <julien@xxxxxxx>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>
CC: Bertrand Marquis <bertrand.marquis@xxxxxxx>
CC: Henry Wang <Henry.Wang@xxxxxxx>
CC: Anthony PERARD <anthony.perard@xxxxxxxxxx>
v2:
* s/p2m/paging/
* Fix overflow-before-widen in ARM's arch_get_p2m_mempool_size()
* Fix overflow-before-widen in both {hap,shadow}_get_allocation_bytes()
* Leave a TODO about x86/PV, drop assertion.
* Check for long->int truncation in x86's arch_set_paging_mempool_size()
Future TODOs:
* x86 shadow still rounds up. This is buggy as it's a simultaneous equation
with tot_pages which varies over time with ballooning.
* x86 PV is weird. There are no toolstack interact with the shadow pool
size, but the "shadow" pool it does come into existence when logdirty (or
pv-l1tf) when first enabled.
* The shadow+hap logic is in desperate need of deduping.
---
tools/include/xenctrl.h | 3 +++
tools/libs/ctrl/xc_domain.c | 29 ++++++++++++++++++++++++++
xen/arch/arm/p2m.c | 26 +++++++++++++++++++++++
xen/arch/x86/include/asm/hap.h | 1 +
xen/arch/x86/include/asm/shadow.h | 4 ++++
xen/arch/x86/mm/hap/hap.c | 11 ++++++++++
xen/arch/x86/mm/paging.c | 43 +++++++++++++++++++++++++++++++++++++++
xen/arch/x86/mm/shadow/common.c | 11 ++++++++++
xen/common/domctl.c | 14 +++++++++++++
xen/include/public/domctl.h | 24 +++++++++++++++++++++-
xen/include/xen/domain.h | 3 +++
11 files changed, 168 insertions(+), 1 deletion(-)
diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 0c8b4c3aa7a5..23037874d3d5 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -893,6 +893,9 @@ long long xc_logdirty_control(xc_interface *xch,
unsigned int mode,
xc_shadow_op_stats_t *stats);
+int xc_get_paging_mempool_size(xc_interface *xch, uint32_t domid, uint64_t
*size);
+int xc_set_paging_mempool_size(xc_interface *xch, uint32_t domid, uint64_t
size);
+
int xc_sched_credit_domain_set(xc_interface *xch,
uint32_t domid,
struct xen_domctl_sched_credit *sdom);
diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
index 14c0420c35be..e939d0715739 100644
--- a/tools/libs/ctrl/xc_domain.c
+++ b/tools/libs/ctrl/xc_domain.c
@@ -706,6 +706,35 @@ long long xc_logdirty_control(xc_interface *xch,
return (rc == 0) ? domctl.u.shadow_op.pages : rc;
}
+int xc_get_paging_mempool_size(xc_interface *xch, uint32_t domid, uint64_t
*size)
+{
+ int rc;
+ struct xen_domctl domctl = {
+ .cmd = XEN_DOMCTL_get_paging_mempool_size,
+ .domain = domid,
+ };
+
+ rc = do_domctl(xch, &domctl);
+ if ( rc )
+ return rc;
+
+ *size = domctl.u.paging_mempool.size;
+ return 0;
+}
+
+int xc_set_paging_mempool_size(xc_interface *xch, uint32_t domid, uint64_t
size)
+{
+ struct xen_domctl domctl = {
+ .cmd = XEN_DOMCTL_set_paging_mempool_size,
+ .domain = domid,
+ .u.paging_mempool = {
+ .size = size,
+ },
+ };
+
+ return do_domctl(xch, &domctl);
+}
+
int xc_domain_setmaxmem(xc_interface *xch,
uint32_t domid,
uint64_t max_memkb)
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 94d3b60b1387..8c1972e58227 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -100,6 +100,13 @@ unsigned int p2m_get_allocation(struct domain *d)
return ROUNDUP(nr_pages, 1 << (20 - PAGE_SHIFT)) >> (20 - PAGE_SHIFT);
}
+/* Return the size of the pool, in bytes. */
+int arch_get_paging_mempool_size(struct domain *d, uint64_t *size)
+{
+ *size = (uint64_t)ACCESS_ONCE(d->arch.paging.p2m_total_pages) <<
PAGE_SHIFT;
+ return 0;
+}
+
/*
* Set the pool of pages to the required number of pages.
* Returns 0 for success, non-zero for failure.
@@ -157,6 +164,25 @@ int p2m_set_allocation(struct domain *d, unsigned long
pages, bool *preempted)
return 0;
}
+int arch_set_paging_mempool_size(struct domain *d, uint64_t size)
+{
+ unsigned long pages = size >> PAGE_SHIFT;
+ bool preempted = false;
+ int rc;
+
+ if ( (size & ~PAGE_MASK) || /* Non page-sized request? */
+ pages != (size >> PAGE_SHIFT) ) /* 32-bit overflow? */
+ return -EINVAL;
+
+ spin_lock(&d->arch.paging.lock);
+ rc = p2m_set_allocation(d, pages, &preempted);
+ spin_unlock(&d->arch.paging.lock);
+
+ ASSERT(preempted == (rc == -ERESTART));
+
+ return rc;
+}
+
int p2m_teardown_allocation(struct domain *d)
{
int ret = 0;
diff --git a/xen/arch/x86/include/asm/hap.h b/xen/arch/x86/include/asm/hap.h
index 90dece29deca..14d2f212dab9 100644
--- a/xen/arch/x86/include/asm/hap.h
+++ b/xen/arch/x86/include/asm/hap.h
@@ -47,6 +47,7 @@ int hap_track_dirty_vram(struct domain *d,
extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
int hap_set_allocation(struct domain *d, unsigned int pages, bool *preempted);
unsigned int hap_get_allocation(struct domain *d);
+int hap_get_allocation_bytes(struct domain *d, uint64_t *size);
#endif /* XEN_HAP_H */
diff --git a/xen/arch/x86/include/asm/shadow.h
b/xen/arch/x86/include/asm/shadow.h
index 1365fe480518..dad876d29499 100644
--- a/xen/arch/x86/include/asm/shadow.h
+++ b/xen/arch/x86/include/asm/shadow.h
@@ -97,6 +97,8 @@ void shadow_blow_tables_per_domain(struct domain *d);
int shadow_set_allocation(struct domain *d, unsigned int pages,
bool *preempted);
+int shadow_get_allocation_bytes(struct domain *d, uint64_t *size);
+
#else /* !CONFIG_SHADOW_PAGING */
#define shadow_vcpu_teardown(v) ASSERT(is_pv_vcpu(v))
@@ -108,6 +110,8 @@ int shadow_set_allocation(struct domain *d, unsigned int
pages,
({ ASSERT_UNREACHABLE(); -EOPNOTSUPP; })
#define shadow_set_allocation(d, pages, preempted) \
({ ASSERT_UNREACHABLE(); -EOPNOTSUPP; })
+#define shadow_get_allocation_bytes(d, size) \
+ ({ ASSERT_UNREACHABLE(); -EOPNOTSUPP; })
static inline void sh_remove_shadows(struct domain *d, mfn_t gmfn,
int fast, int all) {}
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index f809ea9aa6ae..0fc1b1d9aced 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -345,6 +345,17 @@ unsigned int hap_get_allocation(struct domain *d)
+ ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
}
+int hap_get_allocation_bytes(struct domain *d, uint64_t *size)
+{
+ unsigned long pages = d->arch.paging.hap.total_pages;
+
+ pages += d->arch.paging.hap.p2m_pages;
+
+ *size = pages << PAGE_SHIFT;
+
+ return 0;
+}
+
/* Set the pool of pages to the required number of pages.
* Returns 0 for success, non-zero for failure. */
int hap_set_allocation(struct domain *d, unsigned int pages, bool *preempted)
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index 3a355eee9ca3..8d579fa9a3e8 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -977,6 +977,49 @@ int __init paging_set_allocation(struct domain *d,
unsigned int pages,
}
#endif
+int arch_get_paging_mempool_size(struct domain *d, uint64_t *size)
+{
+ int rc;
+
+ if ( is_pv_domain(d) ) /* TODO: Relax in due course */
+ return -EOPNOTSUPP;
+
+ if ( hap_enabled(d) )
+ rc = hap_get_allocation_bytes(d, size);
+ else
+ rc = shadow_get_allocation_bytes(d, size);
+
+ return rc;
+}
+
+int arch_set_paging_mempool_size(struct domain *d, uint64_t size)
+{
+ unsigned long pages = size >> PAGE_SHIFT;
+ bool preempted = false;
+ int rc;
+
+ if ( is_pv_domain(d) ) /* TODO: Relax in due course */
+ return -EOPNOTSUPP;
+
+ if ( size & ~PAGE_MASK || /* Non page-sized request? */
+ pages != (unsigned int)pages ) /* Overflow $X_set_allocation()? */
+ return -EINVAL;
+
+ paging_lock(d);
+ if ( hap_enabled(d) )
+ rc = hap_set_allocation(d, pages, &preempted);
+ else
+ rc = shadow_set_allocation(d, pages, &preempted);
+ paging_unlock(d);
+
+ /*
+ * TODO: Adjust $X_set_allocation() so this is true.
+ ASSERT(preempted == (rc == -ERESTART));
+ */
+
+ return preempted ? -ERESTART : rc;
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index badfd53c6b23..a8404f97f668 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1427,6 +1427,17 @@ static unsigned int shadow_get_allocation(struct domain
*d)
+ ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
}
+int shadow_get_allocation_bytes(struct domain *d, uint64_t *size)
+{
+ unsigned long pages = d->arch.paging.shadow.total_pages;
+
+ pages += d->arch.paging.shadow.p2m_pages;
+
+ *size = pages << PAGE_SHIFT;
+
+ return 0;
+}
+
/**************************************************************************/
/* Hash table for storing the guest->shadow mappings.
* The table itself is an array of pointers to shadows; the shadows are then
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 69fb9abd346f..ad71ad8a4cc5 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -874,6 +874,20 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t)
u_domctl)
ret = iommu_do_domctl(op, d, u_domctl);
break;
+ case XEN_DOMCTL_get_paging_mempool_size:
+ ret = arch_get_paging_mempool_size(d, &op->u.paging_mempool.size);
+ if ( !ret )
+ copyback = 1;
+ break;
+
+ case XEN_DOMCTL_set_paging_mempool_size:
+ ret = arch_set_paging_mempool_size(d, op->u.paging_mempool.size);
+
+ if ( ret == -ERESTART )
+ ret = hypercall_create_continuation(
+ __HYPERVISOR_domctl, "h", u_domctl);
+ break;
+
default:
ret = arch_do_domctl(op, d, u_domctl);
break;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index b2ae839c3632..d4072761791a 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -214,7 +214,10 @@ struct xen_domctl_getpageframeinfo3 {
/* Return the bitmap but do not modify internal copy. */
#define XEN_DOMCTL_SHADOW_OP_PEEK 12
-/* Memory allocation accessors. */
+/*
+ * Memory allocation accessors. These APIs are broken and will be removed.
+ * Use XEN_DOMCTL_{get,set}_paging_mempool_size instead.
+ */
#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION 30
#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION 31
@@ -946,6 +949,22 @@ struct xen_domctl_cacheflush {
xen_pfn_t start_pfn, nr_pfns;
};
+/*
+ * XEN_DOMCTL_get_paging_mempool_size / XEN_DOMCTL_set_paging_mempool_size.
+ *
+ * Get or set the paging memory pool size. The size is in bytes.
+ *
+ * This is a dedicated pool of memory for Xen to use while managing the guest,
+ * typically containing pagetables. As such, there is an implementation
+ * specific minimum granularity.
+ *
+ * The set operation can fail mid-way through the request (e.g. Xen running
+ * out of memory, no free memory to reclaim from the pool, etc.).
+ */
+struct xen_domctl_paging_mempool {
+ uint64_aligned_t size; /* IN/OUT. Size in bytes. */
+};
+
#if defined(__i386__) || defined(__x86_64__)
struct xen_domctl_vcpu_msr {
uint32_t index;
@@ -1274,6 +1293,8 @@ struct xen_domctl {
#define XEN_DOMCTL_get_cpu_policy 82
#define XEN_DOMCTL_set_cpu_policy 83
#define XEN_DOMCTL_vmtrace_op 84
+#define XEN_DOMCTL_get_paging_mempool_size 85
+#define XEN_DOMCTL_set_paging_mempool_size 86
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
@@ -1335,6 +1356,7 @@ struct xen_domctl {
struct xen_domctl_psr_alloc psr_alloc;
struct xen_domctl_vuart_op vuart_op;
struct xen_domctl_vmtrace_op vmtrace_op;
+ struct xen_domctl_paging_mempool paging_mempool;
uint8_t pad[128];
} u;
};
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index 2c8116afba27..0de9cbc1696d 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -98,6 +98,9 @@ void arch_get_info_guest(struct vcpu *, vcpu_guest_context_u);
int arch_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg);
int default_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg);
+int arch_get_paging_mempool_size(struct domain *d, uint64_t *size /* bytes */);
+int arch_set_paging_mempool_size(struct domain *d, uint64_t size /* bytes */);
+
int domain_relinquish_resources(struct domain *d);
void dump_pageframe_info(struct domain *d);
--
2.11.0
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |