introduce XENMEM_reserved_device_memory_map This is a prerequisite for punching holes into HVM and PVH guests' P2M to allow passing through devices that are associated with (on VT-d) RMRRs. Signed-off-by: Jan Beulich Signed-off-by: Tiejun Chen Acked-by: Kevin Tian Acked-by: Ian Campbell --- v12a: Move interface structure union member to the end, while moving the whole public header block into a __XEN__ / __XEN_TOOLS__ conditional block. v12: Restore changes as much as possible to my original version, fixing a few issues that got introduced after handing it over. Unionize new public memop interface structure to allow for non-PCI to be supported later on. Check flags to have all currently undefined flags clear. Refine adjustments to xen/pci.h. --- a/xen/common/compat/memory.c +++ b/xen/common/compat/memory.c @@ -17,6 +17,42 @@ CHECK_TYPE(domid); CHECK_mem_access_op; CHECK_vmemrange; +#ifdef HAS_PASSTHROUGH +struct get_reserved_device_memory { + struct compat_reserved_device_memory_map map; + unsigned int used_entries; +}; + +static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr, + u32 id, void *ctxt) +{ + struct get_reserved_device_memory *grdm = ctxt; + u32 sbdf = PCI_SBDF3(grdm->map.dev.pci.seg, grdm->map.dev.pci.bus, + grdm->map.dev.pci.devfn); + + if ( !(grdm->map.flags & XENMEM_RDM_ALL) && (sbdf != id) ) + return 0; + + if ( grdm->used_entries < grdm->map.nr_entries ) + { + struct compat_reserved_device_memory rdm = { + .start_pfn = start, .nr_pages = nr + }; + + if ( rdm.start_pfn != start || rdm.nr_pages != nr ) + return -ERANGE; + + if ( __copy_to_compat_offset(grdm->map.buffer, grdm->used_entries, + &rdm, 1) ) + return -EFAULT; + } + + ++grdm->used_entries; + + return 1; +} +#endif + int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat) { int split, op = cmd & MEMOP_CMD_MASK; @@ -303,6 +339,35 @@ int compat_memory_op(unsigned int cmd, X break; } +#ifdef HAS_PASSTHROUGH + case XENMEM_reserved_device_memory_map: + { + struct get_reserved_device_memory grdm; + + if ( unlikely(start_extent) ) + return -ENOSYS; + + if ( copy_from_guest(&grdm.map, compat, 1) || + !compat_handle_okay(grdm.map.buffer, grdm.map.nr_entries) ) + return -EFAULT; + + if ( grdm.map.flags & ~XENMEM_RDM_ALL ) + return -EINVAL; + + grdm.used_entries = 0; + rc = iommu_get_reserved_device_memory(get_reserved_device_memory, + &grdm); + + if ( !rc && grdm.map.nr_entries < grdm.used_entries ) + rc = -ENOBUFS; + grdm.map.nr_entries = grdm.used_entries; + if ( __copy_to_guest(compat, &grdm.map, 1) ) + rc = -EFAULT; + + return rc; + } +#endif + default: return compat_arch_memory_op(cmd, compat); } --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -748,6 +748,39 @@ static int construct_memop_from_reservat return 0; } +#ifdef HAS_PASSTHROUGH +struct get_reserved_device_memory { + struct xen_reserved_device_memory_map map; + unsigned int used_entries; +}; + +static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr, + u32 id, void *ctxt) +{ + struct get_reserved_device_memory *grdm = ctxt; + u32 sbdf = PCI_SBDF3(grdm->map.dev.pci.seg, grdm->map.dev.pci.bus, + grdm->map.dev.pci.devfn); + + if ( !(grdm->map.flags & XENMEM_RDM_ALL) && (sbdf != id) ) + return 0; + + if ( grdm->used_entries < grdm->map.nr_entries ) + { + struct xen_reserved_device_memory rdm = { + .start_pfn = start, .nr_pages = nr + }; + + if ( __copy_to_guest_offset(grdm->map.buffer, grdm->used_entries, + &rdm, 1) ) + return -EFAULT; + } + + ++grdm->used_entries; + + return 1; +} +#endif + long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) { struct domain *d; @@ -1162,6 +1195,35 @@ long do_memory_op(unsigned long cmd, XEN break; } +#ifdef HAS_PASSTHROUGH + case XENMEM_reserved_device_memory_map: + { + struct get_reserved_device_memory grdm; + + if ( unlikely(start_extent) ) + return -ENOSYS; + + if ( copy_from_guest(&grdm.map, arg, 1) || + !guest_handle_okay(grdm.map.buffer, grdm.map.nr_entries) ) + return -EFAULT; + + if ( grdm.map.flags & ~XENMEM_RDM_ALL ) + return -EINVAL; + + grdm.used_entries = 0; + rc = iommu_get_reserved_device_memory(get_reserved_device_memory, + &grdm); + + if ( !rc && grdm.map.nr_entries < grdm.used_entries ) + rc = -ENOBUFS; + grdm.map.nr_entries = grdm.used_entries; + if ( __copy_to_guest(arg, &grdm.map, 1) ) + rc = -EFAULT; + + break; + } +#endif + default: rc = arch_memory_op(cmd, arg); break; --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -375,6 +375,16 @@ void iommu_crash_shutdown(void) iommu_enabled = iommu_intremap = 0; } +int iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt) +{ + const struct iommu_ops *ops = iommu_get_ops(); + + if ( !iommu_enabled || !ops->get_reserved_device_memory ) + return 0; + + return ops->get_reserved_device_memory(func, ctxt); +} + bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature) { const struct hvm_iommu *hd = domain_hvm_iommu(d); --- a/xen/drivers/passthrough/vtd/dmar.c +++ b/xen/drivers/passthrough/vtd/dmar.c @@ -914,3 +914,30 @@ int platform_supports_x2apic(void) unsigned int mask = ACPI_DMAR_INTR_REMAP | ACPI_DMAR_X2APIC_OPT_OUT; return cpu_has_x2apic && ((dmar_flags & mask) == ACPI_DMAR_INTR_REMAP); } + +int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt) +{ + struct acpi_rmrr_unit *rmrr, *rmrr_cur = NULL; + unsigned int i; + u16 bdf; + + for_each_rmrr_device ( rmrr, bdf, i ) + { + int rc; + + if ( rmrr == rmrr_cur ) + continue; + + rc = func(PFN_DOWN(rmrr->base_address), + PFN_UP(rmrr->end_address) - PFN_DOWN(rmrr->base_address), + PCI_SBDF2(rmrr->segment, bdf), ctxt); + + if ( unlikely(rc < 0) ) + return rc; + + if ( rc ) + rmrr_cur = rmrr; + } + + return 0; +} --- a/xen/drivers/passthrough/vtd/extern.h +++ b/xen/drivers/passthrough/vtd/extern.h @@ -75,6 +75,7 @@ int domain_context_mapping_one(struct do u8 bus, u8 devfn, const struct pci_dev *); int domain_context_unmap_one(struct domain *domain, struct iommu *iommu, u8 bus, u8 devfn); +int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt); unsigned int io_apic_read_remap_rte(unsigned int apic, unsigned int reg); void io_apic_write_remap_rte(unsigned int apic, --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -2490,6 +2490,7 @@ const struct iommu_ops intel_iommu_ops = .crash_shutdown = vtd_crash_shutdown, .iotlb_flush = intel_iommu_iotlb_flush, .iotlb_flush_all = intel_iommu_iotlb_flush_all, + .get_reserved_device_memory = intel_iommu_get_reserved_device_memory, .dump_p2m_table = vtd_dump_p2m_table, }; --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -28,6 +28,7 @@ #define __XEN_PUBLIC_MEMORY_H__ #include "xen.h" +#include "physdev.h" /* * Increase or decrease the specified domain's memory reservation. Returns the @@ -522,6 +523,40 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_ * The zero value is appropiate. */ +/* + * With some legacy devices, certain guest-physical addresses cannot safely + * be used for other purposes, e.g. to map guest RAM. This hypercall + * enumerates those regions so the toolstack can avoid using them. + */ +#define XENMEM_reserved_device_memory_map 27 +struct xen_reserved_device_memory { + xen_pfn_t start_pfn; + xen_ulong_t nr_pages; +}; +typedef struct xen_reserved_device_memory xen_reserved_device_memory_t; +DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_t); + +struct xen_reserved_device_memory_map { +#define XENMEM_RDM_ALL 1 /* Request all regions (ignore dev union). */ + /* IN */ + uint32_t flags; + /* + * IN/OUT + * + * Gets set to the required number of entries when too low, + * signaled by error code -ERANGE. + */ + unsigned int nr_entries; + /* OUT */ + XEN_GUEST_HANDLE(xen_reserved_device_memory_t) buffer; + /* IN */ + union { + struct physdev_pci_device pci; + } dev; +}; +typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t); + #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ /* @@ -573,7 +608,7 @@ struct xen_vnuma_topology_info { typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t; DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t); -/* Next available subop number is 27 */ +/* Next available subop number is 28 */ #endif /* __XEN_PUBLIC_MEMORY_H__ */ --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -125,6 +125,14 @@ int iommu_do_dt_domctl(struct xen_domctl struct page_info; +/* + * Any non-zero value returned from callbacks of this type will cause the + * function the callback was handed to terminate its iteration. Assigning + * meaning of these non-zero values is left to the top level caller / + * callback pair. + */ +typedef int iommu_grdm_t(xen_pfn_t start, xen_ulong_t nr, u32 id, void *ctxt); + struct iommu_ops { int (*init)(struct domain *d); void (*hwdom_init)(struct domain *d); @@ -156,12 +164,14 @@ struct iommu_ops { void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); + int (*get_reserved_device_memory)(iommu_grdm_t *, void *); void (*dump_p2m_table)(struct domain *d); }; void iommu_suspend(void); void iommu_resume(void); void iommu_crash_shutdown(void); +int iommu_get_reserved_device_memory(iommu_grdm_t *, void *); void iommu_share_p2m_table(struct domain *d); --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -26,6 +26,7 @@ * 7:3 = slot * 2:0 = function */ +#define PCI_SEG(sbdf) (((sbdf) >> 16) & 0xffff) #define PCI_BUS(bdf) (((bdf) >> 8) & 0xff) #define PCI_SLOT(bdf) (((bdf) >> 3) & 0x1f) #define PCI_FUNC(bdf) ((bdf) & 0x07) @@ -33,6 +34,9 @@ #define PCI_DEVFN2(bdf) ((bdf) & 0xff) #define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f)) #define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff)) +#define PCI_SBDF(s,b,d,f) ((((s) & 0xffff) << 16) | PCI_BDF(b,d,f)) +#define PCI_SBDF2(s,bdf) ((((s) & 0xffff) << 16) | ((bdf) & 0xffff)) +#define PCI_SBDF3(s,b,df) ((((s) & 0xffff) << 16) | PCI_BDF2(b, df)) struct pci_dev_info { bool_t is_extfn; --- a/xen/include/xlat.lst +++ b/xen/include/xlat.lst @@ -65,9 +65,10 @@ ! memory_exchange memory.h ! memory_map memory.h ! memory_reservation memory.h -? mem_access_op memory.h +? mem_access_op memory.h ! pod_target memory.h ! remove_from_physmap memory.h +! reserved_device_memory_map memory.h ? vmemrange memory.h ! vnuma_topology_info memory.h ? physdev_eoi physdev.h