[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [v7][RFC][PATCH 06/13] hvmloader/ram: check if guest memory is out of reserved device memory maps



On 2014/11/6 1:00, Jan Beulich wrote:
"Chen, Tiejun" <tiejun.chen@xxxxxxxxx> 11/05/14 3:59 AM >>>

Everything up to here sounded reasonable.

Next, we need a new domctl to provide these info, 'pci_rdmforce' and
'rdm_force' when parse the config file. Certainly we may need to
introduce and set two new fields in strcut domain, and since then we
just use these fields to modify our current existing iommu callback
inside to support our policy. I mean we just expose those associated
RMRR entry. I think its easy to implement this since inside Xen we can
know which entry is owned by which device. So this can benefit us to
avoid modifying any tools codes and most Xen codes we already addressed.

Whether a domctl is the right approach I can't really tell with this
somewhat vague description.


So based on our current patch, I generate a draft patch as follows to show my idea. Note I just tried to compile this.

diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 009e351..ff22228 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1642,6 +1642,21 @@ int xc_assign_device(
     return do_domctl(xch, &domctl);
 }

+int xc_domain_device_setrdm(xc_interface *xch,
+                            uint32_t domid,
+                            uint32_t pci_rdmforce,
+                            uint32_t pci_dev_bitmap)
+{
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_set_rdm;
+    domctl.domain = domid;
+    domctl.u.set_rdm.pci_rdmforce = pci_rdmforce;
+    domctl.u.set_rdm.pci_dev_bitmap = pci_dev_bitmap;
+
+    return do_domctl(xch, &domctl);
+}
+
 int xc_get_device_group(
     xc_interface *xch,
     uint32_t domid,
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 3e191c3..ca8b754 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -90,6 +90,19 @@ const char *libxl__domain_device_model(libxl__gc *gc,
     return dm;
 }

+int libxl__domain_device_setrdm(libxl__gc *gc,
+                                const libxl_domain_build_info *info,
+                                uint32_t dm_domid)
+{
+    int ret;
+    libxl_ctx *ctx = libxl__gc_owner(gc);
+
+    ret = xc_domain_device_setrdm(ctx->xch, dm_domid, info->pci_rdmforce,
+                                  info->pci_dev_bitmap);
+
+    return ret;
+}
+
const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config)
 {
     const libxl_vnc_info *vnc = NULL;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 4361421..06938ee 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1471,6 +1471,11 @@ _hidden int libxl__domain_build(libxl__gc *gc,
 /* for device model creation */
 _hidden const char *libxl__domain_device_model(libxl__gc *gc,
const libxl_domain_build_info *info);
+
+_hidden int libxl__domain_device_setrdm(libxl__gc *gc,
+ const libxl_domain_build_info *info,
+                                        uint32_t domid);
+
 _hidden int libxl__need_xenpv_qemu(libxl__gc *gc,
         int nr_consoles, libxl__device_console *consoles,
         int nr_vfbs, libxl_device_vfb *vfbs,
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index ca3f724..ed20823 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -398,6 +398,8 @@ libxl_domain_build_info = Struct("domain_build_info",[
     ("kernel",           string),
     ("cmdline",          string),
     ("ramdisk",          string),
+    ("pci_rdmforce",   uint32),
+    ("pci_dev_bitmap",   uint32),
     ("u", KeyedUnion(None, libxl_domain_type, "type",
                 [("hvm", Struct(None, [("firmware",         string),
("bios", libxl_bios_type),
@@ -518,6 +520,7 @@ libxl_device_pci = Struct("device_pci", [
     ("power_mgmt", bool),
     ("permissive", bool),
     ("seize", bool),
+    ("rdmforce", bool),
     ])

 libxl_device_vtpm = Struct("device_vtpm", [
diff --git a/tools/libxl/libxlu_pci.c b/tools/libxl/libxlu_pci.c
index 26fb143..989eac8 100644
--- a/tools/libxl/libxlu_pci.c
+++ b/tools/libxl/libxlu_pci.c
@@ -143,6 +143,8 @@ int xlu_pci_parse_bdf(XLU_Config *cfg, libxl_device_pci *pcidev, const char *str
                     pcidev->permissive = atoi(tok);
                 }else if ( !strcmp(optkey, "seize") ) {
                     pcidev->seize = atoi(tok);
+                }else if ( !strcmp(optkey, "rdmforce") ) {
+                    pcidev->rdmforce = atoi(tok);
                 }else{
XLU__PCI_ERR(cfg, "Unknown PCI BDF option: %s", optkey);
                 }
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 3c9f146..64a1e51 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -904,6 +904,7 @@ static void replace_string(char **str, const char *val)
     *str = xstrdup(val);
 }

+#define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
 static void parse_config_data(const char *config_source,
                               const char *config_data,
                               int config_len,
@@ -919,6 +920,7 @@ static void parse_config_data(const char *config_source,
     int pci_msitranslate = 0;
     int pci_permissive = 0;
     int pci_seize = 0;
+    int pci_rdmforce = 0;
     int i, e;

     libxl_domain_create_info *c_info = &d_config->c_info;
@@ -1699,6 +1701,9 @@ skip_vfb:
     if (!xlu_cfg_get_long (config, "pci_seize", &l, 0))
         pci_seize = l;

+    if (!xlu_cfg_get_long (config, "pci_rdmforce", &l, 0))
+        pci_rdmforce = l;
+
     /* To be reworked (automatically enabled) once the auto ballooning
      * after guest starts is done (with PCI devices passed in). */
     if (c_info->type == LIBXL_DOMAIN_TYPE_PV) {
@@ -1719,9 +1724,16 @@ skip_vfb:
             pcidev->power_mgmt = pci_power_mgmt;
             pcidev->permissive = pci_permissive;
             pcidev->seize = pci_seize;
+            pcidev->rdmforce = pci_rdmforce;
             if (!xlu_pci_parse_bdf(config, pcidev, buf))
+            {
+                /* Just fake this wit a bitmap. */
+ b_info.pci_dev_bitmap |= 1 << PCI_BDF(pcidev->bus, pcidev->dev,
+                                                       pcidev->func);
                 d_config->num_pcidevs++;
+            }
         }
+        b_info.pci_rdmforce = pci_rdmforce;
         if (d_config->num_pcidevs && c_info->type == LIBXL_DOMAIN_TYPE_PV)
             libxl_defbool_set(&b_info->u.pv.e820_host, true);
     }
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index dc18f1b..3bbd28f 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2434,6 +2434,7 @@ static void ept_handle_violation(unsigned long qualification, paddr_t gpa)
      * handle such a scenario as its own logic.
      */
ret = iommu_get_reserved_device_memory(p2m_check_reserved_device_memory,
+                                           d,
                                            &gfn);
     if ( ret )
     {
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 113d996..ba489ce 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -691,6 +691,7 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn,
         if ( !is_hardware_domain(d) )
         {
rc = iommu_get_reserved_device_memory(p2m_check_reserved_device_memory,
+                                                  d,
                                                   &gfn);
             /* We always avoid populating reserved device memory. */
             if ( rc == 1 )
diff --git a/xen/common/compat/memory.c b/xen/common/compat/memory.c
index af613b9..cd99ba7 100644
--- a/xen/common/compat/memory.c
+++ b/xen/common/compat/memory.c
@@ -315,6 +315,7 @@ int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)

             grdm.used_entries = 0;
rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
+                                                  current->domain,
                                                   &grdm);

             if ( !rc && grdm.map.nr_entries < grdm.used_entries )
diff --git a/xen/common/mem_access.c b/xen/common/mem_access.c
index 4c84f88..e7973ee 100644
--- a/xen/common/mem_access.c
+++ b/xen/common/mem_access.c
@@ -69,6 +69,7 @@ static int mem_access_check_rdm(struct domain *d, uint64_aligned_t start,
         {
             gfn = start + i;
rc = iommu_get_reserved_device_memory(p2m_check_reserved_device_memory,
+                                                  d,
                                                   &gfn);
             if ( rc < 0 )
                 printk(XENLOG_WARNING
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 2177c56..21db828 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -1140,6 +1140,7 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)

         grdm.used_entries = 0;
         rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
+                                              current->domain,
                                               &grdm);

         if ( !rc && grdm.map.nr_entries < grdm.used_entries )
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 7c17e8d..7a5c66d 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -344,14 +344,15 @@ void iommu_crash_shutdown(void)
     iommu_enabled = iommu_intremap = 0;
 }

-int iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
+int iommu_get_reserved_device_memory(iommu_grdm_t *func, struct domain* d,
+                                     void *ctxt)
 {
     const struct iommu_ops *ops = iommu_get_ops();

     if ( !iommu_enabled || !ops->get_reserved_device_memory )
         return 0;

-    return ops->get_reserved_device_memory(func, ctxt);
+    return ops->get_reserved_device_memory(func, d, ctxt);
 }

 bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 1eba833..ee689ff 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1540,6 +1540,18 @@ int iommu_do_pci_domctl(
         }
         break;

+    case XEN_DOMCTL_set_rdm:
+        if ( unlikely(d->is_dying) )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        d->arch.hvm_domain.pci_rdmforce = domctl->u.set_rdm.pci_rdmforce;
+ d->arch.hvm_domain.pci_dev_bitmap = domctl->u.set_rdm.pci_dev_bitmap;
+
+        break;
+
     case XEN_DOMCTL_assign_device:
         if ( unlikely(d->is_dying) )
         {
diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
index 546eca9..138840c 100644
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -28,6 +28,7 @@
 #include <xen/xmalloc.h>
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
+#include <xen/sched.h>
 #include <asm/string.h>
 #include "dmar.h"
 #include "iommu.h"
@@ -921,18 +922,28 @@ int platform_supports_x2apic(void)
return cpu_has_x2apic && ((dmar_flags & mask) == ACPI_DMAR_INTR_REMAP);
 }

-int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
+int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, struct domain *d,
+                                           void *ctxt)
 {
     struct acpi_rmrr_unit *rmrr;
     int rc = 0;
+    int i;
+    u16 bdf;

-    list_for_each_entry(rmrr, &acpi_rmrr_units, list)
+    for_each_rmrr_device ( rmrr, bdf, i )
     {
-        rc = func(PFN_DOWN(rmrr->base_address),
-                  PFN_UP(rmrr->end_address) - PFN_DOWN(rmrr->base_address),
-                  ctxt);
-        if ( rc )
-            break;
+        if ( d->arch.hvm_domain.pci_rdmforce )
+        {
+            if ( d->arch.hvm_domain.pci_dev_bitmap & (uint32_t)(1 << bdf) )
+            {
+                rc = func(PFN_DOWN(rmrr->base_address),
+                                   PFN_UP(rmrr->end_address) -
+                                   PFN_DOWN(rmrr->base_address),
+                                   ctxt);
+                if ( rc )
+                    break;
+            }
+        }
     }

     return rc;
diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
index f9ee9b0..df9fed3 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -75,7 +75,8 @@ int domain_context_mapping_one(struct domain *domain, struct iommu *iommu,
                                u8 bus, u8 devfn, const struct pci_dev *);
 int domain_context_unmap_one(struct domain *domain, struct iommu *iommu,
                              u8 bus, u8 devfn);
-int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt);
+int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, struct domain *d,
+                                           void *ctxt);

 unsigned int io_apic_read_remap_rte(unsigned int apic, unsigned int reg);
 void io_apic_write_remap_rte(unsigned int apic,
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 2757c7f..5dab8dd 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -90,6 +90,9 @@ struct hvm_domain {
     /* Cached CF8 for guest PCI config cycles */
     uint32_t                pci_cf8;

+    uint32_t                pci_rdmforce;
+    uint32_t                pci_dev_bitmap;
+
     struct pl_time         pl_time;

     struct hvm_io_handler *io_handler;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 58b19e7..8b7cd5b 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -484,6 +484,14 @@ struct xen_domctl_assign_device {
 typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);

+/* Control whether/how we check and reserve device memory. */
+struct xen_domctl_set_rdm {
+    uint32_t  pci_rdmforce;
+    uint32_t  pci_dev_bitmap;
+};
+typedef struct xen_domctl_set_rdm xen_domctl_set_rdm_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_rdm_t);
+
 /* Retrieve sibling devices infomation of machine_sbdf */
 /* XEN_DOMCTL_get_device_group */
 struct xen_domctl_get_device_group {
@@ -1056,6 +1064,7 @@ struct xen_domctl {
 #define XEN_DOMCTL_set_vcpu_msrs                 73
 #define XEN_DOMCTL_setvnumainfo                  74
 #define XEN_DOMCTL_psr_cmt_op                    75
+#define XEN_DOMCTL_set_rdm                       76
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1118,7 +1127,8 @@ struct xen_domctl {
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
         struct xen_domctl_vnuma             vnuma;
         struct xen_domctl_psr_cmt_op        psr_cmt_op;
-        uint8_t                             pad[128];
+        struct xen_domctl_set_rdm           set_rdm;
+        uint8_t                             pad[120];
     } u;
 };
 typedef struct xen_domctl xen_domctl_t;
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 409f6f8..adf3d52 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -158,14 +158,14 @@ struct iommu_ops {
     void (*crash_shutdown)(void);
void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
     void (*iotlb_flush_all)(struct domain *d);
-    int (*get_reserved_device_memory)(iommu_grdm_t *, void *);
+ int (*get_reserved_device_memory)(iommu_grdm_t *, struct domain *, void *);
     void (*dump_p2m_table)(struct domain *d);
 };

 void iommu_suspend(void);
 void iommu_resume(void);
 void iommu_crash_shutdown(void);
-int iommu_get_reserved_device_memory(iommu_grdm_t *, void *);
+int iommu_get_reserved_device_memory(iommu_grdm_t *, struct domain *, void *);

 void iommu_share_p2m_table(struct domain *d);

Thanks
Tiejun

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.