[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 1/2] libxl: Implement the handler to handle unrecoverable AER errors



Implement the callback function to handle unrecoverable AER errors, and
also the public APIs that can be used to register/unregister the handler.
When an AER error occurs, the handler will forcibly remove the erring
PCIe device from the guest.

Signed-off-by: Venu Busireddy <venu.busireddy@xxxxxxxxxx>
---
 tools/libxl/libxl.h          |   7 +++
 tools/libxl/libxl_event.h    |   7 +++
 tools/libxl/libxl_internal.h |   8 +++
 tools/libxl/libxl_pci.c      | 123 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 145 insertions(+)

diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index eca0ea2c50..99a3c8ae1f 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -1120,6 +1120,13 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst, 
const libxl_mac *src);
  */
 #define LIBXL_HAVE_PV_SHIM 1
 
+/* LIBXL_HAVE_AER_EVENTS_HANDLER
+ *
+ * If this is defined, libxl has the library functions called
+ * libxl_reg_aer_events_handler and libxl_unreg_aer_events_handler.
+ */
+#define LIBXL_HAVE_AER_EVENTS_HANDLER 1
+
 typedef char **libxl_string_list;
 void libxl_string_list_dispose(libxl_string_list *sl);
 int libxl_string_list_length(const libxl_string_list *sl);
diff --git a/tools/libxl/libxl_event.h b/tools/libxl/libxl_event.h
index 1ea789e231..63c29ae800 100644
--- a/tools/libxl/libxl_event.h
+++ b/tools/libxl/libxl_event.h
@@ -184,6 +184,13 @@ void libxl_evdisable_domain_death(libxl_ctx *ctx, 
libxl_evgen_domain_death*);
    * may generate only a DEATH event.
    */
 
+typedef struct libxl__aer_watch libxl_aer_watch;
+int libxl_reg_aer_events_handler(libxl_ctx *, uint32_t);
+  /*
+   * Registers a handler to handle the occurrence of unrecoverable AER errors.
+   */
+void libxl_unreg_aer_events_handler(libxl_ctx *, uint32_t);
+
 typedef struct libxl__evgen_disk_eject libxl_evgen_disk_eject;
 int libxl_evenable_disk_eject(libxl_ctx *ctx, uint32_t domid, const char *vdev,
                         libxl_ev_user, libxl_evgen_disk_eject **evgen_out);
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 506687fbe9..7972490050 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -356,6 +356,14 @@ struct libxl__ev_child {
     LIBXL_LIST_ENTRY(struct libxl__ev_child) entry;
 };
 
+/*
+ * Structure used for AER event handling.
+ */
+struct libxl__aer_watch {
+    uint32_t domid;
+    libxl__ev_xswatch watch;
+    struct libxl__aer_watch *next;
+};
 
 /*
  * evgen structures, which are the state we use for generating
diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c
index 4755a0c93c..c121c9f8cc 100644
--- a/tools/libxl/libxl_pci.c
+++ b/tools/libxl/libxl_pci.c
@@ -1686,6 +1686,129 @@ static int libxl_device_pci_compare(libxl_device_pci 
*d1,
     return COMPARE_PCI(d1, d2);
 }
 
+static void aer_backend_watch_callback(libxl__egc *egc,
+                                       libxl__ev_xswatch *watch,
+                                       const char *watch_path,
+                                       const char *event_path)
+{
+    EGC_GC;
+    libxl_aer_watch *aer_ws = CONTAINER_OF(watch, *aer_ws, watch);
+    int rc;
+    uint32_t dom, bus, dev, fn;
+    uint32_t domid = aer_ws->domid;
+    char *p, *path;
+    const char *aerFailedSBDF;
+    libxl_device_pci pcidev;
+
+    /* Extract the backend directory. */
+    path = libxl__strdup(gc, event_path);
+    p = strrchr(path, '/');
+    if ((p == NULL) || (strcmp(p, "/aerFailedSBDF") != 0))
+        return;
+    /* Truncate the string so it points to the backend directory. */
+    *p = '\0';
+
+    /* Fetch the value of the failed PCI device. */
+    rc = libxl__xs_read_checked(gc, XBT_NULL,
+            GCSPRINTF("%s/aerFailedSBDF", path), &aerFailedSBDF);
+    if (rc || !aerFailedSBDF)
+        return;
+    LOGD(ERROR, domid, " aerFailedSBDF = %s", aerFailedSBDF);
+    sscanf(aerFailedSBDF, "%x:%x:%x.%x", &dom, &bus, &dev, &fn);
+
+    libxl_device_pci_init(&pcidev);
+    pcidev_struct_fill(&pcidev, dom, bus, dev, fn, 0);
+    /* Forcibly remove the device from the guest */
+    rc = libxl__device_pci_remove_common(gc, domid, &pcidev, 1);
+    if (rc)
+        LOGD(ERROR, domid, " libxl__device_pci_remove_common() failed, rc=x%x",
+                (unsigned int)rc);
+
+    return;
+}
+
+static libxl_aer_watch *manage_aer_ws_list(libxl_aer_watch *in, uint32_t domid)
+{
+    static libxl_aer_watch *aer_ws = NULL;
+    libxl_aer_watch *iter, *prev = NULL;
+
+    if (in) {
+        if (aer_ws)
+            in->next = aer_ws;
+        iter = aer_ws = in;
+    } else {
+        iter = aer_ws;
+        while (iter) {
+            if (iter->domid == domid) {
+                if (prev)
+                    prev->next = iter->next;
+                else
+                    aer_ws = iter->next;
+                break;
+            }
+            prev = iter;
+            iter = iter->next;
+        }
+    }
+    return iter;
+}
+
+static void store_aer_ws(libxl_aer_watch *aer_ws)
+{
+    manage_aer_ws_list(aer_ws, 0);
+    return;
+}
+
+static libxl_aer_watch *retrieve_aer_ws(uint32_t domid)
+{
+    return manage_aer_ws_list(NULL, domid);
+}
+
+int libxl_reg_aer_events_handler(libxl_ctx *ctx, uint32_t domid)
+{
+    int rc = 0;
+    char *be_path;
+    uint32_t pciback_domid;
+    libxl_aer_watch *aer_ws;
+    GC_INIT(ctx);
+
+    rc = libxl__get_domid(gc, (uint32_t *)(&pciback_domid));
+    if (rc) {
+        LOGD(ERROR, domid, " libxl__get_domid() failed, rc = %d", rc);
+        goto out;
+    }
+
+    aer_ws = libxl__calloc(NOGC, 1, sizeof(libxl_aer_watch));
+    aer_ws->domid = domid;
+    aer_ws->next = NULL;
+    store_aer_ws(aer_ws);
+    be_path = GCSPRINTF("/local/domain/%u/backend/pci/%u/%u/%s",
+            pciback_domid, domid, pciback_domid, "aerFailedSBDF");
+    rc = libxl__ev_xswatch_register(gc, &aer_ws->watch,
+            aer_backend_watch_callback, be_path);
+
+out:
+    GC_FREE;
+    return rc;
+}
+
+void libxl_unreg_aer_events_handler(libxl_ctx *ctx, uint32_t domid)
+{
+    GC_INIT(ctx);
+    libxl_aer_watch *aer_ws;
+
+    aer_ws = retrieve_aer_ws(domid);
+    if (!aer_ws)
+        goto out;
+
+    libxl__ev_xswatch_deregister(gc, &aer_ws->watch);
+    free(aer_ws);
+
+out:
+    GC_FREE;
+    return;
+}
+
 #define libxl__device_pci_update_devid NULL
 
 DEFINE_DEVICE_TYPE_STRUCT_X(pcidev, pci, PCI);

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.