[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC PATCH 21/26] x86/alternatives: Paravirt runtime selftest



Add a selftest that triggers paravirt_runtime_patch() which
toggles between the paravirt and native pv_lock_ops.

The selftest also register an NMI handler, which exercises the
patched pv-ops by spin-lock operations. These are triggered via
artificially sent NMIs.

And last, introduce patch sites in the primary and secondary
patching code which are hit while during the patching process.

Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx>
---
 arch/x86/Kconfig.debug        |  13 ++
 arch/x86/kernel/Makefile      |   1 +
 arch/x86/kernel/alternative.c |  20 +++
 arch/x86/kernel/kvm.c         |   4 +-
 arch/x86/kernel/pv_selftest.c | 264 ++++++++++++++++++++++++++++++++++
 arch/x86/kernel/pv_selftest.h |  15 ++
 6 files changed, 315 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/kernel/pv_selftest.c
 create mode 100644 arch/x86/kernel/pv_selftest.h

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2e74690b028a..82a8e3fa68c7 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -252,6 +252,19 @@ config X86_DEBUG_FPU
 
          If unsure, say N.
 
+config DEBUG_PARAVIRT_SELFTEST
+       bool "Enable paravirt runtime selftest"
+       depends on PARAVIRT
+       depends on PARAVIRT_RUNTIME
+       depends on PARAVIRT_SPINLOCKS
+       depends on KVM_GUEST
+       help
+         This option enables sanity testing of the runtime paravirtualized
+         patching code. Triggered via debugfs.
+
+         Might help diagnose patching problems in different
+         configurations and loads.
+
 config PUNIT_ATOM_DEBUG
        tristate "ATOM Punit debug driver"
        depends on PCI
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index ba89cabe5fcf..ed3c93681f12 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -114,6 +114,7 @@ obj-$(CONFIG_APB_TIMER)             += apb_timer.o
 
 obj-$(CONFIG_AMD_NB)           += amd_nb.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
+obj-$(CONFIG_DEBUG_PARAVIRT_SELFTEST) += pv_selftest.o
 
 obj-$(CONFIG_KVM_GUEST)                += kvm.o kvmclock.o
 obj-$(CONFIG_PARAVIRT)         += paravirt.o paravirt_patch.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 385c3e6ea925..26407d7a54db 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -26,6 +26,7 @@
 #include <asm/insn.h>
 #include <asm/io.h>
 #include <asm/fixmap.h>
+#include "pv_selftest.h"
 
 int __read_mostly alternatives_patched;
 
@@ -1549,6 +1550,12 @@ static void __maybe_unused text_poke_site(struct 
text_poke_state *tps,
         */
        poke_sync(tps, PATCH_SYNC_0, offset, &int3, INT3_INSN_SIZE);
 
+       /*
+        * We have an INT3 in place; execute a contrived selftest that
+        * has an insn sequence that is under patching.
+        */
+       pv_selftest_primary();
+
        /* Poke remaining */
        poke_sync(tps, PATCH_SYNC_1, offset + INT3_INSN_SIZE,
                  tp->text + INT3_INSN_SIZE, tp->native.len - INT3_INSN_SIZE);
@@ -1634,6 +1641,19 @@ static void text_poke_sync_site(struct text_poke_state 
*tps)
                smp_cond_load_acquire(&tps->state,
                                      prevstate != VAL);
 
+               /*
+                * Send an NMI to one of the other CPUs.
+                */
+               pv_selftest_send_nmi();
+
+               /*
+                * We have an INT3 in place; execute a contrived selftest that
+                * has an insn sequence that is under patching.
+                *
+                * Note that this function is also called from BP fixup but
+                * is just an NOP when called from there.
+                */
+               pv_selftest_secondary();
                prevstate = READ_ONCE(tps->state);
 
                /*
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 6efe0410fb72..e56d263159d7 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -779,7 +779,7 @@ arch_initcall(kvm_alloc_cpumask);
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 
 /* Kick a cpu by its apicid. Used to wake up a halted vcpu */
-static void kvm_kick_cpu(int cpu)
+void kvm_kick_cpu(int cpu)
 {
        int apicid;
        unsigned long flags = 0;
@@ -790,7 +790,7 @@ static void kvm_kick_cpu(int cpu)
 
 #include <asm/qspinlock.h>
 
-static void kvm_wait(u8 *ptr, u8 val)
+void kvm_wait(u8 *ptr, u8 val)
 {
        unsigned long flags;
 
diff --git a/arch/x86/kernel/pv_selftest.c b/arch/x86/kernel/pv_selftest.c
new file mode 100644
index 000000000000..e522f444bd6e
--- /dev/null
+++ b/arch/x86/kernel/pv_selftest.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/memory.h>
+#include <linux/nmi.h>
+#include <linux/uaccess.h>
+#include <asm/apic.h>
+#include <asm/text-patching.h>
+#include <asm/paravirt.h>
+#include <asm/paravirt_types.h>
+#include "pv_selftest.h"
+
+static int nmi_selftest;
+static bool cond_state;
+
+#define SELFTEST_PARAVIRT      1
+static int test_mode;
+
+/*
+ * Mark this and the following functions __always_inline to ensure
+ * we generate multiple patch sites that can be hit independently
+ * in thread, NMI etc contexts.
+ */
+static __always_inline void selftest_pv(void)
+{
+       struct qspinlock test;
+
+       memset(&test, 0, sizeof(test));
+
+       test.locked = _Q_LOCKED_VAL;
+
+       /*
+        * Sits directly in the path of the test.
+        *
+        * The primary sets up an INT3 instruction at pv_queued_spin_unlock().
+        * Both the primary and secondary CPUs should hit that in both
+        * thread and NMI contexts.
+        *
+        * Additionally, this also gets inlined in nmi_pv_callback() so we
+        * should hit this with nmi_selftest.
+        *
+        * The fixup takes place in poke_int3_native().
+        */
+       pv_queued_spin_unlock(&test);
+}
+
+static __always_inline void patch_selftest(void)
+{
+       if (test_mode == SELFTEST_PARAVIRT)
+               selftest_pv();
+}
+
+static DEFINE_PER_CPU(int, selftest_count);
+void pv_selftest_secondary(void)
+{
+       /*
+        * On the secondary we execute the same code in both the
+        * thread-context and the BP-context and so would hit this
+        * recursively if we do inside the fixup context.
+        *
+        * So we trigger the selftest only if it's not ongoing already
+        * (thus allowing the thread or NMI context, but excluding
+        * the INT3 handling path.)
+        */
+       if (this_cpu_read(selftest_count))
+               return;
+
+       this_cpu_inc(selftest_count);
+
+       patch_selftest();
+
+       this_cpu_dec(selftest_count);
+}
+
+void pv_selftest_primary(void)
+{
+       patch_selftest();
+}
+
+/*
+ * We only come here if nmi_selftest > 0.
+ *  - nmi_selftest >= 1: execute a pv-op that will be patched
+ *  - nmi_selftest >= 2: execute a paired pv-op that is also contended
+ *  - nmi_selftest >= 3: add lock contention
+ */
+static int nmi_callback(unsigned int val, struct pt_regs *regs)
+{
+       static DEFINE_SPINLOCK(nmi_spin);
+
+       if (!nmi_selftest)
+               goto out;
+
+       patch_selftest();
+
+       if (nmi_selftest >= 2) {
+               /*
+                * Depending on whether CONFIG_[UN]INLINE_SPIN_* are
+                * defined or not, these would get patched or just
+                * create race conditions between via NMIs.
+                */
+               spin_lock(&nmi_spin);
+
+               /* Dilate the critical section to force contention. */
+               if (nmi_selftest >= 3)
+                       udelay(1);
+
+               spin_unlock(&nmi_spin);
+       }
+
+       /*
+        * nmi_selftest > 0, but we should really have a bitmap where
+        * to check if this really was destined for us or not.
+        */
+       return NMI_HANDLED;
+out:
+       return NMI_DONE;
+}
+
+void pv_selftest_register(void)
+{
+       register_nmi_handler(NMI_LOCAL, nmi_callback,
+                            0, "paravirt_nmi_selftest");
+}
+
+void pv_selftest_unregister(void)
+{
+       unregister_nmi_handler(NMI_LOCAL, "paravirt_nmi_selftest");
+}
+
+void pv_selftest_send_nmi(void)
+{
+       int cpu = smp_processor_id();
+       /* NMI or INT3 */
+       if (nmi_selftest && !in_interrupt())
+               apic->send_IPI(cpu + 1 % num_online_cpus(), NMI_VECTOR);
+}
+
+/*
+ * Just declare these locally here instead of having them be
+ * exposed to the whole world.
+ */
+void kvm_wait(u8 *ptr, u8 val);
+void kvm_kick_cpu(int cpu);
+bool __raw_callee_save___kvm_vcpu_is_preempted(long cpu);
+static void pv_spinlocks(void)
+{
+       paravirt_stage_alt(cond_state,
+                          lock.queued_spin_lock_slowpath,
+                          __pv_queued_spin_lock_slowpath);
+       paravirt_stage_alt(cond_state, lock.queued_spin_unlock.func,
+                          PV_CALLEE_SAVE(__pv_queued_spin_unlock).func);
+       paravirt_stage_alt(cond_state, lock.wait, kvm_wait);
+       paravirt_stage_alt(cond_state, lock.kick, kvm_kick_cpu);
+
+       paravirt_stage_alt(cond_state,
+                          lock.vcpu_is_preempted.func,
+                          PV_CALLEE_SAVE(__kvm_vcpu_is_preempted).func);
+}
+
+void pv_trigger(void)
+{
+       bool nmi_mode = nmi_selftest ? true : false;
+       int ret;
+
+       pr_debug("%s: nmi=%d; NMI-mode=%d\n", __func__, nmi_selftest, nmi_mode);
+
+       mutex_lock(&text_mutex);
+
+       paravirt_stage_zero();
+       pv_spinlocks();
+
+       /*
+        * paravirt patching for pv_locks can potentially deadlock
+        * if we are running with nmi_mode=false and we get an NMI.
+        *
+        * For the sake of testing that path, we risk it. However, if
+        * we are generating synthetic NMIs (nmi_selftest > 0) then
+        * run with nmi_mode=true.
+        */
+       ret = paravirt_runtime_patch(nmi_mode);
+
+       /*
+        * Flip the state so we switch the pv_lock_ops on the next test.
+        */
+       cond_state = !cond_state;
+
+       mutex_unlock(&text_mutex);
+
+       pr_debug("%s: nmi=%d; NMI-mode=%d, ret=%d\n", __func__, nmi_selftest,
+                nmi_mode, ret);
+}
+
+static void pv_selftest_trigger(void)
+{
+       test_mode = SELFTEST_PARAVIRT;
+       pv_trigger();
+}
+
+static ssize_t pv_selftest_write(struct file *file, const char __user *ubuf,
+                                size_t count, loff_t *ppos)
+{
+       pv_selftest_register();
+       pv_selftest_trigger();
+       pv_selftest_unregister();
+
+       return count;
+}
+
+static ssize_t pv_nmi_read(struct file *file, char __user *ubuf,
+                          size_t count, loff_t *ppos)
+{
+       char buf[32];
+       unsigned int len;
+
+       len = snprintf(buf, sizeof(buf), "%d\n", nmi_selftest);
+       return simple_read_from_buffer(ubuf, count, ppos, buf, len);
+}
+
+static ssize_t pv_nmi_write(struct file *file, const char __user *ubuf,
+                           size_t count, loff_t *ppos)
+{
+       char buf[32];
+       unsigned int len;
+       unsigned int enabled;
+
+       len = min(sizeof(buf) - 1, count);
+       if (copy_from_user(buf, ubuf, len))
+               return -EFAULT;
+
+       buf[len] = '\0';
+       if (kstrtoint(buf, 0, &enabled))
+               return -EINVAL;
+
+       nmi_selftest = enabled > 3 ? 3 : enabled;
+
+       return count;
+}
+
+static const struct file_operations pv_selftest_fops = {
+       .read = NULL,
+       .write = pv_selftest_write,
+       .llseek = default_llseek,
+};
+
+static const struct file_operations pv_nmi_fops = {
+       .read = pv_nmi_read,
+       .write = pv_nmi_write,
+       .llseek = default_llseek,
+};
+
+static int __init pv_selftest_init(void)
+{
+       struct dentry *d = debugfs_create_dir("pv_selftest", NULL);
+
+       debugfs_create_file("toggle", 0600, d, NULL, &pv_selftest_fops);
+       debugfs_create_file("nmi", 0600, d, NULL, &pv_nmi_fops);
+
+       return 0;
+}
+
+late_initcall(pv_selftest_init);
diff --git a/arch/x86/kernel/pv_selftest.h b/arch/x86/kernel/pv_selftest.h
new file mode 100644
index 000000000000..5afa0f7db5cc
--- /dev/null
+++ b/arch/x86/kernel/pv_selftest.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PVR_SELFTEST_H
+#define _PVR_SELFTEST_H
+
+#ifdef CONFIG_DEBUG_PARAVIRT_SELFTEST
+void pv_selftest_send_nmi(void);
+void pv_selftest_primary(void);
+void pv_selftest_secondary(void);
+#else
+static inline void pv_selftest_send_nmi(void) { }
+static inline void pv_selftest_primary(void) { }
+static inline void pv_selftest_secondary(void) { }
+#endif /*! CONFIG_DEBUG_PARAVIRT_SELFTEST */
+
+#endif /* _PVR_SELFTEST_H */
-- 
2.20.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.