[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 7/7] vm-event/arm: implement support for control-register write vm-events



On 6/16/2016 7:49 PM, Julien Grall wrote:
Hello Corneliu,

On 16/06/16 15:13, Corneliu ZUZU wrote:
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 8c50685..af61ac3 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -43,6 +43,7 @@
  #include <asm/mmio.h>
  #include <asm/cpufeature.h>
  #include <asm/flushtlb.h>
+#include <asm/traps.h>

  #include "decode.h"
  #include "vtimer.h"
@@ -124,7 +125,12 @@ void init_traps(void)
WRITE_SYSREG((HCPTR_CP_MASK & ~(HCPTR_CP(10) | HCPTR_CP(11))) | HCPTR_TTA,
                   CPTR_EL2);

-    /* Setup hypervisor traps */
+    /* Setup hypervisor traps
+     *
+ * Note: HCR_TVM bit is also set for system-register write monitoring + * purposes (see vm_event_monitor_cr), but (for performance reasons) that's
+     * done selectively (see vcpu_enter_adjust_traps).
+     */
WRITE_SYSREG(HCR_PTW|HCR_BSU_INNER|HCR_AMO|HCR_IMO|HCR_FMO|HCR_VM|
HCR_TWE|HCR_TWI|HCR_TSC|HCR_TAC|HCR_SWIO|HCR_TIDCP|HCR_FB,
                   HCR_EL2);
@@ -1720,6 +1726,7 @@ static void do_cp15_32(struct cpu_user_regs *regs,
      const struct hsr_cp32 cp32 = hsr.cp32;
      int regidx = cp32.reg;
      struct vcpu *v = current;
+    register_t r = get_user_reg(regs, regidx);

      if ( !check_conditional_instr(regs, hsr) )
      {
@@ -1730,6 +1737,61 @@ static void do_cp15_32(struct cpu_user_regs *regs,
      switch ( hsr.bits & HSR_CP32_REGS_MASK )
      {
      /*
+     * HCR_EL2.TVM / HCR.TVM
+     *
+     * ARMv7 (DDI 0406C.b): B1.14.13

Please try to mention the most recent spec. This was published in 2012, the latest release (C.c) was published in 2014.

Ack.


+     * ARMv8 (DDI 0487A.e): D1-1569 Table D1-34

Ditto. This was published in 2014, whilst the most recent was published in 2016.

+     */
+    case HSR_CPREG32(SCTLR):
+        TVM_EMUL_VMEVT(v, regs, hsr, r, SCTLR);
+        break;
+    case HSR_CPREG32(TTBR0_32):
+        TVM_EMUL_VMEVT(v, regs, hsr, r, TTBR0_32);
+        break;
+    case HSR_CPREG32(TTBR1_32):
+        TVM_EMUL_VMEVT(v, regs, hsr, r, TTBR1_32);
+        break;
+    case HSR_CPREG32(TTBCR):
+        TVM_EMUL_VMEVT(v, regs, hsr, r, TTBCR);
+        break;
+    case HSR_CPREG32(DACR):
+        TVM_EMUL(regs, hsr, r, DACR);
+        break;
+    case HSR_CPREG32(DFSR):
+        TVM_EMUL(regs, hsr, r, DFSR);
+        break;
+    case HSR_CPREG32(IFSR):
+        TVM_EMUL(regs, hsr, r, IFSR);
+        break;
+    case HSR_CPREG32(DFAR):
+        TVM_EMUL(regs, hsr, r, DFAR);
+        break;
+    case HSR_CPREG32(IFAR):
+        TVM_EMUL(regs, hsr, r, IFAR);
+        break;
[...]
+    case HSR_CPREG32(ADFSR):
+        TVM_EMUL(regs, hsr, r, ADFSR);
+        break;
+    case HSR_CPREG32(AIFSR):
+        TVM_EMUL(regs, hsr, r, AIFSR);
+        break;
+    case HSR_CPREG32(MAIR0):
+        TVM_EMUL(regs, hsr, r, MAIR0);
+        break;
+    case HSR_CPREG32(MAIR1):
+        TVM_EMUL(regs, hsr, r, MAIR1);
+        break;

Please mention that PRRR and NMRR are aliased to respectively MAIR0 and MAIR1. This will avoid to spend time trying to understanding why the spec says they are trapped but you don't "handle" them.

I mentioned that in traps.h (see "AKA" comments). Will put the same comment here then.


+    case HSR_CPREG32(AMAIR0):
+        TVM_EMUL(regs, hsr, r, AMAIR0);
+        break;
+    case HSR_CPREG32(AMAIR1):
+        TVM_EMUL(regs, hsr, r, AMAIR1);
+        break;
+    case HSR_CPREG32(CONTEXTIDR):
+        TVM_EMUL(regs, hsr, r, CONTEXTIDR);
+        break;
+
+    /*
       * !CNTHCTL_EL2.EL1PCEN / !CNTHCTL.PL1PCEN
       *
       * ARMv7 (DDI 0406C.b): B4.1.22
@@ -1853,6 +1915,13 @@ static void do_cp15_32(struct cpu_user_regs *regs,
  static void do_cp15_64(struct cpu_user_regs *regs,
                         const union hsr hsr)
  {
+    struct vcpu *v = current;
+    const struct hsr_cp64 cp64 = hsr.cp64;
+    sysreg64_t r = {
+        .low32 = (uint32_t) get_user_reg(regs, cp64.reg1),

The cast is not necessary.

Ack.


+        .high32 = (uint32_t) get_user_reg(regs, cp64.reg2)

Ditto.

+    };
+
      if ( !check_conditional_instr(regs, hsr) )
      {
          advance_pc(regs, hsr);
@@ -1862,6 +1931,19 @@ static void do_cp15_64(struct cpu_user_regs *regs,
      switch ( hsr.bits & HSR_CP64_REGS_MASK )
      {
      /*
+     * HCR_EL2.TVM / HCR.TVM
+     *
+     * ARMv7 (DDI 0406C.b): B1.14.13
+     * ARMv8 (DDI 0487A.e): D1-1569 Table D1-34

Same remarks as above for the spec version.

+     */
+    case HSR_CPREG64(TTBR0):
+        TVM_EMUL_VMEVT(v, regs, hsr, r.val64, TTBR0_64);
+        break;
+    case HSR_CPREG64(TTBR1):
+        TVM_EMUL_VMEVT(v, regs, hsr, r.val64, TTBR1_64);
+        break;
+
+    /*
       * !CNTHCTL_EL2.EL1PCEN / !CNTHCTL.PL1PCEN
       *
       * ARMv7 (DDI 0406C.b): B4.1.22
@@ -1891,8 +1973,6 @@ static void do_cp15_64(struct cpu_user_regs *regs,
       */
      default:
          {
-            const struct hsr_cp64 cp64 = hsr.cp64;
-
              gdprintk(XENLOG_ERR,
"%s p15, %d, r%d, r%d, cr%d @ 0x%"PRIregister"\n",
                       cp64.read ? "mrrc" : "mcrr",
@@ -2128,10 +2208,50 @@ static void do_sysreg(struct cpu_user_regs *regs,
  {
      int regidx = hsr.sysreg.reg;
      struct vcpu *v = current;
+    register_t r = get_user_reg(regs, regidx);

      switch ( hsr.bits & HSR_SYSREG_REGS_MASK )
      {
      /*
+     * HCR_EL2.TVM
+     *
+     * ARMv8 (DDI 0487A.e): D1-1569 Table D1-34

Ditto for the spec version.

[...]

diff --git a/xen/arch/arm/vm_event.c b/xen/arch/arm/vm_event.c
new file mode 100644
index 0000000..3f23fec
--- /dev/null
+++ b/xen/arch/arm/vm_event.c
@@ -0,0 +1,112 @@

[...]

+#include <xen/vm_event.h>
+#include <asm/traps.h>
+
+#if CONFIG_ARM_64
+
+#define MWSINF_SCTLR    32,SCTLR_EL1
+#define MWSINF_TTBR0    64,TTBR0_EL1
+#define MWSINF_TTBR1    64,TTBR1_EL1
+#define MWSINF_TTBCR    64,TCR_EL1
+
+#elif CONFIG_ARM_32
+
+#define MWSINF_SCTLR    32,SCTLR
+#define MWSINF_TTBR0    64,TTBR0
+#define MWSINF_TTBR1    64,TTBR1

The values are the same as for arm64 (*_EL1 is aliased to * for arm32). Please avoid duplication.

(see comment below about later reply)


+#define MWSINF_TTBR0_32 32,TTBR0_32
+#define MWSINF_TTBR1_32 32,TTBR1_32
+#define MWSINF_TTBCR    32,TTBCR
+
+#endif
+
+#define MWS_EMUL_(val, sz, r...) WRITE_SYSREG##sz((uint##sz##_t) (val), r)

The cast is not necessary.

+#define MWS_EMUL(r) CALL_MACRO(MWS_EMUL_, w->value, MWSINF_##r)
+
+static inline void vcpu_enter_write_data(struct vcpu *v)
+{
+    struct monitor_write_data *w = &v->arch.vm_event.write_data;
+
+    if ( likely(MWS_NOWRITE == w->status) )
+        return;
+
+    switch ( w->status )
+    {
+    case MWS_SCTLR:
+        MWS_EMUL(SCTLR);
+        break;
+    case MWS_TTBR0:
+        MWS_EMUL(TTBR0);
+        break;
+    case MWS_TTBR1:
+        MWS_EMUL(TTBR1);
+        break;
+#if CONFIG_ARM_32
+    case MWS_TTBR0_32:
+        MWS_EMUL(TTBR0_32);
+        break;
+    case MWS_TTBR1_32:
+        MWS_EMUL(TTBR1_32);
+        break;
+#endif

Aarch32 kernel can return on an AArch64 Xen. This means that TTBR{0,1}_32 could be trapped and the write therefore be properly emulated.

MCR TTBRx writes from AArch32 guests appear as writes of the low 32-bits of AArch64 TTBRx_EL1 (architecturally mapped).
MCRR TTBRx writes from AArch32 guests appear as writes to AArch64 TTBRx_EL1.
Therefore, in the end the register we need to write is still TTBRx_EL1.


+    case MWS_TTBCR:
+        MWS_EMUL(TTBCR);
+        break;
+    default:
+        break;
+    }
+
+    w->status = MWS_NOWRITE;
+}
+
+static inline void vcpu_enter_adjust_traps(struct vcpu *v)
+{
+    register_t old_hcr, hcr;
+
+    hcr = (old_hcr = READ_SYSREG(HCR_EL2));
+
+    if ( unlikely(0 != v->domain->arch.monitor.write_ctrlreg_enabled) )
+        hcr |= HCR_TVM;
+    else
+        hcr &= ~HCR_TVM;
+
+    if ( unlikely(hcr != old_hcr) )
+    {
+        WRITE_SYSREG(hcr, HCR_EL2);
+        isb();
+    }

The HCR register is also updated in p2m_restore_state to set HCR_EL2.RW. I would prefer to have a single place where HCR is updated.

Maybe we should store the HCR value per-domain?

Yes, that would be nice. Will look into that for v2.


+}
+
+void arch_vm_event_vcpu_enter(struct vcpu *v)
+{
+    vcpu_enter_write_data(v);
+    vcpu_enter_adjust_traps(v);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/monitor.c b/xen/common/monitor.c
index 2366bae..c35a717 100644
--- a/xen/common/monitor.c
+++ b/xen/common/monitor.c
@@ -62,7 +62,6 @@ int monitor_domctl(struct domain *d, struct xen_domctl_monitor_op *mop)

      switch ( mop->event )
      {
-#if CONFIG_X86
      case XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG:
      {
          struct arch_domain *ad = &d->arch;
@@ -100,7 +99,6 @@ int monitor_domctl(struct domain *d, struct xen_domctl_monitor_op *mop)

          break;
      }
-#endif

      case XEN_DOMCTL_MONITOR_EVENT_GUEST_REQUEST:
      {
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index 53dc048..e0f999e 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -824,7 +824,6 @@ int vm_event_monitor_traps(struct vcpu *v, uint8_t sync,
      return 1;
  }

-#if CONFIG_X86
  bool_t vm_event_monitor_cr(unsigned int index, unsigned long value,
                             unsigned long old)
  {
@@ -852,7 +851,6 @@ bool_t vm_event_monitor_cr(unsigned int index, unsigned long value,

      return 0;
  }
-#endif

  void vm_event_monitor_guest_request(void)
  {

[...]

diff --git a/xen/include/asm-arm/traps.h b/xen/include/asm-arm/traps.h
new file mode 100644
index 0000000..9e246a7
--- /dev/null
+++ b/xen/include/asm-arm/traps.h

This file is very difficult to review. Can you explain why you need to duplicate most of the things between the AArch64 and AArch32 port?

Whilst I understand that some registers are accessible in a different way between AArch64 and Aarch32, we were able to get rid of most of them in the Xen port by aliasing registers (see asm-arm/cpregs.h).

From a quick look, I believe this is possible to do and would reduce a lot the size/complexity of this file.

I realize all this looks complex, heck I've spent about 3-4 working days trying to find an acceptable macros combination that looks readable, useful (shortens code) and also works. Since the cover letter isn't the right place to include such detailed explanations, I will come back with a reply detailing this patch alone, I just need some time to reinspect it since I've done this work a few months ago (went in sick leave).


@@ -0,0 +1,253 @@

[...]

+/*
+ * Emulation of system-register trapped writes that do not cause
+ * VM_EVENT_REASON_WRITE_CTRLREG monitor vm-events.
+ * Such writes are collaterally trapped due to setting the HCR_EL2.TVM bit.
+ *
+ * Regarding aarch32 domains, note that from Xen's perspective system-registers + * of such domains are architecturally-mapped to aarch64 registers in one of
+ * three ways:
+ * - low 32-bits mapping (e.g. aarch32 DFAR -> aarch64 FAR_EL1[31:0]) + * - high 32-bits mapping (e.g. aarch32 IFAR -> aarch64 FAR_EL1[63:32])
+ *  - full mapping          (e.g. aarch32 SCTLR -> aarch64 SCTLR_EL1)
+ *
+ * Hence we define 2 macro variants:
+ *  - TVM_EMUL_SZ variant, for full mappings
+ *  - TVM_EMUL_LH variant, for low/high 32-bits mappings
+ */
+#define TVM_EMUL_SZ(regs, hsr, val, sz, r...) \
+{ \
+ if ( psr_mode_is_user(regs) ) \

Those registers are not accessible at EL0.

Hmm, I have this question noted.
I remember finding from the manuals that a write from user-mode of those regs would still trap to EL2, but I didn't test that yet.
Will put that to test and come back with results for v2.


+ return inject_undef_exception(regs, hsr); \
+    WRITE_SYSREG##sz((uint##sz##_t) (val), r);

[...]

diff --git a/xen/include/asm-arm/vm_event.h b/xen/include/asm-arm/vm_event.h
index 4e5a272..edf9654 100644
--- a/xen/include/asm-arm/vm_event.h
+++ b/xen/include/asm-arm/vm_event.h
@@ -30,6 +30,12 @@ static inline int vm_event_init_domain(struct domain *d)

  static inline void vm_event_cleanup_domain(struct domain *d)
  {
+    struct vcpu *v;
+
+    for_each_vcpu ( d, v )
+        memset(&v->arch.vm_event, 0, sizeof(v->arch.vm_event));
+
+    memset(&d->arch.monitor, 0, sizeof(d->arch.monitor));
      memset(&d->monitor, 0, sizeof(d->monitor));
  }

@@ -41,7 +47,13 @@ static inline void vm_event_toggle_singlestep(struct domain *d, struct vcpu *v)
  static inline
void vm_event_register_write_resume(struct vcpu *v, vm_event_response_t *rsp)
  {
-    /* Not supported on ARM. */
+ /* X86 VM_EVENT_REASON_MOV_TO_MSR could (but shouldn't) end-up here too. */

This should be an ASSERT/BUG_ON then.

We can't ASSERT/BUG_ON here because rsp->reason is controlled by the toolstack user.
This has to do with the implementation of vm_event_resume.
What I think we should do instead is to surround "case VM_EVENT_REASON_MOV_TO_MSR" there with #ifdef CONFIG_X86.
Would that be suitable?


+    if ( unlikely(VM_EVENT_REASON_WRITE_CTRLREG != rsp->reason) )
+        return;
+
+    if ( (rsp->flags & VM_EVENT_FLAG_DENY) &&
+         (rsp->flags & VM_EVENT_FLAG_VCPU_PAUSED) )
+        v->arch.vm_event.write_data.status = MWS_NOWRITE;
  }

  static inline
@@ -55,10 +67,7 @@ static inline void vm_event_fill_regs(vm_event_request_t *req)
      /* Not supported on ARM. */
  }

-static inline void arch_vm_event_vcpu_enter(struct vcpu *v)
-{
-    /* Nothing to do. */
-}
+void arch_vm_event_vcpu_enter(struct vcpu *v);

  /*
   * Monitor vm-events.
@@ -67,7 +76,8 @@ static inline uint32_t vm_event_monitor_get_capabilities(struct domain *d)
  {
      uint32_t capabilities = 0;

-    capabilities = (1U << XEN_DOMCTL_MONITOR_EVENT_GUEST_REQUEST);
+    capabilities = (1U << XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG) |
+                   (1U << XEN_DOMCTL_MONITOR_EVENT_GUEST_REQUEST);

      return capabilities;
  }
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
index 8f94e20..ec3eaca 100644
--- a/xen/include/public/vm_event.h
+++ b/xen/include/public/vm_event.h

[...]


Regards,
Corneliu.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.