Xen project Mailing List

Re: [Xen-devel] [PATCH v2 05/10] x86/SVM: Add AVIC vmexit handlers

To: Jan Beulich <JBeulich@xxxxxxxx>, Janakarajan Natarajan <Janakarajan.Natarajan@xxxxxxx>

From: "Natarajan, Janakarajan" <jnataraj@xxxxxxx>

Date: Tue, 29 May 2018 16:49:37 -0500

Authentication-results: spf=none (sender IP is ) smtp.mailfrom=Janakarajan.Natarajan@xxxxxxx;

Cc: Tim Deegan <tim@xxxxxxx>, Kevin Tian <kevin.tian@xxxxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, Wei Liu <wei.liu2@xxxxxxxxxx>, Jun Nakajima <jun.nakajima@xxxxxxxxx>, George Dunlap <George.Dunlap@xxxxxxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxx, Julien Grall <julien.grall@xxxxxxx>, Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>, Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>

Delivery-date: Tue, 29 May 2018 21:50:18 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Spamdiagnosticmetadata: NSPM

Spamdiagnosticoutput: 1:99

On 5/16/2018 10:56 AM, Jan Beulich wrote:

On 07.05.18 at 23:07, <Janakarajan.Natarajan@xxxxxxx> wrote:

@@ -180,6 +185,293 @@ int svm_avic_init_vmcb(struct vcpu *v)
  }

/*

+ * Note:
+ * This function handles the AVIC_INCOMP_IPI #vmexit when AVIC is enabled.
+ * The hardware generates this fault when an IPI could not be delivered
+ * to all targeted guest virtual processors because at least one guest
+ * virtual processor was not allocated to a physical core at the time.
+ */
+void svm_avic_vmexit_do_incomp_ipi(struct cpu_user_regs *regs)
+{
+    struct vcpu *curr = current;
+    struct domain *currd = curr->domain;
+    struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
+    u32 icrh = vmcb->exitinfo1 >> 32;
+    u32 icrl = vmcb->exitinfo1;
+    u32 id = vmcb->exitinfo2 >> 32;
+    u32 index = vmcb->exitinfo2 && 0xFF;
+
+    switch ( id )
+    {
+    case AVIC_INCMP_IPI_ERR_INVALID_INT_TYPE:
+        /*
+         * AVIC hardware handles the delivery of
+         * IPIs when the specified Message Type is Fixed
+         * (also known as fixed delivery mode) and
+         * the Trigger Mode is edge-triggered. The hardware
+         * also supports self and broadcast delivery modes
+         * specified via the Destination Shorthand(DSH)
+         * field of the ICRL. Logical and physical APIC ID
+         * formats are supported. All other IPI types cause
+         * a #VMEXIT, which needs to emulated.

Please utilize the permitted line length (also elsewhere).

+         */
+        vlapic_reg_write(curr, APIC_ICR2, icrh);
+        vlapic_reg_write(curr, APIC_ICR, icrl);
+        break;
+
+    case AVIC_INCMP_IPI_ERR_TARGET_NOT_RUN:
+    {
+        /*
+         * At this point, we expect that the AVIC HW has already
+         * set the appropriate IRR bits on the valid target
+         * vcpus. So, we just need to kick the appropriate vcpu.
+         */
+        struct vcpu *v;
+        uint32_t dest = GET_xAPIC_DEST_FIELD(icrh);
+        uint32_t short_hand = icrl & APIC_SHORT_MASK;
+        bool dest_mode = icrl & APIC_DEST_MASK;
+
+        for_each_vcpu ( currd,  v )
+        {
+            if ( v != curr &&
+                 vlapic_match_dest(vcpu_vlapic(v), vcpu_vlapic(curr),
+                                   short_hand, dest, dest_mode) )
+            {
+                vcpu_kick(v);
+                break;

Why do you break out of the loop here? With a shorthand more than
one vCPU might be the target.

+            }
+        }
+        break;
+    }
+
+    case AVIC_INCMP_IPI_ERR_INV_TARGET:
+        gprintk(XENLOG_ERR,
+                "SVM: %s: Invalid IPI target (icr=%#08x:%08x, idx=%u)\n",

%#08x produces something like 0x012345, rather than a full eight digits.
Preferably drop the #, or if you really think it's needed replace it be an
explicit 0x.

+                __func__, icrh, icrl, index);

Please use __func__ only when a log message really can't be disambiguated
another way.

For both of these - same further down.

+static avic_logical_id_entry_t *
+avic_get_logical_id_entry(struct svm_domain *d, u32 ldr, bool flat)
+{
+    unsigned int index;
+    unsigned int dest_id = GET_xAPIC_LOGICAL_ID(ldr);
+
+    if ( !dest_id )
+        return NULL;
+
+    if ( flat )
+    {
+        index = ffs(dest_id) - 1;
+        if ( index > 7 )
+            return NULL;
+    }
+    else
+    {
+        unsigned int cluster = (dest_id & 0xf0) >> 4;
+        int apic = ffs(dest_id & 0x0f) - 1;
+
+        if ( (apic < 0) || (apic > 7) || (cluster >= 0xf) )

I can't see a way for apic to be larger than 3 with the calculation above.

+            return NULL;
+        index = (cluster << 2) + apic;
+    }
+
+    ASSERT(index <= 255);

Which of the many possible meanings of 255 is this?

+static int avic_ldr_write(struct vcpu *v, u8 g_phy_id, u32 ldr, bool valid)
+{
+    avic_logical_id_entry_t *entry, new_entry;
+    u32 dfr = vlapic_reg_read(vcpu_vlapic(v), APIC_DFR);

Just to give another example - looks like this too could be vlapic_get_reg().

+static int avic_handle_ldr_update(struct vcpu *v)
+{
+    int ret = 0;

Pointless initializer.

+    u32 ldr = vlapic_reg_read(vcpu_vlapic(v), APIC_LDR);
+    u32 apic_id = vlapic_reg_read(vcpu_vlapic(v), APIC_ID);
+
+    if ( !ldr )
+        return -EINVAL;
+
+    ret = avic_ldr_write(v, GET_xAPIC_ID(apic_id), ldr, true);
+    if ( ret && v->arch.hvm_svm.avic_last_ldr )
+    {
+        /*
+         * Note:
+         * In case of failure to update LDR register,
+         * we set the guest physical APIC ID to 0,
+         * and set the entry logical APID ID entry
+         * to invalid (false).
+         */
+        avic_ldr_write(v, 0, v->arch.hvm_svm.avic_last_ldr, false);
+        v->arch.hvm_svm.avic_last_ldr = 0;
+    }
+    else
+    {
+        /*
+         * Note:
+         * This saves the last valid LDR so that we
+         * know which entry in the local APIC ID
+         * to clean up when the LDR is updated.
+         */
+        v->arch.hvm_svm.avic_last_ldr = ldr;

The comment says "last valid", but you may get here also when the
first avic_ldr_write() failed. I think you mean

     if ( !ret )
     ...
     else if ( v->arch.hvm_svm.avic_last_ldr )
     ...

+static int avic_unaccel_trap_write(struct vcpu *v)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    u32 offset = vmcb->exitinfo1 & AVIC_UNACCEL_ACCESS_OFFSET_MASK;
+    u32 reg = vlapic_reg_read(vcpu_vlapic(v), offset);
+
+    switch ( offset )
+    {
+    case APIC_ID:
+        /*
+         * Currently, we do not support APIC_ID update while
+         * the vcpus are running, which might require updating
+         * AVIC max APIC ID in all VMCBs. This would require
+         * synchronize update on all running VCPUs.
+         */
+        return X86EMUL_UNHANDLEABLE;
+
+    case APIC_LDR:
+        if ( avic_handle_ldr_update(v) )
+            return X86EMUL_UNHANDLEABLE;
+        break;
+
+    case APIC_DFR:
+        if ( avic_handle_dfr_update(v) )
+            return X86EMUL_UNHANDLEABLE;
+        break;
+
+    default:
+        break;

This default case is unnecessary.

+void svm_avic_vmexit_do_noaccel(struct cpu_user_regs *regs)
+{
+    struct vcpu *curr = current;
+    struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
+    u32 offset = vmcb->exitinfo1 & 0xFF0;
+    u32 rw = (vmcb->exitinfo1 >> 32) & 0x1;

bool?

+    if ( avic_is_trap(offset) )
+    {
+        /* Handling AVIC Trap (intercept right after the access). */
+        if ( !rw )
+        {
+            /*
+             * If a read trap happens, the CPU microcode does not
+             * implement the spec.
+             */
+            gprintk(XENLOG_ERR, "%s: Invalid #VMEXIT due to trap read (%#x)\n",
+                    __func__, offset);
+            domain_crash(curr->domain);
+        }
+
+        if ( avic_unaccel_trap_write(curr) != X86EMUL_OKAY )

ITYM "else if" here.

+        {
+            gprintk(XENLOG_ERR, "%s: Failed to handle trap write (%#x)\n",
+                    __func__, offset);
+            domain_crash(curr->domain);
+        }
+    }
+    else
+        /* Handling AVIC Fault (intercept before the access). */
+        hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op,
+                                 X86_EVENT_NO_EC);

What's the rationale behind having chosen this function? I don't think it is
supposed to be called from outside the VM event code.

We wanted to handle the AVIC fault by emulating the instruction causing it. Would this be better suited ? struct hvm_emulate_ctxt ctx; hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs()); rc = hvm_emulate_one(&ctx); switch( rc ) { case X86EMUL_UNHANDLABLE: case X86EMUL_UNRECOGNIZED: hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC); break; case X86EMUL_EXCEPTION: hvm_inject_event(&ctxt.ctxt.event); break; } hvm_emulate_writeback(&ctxt); Thanks, Janak

+    return;
+}

Please omit such redundant return statements.

Jan

_______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.