x86/vMSI-X: avoid missing first unmask of vectors Recent changes to Linux result in there just being a single unmask operation prior to expecting the first interrupts to arrive. However, we've had a chicken-and-egg problem here: Qemu invokes xc_domain_update_msi_irq(), ultimately leading to msixtbl_pt_register(), upon seeing that first unmask operation. Yet for msixtbl_range() to return true (in order to msixtbl_write() to get invoked at all) msixtbl_pt_register() must have completed. Deal with this by snooping suitable writes in msixtbl_range() and triggering the invocation of msix_write_completion() from msixtbl_pt_register() when that happens in the context of a still in progress vector control field write. Note that the seemingly unrelated deletion of the redundant irq_desc->msi_desc checks in msixtbl_pt_register() is to make clear to any compiler version used that the "msi_desc" local variable isn't being used uninitialized. (Doing the same in msixtbl_pt_unregister() is just for consistency reasons.) Signed-off-by: Jan Beulich --- TODO: How to deal with REP MOVS to the MSI-X table (in msixtbl_range())? --- a/xen/arch/x86/hvm/vmsi.c +++ b/xen/arch/x86/hvm/vmsi.c @@ -341,7 +352,21 @@ static int msixtbl_range(struct vcpu *v, desc = msixtbl_addr_to_desc(msixtbl_find_entry(v, addr), addr); rcu_read_unlock(&msixtbl_rcu_lock); - return !!desc; + if ( desc ) + return 1; + + if ( (addr & (PCI_MSIX_ENTRY_SIZE - 1)) == + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET ) + { + const ioreq_t *r = &v->arch.hvm_vcpu.hvm_io.io_req; + + ASSERT(r->type == IOREQ_TYPE_COPY); + if ( r->dir == IOREQ_WRITE && r->size == 4 && !r->data_is_ptr + && !(r->data & PCI_MSIX_VECTOR_BITMASK) ) + v->arch.hvm_vcpu.hvm_io.msix_snoop_address = addr; + } + + return 0; } static const struct hvm_mmio_ops msixtbl_mmio_ops = { @@ -410,9 +434,6 @@ int msixtbl_pt_register(struct domain *d return r; } - if ( !irq_desc->msi_desc ) - goto out; - msi_desc = irq_desc->msi_desc; if ( !msi_desc ) goto out; @@ -437,6 +458,21 @@ found: out: spin_unlock_irq(&irq_desc->lock); xfree(new_entry); + + if ( !r ) + { + struct vcpu *v; + + for_each_vcpu ( d, v ) + { + if ( v->arch.hvm_vcpu.hvm_io.msix_snoop_address == + (gtable + msi_desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) ) + v->arch.hvm_vcpu.hvm_io.msix_unmask_address = + v->arch.hvm_vcpu.hvm_io.msix_snoop_address; + } + } + return r; } @@ -457,9 +496,6 @@ void msixtbl_pt_unregister(struct domain if ( !irq_desc ) return; - if ( !irq_desc->msi_desc ) - goto out; - msi_desc = irq_desc->msi_desc; if ( !msi_desc ) goto out; @@ -522,6 +558,8 @@ void msix_write_completion(struct vcpu * { unsigned long ctrl_address = v->arch.hvm_vcpu.hvm_io.msix_unmask_address; + v->arch.hvm_vcpu.hvm_io.msix_snoop_address = 0; + if ( !ctrl_address ) return; --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -85,6 +85,7 @@ struct hvm_vcpu_io { bool_t mmio_retry; unsigned long msix_unmask_address; + unsigned long msix_snoop_address; const struct g2m_ioport *g2m_ioport; };