[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v7 6/7] x86: add xen_iommu_ops to modify IOMMU mappings



>>> On 15.10.18 at 12:35, <paul.durrant@xxxxxxxxxx> wrote:
> --- a/xen/common/iommu_op.c
> +++ b/xen/common/iommu_op.c
> @@ -78,7 +78,205 @@ static int iommu_op_query_reserved(struct 
> xen_iommu_op_query_reserved *op)
>      return 0;
>  }
>  
> -static void iommu_op(xen_iommu_op_t *op)
> +static int iommu_op_enable_modification(
> +    struct xen_iommu_op_enable_modification *op)
> +{
> +    struct domain *currd = current->domain;
> +    struct domain_iommu *iommu = dom_iommu(currd);
> +    const struct iommu_ops *ops = iommu->platform_ops;
> +    int rc;
> +
> +    if ( op->cap || op->pad )
> +        return -EINVAL;
> +
> +    spin_lock(&iommu->lock);
> +
> +    /* Has modification already been enabled? */
> +    rc = 0;
> +    if ( iommu->domain_control )
> +        goto unlock;
> +
> +    /*
> +     * Modificaton of IOMMU mappings cannot be put under domain control if:
> +     * - this domain does not have IOMMU page tables, or
> +     * - HAP is enabled for this domain and the IOMMU shares the tables.
> +     */
> +    rc = -EACCES;
> +    if ( !has_iommu_pt(currd) || iommu_use_hap_pt(currd) )
> +        goto unlock;

I understand the latter restriction, but why the former? Wouldn't
it be reasonable for a domain to request self management at boot
time even if a first pass-through device was assigned only at a
later point in time?

> +static int iommuop_map(struct xen_iommu_op_map *op)

const?

> +{
> +    struct domain *d, *currd = current->domain;
> +    struct domain_iommu *iommu = dom_iommu(currd);
> +    bool readonly = op->flags & XEN_IOMMUOP_map_readonly;
> +    dfn_t dfn = _dfn(op->dfn);
> +    p2m_type_t p2mt;
> +    struct page_info *page;
> +    mfn_t ignore;
> +    unsigned int flags;
> +    int rc;
> +
> +    if ( op->pad || (op->flags & ~(XEN_IOMMUOP_map_all |
> +                                   XEN_IOMMUOP_map_readonly)) )
> +        return -EINVAL;
> +
> +    if ( !iommu->domain_control )
> +        return -EOPNOTSUPP;

-EACCES or -EPERM?

> +    /* Per-device mapping not yet supported */
> +    if ( !(op->flags & XEN_IOMMUOP_map_all) )
> +        return -EINVAL;
> +
> +    /* Check whether the specified DFN falls in a reserved region */
> +    if ( rangeset_contains_singleton(iommu->reserved_ranges, dfn_x(dfn)) )
> +        return -EINVAL;
> +
> +    d = rcu_lock_domain_by_any_id(op->domid);
> +    if ( !d )
> +        return -ESRCH;
> +
> +    rc = check_get_page_from_gfn(d, _gfn(op->gfn), readonly, &p2mt, &page);
> +    if ( rc )
> +        goto unlock_domain;
> +
> +    rc = -EINVAL;
> +    if ( p2mt != p2m_ram_rw ||
> +         (!readonly && !get_page_type(page, PGT_writable_page)) )

Why would r/o mappings of p2m_ram_ro not be permitted?

> +    {
> +        put_page(page);
> +        goto unlock_domain;
> +    }
> +
> +    spin_lock(&iommu->lock);
> +
> +    rc = iommu_lookup_page(currd, dfn, &ignore, &flags);
> +
> +    /* Treat a non-reference-counted entry as non-existent */
> +    if ( !rc )
> +        rc = !(flags & IOMMUF_refcount) ? -ENOENT : -EEXIST;
> +
> +    if ( rc != -ENOENT )
> +        goto unlock_iommu;

Isn't the combination of the two if()s the wrong way round, allowing
non-ref-counted entries to get replaced? I.e. don't you need to
swap the two latter operands of the conditional expression above?
The comment also looks to be somewhat off: Aiui you mean to
permit mapping into holes (-ENOENT), i.e. what you call "non-
existent" above. But maybe I'm confused ...

> +static int iommuop_unmap(struct xen_iommu_op_unmap *op)
> +{
> +    struct domain *d, *currd = current->domain;
> +    struct domain_iommu *iommu = dom_iommu(currd);
> +    dfn_t dfn = _dfn(op->dfn);
> +    mfn_t mfn;
> +    unsigned int flags;
> +    bool readonly;
> +    p2m_type_t p2mt;
> +    struct page_info *page;
> +    int rc;
> +
> +    if ( op->pad ||
> +         (op->flags & ~XEN_IOMMUOP_unmap_all) )
> +        return -EINVAL;
> +
> +    if ( !iommu->domain_control )
> +        return -EOPNOTSUPP;
> +
> +    /* Per-device unmapping not yet supported */
> +    if ( !(op->flags & XEN_IOMMUOP_unmap_all) )
> +        return -EINVAL;
> +
> +    d = rcu_lock_domain_by_any_id(op->domid);
> +    if ( !d )
> +        return -ESRCH;
> +
> +    spin_lock(&iommu->lock);
> +
> +    rc = iommu_lookup_page(currd, dfn, &mfn, &flags);
> +
> +    /* Treat a non-reference-counted entry as non-existent */
> +    if ( !rc )
> +        rc = !(flags & IOMMUF_refcount) ? -ENOENT : 0;
> +
> +    if ( rc )
> +        goto unlock;
> +
> +    readonly = !(flags & IOMMUF_writable);
> +
> +    /* Make sure the mapped frame matches */
> +    rc = check_get_page_from_gfn(d, _gfn(op->gfn), readonly, &p2mt, &page);

I'm not convinced that P2M_UNSHARE is really intended or necessary
for unmaps of writable mappings. With this there's then little point
having the local variable.

> +    if ( rc )
> +        goto unlock;
> +
> +    rc = !mfn_eq(mfn, page_to_mfn(page)) ? -EINVAL : 0;
> +
> +    /* Release reference taken above */
> +    put_page(page);
> +
> +    if ( rc )
> +        goto unlock;
> +
> +    /* Release references taken in map */
> +    if ( !readonly )
> +        put_page_type(page);
> +    put_page(page);

You may not drop these references before ...

> +    /*
> +     * This really should not fail. If it does, there is an implicit
> +     * domain_crash() (except in the case of the hardware domain) since
> +     * there is not a lot else that can be done to ensure the released
> +     * page can be safely re-used.
> +     */
> +    rc = iommu_unmap_page(currd, dfn);

... having completed the unmap _and_ the associated flush.

> @@ -86,13 +284,30 @@ static void iommu_op(xen_iommu_op_t *op)
>          op->status = iommu_op_query_reserved(&op->u.query_reserved);
>          break;
>  
> +    case XEN_IOMMUOP_enable_modification:
> +        op->status =
> +            iommu_op_enable_modification(&op->u.enable_modification);
> +        break;
> +
> +    case XEN_IOMMUOP_map:
> +        op->status = iommuop_map(&op->u.map);
> +        if ( !op->status )
> +            *need_flush = true;

Aren't you going quite a bit too far here? Replacing a not-present
entry with another one should, in the common case, not require
a flush. There may be a hardware dependency here (different
IOMMU kinds may behave differently).

> @@ -177,7 +403,8 @@ long do_iommu_op(unsigned int nr_bufs,
>  
>          rc = hypercall_create_continuation(__HYPERVISOR_iommu_op,
>                                             "ih", nr_bufs, bufs);
> -    }
> +    } else if ( !rc && need_flush )
> +        rc = iommu_iotlb_flush_all(current->domain);

Seems rather heavy a flush for perhaps just a single page map/unmap.

> @@ -296,6 +537,13 @@ int compat_one_iommu_op(compat_iommu_op_buf_t *buf)
>          if ( __copy_field_to_compat(h, &cmp, u.query_reserved.nr_entries) )
>              return -EFAULT;
>      }
> +    else if ( cmp.op == XEN_IOMMUOP_enable_modification )

Convert to switch()?

> --- a/xen/include/public/iommu_op.h
> +++ b/xen/include/public/iommu_op.h
> @@ -61,6 +61,101 @@ struct xen_iommu_op_query_reserved {
>      XEN_GUEST_HANDLE(xen_iommu_reserved_range_t) ranges;
>  };
>  
> +/*
> + * XEN_IOMMUOP_enable_modification: Enable operations that modify IOMMU
> + *                                  mappings.
> + */
> +#define XEN_IOMMUOP_enable_modification 2
> +
> +struct xen_iommu_op_enable_modification {
> +    /*
> +     * OUT - On successful return this is set to the bitwise OR of 
> capabilities
> +     *       defined below. On entry this must be set to zero.
> +     */
> +    uint32_t cap;
> +    uint32_t pad;

What's the point of this padding field?

> +struct xen_iommu_op_map {
> +    /* IN - The domid of the guest */
> +    domid_t domid;
> +    /*
> +     * IN - flags controlling the mapping. This should be a bitwise OR of the
> +     *      flags defined below.
> +     */
> +    uint16_t flags;
> +
> +    /*
> +     * Should the mapping be created for all initiators?
> +     *
> +     * NOTE: This flag is currently required as the implementation does not 
> yet
> +     *       support pre-device mappings.
> +     */
> +#define _XEN_IOMMUOP_map_all 0
> +#define XEN_IOMMUOP_map_all (1 << (_XEN_IOMMUOP_map_all))
> +
> +    /* Should the mapping be read-only to the initiator(s)? */
> +#define _XEN_IOMMUOP_map_readonly 1
> +#define XEN_IOMMUOP_map_readonly (1 << (_XEN_IOMMUOP_map_readonly))
> +
> +    uint32_t pad;
> +    /*
> +     * IN - Segment/Bus/Device/Function of the initiator.
> +     *
> +     * NOTE: This is ignored if XEN_IOMMUOP_map_all is set.
> +     */
> +    uint64_t sbdf;

Does this need to be 64 bits wide? Iirc segment numbers are 16 bit,
bus ones 8 bit, devices ones 5, and function ones 3. Sums up to 32.
Another question is whether, for portability's sake, this shouldn't
be a union with a PCI sub-structure. In this case for PCI 32 bits
would suffice, but reserving 64 bits for the general case would
perhaps indeed be a good idea.

> --- a/xen/include/xlat.lst
> +++ b/xen/include/xlat.lst
> @@ -79,7 +79,10 @@
>  ?    vcpu_hvm_x86_64                 hvm/hvm_vcpu.h
>  !    iommu_op                        iommu_op.h
>  !    iommu_op_buf                    iommu_op.h
> +!    iommu_op_enable_modification    iommu_op.h

Shouldn't this be ? instead?

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.