[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 07/28] x86/hvm: Introduce a emulated VTD for HVM



On Fri, Feb 09, 2018 at 04:27:54PM +0000, Roger Pau Monné wrote:
>On Fri, Nov 17, 2017 at 02:22:14PM +0800, Chao Gao wrote:
>> This patch adds create/destroy function for the emulated VTD
>> and adapts it to the common VIOMMU abstraction.
>> 
>> As the Makefile is changed here, put all files in alphabetic order
>> by this chance.
>> 
>> Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
>> Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx>
>> 
>> ---
>> v4:
>> - use REGISTER_VIOMMU
>> - shrink the size of hvm_hw_vvtd_regs
>> - make hvm_hw_vvtd_regs a field inside struct vvtd
>> ---
>>  xen/drivers/passthrough/vtd/Makefile |   7 +-
>>  xen/drivers/passthrough/vtd/iommu.h  |   9 +++
>>  xen/drivers/passthrough/vtd/vvtd.c   | 150 
>> +++++++++++++++++++++++++++++++++++
>>  3 files changed, 163 insertions(+), 3 deletions(-)
>>  create mode 100644 xen/drivers/passthrough/vtd/vvtd.c
>> 
>> diff --git a/xen/drivers/passthrough/vtd/Makefile 
>> b/xen/drivers/passthrough/vtd/Makefile
>> index f302653..163c7fe 100644
>> --- a/xen/drivers/passthrough/vtd/Makefile
>> +++ b/xen/drivers/passthrough/vtd/Makefile
>> @@ -1,8 +1,9 @@
>>  subdir-$(CONFIG_X86) += x86
>>  
>> -obj-y += iommu.o
>>  obj-y += dmar.o
>> -obj-y += utils.o
>> -obj-y += qinval.o
>>  obj-y += intremap.o
>> +obj-y += iommu.o
>> +obj-y += qinval.o
>>  obj-y += quirks.o
>> +obj-y += utils.o
>> +obj-$(CONFIG_VIOMMU) += vvtd.o
>> diff --git a/xen/drivers/passthrough/vtd/iommu.h 
>> b/xen/drivers/passthrough/vtd/iommu.h
>> index db80b31..f2ef3dd 100644
>> --- a/xen/drivers/passthrough/vtd/iommu.h
>> +++ b/xen/drivers/passthrough/vtd/iommu.h
>> @@ -47,6 +47,7 @@
>>  #define DMAR_IQH_REG            0x80 /* invalidation queue head */
>>  #define DMAR_IQT_REG            0x88 /* invalidation queue tail */
>>  #define DMAR_IQA_REG            0x90 /* invalidation queue addr */
>> +#define DMAR_IECTL_REG          0xa0 /* invalidation event control register 
>> */
>>  #define DMAR_IRTA_REG           0xb8 /* intr remap */
>>  
>>  #define OFFSET_STRIDE        (9)
>> @@ -89,6 +90,12 @@
>>  #define cap_afl(c)        (((c) >> 3) & 1)
>>  #define cap_ndoms(c)        (1 << (4 + 2 * ((c) & 0x7)))
>>  
>> +#define cap_set_num_fault_regs(c)   ((((c) - 1) & 0xff) << 40)
>> +#define cap_set_fault_reg_offset(c) ((((c) / 16) & 0x3ff) << 24)
>> +#define cap_set_mgaw(c)             ((((c) - 1) & 0x3f) << 16)
>> +#define cap_set_sagaw(c)            (((c) & 0x1f) << 8)
>> +#define cap_set_ndoms(c)            ((c) & 0x7)
>> +
>>  /*
>>   * Extended Capability Register
>>   */
>> @@ -114,6 +121,8 @@
>>  #define ecap_niotlb_iunits(e)    ((((e) >> 24) & 0xff) + 1)
>>  #define ecap_iotlb_offset(e)     ((((e) >> 8) & 0x3ff) * 16)
>>  
>> +#define ecap_set_mhmv(e)         (((e) & 0xf) << 20)
>> +
>>  /* IOTLB_REG */
>>  #define DMA_TLB_FLUSH_GRANU_OFFSET  60
>>  #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
>> diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
>> b/xen/drivers/passthrough/vtd/vvtd.c
>> new file mode 100644
>> index 0000000..9f76ccf
>> --- /dev/null
>> +++ b/xen/drivers/passthrough/vtd/vvtd.c
>> @@ -0,0 +1,150 @@
>> +/*
>> + * vvtd.c
>> + *
>> + * virtualize VTD for HVM.
>> + *
>> + * Copyright (C) 2017 Chao Gao, Intel Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms and conditions of the GNU General Public
>> + * License, version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public
>> + * License along with this program; If not, see 
>> <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include <xen/sched.h>
>> +#include <xen/types.h>
>> +#include <xen/viommu.h>
>> +#include <xen/xmalloc.h>
>> +#include <asm/current.h>
>> +#include <asm/hvm/domain.h>
>> +
>> +#include "iommu.h"
>> +
>> +/* Supported capabilities by vvtd */
>> +#define VVTD_MAX_CAPS VIOMMU_CAP_IRQ_REMAPPING
>> +
>> +#define VVTD_FRCD_NUM   1ULL
>> +#define VVTD_FRCD_START (DMAR_IRTA_REG + 8)
>> +#define VVTD_FRCD_END   (VVTD_FRCD_START + VVTD_FRCD_NUM * 16)
>> +#define VVTD_MAX_OFFSET VVTD_FRCD_END
>> +
>> +struct hvm_hw_vvtd {
>> +    uint32_t regs[VVTD_MAX_OFFSET/sizeof(uint32_t)];
>
>Unless I'm mistaken this is 208bytes in size, yet you only seem to use
>28bytes (from the registers used in vvtd_reset). I guess this is going
>to change over the series so all this space is really needed.

Yes except I bump up the offset when introducing "queue invalidation"
and "fault reporting" which are two features of VT-d.

>
>Also I think this would be better as:
>
>union hw_vvtd {
>    uint32_t regs32[VVTD_MAX_OFFSET/sizeof(uint32_t)];
>    uint64_t regs64[VVTD_MAX_OFFSET/sizeof(uint64_t)];
>};
>

Will do.

>> +};
>> +
>> +struct vvtd {
>> +    /* Base address of remapping hardware register-set */
>> +    uint64_t base_addr;
>> +    /* Point back to the owner domain */
>> +    struct domain *domain;
>> +
>> +    struct hvm_hw_vvtd hw;
>> +};
>> +
>> +/* Setting viommu_verbose enables debugging messages of vIOMMU */
>> +bool __read_mostly viommu_verbose;
>
>static?
>
>> +boolean_runtime_param("viommu_verbose", viommu_verbose);
>> +
>> +#ifndef NDEBUG
>> +#define vvtd_info(fmt...) do {                    \
>> +    if ( viommu_verbose )                         \
>> +        gprintk(XENLOG_INFO, ## fmt);             \
>> +} while(0)
>> +/*
>> + * Use printk and '_G_' prefix because vvtd_debug() may be called
>> + * in the context of another domain's vCPU. Don't output 'current'
>> + * information to avoid confusion.
>> + */
>> +#define vvtd_debug(fmt...) do {                   \
>> +    if ( viommu_verbose && printk_ratelimit())    \
>> +        printk(XENLOG_G_DEBUG fmt);               \
>
>I think printk is already rate-limited if you use _G_, so no need for
>the ratelimit call.
>
>> +} while(0)
>> +#else
>> +#define vvtd_info(...) do {} while(0)
>> +#define vvtd_debug(...) do {} while(0)
>> +#endif
>> +
>> +#define VVTD_REG_POS(vvtd, offset) &(vvtd->hw.regs[offset/sizeof(uint32_t)])
>> +
>> +static inline void vvtd_set_reg(struct vvtd *vvtd, uint32_t reg, uint32_t 
>> value)
>
>I don't think you need the vvtd prefix here, and I would leave adding
>inline to the compiler discretion:
>
>static void set_reg32(struct vvtd *vvtd, unsigned long offset, uint32_t val)
>{
>    vvtd->hw.regs32[offset / 4] = val
>}
>
>But I think you can even get rid of the helper functions and just use
>macros directly, ie:
>
>#define GET_REG(vvtd, offset, size) \
>    ((vvtd)->hw.regs ## size [(offset) / size / 8 ])
>#define SET_REG(vvtd, offset, val, size) \
>    (GET_REG(vvtd, offset, val) = val)
>
>This is better IMHO, and I'm not really sure the SET_REG macro is
>really needed, you can just open code GET_REG(...) = val;

Got it.

>
>> +{
>> +    *VVTD_REG_POS(vvtd, reg) = value;
>> +}
>> +
>> +static inline uint32_t vvtd_get_reg(const struct vvtd *vvtd, uint32_t reg)
>> +{
>> +    return *VVTD_REG_POS(vvtd, reg);
>> +}
>> +
>> +static inline void vvtd_set_reg_quad(struct vvtd *vvtd, uint32_t reg,
>> +                                     uint64_t value)
>> +{
>> +    *(uint64_t*)VVTD_REG_POS(vvtd, reg) = value;
>> +}
>> +
>> +static inline uint64_t vvtd_get_reg_quad(const struct vvtd *vvtd, uint32_t 
>> reg)
>> +{
>> +    return *(uint64_t*)VVTD_REG_POS(vvtd, reg);
>> +}
>> +
>> +static void vvtd_reset(struct vvtd *vvtd)
>> +{
>> +    uint64_t cap = cap_set_num_fault_regs(VVTD_FRCD_NUM)
>> +                   | cap_set_fault_reg_offset(VVTD_FRCD_START)
>> +                   | cap_set_mgaw(39) /* maximum guest address width */
>> +                   | cap_set_sagaw(2) /* support 3-level page_table */
>> +                   | cap_set_ndoms(6); /* support 64K domains */
>> +    uint64_t ecap = DMA_ECAP_QUEUED_INVAL | DMA_ECAP_INTR_REMAP | 
>> DMA_ECAP_EIM |
>> +                    ecap_set_mhmv(0xf);
>> +
>> +    vvtd_set_reg(vvtd, DMAR_VER_REG, 0x10UL);
>> +    vvtd_set_reg_quad(vvtd, DMAR_CAP_REG, cap);
>> +    vvtd_set_reg_quad(vvtd, DMAR_ECAP_REG, ecap);
>> +    vvtd_set_reg(vvtd, DMAR_FECTL_REG, 0x80000000UL);
>> +    vvtd_set_reg(vvtd, DMAR_IECTL_REG, 0x80000000UL);
>> +}
>> +
>> +static int vvtd_create(struct domain *d, struct viommu *viommu)
>> +{
>> +    struct vvtd *vvtd;
>> +
>> +    if ( !is_hvm_domain(d) || (viommu->base_address & (PAGE_SIZE - 1)) ||
>> +         (~VVTD_MAX_CAPS & viommu->caps) )
>> +        return -EINVAL;
>> +
>> +    vvtd = xzalloc_bytes(sizeof(struct vvtd));
>
>vvtd = xzalloc(struct vvtd);
>
>> +    if ( !vvtd )
>> +        return ENOMEM;
>> +
>> +    vvtd_reset(vvtd);
>> +    vvtd->base_addr = viommu->base_address;
>
>I think it would be good to have some check here, so that the vIOMMU
>is not for example positioned on top of a RAM region. Ideally you
>should check that the gfns [base_address, base_address + size) are
>unpopulated.

Yes. Except some checks here, this page should be reserved in guest e820,
which implies some work in qemu or tool stack.

>
>> +    vvtd->domain = d;
>> +
>> +    viommu->priv = vvtd;
>> +
>> +    return 0;
>> +}
>> +
>> +static int vvtd_destroy(struct viommu *viommu)
>> +{
>> +    struct vvtd *vvtd = viommu->priv;
>> +
>> +    if ( vvtd )
>> +        xfree(vvtd);
>> +
>> +    return 0;
>> +}
>> +
>> +static const struct viommu_ops vvtd_hvm_vmx_ops = {
>
>Is the vmx needed? vvtd is already Intel specific AFAICT. You could
>probably omit the hvm also, so vvtd_ops.

Will do.

Thanks
Chao

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.