[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v5 1/2] Differentiate IO/mem resources tracked by ioreq server



> -----Original Message-----
> From: Yu, Zhang [mailto:yu.c.zhang@xxxxxxxxxxxxxxx]
> Sent: 17 August 2015 22:34
> To: Paul Durrant; xen-devel@xxxxxxxxxxxxx; Ian Jackson; Stefano Stabellini; 
> Ian
> Campbell; Wei Liu; Keir (Xen.org); jbeulich@xxxxxxxx; Andrew Cooper
> Cc: Kevin Tian; zhiyuan.lv@xxxxxxxxx
> Subject: Re: [Xen-devel] [PATCH v5 1/2] Differentiate IO/mem resources
> tracked by ioreq server
> 
> 
> 
> On 8/14/2015 9:51 PM, Paul Durrant wrote:
> >> -----Original Message-----
> >> From: Yu Zhang [mailto:yu.c.zhang@xxxxxxxxxxxxxxx]
> >> Sent: 14 August 2015 13:03
> >> To: xen-devel@xxxxxxxxxxxxx; Paul Durrant; Ian Jackson; Stefano Stabellini;
> Ian
> >> Campbell; Wei Liu; Keir (Xen.org); jbeulich@xxxxxxxx; Andrew Cooper
> >> Cc: Kevin Tian; zhiyuan.lv@xxxxxxxxx
> >> Subject: [PATCH v5 1/2] Differentiate IO/mem resources tracked by ioreq
> >> server
> >>
> >> Currently in ioreq server, guest write-protected ram pages are
> >> tracked in the same rangeset with device mmio resources. Yet
> >> unlike device mmio, which can be in big chunks, the guest write-
> >> protected pages may be discrete ranges with 4K bytes each.
> >>
> >> This patch uses a seperate rangeset for the guest ram pages.
> >> And a new ioreq type, IOREQ_TYPE_WP_MEM, is defined.
> >>
> >> Note: Previously, a new hypercall or subop was suggested to map
> >> write-protected pages into ioreq server. However, it turned out
> >> handler of this new hypercall would be almost the same with the
> >> existing pair - HVMOP_[un]map_io_range_to_ioreq_server, and there's
> >> already a type parameter in this hypercall. So no new hypercall
> >> defined, only a new type is introduced.
> >>
> >> Signed-off-by: Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx>
> >> ---
> >>   tools/libxc/include/xenctrl.h    | 31 ++++++++++++++++++++++
> >>   tools/libxc/xc_domain.c          | 55
> >> ++++++++++++++++++++++++++++++++++++++++
> >>   xen/arch/x86/hvm/hvm.c           | 25 +++++++++++++++++-
> >>   xen/include/asm-x86/hvm/domain.h |  4 +--
> >>   xen/include/public/hvm/hvm_op.h  |  1 +
> >>   xen/include/public/hvm/ioreq.h   |  1 +
> >>   6 files changed, 114 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> >> index de3c0ad..3c276b7 100644
> >> --- a/tools/libxc/include/xenctrl.h
> >> +++ b/tools/libxc/include/xenctrl.h
> >> @@ -2010,6 +2010,37 @@ int
> >> xc_hvm_unmap_io_range_from_ioreq_server(xc_interface *xch,
> >>                                               int is_mmio,
> >>                                               uint64_t start,
> >>                                               uint64_t end);
> >> +/**
> >> + * This function registers a range of write-protected memory for
> emulation.
> >> + *
> >> + * @parm xch a handle to an open hypervisor interface.
> >> + * @parm domid the domain id to be serviced
> >> + * @parm id the IOREQ Server id.
> >> + * @parm start start of range
> >> + * @parm end end of range (inclusive).
> >> + * @return 0 on success, -1 on failure.
> >> + */
> >> +int xc_hvm_map_mem_range_to_ioreq_server(xc_interface *xch,
> >> +                                        domid_t domid,
> >> +                                        ioservid_t id,
> >> +                                        xen_pfn_t start,
> >> +                                        xen_pfn_t end);
> >> +
> >> +/**
> >> + * This function deregisters a range of write-protected memory for
> >> emulation.
> >> + *
> >> + * @parm xch a handle to an open hypervisor interface.
> >> + * @parm domid the domain id to be serviced
> >> + * @parm id the IOREQ Server id.
> >> + * @parm start start of range
> >> + * @parm end end of range (inclusive).
> >> + * @return 0 on success, -1 on failure.
> >> + */
> >> +int xc_hvm_unmap_mem_range_from_ioreq_server(xc_interface *xch,
> >> +                                            domid_t domid,
> >> +                                            ioservid_t id,
> >> +                                            xen_pfn_t start,
> >> +                                            xen_pfn_t end);
> >>
> >>   /**
> >>    * This function registers a PCI device for config space emulation.
> >> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> >> index 2ee26fb..9db05b3 100644
> >> --- a/tools/libxc/xc_domain.c
> >> +++ b/tools/libxc/xc_domain.c
> >> @@ -1552,6 +1552,61 @@ int
> >> xc_hvm_unmap_io_range_from_ioreq_server(xc_interface *xch,
> domid_t
> >> domid,
> >>       return rc;
> >>   }
> >>
> >> +int xc_hvm_map_mem_range_to_ioreq_server(xc_interface *xch,
> >> domid_t domid,
> >> +                                        ioservid_t id, xen_pfn_t start, 
> >> xen_pfn_t end)
> >> +{
> >> +    DECLARE_HYPERCALL;
> >> +    DECLARE_HYPERCALL_BUFFER(xen_hvm_io_range_t, arg);
> >> +    int rc;
> >> +
> >> +    arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg));
> >> +    if ( arg == NULL )
> >> +        return -1;
> >> +
> >> +    hypercall.op     = __HYPERVISOR_hvm_op;
> >> +    hypercall.arg[0] = HVMOP_map_io_range_to_ioreq_server;
> >> +    hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
> >> +
> >> +    arg->domid = domid;
> >> +    arg->id = id;
> >> +    arg->type = HVMOP_IO_RANGE_WP_MEM;
> >> +    arg->start = start;
> >> +    arg->end = end;
> >> +
> >> +    rc = do_xen_hypercall(xch, &hypercall);
> >> +
> >> +    xc_hypercall_buffer_free(xch, arg);
> >> +    return rc;
> >> +}
> >> +
> >> +int xc_hvm_unmap_mem_range_from_ioreq_server(xc_interface *xch,
> >> domid_t domid,
> >> +                                            ioservid_t id, xen_pfn_t 
> >> start, xen_pfn_t end)
> >> +{
> >> +    DECLARE_HYPERCALL;
> >> +    DECLARE_HYPERCALL_BUFFER(xen_hvm_io_range_t, arg);
> >> +    int rc;
> >> +
> >> +    arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg));
> >> +    if ( arg == NULL )
> >> +        return -1;
> >> +
> >> +    hypercall.op     = __HYPERVISOR_hvm_op;
> >> +    hypercall.arg[0] = HVMOP_unmap_io_range_from_ioreq_server;
> >> +    hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
> >> +
> >> +    arg->domid = domid;
> >> +    arg->id = id;
> >> +    arg->type = HVMOP_IO_RANGE_WP_MEM;
> >> +    arg->start = start;
> >> +    arg->end = end;
> >> +
> >> +    rc = do_xen_hypercall(xch, &hypercall);
> >> +
> >> +    xc_hypercall_buffer_free(xch, arg);
> >> +    return rc;
> >> +
> >> +}
> >> +
> >>   int xc_hvm_map_pcidev_to_ioreq_server(xc_interface *xch, domid_t
> >> domid,
> >>                                         ioservid_t id, uint16_t segment,
> >>                                         uint8_t bus, uint8_t device,
> >> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> >> index c957610..5eb7864 100644
> >> --- a/xen/arch/x86/hvm/hvm.c
> >> +++ b/xen/arch/x86/hvm/hvm.c
> >> @@ -938,8 +938,9 @@ static int
> hvm_ioreq_server_alloc_rangesets(struct
> >> hvm_ioreq_server *s,
> >>
> >>           rc = asprintf(&name, "ioreq_server %d %s", s->id,
> >>                         (i == HVMOP_IO_RANGE_PORT) ? "port" :
> >> -                      (i == HVMOP_IO_RANGE_MEMORY) ? "memory" :
> >> +                      (i == HVMOP_IO_RANGE_MEMORY) ? "mmio" :
> >>                         (i == HVMOP_IO_RANGE_PCI) ? "pci" :
> >> +                      (i == HVMOP_IO_RANGE_WP_MEM) ? "wp-ed memory" :
> >>                         "");
> >>           if ( rc )
> >>               goto fail;
> >> @@ -1258,6 +1259,7 @@ static int
> >> hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id,
> >>               case HVMOP_IO_RANGE_PORT:
> >>               case HVMOP_IO_RANGE_MEMORY:
> >>               case HVMOP_IO_RANGE_PCI:
> >> +            case HVMOP_IO_RANGE_WP_MEM:
> >>                   r = s->range[type];
> >>                   break;
> >>
> >> @@ -1309,6 +1311,7 @@ static int
> >> hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t
> id,
> >>               case HVMOP_IO_RANGE_PORT:
> >>               case HVMOP_IO_RANGE_MEMORY:
> >>               case HVMOP_IO_RANGE_PCI:
> >> +            case HVMOP_IO_RANGE_WP_MEM:
> >>                   r = s->range[type];
> >>                   break;
> >>
> >> @@ -2523,6 +2526,8 @@ struct hvm_ioreq_server
> >> *hvm_select_ioreq_server(struct domain *d,
> >>       uint32_t cf8;
> >>       uint8_t type;
> >>       uint64_t addr;
> >> +    p2m_type_t p2mt;
> >> +    struct page_info *ram_page;
> >>
> >>       if ( list_empty(&d->arch.hvm_domain.ioreq_server.list) )
> >>           return NULL;
> >> @@ -2565,6 +2570,17 @@ struct hvm_ioreq_server
> >> *hvm_select_ioreq_server(struct domain *d,
> >>       {
> >>           type = p->type;
> >>           addr = p->addr;
> >> +
> >> +        if ( p->type == IOREQ_TYPE_COPY )
> >> +        {
> >> +            ram_page = get_page_from_gfn(d, p->addr >> PAGE_SHIFT,
> >> +                                         &p2mt, P2M_UNSHARE);
> >> +            if ( p2mt == p2m_mmio_write_dm )
> >> +                type = IOREQ_TYPE_WP_MEM;
> >> +
> >> +            if ( ram_page )
> >> +                put_page(ram_page);
> >> +        }
> >>       }
> >>
> >>       list_for_each_entry ( s,
> >> @@ -2582,6 +2598,7 @@ struct hvm_ioreq_server
> >> *hvm_select_ioreq_server(struct domain *d,
> >>           BUILD_BUG_ON(IOREQ_TYPE_PIO != HVMOP_IO_RANGE_PORT);
> >>           BUILD_BUG_ON(IOREQ_TYPE_COPY !=
> HVMOP_IO_RANGE_MEMORY);
> >>           BUILD_BUG_ON(IOREQ_TYPE_PCI_CONFIG !=
> >> HVMOP_IO_RANGE_PCI);
> >> +        BUILD_BUG_ON(IOREQ_TYPE_WP_MEM !=
> >> HVMOP_IO_RANGE_WP_MEM);
> >
> > I wonder whether it's time to break this identity relationship. Is there 
> > really
> a need for the emulator to know the difference between MMIO and
> WP_MEM? If not then, IOREQ_TYPE_COPY would do for both but it does
> mean you'd need to index below using HVMOP_IO_RANGE_XXX values
> rather than the raw ioreq type itself.
> >
> >    Paul
> 
> Well, that sounds reasonable. The identity relationship is not so
> necessary as long as hvm_select_ioreq_server() can decide which
> rangeset to search.
> Maybe we can also remove the IOREQ_TYPE_PCI_CONFIG? With a separate
> patch as the first one in this patch series? :)

No, you can't remove that type. That is actually passed to emulators. See 
upstream QEMU for usage.

  Paul

> 
> Thanks
> Yu
> >
> >>           r = s->range[type];
> >>
> >>           switch ( type )
> >> @@ -2609,6 +2626,12 @@ struct hvm_ioreq_server
> >> *hvm_select_ioreq_server(struct domain *d,
> >>               }
> >>
> >>               break;
> >> +
> >> +        case IOREQ_TYPE_WP_MEM:
> >> +            if ( rangeset_contains_singleton(r, addr >> PAGE_SHIFT) )
> >> +                return s;
> >> +
> >> +            break;
> >>           }
> >>       }
> >>
> >> diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-
> >> x86/hvm/domain.h
> >> index 992d5d1..b2e4234 100644
> >> --- a/xen/include/asm-x86/hvm/domain.h
> >> +++ b/xen/include/asm-x86/hvm/domain.h
> >> @@ -48,8 +48,8 @@ struct hvm_ioreq_vcpu {
> >>       bool_t           pending;
> >>   };
> >>
> >> -#define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1)
> >> -#define MAX_NR_IO_RANGES  256
> >> +#define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_WP_MEM + 1)
> >> +#define MAX_NR_IO_RANGES  8192
> >>
> >>   struct hvm_ioreq_server {
> >>       struct list_head       list_entry;
> >> diff --git a/xen/include/public/hvm/hvm_op.h
> >> b/xen/include/public/hvm/hvm_op.h
> >> index 014546a..c02c91a 100644
> >> --- a/xen/include/public/hvm/hvm_op.h
> >> +++ b/xen/include/public/hvm/hvm_op.h
> >> @@ -331,6 +331,7 @@ struct xen_hvm_io_range {
> >>   # define HVMOP_IO_RANGE_PORT   0 /* I/O port range */
> >>   # define HVMOP_IO_RANGE_MEMORY 1 /* MMIO range */
> >>   # define HVMOP_IO_RANGE_PCI    2 /* PCI segment/bus/dev/func
> range */
> >> +# define HVMOP_IO_RANGE_WP_MEM 3 /* Write-protected memory
> >> range */
> >>       uint64_aligned_t start, end; /* IN - inclusive start and end of 
> >> range */
> >>   };
> >>   typedef struct xen_hvm_io_range xen_hvm_io_range_t;
> >> diff --git a/xen/include/public/hvm/ioreq.h
> >> b/xen/include/public/hvm/ioreq.h
> >> index 2e5809b..2b712ac 100644
> >> --- a/xen/include/public/hvm/ioreq.h
> >> +++ b/xen/include/public/hvm/ioreq.h
> >> @@ -35,6 +35,7 @@
> >>   #define IOREQ_TYPE_PIO          0 /* pio */
> >>   #define IOREQ_TYPE_COPY         1 /* mmio ops */
> >>   #define IOREQ_TYPE_PCI_CONFIG   2
> >> +#define IOREQ_TYPE_WP_MEM       3
> >>   #define IOREQ_TYPE_TIMEOFFSET   7
> >>   #define IOREQ_TYPE_INVALIDATE   8 /* mapcache */
> >>
> >> --
> >> 1.9.1
> >
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@xxxxxxxxxxxxx
> > http://lists.xen.org/xen-devel
> >
> >

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.