Xen project Mailing List

Re: [Xen-devel] [PATCH v3 5/6] ioreq-server: add support for multiple servers

To: Paul Durrant <paul.durrant@xxxxxxxxxx>

From: Ian Campbell <Ian.Campbell@xxxxxxxxxx>

Date: Fri, 14 Mar 2014 11:52:21 +0000

Delivery-date: Fri, 14 Mar 2014 11:52:41 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On Wed, 2014-03-05 at 14:48 +0000, Paul Durrant wrote: > The legacy 'catch-all' server is always created with id 0. Secondary > servers will have an id ranging from 1 to a limit set by the toolstack > via the 'max_emulators' build info field. This defaults to 1 so ordinarily > no extra special pages are reserved for secondary emulators. It may be > increased using the secondary_device_emulators parameter in xl.cfg(5). > There's no clear limit to apply to the number of emulators so I've not > applied one. > > Because of the re-arrangement of the special pages in a previous patch we > only need the addition of parameter HVM_PARAM_NR_IOREQ_SERVERS to determine > the layout of the shared pages for multiple emulators. Guests migrated in > from hosts without this patch will be lacking the save record which stores > the new parameter and so the guest is assumed to only have had a single > emulator. > > Added some more emacs boilerplate to xenctrl.h and xenguest.h > > Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx> > --- > docs/man/xl.cfg.pod.5 | 7 + > tools/libxc/xc_domain.c | 175 +++++++ > tools/libxc/xc_domain_restore.c | 20 + > tools/libxc/xc_domain_save.c | 12 + > tools/libxc/xc_hvm_build_x86.c | 24 +- > tools/libxc/xenctrl.h | 51 ++ > tools/libxc/xenguest.h | 12 + > tools/libxc/xg_save_restore.h | 1 + > tools/libxl/libxl.h | 8 + > tools/libxl/libxl_create.c | 3 + > tools/libxl/libxl_dom.c | 1 + > tools/libxl/libxl_types.idl | 1 + > tools/libxl/xl_cmdimpl.c | 3 + > xen/arch/x86/hvm/hvm.c | 964 > ++++++++++++++++++++++++++++++++++++-- > xen/arch/x86/hvm/io.c | 2 +- > xen/include/asm-x86/hvm/domain.h | 23 +- > xen/include/asm-x86/hvm/hvm.h | 3 +- > xen/include/asm-x86/hvm/io.h | 2 +- > xen/include/public/hvm/hvm_op.h | 70 +++ > xen/include/public/hvm/ioreq.h | 1 + > xen/include/public/hvm/params.h | 4 +- > 21 files changed, 1324 insertions(+), 63 deletions(-) > > diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 > index e15a49f..0226c55 100644 > --- a/docs/man/xl.cfg.pod.5 > +++ b/docs/man/xl.cfg.pod.5 > @@ -1281,6 +1281,13 @@ specified, enabling the use of XenServer PV drivers in > the guest. > This parameter only takes effect when device_model_version=qemu-xen. > See F<docs/misc/pci-device-reservations.txt> for more information. > > +=item B<secondary_device_emulators=NUMBER> > + > +If a number of secondary device emulators (i.e. in addition to > +qemu-xen or qemu-xen-traditional) are to be invoked to support the > +guest then this parameter can be set with the count of how many are > +to be used. The default value is zero. This is an odd thing to expose to the user. Surely (lib)xl should be launching these things while building the domain and therefore know how many there are the config options should be things like "use_split_dm_for_foo=1" or device_emulators = ["/usr/bin/first-dm", "/usr/local/bin/second-dm"] or something. > + > =back > > =head2 Device-Model Options > diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c > index 369c3f3..dfa905b 100644 > --- a/tools/libxc/xc_domain.c > +++ b/tools/libxc/xc_domain.c > +int xc_hvm_get_ioreq_server_info(xc_interface *xch, > + domid_t domid, > + ioservid_t id, > + xen_pfn_t *pfn, > + xen_pfn_t *buf_pfn, > + evtchn_port_t *buf_port) > +{ > + DECLARE_HYPERCALL; > + DECLARE_HYPERCALL_BUFFER(xen_hvm_get_ioreq_server_info_t, arg); > + int rc; > + > + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg)); > + if ( arg == NULL ) > + return -1; > + > + hypercall.op = __HYPERVISOR_hvm_op; > + hypercall.arg[0] = HVMOP_get_ioreq_server_info; > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > + arg->domid = domid; > + arg->id = id; > + rc = do_xen_hypercall(xch, &hypercall); > + if ( rc != 0 ) > + goto done; > + > + if ( pfn ) > + *pfn = arg->pfn; > + > + if ( buf_pfn ) > + *buf_pfn = arg->buf_pfn; > + > + if ( buf_port ) > + *buf_port = arg->buf_port; This looks a bit like this function should take a xen_hvm_get_ioreq_server_info_t* and use the bounce buffering stuff. Unless there is some desire to hide that struct from the callers perhaps? > + > +done: > + xc_hypercall_buffer_free(xch, arg); > + return rc; > +} > + > +int xc_hvm_map_io_range_to_ioreq_server(xc_interface *xch, domid_t domid, > + ioservid_t id, int is_mmio, > + uint64_t start, uint64_t end) > +{ > + DECLARE_HYPERCALL; > + DECLARE_HYPERCALL_BUFFER(xen_hvm_map_io_range_to_ioreq_server_t, arg); > + int rc; > + > + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg)); > + if ( arg == NULL ) > + return -1; > + > + hypercall.op = __HYPERVISOR_hvm_op; > + hypercall.arg[0] = HVMOP_map_io_range_to_ioreq_server; > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > + arg->domid = domid; > + arg->id = id; > + arg->is_mmio = is_mmio; > + arg->start = start; > + arg->end = end; Bounce a struct here instead? > + rc = do_xen_hypercall(xch, &hypercall); > + xc_hypercall_buffer_free(xch, arg); > + return rc; > +} > + > +int xc_hvm_unmap_io_range_from_ioreq_server(xc_interface *xch, domid_t domid, > + ioservid_t id, int is_mmio, > + uint64_t start) > +{ > + DECLARE_HYPERCALL; > + DECLARE_HYPERCALL_BUFFER(xen_hvm_unmap_io_range_from_ioreq_server_t, > arg); > + int rc; > + > + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg)); > + if ( arg == NULL ) > + return -1; > + > + hypercall.op = __HYPERVISOR_hvm_op; > + hypercall.arg[0] = HVMOP_unmap_io_range_from_ioreq_server; > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > + arg->domid = domid; > + arg->id = id; > + arg->is_mmio = is_mmio; > + arg->start = start; Again? > + rc = do_xen_hypercall(xch, &hypercall); > + xc_hypercall_buffer_free(xch, arg); > + return rc; > +} > + > +int xc_hvm_map_pcidev_to_ioreq_server(xc_interface *xch, domid_t domid, > + ioservid_t id, uint16_t bdf) > +{ > + DECLARE_HYPERCALL; > + DECLARE_HYPERCALL_BUFFER(xen_hvm_map_pcidev_to_ioreq_server_t, arg); > + int rc; > + > + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg)); > + if ( arg == NULL ) > + return -1; > + > + hypercall.op = __HYPERVISOR_hvm_op; > + hypercall.arg[0] = HVMOP_map_pcidev_to_ioreq_server; > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > + arg->domid = domid; > + arg->id = id; > + arg->bdf = bdf; Guess what ;-) > + rc = do_xen_hypercall(xch, &hypercall); > + xc_hypercall_buffer_free(xch, arg); > + return rc; > +} > + > +int xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch, domid_t domid, > + ioservid_t id, uint16_t bdf) > +{ > + DECLARE_HYPERCALL; > + DECLARE_HYPERCALL_BUFFER(xen_hvm_unmap_pcidev_from_ioreq_server_t, arg); > + int rc; > + > + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg)); > + if ( arg == NULL ) > + return -1; > + > + hypercall.op = __HYPERVISOR_hvm_op; > + hypercall.arg[0] = HVMOP_unmap_pcidev_from_ioreq_server; > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > + arg->domid = domid; > + arg->id = id; > + arg->bdf = bdf; I'm going to stop suggesting it now ... > + rc = do_xen_hypercall(xch, &hypercall); > + xc_hypercall_buffer_free(xch, arg); > + return rc; > +} > + > +int xc_hvm_destroy_ioreq_server(xc_interface *xch, > + domid_t domid, > + ioservid_t id) > +{ > + DECLARE_HYPERCALL; > + DECLARE_HYPERCALL_BUFFER(xen_hvm_destroy_ioreq_server_t, arg); > + int rc; > + > + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg)); > + if ( arg == NULL ) > + return -1; > + > + hypercall.op = __HYPERVISOR_hvm_op; > + hypercall.arg[0] = HVMOP_destroy_ioreq_server; > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); > + arg->domid = domid; > + arg->id = id; OK, one more... > + rc = do_xen_hypercall(xch, &hypercall); > + xc_hypercall_buffer_free(xch, arg); > + return rc; > +} > + > int xc_domain_setdebugging(xc_interface *xch, > uint32_t domid, > unsigned int enable) > diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c > index 1f6ce50..3116653 100644 > --- a/tools/libxc/xc_domain_restore.c > +++ b/tools/libxc/xc_domain_restore.c > @@ -746,6 +746,7 @@ typedef struct { > uint64_t acpi_ioport_location; > uint64_t viridian; > uint64_t vm_generationid_addr; > + uint64_t nr_ioreq_servers; This makes me wonder: what happens if the source and target hosts do different amounts of disaggregation? Perhaps in Xen N+1 we split some additional component out into its own process? This is going to be complex with the allocation of space for special pages, isn't it? > > struct toolstack_data_t tdata; > } pagebuf_t; > @@ -996,6 +997,16 @@ static int pagebuf_get_one(xc_interface *xch, struct > restore_ctx *ctx, > DPRINTF("read generation id buffer address"); > return pagebuf_get_one(xch, ctx, buf, fd, dom); > > + case XC_SAVE_ID_HVM_NR_IOREQ_SERVERS: > + /* Skip padding 4 bytes then read the acpi ioport location. */ > + if ( RDEXACT(fd, &buf->nr_ioreq_servers, sizeof(uint32_t)) || > + RDEXACT(fd, &buf->nr_ioreq_servers, sizeof(uint64_t)) ) > + { > + PERROR("error reading the number of IOREQ servers"); > + return -1; > + } > + return pagebuf_get_one(xch, ctx, buf, fd, dom); > + > default: > if ( (count > MAX_BATCH_SIZE) || (count < 0) ) { > ERROR("Max batch size exceeded (%d). Giving up.", count); > @@ -1755,6 +1766,15 @@ int xc_domain_restore(xc_interface *xch, int io_fd, > uint32_t dom, > if (pagebuf.viridian != 0) > xc_set_hvm_param(xch, dom, HVM_PARAM_VIRIDIAN, 1); > > + if ( hvm ) { > + int nr_ioreq_servers = pagebuf.nr_ioreq_servers; > + > + if ( nr_ioreq_servers == 0 ) > + nr_ioreq_servers = 1; > + > + xc_set_hvm_param(xch, dom, HVM_PARAM_NR_IOREQ_SERVERS, > nr_ioreq_servers); > + } > + > if (pagebuf.acpi_ioport_location == 1) { > DBGPRINTF("Use new firmware ioport from the checkpoint\n"); > xc_set_hvm_param(xch, dom, HVM_PARAM_ACPI_IOPORTS_LOCATION, 1); > diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c > index 42c4752..3293e29 100644 > --- a/tools/libxc/xc_domain_save.c > +++ b/tools/libxc/xc_domain_save.c > @@ -1731,6 +1731,18 @@ int xc_domain_save(xc_interface *xch, int io_fd, > uint32_t dom, uint32_t max_iter > PERROR("Error when writing the viridian flag"); > goto out; > } > + > + chunk.id = XC_SAVE_ID_HVM_NR_IOREQ_SERVERS; > + chunk.data = 0; > + xc_get_hvm_param(xch, dom, HVM_PARAM_NR_IOREQ_SERVERS, > + (unsigned long *)&chunk.data); > + > + if ( (chunk.data != 0) && Can this ever be 0 for an HVM guest? > + wrexact(io_fd, &chunk, sizeof(chunk)) ) > + { > + PERROR("Error when writing the number of IOREQ servers"); > + goto out; > + } > } > > if ( callbacks != NULL && callbacks->toolstack_save != NULL ) > diff --git a/tools/libxc/xg_save_restore.h b/tools/libxc/xg_save_restore.h > index f859621..5170b7f 100644 > --- a/tools/libxc/xg_save_restore.h > +++ b/tools/libxc/xg_save_restore.h > @@ -259,6 +259,7 @@ > #define XC_SAVE_ID_HVM_ACCESS_RING_PFN -16 > #define XC_SAVE_ID_HVM_SHARING_RING_PFN -17 > #define XC_SAVE_ID_TOOLSTACK -18 /* Optional toolstack specific > info */ > +#define XC_SAVE_ID_HVM_NR_IOREQ_SERVERS -19 Absence of this chunk => assume 1 iff hvm? > diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c > index 4fc46eb..cf9b67d 100644 > --- a/tools/libxl/xl_cmdimpl.c > +++ b/tools/libxl/xl_cmdimpl.c > @@ -1750,6 +1750,9 @@ skip_vfb: > > b_info->u.hvm.vendor_device = d; > } > + > + if (!xlu_cfg_get_long (config, "secondary_device_emulators", &l, 0)) > + b_info->u.hvm.max_emulators = l + 1; + 1 ? Oh secondary. Well I already objected to the concept of this generally but even if going this way then max_/nr_device_emulators would have been better. > } > > xlu_cfg_destroy(config); > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index 22b2a2c..996c374 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c [...] Skipping the xen side, if you want review from those folks I suggest you CC them. People may find this more wieldy if you made the Xen changes first? > diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h > index a9aab4b..6b31189 100644 > --- a/xen/include/public/hvm/hvm_op.h > +++ b/xen/include/public/hvm/hvm_op.h > @@ -23,6 +23,7 @@ > > #include "../xen.h" > #include "../trace.h" > +#include "../event_channel.h" > > /* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ > #define HVMOP_set_param 0 > @@ -270,6 +271,75 @@ struct xen_hvm_inject_msi { > typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t; > DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t); > > +typedef uint32_t ioservid_t; > + > +DEFINE_XEN_GUEST_HANDLE(ioservid_t); I don't think you have any pointers to these in your interface, do you? (if not then you don't need this) > +#define HVMOP_get_ioreq_server_info 18 > +struct xen_hvm_get_ioreq_server_info { > + domid_t domid; /* IN - domain to be serviced */ > + ioservid_t id; /* IN - server id */ > + xen_pfn_t pfn; /* OUT - ioreq pfn */ > + xen_pfn_t buf_pfn; /* OUT - buf ioreq pfn */ Are all servers required to have both buffered and unbuffered modes? I could imagine a simple one requiring only one or the other. > + evtchn_port_t buf_port; /* OUT - buf ioreq port */ > +}; > +typedef struct xen_hvm_get_ioreq_server_info xen_hvm_get_ioreq_server_info_t; > +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_ioreq_server_info_t); > + > +#define HVMOP_map_io_range_to_ioreq_server 19 > +struct xen_hvm_map_io_range_to_ioreq_server { > + domid_t domid; /* IN - domain to be serviced */ > + ioservid_t id; /* IN - handle from > HVMOP_register_ioreq_server */ > + int is_mmio; /* IN - MMIO or port IO? */ Do we normally make this distinction via two different hypercalls? Ian. _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.