[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2 of 3] tools/libxl: suspend/postflush/commit callbacks



On Mon, 2012-01-23 at 22:46 +0000, rshriram@xxxxxxxxx wrote:
> # HG changeset patch
> # User Shriram Rajagopalan <rshriram@xxxxxxxxx>
> # Date 1327358640 28800
> # Node ID 0446591bee86eb4e767d75b70c885549c7a3cfef
> # Parent  11fb1dfda7de4d6759dec87d80cd16cf137f7369
> tools/libxl: suspend/postflush/commit callbacks
> 
>  * Add libxl callbacks for Remus checkpoint suspend, postflush (aka resume)
>    and checkpoint commit callbacks.
>  * suspend callback just bounces off libxl__domain_suspend_common_callback
>  * resume callback directly calls xc_domain_resume instead of re-using
>    libxl_domain_resume (as the latter does not set the fast suspend argument
>    to xc_domain_resume - aka suspend_cancel support).
>  * commit callback just sleeps for the checkpoint interval (for the moment).
> 
>  * Future patches will augument these callbacks with more functionalities like
>    issuing network buffer plug/unplug commands, disk checkpoint commands, etc.
> 
> Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
> 

> diff -r 11fb1dfda7de -r 0446591bee86 tools/libxl/libxl.h
> --- a/tools/libxl/libxl.h     Mon Jan 23 14:43:58 2012 -0800
> +++ b/tools/libxl/libxl.h     Mon Jan 23 14:44:00 2012 -0800
> @@ -212,6 +212,12 @@
>      int (*suspend_callback)(void *, int);
>  } libxl_domain_suspend_info;
>  
> +typedef struct {
> +    int interval;
> +    int blackhole;
> +    int compression;
> +} libxl_domain_remus_info;

This should be defined via the libxl IDL (see libxl_types.idl and
idl.txt)

> +
>  enum {
>      ERROR_NONSPECIFIC = -1,
>      ERROR_VERSION = -2,
> diff -r 11fb1dfda7de -r 0446591bee86 tools/libxl/libxl_dom.c
> --- a/tools/libxl/libxl_dom.c Mon Jan 23 14:43:58 2012 -0800
> +++ b/tools/libxl/libxl_dom.c Mon Jan 23 14:44:00 2012 -0800
> @@ -395,6 +395,8 @@
>      int hvm;
>      unsigned int flags;
>      int guest_responded;
> +    int save_fd; /* Migration stream fd (for Remus) */
> +    int interval; /* remus checkpoint interval */
>  };
>  
>  static int libxl__domain_suspend_common_switch_qemu_logdirty(int domid, 
> unsigned int enable, void *data)
> @@ -581,9 +583,69 @@
>      return 0;
>  }
>  
> +static int libxl__remus_domain_suspend_callback(void *data)
> +{
> +    struct suspendinfo *si = data;
> +
> +    if (libxl__domain_suspend_common_callback(data)) {

if (!libxl_domain_suspend_common_callback)
        return 1;
if (si->hvm && !libxl__remus_domain_suspend(...)
        return 1;
return 0;

Is would be clearer than the multiple nested if/elses and returns.

> +        if (si->hvm) {
> +            if (!libxl__remus_domain_suspend_qemu(si->gc, si->domid))

Perhaps libxl__domain_suspend_common_callback should do this in both the
Remus and non-Remus cases?

Likewise perhaps normal save should use the checkpoint hook to call
libxl__domain_save_device_model() as well.

I'm in favour of tweaking the normal suspend/resume case as necessary to
reduce the amount of Remus special casing that is required. IMHO a
normal suspend should look a lot like a Remus suspend which happened to
only do a single iteration.

> +                return 1;
> +            else
> +                return 0;
> +        }
> +        return 1;
> +    }
> +    return 0;
> +}
> +
> +static int libxl__remus_domain_resume_callback(void *data)
> +{
> +    struct suspendinfo *si = data;
> +    libxl_ctx *ctx = libxl__gc_owner(si->gc);
> +
> +    /* Assumes that SUSPEND_CANCEL is available - just like
> +     * xm version of Remus. Without that, remus wont work.
> +     * Since libxl_domain_resume calls xc_domain_resume with
> +     * fast_suspend = 0, we cant re-use that api.

You could modify that API which would be better than duplicating its
content. I think the "fast" flag is useful to expose to users of libxl
but if not then you could refactor into an internal function which takes
the flag and call it from both here and libxl_domain_resume.

> +     */
> +    if (xc_domain_resume(ctx->xch, si->domid, 1)) {
> +        LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
> +                   "xc_domain_resume failed for domain %u",
> +                   si->domid);
> +        return 0;
> +    }
> +    if (!xs_resume_domain(ctx->xsh, si->domid)) {
> +        LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
> +                   "xs_resume_domain failed for domain %u",
> +                   si->domid);
> +        return 0;
> +    }
> +
> +    if (si->hvm) {
> +        if (libxl__remus_domain_resume_qemu(si->gc, si->domid))
> +            return 0;
> +    }
> +    return 1;
> +}
> +
> +/* For now, just sleep. Later, we need to release disk/netbuf */
> +static int libxl__remus_domain_checkpoint_callback(void *data)
> +{
> +    struct suspendinfo *si = data;
> +
> +    if (si->hvm && libxl__domain_save_device_model(si->gc, si->domid,
> +                                                   si->save_fd, 1))
> +            return 0;
> +
> +    usleep(si->interval * 1000);
> +    return 1;
> +}
> +
>  int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
>                                   libxl_domain_type type,
> -                                 int live, int debug)
> +                                 int live, int debug,
> +                                 const libxl_domain_remus_info *r_info)
>  {
>      libxl_ctx *ctx = libxl__gc_owner(gc);
>      int flags;
> @@ -614,10 +676,20 @@

Please can you add :
        [diff]
        showfunc = True

to your ~/.hgrc.

>          return ERROR_INVAL;
>      }
>  
> +    memset(&si, 0, sizeof(si));
>      flags = (live) ? XCFLAGS_LIVE : 0
>            | (debug) ? XCFLAGS_DEBUG : 0
>            | (hvm) ? XCFLAGS_HVM : 0;
>  
> +    if (r_info != NULL) {
> +        si.interval = r_info->interval;
> +        if (r_info->compression)
> +            flags |= XCFLAGS_CHECKPOINT_COMPRESS;
> +        si.save_fd = fd;
> +    }
> +    else
> +        si.save_fd = -1;
> +
>      si.domid = domid;
>      si.flags = flags;
>      si.hvm = hvm;
> @@ -641,7 +713,13 @@
>      }
>  
>      memset(&callbacks, 0, sizeof(callbacks));
> -    callbacks.suspend = libxl__domain_suspend_common_callback;
> +    if (r_info != NULL) {
> +        callbacks.suspend = libxl__remus_domain_suspend_callback;
> +        callbacks.postcopy = libxl__remus_domain_resume_callback;
> +        callbacks.checkpoint = libxl__remus_domain_checkpoint_callback;
> +    } else
> +        callbacks.suspend = libxl__domain_suspend_common_callback;
> +
>      callbacks.switch_qemu_logdirty = 
> libxl__domain_suspend_common_switch_qemu_logdirty;
>      callbacks.data = &si;
>  
> diff -r 11fb1dfda7de -r 0446591bee86 tools/libxl/libxl_internal.h
> --- a/tools/libxl/libxl_internal.h    Mon Jan 23 14:43:58 2012 -0800
> +++ b/tools/libxl/libxl_internal.h    Mon Jan 23 14:44:00 2012 -0800
> @@ -272,7 +272,8 @@
>                                           int fd);
>  _hidden int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int 
> fd,
>                                           libxl_domain_type type,
> -                                         int live, int debug);
> +                                         int live, int debug,
> +                                         const libxl_domain_remus_info 
> *r_info);
>  _hidden const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t 
> domid);
>  _hidden int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, 
> int fd,
>                                              int remus);
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.