WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] tmem: save/restore/migrate/livemigrate an

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] tmem: save/restore/migrate/livemigrate and shared pool authentication
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 06 Aug 2009 05:35:28 -0700
Delivery-date: Thu, 06 Aug 2009 05:37:46 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1249546795 -3600
# Node ID c98fd816db858917782dfaede518564bbbdd090d
# Parent  f57cc4a7c8537aab0fab5077d8d31e8879b472d2
tmem: save/restore/migrate/livemigrate and shared pool authentication

Attached patch implements save/restore/migration/livemigration
for transcendent memory ("tmem").  Without this patch, domains
using tmem may in some cases lose data when doing save/restore
or migrate/livemigrate.  Also included in this patch is
support for a new (privileged) hypercall for authorizing
domains to share pools; this provides the foundation to
accomodate upstream linux requests for security for shared
pools.

Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>
---
 tools/libxc/xc_domain_restore.c              |   21 +
 tools/libxc/xc_domain_save.c                 |   18 
 tools/libxc/xc_tmem.c                        |  374 ++++++++++++++++++
 tools/libxc/xenctrl.h                        |   15 
 tools/python/xen/lowlevel/xc/xc.c            |   42 +-
 tools/python/xen/xend/XendAPI.py             |   11 
 tools/python/xen/xend/XendNode.py            |   28 -
 tools/python/xen/xend/balloon.py             |    6 
 tools/python/xen/xend/server/XMLRPCServer.py |    3 
 tools/python/xen/xm/main.py                  |   48 ++
 xen/common/tmem.c                            |  540 +++++++++++++++++++++++----
 xen/common/tmem_xen.c                        |   64 +--
 xen/include/public/tmem.h                    |   62 ++-
 xen/include/xen/tmem_xen.h                   |   31 +
 14 files changed, 1116 insertions(+), 147 deletions(-)

diff -r f57cc4a7c853 -r c98fd816db85 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c   Thu Aug 06 09:15:42 2009 +0100
+++ b/tools/libxc/xc_domain_restore.c   Thu Aug 06 09:19:55 2009 +0100
@@ -536,6 +536,27 @@ int xc_domain_restore(int xc_handle, int
             continue;
         }
 
+        if ( j == -5 )
+        {
+            DPRINTF("xc_domain_restore start tmem\n");
+            if ( xc_tmem_restore(xc_handle, dom, io_fd) )
+            {
+                ERROR("error reading/restoring tmem");
+                goto out;
+            }
+            continue;
+        }
+
+        if ( j == -6 )
+        {
+            if ( xc_tmem_restore_extra(xc_handle, dom, io_fd) )
+            {
+                ERROR("error reading/restoring tmem extra");
+                goto out;
+            }
+            continue;
+        }
+
         if ( j == 0 )
             break;  /* our work here is done */
 
diff -r f57cc4a7c853 -r c98fd816db85 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c      Thu Aug 06 09:15:42 2009 +0100
+++ b/tools/libxc/xc_domain_save.c      Thu Aug 06 09:19:55 2009 +0100
@@ -758,6 +758,7 @@ int xc_domain_save(int xc_handle, int io
     int live  = (flags & XCFLAGS_LIVE);
     int debug = (flags & XCFLAGS_DEBUG);
     int race = 0, sent_last_iter, skip_this_iter;
+    int tmem_saved = 0;
 
     /* The new domain's shared-info frame number. */
     unsigned long shared_info_frame;
@@ -995,6 +996,13 @@ int xc_domain_save(int xc_handle, int io
     }
 
     print_stats(xc_handle, dom, 0, &stats, 0);
+
+    tmem_saved = xc_tmem_save(xc_handle, dom, io_fd, live, -5);
+    if ( tmem_saved == -1 )
+    {
+        ERROR("Error when writing to state file (tmem)");
+        goto out;
+    }
 
     /* Now write out each data page, canonicalising page tables as we go... */
     for ( ; ; )
@@ -1316,6 +1324,13 @@ int xc_domain_save(int xc_handle, int io
                 }
 
                 DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
+                if ( (tmem_saved > 0) &&
+                     (xc_tmem_save_extra(xc_handle,dom,io_fd,-6) == -1) )
+                {
+                        ERROR("Error when writing to state file (tmem)");
+                        goto out;
+                }
+
             }
 
             if ( xc_shadow_control(xc_handle, dom, 
@@ -1605,6 +1620,9 @@ int xc_domain_save(int xc_handle, int io
 
  out:
 
+    if ( tmem_saved != 0 && live )
+        xc_tmem_save_done(xc_handle, dom);
+
     if ( live )
     {
         if ( xc_shadow_control(xc_handle, dom, 
diff -r f57cc4a7c853 -r c98fd816db85 tools/libxc/xc_tmem.c
--- a/tools/libxc/xc_tmem.c     Thu Aug 06 09:15:42 2009 +0100
+++ b/tools/libxc/xc_tmem.c     Thu Aug 06 09:19:55 2009 +0100
@@ -36,6 +36,7 @@ int xc_tmem_control(int xc,
                     uint32_t cli_id,
                     uint32_t arg1,
                     uint32_t arg2,
+                    uint64_t arg3,
                     void *buf)
 {
     tmem_op_t op;
@@ -45,9 +46,10 @@ int xc_tmem_control(int xc,
     op.pool_id = pool_id;
     op.u.ctrl.subop = subop;
     op.u.ctrl.cli_id = cli_id;
+    set_xen_guest_handle(op.u.ctrl.buf,buf);
     op.u.ctrl.arg1 = arg1;
     op.u.ctrl.arg2 = arg2;
-    op.u.ctrl.buf.p = buf;
+    op.u.ctrl.arg3 = arg3;
 
     if (subop == TMEMC_LIST) {
         if ((arg1 != 0) && (lock_pages(buf, arg1) != 0))
@@ -70,6 +72,376 @@ int xc_tmem_control(int xc,
     }
 
     return rc;
+}
+
+static int xc_tmem_uuid_parse(char *uuid_str, uint64_t *uuid_lo, uint64_t 
*uuid_hi)
+{
+    char *p = uuid_str;
+    uint64_t *x = uuid_hi;
+    int i = 0, digit;
+
+    *uuid_lo = 0; *uuid_hi = 0;
+    for ( p = uuid_str, i = 0; i != 36 && *p != '\0'; p++, i++ )
+    {
+        if ( (i == 8 || i == 13 || i == 18 || i == 23) )
+        {
+            if ( *p != '-' )
+                return -1;
+            if ( i == 18 )
+                x = uuid_lo;
+            continue;
+        }
+        else if ( *p >= '0' && *p <= '9' )
+            digit = *p - '0';
+        else if ( *p >= 'A' && *p <= 'F' )
+            digit = *p - 'A';
+        else if ( *p >= 'a' && *p <= 'f' )
+            digit = *p - 'a';
+        else
+            return -1;
+        *x = (*x << 4) | digit;
+    }
+    if ( (i != 1 && i != 36) || *p != '\0' )
+        return -1;
+    return 0;
+}
+
+int xc_tmem_auth(int xc,
+                 int cli_id,
+                 char *uuid_str,
+                 int arg1)
+{
+    tmem_op_t op;
+
+    op.cmd = TMEM_AUTH;
+    op.pool_id = 0;
+    op.u.new.arg1 = cli_id;
+    op.u.new.flags = arg1;
+    if ( xc_tmem_uuid_parse(uuid_str, &op.u.new.uuid[0],
+                                      &op.u.new.uuid[1]) < 0 )
+    {
+        PERROR("Can't parse uuid, use xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx");
+        return -1;
+    }
+
+    return do_tmem_op(xc, &op);
+}
+
+/* Save/restore/live migrate */
+
+/*
+   Note that live migration complicates the save/restore format in
+   multiple ways: Though saving/migration can only occur when all
+   tmem pools belonging to the domain-being-saved are frozen and
+   this ensures that new pools can't be created or existing pools
+   grown (in number of pages), it is possible during a live migration
+   that pools may be destroyed and pages invalidated while the migration
+   is in process.  As a result, (1) it is not safe to pre-specify counts
+   for these values precisely, but only as a "max", and (2) a "invalidation"
+   list (of pools, objects, pages) must be appended when the domain is truly
+   suspended.
+ */
+
+/* returns 0 if nothing to save, -1 if error saving, 1 if saved successfully */
+int xc_tmem_save(int xc, int dom, int io_fd, int live, int field_marker)
+{
+    int marker = field_marker;
+    int i, j;
+    uint32_t max_pools, version;
+    uint32_t weight, cap, flags;
+    uint32_t pool_id;
+    uint32_t minusone = -1;
+    struct tmem_handle *h;
+
+    if ( xc_tmem_control(xc,0,TMEMC_SAVE_BEGIN,dom,live,0,0,NULL) <= 0 )
+        return 0;
+
+    if ( write_exact(io_fd, &marker, sizeof(marker)) )
+        return -1;
+    version = xc_tmem_control(xc,0,TMEMC_SAVE_GET_VERSION,0,0,0,0,NULL);
+    if ( write_exact(io_fd, &version, sizeof(version)) )
+        return -1;
+    max_pools = xc_tmem_control(xc,0,TMEMC_SAVE_GET_MAXPOOLS,0,0,0,0,NULL);
+    if ( write_exact(io_fd, &max_pools, sizeof(max_pools)) )
+        return -1;
+    if ( version == -1 || max_pools == -1 )
+        return -1;
+    if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
+        return -1;
+    flags = xc_tmem_control(xc,0,TMEMC_SAVE_GET_CLIENT_FLAGS,dom,0,0,0,NULL);
+    if ( write_exact(io_fd, &flags, sizeof(flags)) )
+        return -1;
+    weight = xc_tmem_control(xc,0,TMEMC_SAVE_GET_CLIENT_WEIGHT,dom,0,0,0,NULL);
+    if ( write_exact(io_fd, &weight, sizeof(weight)) )
+        return -1;
+    cap = xc_tmem_control(xc,0,TMEMC_SAVE_GET_CLIENT_CAP,dom,0,0,0,NULL);
+    if ( write_exact(io_fd, &cap, sizeof(cap)) )
+        return -1;
+    if ( flags == -1 || weight == -1 || cap == -1 )
+        return -1;
+    if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
+        return -1;
+    for ( i = 0; i < max_pools; i++ )
+    {
+        uint64_t uuid[2];
+        uint32_t n_pages;
+        uint32_t pagesize;
+        char *buf = NULL;
+        int bufsize = 0;
+        int checksum = 0;
+
+        /* get pool id, flags, pagesize, n_pages, uuid */
+        flags = xc_tmem_control(xc,i,TMEMC_SAVE_GET_POOL_FLAGS,dom,0,0,0,NULL);
+        if ( flags != -1 )
+        {
+            pool_id = i;
+            n_pages = 
xc_tmem_control(xc,i,TMEMC_SAVE_GET_POOL_NPAGES,dom,0,0,0,NULL);
+            if ( !(flags & TMEM_POOL_PERSIST) )
+                n_pages = 0;
+            
(void)xc_tmem_control(xc,i,TMEMC_SAVE_GET_POOL_UUID,dom,sizeof(uuid),0,0,&uuid);
+            if ( write_exact(io_fd, &pool_id, sizeof(pool_id)) )
+                return -1;
+            if ( write_exact(io_fd, &flags, sizeof(flags)) )
+                return -1;
+            if ( write_exact(io_fd, &n_pages, sizeof(n_pages)) )
+                return -1;
+            if ( write_exact(io_fd, &uuid, sizeof(uuid)) )
+                return -1;
+            if ( n_pages == 0 )
+                continue;
+
+            pagesize = 1 << (((flags >> TMEM_POOL_PAGESIZE_SHIFT) &
+                              TMEM_POOL_PAGESIZE_MASK) + 12);
+            if ( pagesize > bufsize )
+            {
+                bufsize = pagesize + sizeof(struct tmem_handle);
+                if ( (buf = realloc(buf,bufsize)) == NULL )
+                    return -1;
+            }
+            for ( j = n_pages; j > 0; j-- )
+            {
+                int ret;
+                if ( (ret = xc_tmem_control(xc, pool_id,
+                                            TMEMC_SAVE_GET_NEXT_PAGE, dom,
+                                            bufsize, 0, 0, buf)) > 0 )
+                {
+                    h = (struct tmem_handle *)buf;
+                    if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) )
+                        return -1;
+                    if ( write_exact(io_fd, &h->index, sizeof(h->index)) )
+                        return -1;
+                    h++;
+                    checksum += *(char *)h;
+                    if ( write_exact(io_fd, h, pagesize) )
+                        return -1;
+                } else if ( ret == 0 ) {
+                    continue;
+                } else {
+                    /* page list terminator */
+                    h = (struct tmem_handle *)buf;
+                    h->oid = -1;
+                    if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) )
+                        return -1;
+                    break;
+                }
+            }
+            DPRINTF("saved %d tmem pages for dom=%d pool=%d, checksum=%x\n",
+                         n_pages-j,dom,pool_id,checksum);
+        }
+    }
+    /* pool list terminator */
+    minusone = -1;
+    if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
+        return -1;
+
+    return 1;
+}
+
+/* only called for live migration */
+int xc_tmem_save_extra(int xc, int dom, int io_fd, int field_marker)
+{
+    struct tmem_handle handle;
+    int marker = field_marker;
+    uint32_t minusone;
+    int count = 0, checksum = 0;
+
+    if ( write_exact(io_fd, &marker, sizeof(marker)) )
+        return -1;
+    while ( xc_tmem_control(xc, 0, TMEMC_SAVE_GET_NEXT_INV, dom,
+                            sizeof(handle),0,0,&handle) > 0 ) {
+        if ( write_exact(io_fd, &handle.pool_id, sizeof(handle.pool_id)) )
+            return -1;
+        if ( write_exact(io_fd, &handle.oid, sizeof(handle.oid)) )
+            return -1;
+        if ( write_exact(io_fd, &handle.index, sizeof(handle.index)) )
+            return -1;
+        count++;
+        checksum += handle.pool_id + handle.oid + handle.index;
+    }
+    if ( count )
+            DPRINTF("needed %d tmem invalidates, check=%d\n",count,checksum);
+    minusone = -1;
+    if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
+        return -1;
+    return 0;
+}
+
+/* only called for live migration */
+void xc_tmem_save_done(int xc, int dom)
+{
+    xc_tmem_control(xc,0,TMEMC_SAVE_END,dom,0,0,0,NULL);
+}
+
+/* restore routines */
+
+static int xc_tmem_restore_new_pool(
+                    int xc,
+                    int cli_id,
+                    uint32_t pool_id,
+                    uint32_t flags,
+                    uint64_t uuid_lo,
+                    uint64_t uuid_hi)
+{
+    tmem_op_t op;
+
+    op.cmd = TMEM_RESTORE_NEW;
+    op.pool_id = pool_id;
+    op.u.new.arg1 = cli_id;
+    op.u.new.flags = flags;
+    op.u.new.uuid[0] = uuid_lo;
+    op.u.new.uuid[1] = uuid_hi;
+
+    return do_tmem_op(xc, &op);
+}
+
+int xc_tmem_restore(int xc, int dom, int io_fd)
+{
+    uint32_t save_max_pools, save_version;
+    uint32_t this_max_pools, this_version;
+    uint32_t pool_id;
+    uint32_t minusone;
+    uint32_t weight, cap, flags;
+    int checksum = 0;
+
+    save_version = xc_tmem_control(xc,0,TMEMC_SAVE_GET_VERSION,dom,0,0,0,NULL);
+    if ( save_version == -1 )
+        return -1; /* domain doesn't exist */
+    save_max_pools = 
xc_tmem_control(xc,0,TMEMC_SAVE_GET_MAXPOOLS,0,0,0,0,NULL);
+    if ( read_exact(io_fd, &this_version, sizeof(this_version)) )
+        return -1;
+    if ( read_exact(io_fd, &this_max_pools, sizeof(this_max_pools)) )
+        return -1;
+    /* FIXME check here to ensure no version mismatch or maxpools mismatch */
+    if ( read_exact(io_fd, &minusone, sizeof(minusone)) )
+        return -1;
+    if ( minusone != -1 )
+        return -1;
+    if ( xc_tmem_control(xc,0,TMEMC_RESTORE_BEGIN,dom,0,0,0,NULL) < 0 )
+        return -1;
+    if ( read_exact(io_fd, &flags, sizeof(flags)) )
+        return -1;
+    if ( flags & TMEM_CLIENT_COMPRESS )
+        if ( xc_tmem_control(xc,0,TMEMC_SET_COMPRESS,dom,1,0,0,NULL) < 0 )
+            return -1;
+    if ( flags & TMEM_CLIENT_FROZEN )
+        if ( xc_tmem_control(xc,0,TMEMC_FREEZE,dom,0,0,0,NULL) < 0 )
+            return -1;
+    if ( read_exact(io_fd, &weight, sizeof(weight)) )
+        return -1;
+    if ( xc_tmem_control(xc,0,TMEMC_SET_WEIGHT,dom,0,0,0,NULL) < 0 )
+        return -1;
+    if ( read_exact(io_fd, &cap, sizeof(cap)) )
+        return -1;
+    if ( xc_tmem_control(xc,0,TMEMC_SET_CAP,dom,0,0,0,NULL) < 0 )
+        return -1;
+    if ( read_exact(io_fd, &minusone, sizeof(minusone)) )
+        return -1;
+    while ( read_exact(io_fd, &pool_id, sizeof(pool_id)) == 0 && pool_id != -1 
)
+    {
+        uint64_t uuid[2];
+        uint32_t n_pages;
+        char *buf = NULL;
+        int bufsize = 0, pagesize;
+        int j;
+
+        if ( read_exact(io_fd, &flags, sizeof(flags)) )
+            return -1;
+        if ( read_exact(io_fd, &n_pages, sizeof(n_pages)) )
+            return -1;
+        if ( read_exact(io_fd, &uuid, sizeof(uuid)) )
+            return -1;
+        if ( xc_tmem_restore_new_pool(xc, dom, pool_id,
+                                 flags, uuid[0], uuid[1]) < 0)
+            return -1;
+        if ( n_pages <= 0 )
+            continue;
+
+        pagesize = 1 << (((flags >> TMEM_POOL_PAGESIZE_SHIFT) &
+                              TMEM_POOL_PAGESIZE_MASK) + 12);
+        if ( pagesize > bufsize )
+        {
+            bufsize = pagesize;
+            if ( (buf = realloc(buf,bufsize)) == NULL )
+                return -1;
+        }
+        for ( j = n_pages; j > 0; j-- )
+        {
+            uint64_t oid;
+            uint32_t index;
+            int rc;
+            if ( read_exact(io_fd, &oid, sizeof(oid)) )
+                return -1;
+            if ( oid == -1 )
+                break;
+            if ( read_exact(io_fd, &index, sizeof(index)) )
+                return -1;
+            if ( read_exact(io_fd, buf, pagesize) )
+                return -1;
+            checksum += *buf;
+            if ( (rc = xc_tmem_control(xc, pool_id, TMEMC_RESTORE_PUT_PAGE,
+                                 dom, bufsize, index, oid, buf)) <= 0 )
+            {
+                DPRINTF("xc_tmem_restore: putting page failed, rc=%d\n",rc);
+                return -1;
+            }
+        }
+        if ( n_pages )
+            DPRINTF("restored %d tmem pages for dom=%d pool=%d, check=%x\n",
+                    n_pages-j,dom,pool_id,checksum);
+    }
+    if ( pool_id != -1 )
+        return -1;
+
+    return 0;
+}
+
+/* only called for live migration, must be called after suspend */
+int xc_tmem_restore_extra(int xc, int dom, int io_fd)
+{
+    uint32_t pool_id;
+    uint64_t oid;
+    uint32_t index;
+    int count = 0;
+    int checksum = 0;
+
+    while ( read_exact(io_fd, &pool_id, sizeof(pool_id)) == 0 && pool_id != -1 
)
+    {
+        if ( read_exact(io_fd, &oid, sizeof(oid)) )
+            return -1;
+        if ( read_exact(io_fd, &index, sizeof(index)) )
+            return -1;
+        if ( xc_tmem_control(xc, pool_id, TMEMC_RESTORE_FLUSH_PAGE, dom,
+                             0,index,oid,NULL) <= 0 )
+            return -1;
+        count++;
+        checksum += pool_id + oid + index;
+    }
+    if ( pool_id != -1 )
+        return -1;
+    if ( count )
+            DPRINTF("invalidated %d tmem pages, check=%d\n",count,checksum);
+
+    return 0;
 }
 
 /*
diff -r f57cc4a7c853 -r c98fd816db85 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Aug 06 09:15:42 2009 +0100
+++ b/tools/libxc/xenctrl.h     Thu Aug 06 09:19:55 2009 +0100
@@ -1276,12 +1276,13 @@ int xc_set_cpuidle_max_cstate(int xc_han
 /**
  * tmem operations
  */
-int xc_tmem_control(int xc,
-                    int32_t pool_id,
-                    uint32_t subop,
-                    uint32_t cli_id,
-                    uint32_t arg1,
-                    uint32_t arg2,
-                    void *buf);
+int xc_tmem_control(int xc, int32_t pool_id, uint32_t subop, uint32_t cli_id,
+                    uint32_t arg1, uint32_t arg2, uint64_t arg3, void *buf);
+int xc_tmem_auth(int xc_handle, int cli_id, char *uuid_str, int arg1);
+int xc_tmem_save(int xc_handle, int dom, int live, int fd, int field_marker);
+int xc_tmem_save_extra(int xc_handle, int dom, int fd, int field_marker);
+void xc_tmem_save_done(int xc_handle, int dom);
+int xc_tmem_restore(int xc_handle, int dom, int fd);
+int xc_tmem_restore_extra(int xc_handle, int dom, int fd);
 
 #endif /* XENCTRL_H */
diff -r f57cc4a7c853 -r c98fd816db85 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu Aug 06 09:15:42 2009 +0100
+++ b/tools/python/xen/lowlevel/xc/xc.c Thu Aug 06 09:19:55 2009 +0100
@@ -1523,20 +1523,21 @@ static PyObject *pyxc_tmem_control(XcObj
     uint32_t cli_id;
     uint32_t arg1;
     uint32_t arg2;
+    uint64_t arg3;
     char *buf;
     char _buffer[32768], *buffer = _buffer;
     int rc;
 
-    static char *kwd_list[] = { "pool_id", "subop", "cli_id", "arg1", "arg2", 
"buf", NULL };
-
-    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiiis", kwd_list,
-                                      &pool_id, &subop, &cli_id, &arg1, &arg2, 
&buf) )
+    static char *kwd_list[] = { "pool_id", "subop", "cli_id", "arg1", "arg2", 
"arg3", "buf", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiiiis", kwd_list,
+                        &pool_id, &subop, &cli_id, &arg1, &arg2, &arg3, &buf) )
         return NULL;
 
     if ( (subop == TMEMC_LIST) && (arg1 > 32768) )
         arg1 = 32768;
 
-    if ( (rc = xc_tmem_control(self->xc_handle, pool_id, subop, cli_id, arg1, 
arg2, buffer)) < 0 )
+    if ( (rc = xc_tmem_control(self->xc_handle, pool_id, subop, cli_id, arg1, 
arg2, arg3, buffer)) < 0 )
         return Py_BuildValue("i", rc);
 
     switch (subop) {
@@ -1553,6 +1554,28 @@ static PyObject *pyxc_tmem_control(XcObj
         default:
             break;
     }
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_tmem_shared_auth(XcObject *self,
+                                   PyObject *args,
+                                   PyObject *kwds)
+{
+    uint32_t cli_id;
+    uint32_t arg1;
+    char *uuid_str;
+    int rc;
+
+    static char *kwd_list[] = { "cli_id", "uuid_str", "arg1" };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "isi", kwd_list,
+                                   &cli_id, &uuid_str, &arg1) )
+        return NULL;
+
+    if ( (rc = xc_tmem_auth(self->xc_handle, cli_id, uuid_str, arg1)) < 0 )
+        return Py_BuildValue("i", rc);
 
     Py_INCREF(zero);
     return zero;
@@ -2028,6 +2051,15 @@ static PyMethodDef pyxc_methods[] = {
       " arg2 [int]: Argument.\n"
       " buf [str]: Buffer.\n\n"
       "Returns: [int] 0 or [str] tmem info on success; exception on error.\n" 
},
+
+    { "tmem_shared_auth",
+      (PyCFunction)pyxc_tmem_shared_auth,
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "De/authenticate a shared tmem pool.\n"
+      " cli_id [int]: Client identifier (-1 == all).\n"
+      " uuid_str [str]: uuid.\n"
+      " auth [int]: 0|1 .\n"
+      "Returns: [int] 0 on success; exception on error.\n" },
 
     { NULL, NULL, 0, NULL }
 };
diff -r f57cc4a7c853 -r c98fd816db85 tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py  Thu Aug 06 09:15:42 2009 +0100
+++ b/tools/python/xen/xend/XendAPI.py  Thu Aug 06 09:19:55 2009 +0100
@@ -933,7 +933,8 @@ class XendAPI(object):
                     ('tmem_list', None),
                     ('tmem_set_weight', None),
                     ('tmem_set_cap', None),
-                    ('tmem_set_compress', None)]
+                    ('tmem_set_compress', None),
+                    ('tmem_shared_auth', None)]
     
     host_funcs = [('get_by_name_label', None),
                   ('list_methods', None)]
@@ -1129,6 +1130,14 @@ class XendAPI(object):
         node = XendNode.instance()
         try:
             node.tmem_set_compress(cli_id, value)
+        except Exception, e:
+            return xen_api_error(e)
+        return xen_api_success_void()
+
+    def host_tmem_shared_auth(self, _, host_ref, cli_id, uuid_str, auth):
+        node = XendNode.instance()
+        try:
+            node.tmem_shared_auth(cli_id, uuid_str, auth)
         except Exception, e:
             return xen_api_error(e)
         return xen_api_success_void()
diff -r f57cc4a7c853 -r c98fd816db85 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Thu Aug 06 09:15:42 2009 +0100
+++ b/tools/python/xen/xend/XendNode.py Thu Aug 06 09:19:55 2009 +0100
@@ -948,62 +948,72 @@ class XendNode:
         subop = TMEMC_LIST
         arg1 = 32768
         arg2 = use_long
+        arg3 = 0
         buf = ''
-        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, 
buf)
 
     def tmem_thaw(self, cli_id):
         pool_id = -1
         subop = TMEMC_THAW
         arg1 = 0
         arg2 = 0
+        arg3 = 0
         buf = ''
-        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, 
buf)
 
     def tmem_freeze(self, cli_id):
         pool_id = -1
         subop = TMEMC_FREEZE
         arg1 = 0
         arg2 = 0
+        arg3 = 0
         buf = ''
-        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, 
buf)
 
     def tmem_flush(self, cli_id, pages):
         pool_id = -1
         subop = TMEMC_FLUSH
         arg1 = pages
         arg2 = 0
+        arg3 = 0
         buf = ''
-        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, 
buf)
 
     def tmem_destroy(self, cli_id):
         pool_id = -1
         subop = TMEMC_DESTROY
         arg1 = 0
         arg2 = 0
+        arg3 = 0
         buf = ''
-        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, 
buf)
 
     def tmem_set_weight(self, cli_id, arg1):
         pool_id = -1
         subop = TMEMC_SET_WEIGHT
         arg2 = 0
+        arg3 = 0
         buf = ''
-        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, 
buf)
 
     def tmem_set_cap(self, cli_id, arg1):
         pool_id = -1
         subop = TMEMC_SET_CAP
         arg2 = 0
+        arg3 = 0
         buf = ''
-        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
+        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, 
buf)
 
     def tmem_set_compress(self, cli_id, arg1):
         pool_id = -1
         subop = TMEMC_SET_COMPRESS
         arg2 = 0
+        arg3 = 0
         buf = ''
-        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf)
-
+        return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, 
buf)
+
+    def tmem_shared_auth(self, cli_id, uuid_str, auth):
+        return self.xc.tmem_auth(cli_id, uuid_str, auth)
 
 def instance():
     global inst
diff -r f57cc4a7c853 -r c98fd816db85 tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Thu Aug 06 09:15:42 2009 +0100
+++ b/tools/python/xen/xend/balloon.py  Thu Aug 06 09:19:55 2009 +0100
@@ -111,7 +111,7 @@ def free(need_mem, dominfo):
         rlimit = RETRY_LIMIT
 
         # stop tmem from absorbing any more memory (must THAW when done!)
-        xc.tmem_control(0,TMEMC_FREEZE,-1, 0, 0, "")
+        xc.tmem_control(0,TMEMC_FREEZE,-1, 0, 0, 0, "")
 
         # If unreasonable memory size is required, we give up waiting
         # for ballooning or scrubbing, as if had retried.
@@ -130,7 +130,7 @@ def free(need_mem, dominfo):
         if freeable_mem < need_mem and need_mem < max_free_mem:
             # flush memory from tmem to scrub_mem and reobtain physinfo
             need_tmem_kb = need_mem - freeable_mem
-            tmem_kb = xc.tmem_control(0,TMEMC_FLUSH,-1, need_tmem_kb, 0, "")
+            tmem_kb = xc.tmem_control(0,TMEMC_FLUSH,-1, need_tmem_kb, 0, 0, "")
             log.debug("Balloon: tmem relinquished %d KiB of %d KiB requested.",
                       tmem_kb, need_tmem_kb)
             physinfo = xc.physinfo()
@@ -232,5 +232,5 @@ def free(need_mem, dominfo):
 
     finally:
         # allow tmem to accept pages again
-        xc.tmem_control(0,TMEMC_THAW,-1, 0, 0, "")
+        xc.tmem_control(0,TMEMC_THAW,-1, 0, 0, 0, "")
         del xc
diff -r f57cc4a7c853 -r c98fd816db85 
tools/python/xen/xend/server/XMLRPCServer.py
--- a/tools/python/xen/xend/server/XMLRPCServer.py      Thu Aug 06 09:15:42 
2009 +0100
+++ b/tools/python/xen/xend/server/XMLRPCServer.py      Thu Aug 06 09:19:55 
2009 +0100
@@ -202,7 +202,8 @@ class XMLRPCServer:
                               ['info', 'pciinfo', 'send_debug_keys',
                                'tmem_list', 'tmem_freeze', 'tmem_thaw',
                                'tmem_flush', 'tmem_destroy', 'tmem_set_weight',
-                               'tmem_set_cap', 'tmem_set_compress'],
+                               'tmem_set_cap', 'tmem_set_compress',
+                               'tmem_shared_auth'],
                              'node'),
                              (XendDmesg, ['info', 'clear'], 'node.dmesg')]:
             inst = type.instance()
diff -r f57cc4a7c853 -r c98fd816db85 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Aug 06 09:15:42 2009 +0100
+++ b/tools/python/xen/xm/main.py       Thu Aug 06 09:19:55 2009 +0100
@@ -207,6 +207,7 @@ SUBCOMMAND_HELP = {
     'tmem-set'      :  ('[<Domain>|-a|--all] [weight=<weight>] [cap=<cap>] '
                         '[compress=<compress>]',
                         'Change tmem settings.'),
+    'tmem-shared-auth' :  ('[<Domain>|-a|--all] [--uuid=<uuid>] 
[--auth=<0|1>]', 'De/authenticate shared tmem pool.'),
 
     # security
 
@@ -307,6 +308,11 @@ SUBCOMMAND_OPTIONS = {
     'tmem-set':  (
        ('-a', '--all', 'Operate on all tmem.'),
     ),
+    'tmem-shared-auth':  (
+       ('-a', '--all', 'Authenticate for all tmem pools.'),
+       ('-u', '--uuid', 'Specify uuid 
(abcdef01-2345-6789-01234567890abcdef).'),
+       ('-A', '--auth', '0=auth,1=deauth'),
+    ),
 }
 
 common_commands = [
@@ -427,6 +433,7 @@ tmem_commands = [
     "tmem-freeze",
     "tmem-destroy",
     "tmem-set",
+    "tmem-shared-auth",
     ]
 
 all_commands = (domain_commands + host_commands + scheduler_commands +
@@ -3128,6 +3135,46 @@ def xm_tmem_set(args):
             server.xend.node.tmem_set_cap(domid, cap)
         if compress is not None:
             server.xend.node.tmem_set_compress(domid, compress)
+
+def xm_tmem_shared_auth(args):
+    try:
+        (options, params) = getopt.gnu_getopt(args, 'au:A:', 
['all','uuid=','auth='])
+    except getopt.GetoptError, opterr:
+        err(opterr)
+       usage('tmem-shared-auth')
+
+    all = False
+    for (k, v) in options:
+        if k in ['-a', '--all']:
+            all = True
+
+    if not all and len(params) == 0:
+        err('You must specify -a or --all or a domain id.')
+        usage('tmem-shared-auth')
+
+    if all:
+        domid = -1
+    else:
+        try: 
+            domid = int(params[0])
+            params = params[1:]
+        except:
+            err('Unrecognized domain id: %s' % params[0])
+            usage('tmem-shared-auth')
+
+    for (k, v) in options:
+        if k in ['-u', '--uuid']:
+             uuid_str = v
+
+    auth = 0
+    for (k, v) in options:
+        if k in ['-A', '--auth']:
+            auth = v
+
+    if serverType == SERVER_XEN_API:
+        return server.xenapi.host.tmem_shared_auth(domid,uuid_str,auth)
+    else:
+        return server.xend.node.tmem_shared_auth(domid,uuid_str,auth)
 
 
 commands = {
@@ -3211,6 +3258,7 @@ commands = {
     "tmem-destroy": xm_tmem_destroy,
     "tmem-list": xm_tmem_list,
     "tmem-set": xm_tmem_set,
+    "tmem-shared-auth": xm_tmem_shared_auth,
     }
 
 ## The commands supported by a separate argument parser in xend.xm.
diff -r f57cc4a7c853 -r c98fd816db85 xen/common/tmem.c
--- a/xen/common/tmem.c Thu Aug 06 09:15:42 2009 +0100
+++ b/xen/common/tmem.c Thu Aug 06 09:19:55 2009 +0100
@@ -26,6 +26,8 @@
 
 #define EXPORT /* indicates code other modules are dependent upon */
 #define FORWARD
+
+#define TMEM_SPEC_VERSION 0
 
 /************  INTERFACE TO TMEM HOST-DEPENDENT (tmh) CODE ************/
 
@@ -105,6 +107,7 @@ DECL_CYC_COUNTER(decompress);
 #define MAX_GLOBAL_SHARED_POOLS  16
 
 struct tm_pool;
+struct tmem_page_descriptor;
 struct client {
     struct list_head client_list;
     struct tm_pool *pools[MAX_POOLS_PER_DOMAIN];
@@ -116,11 +119,20 @@ struct client {
     uint32_t cap;
     bool_t compress;
     bool_t frozen;
+    bool_t shared_auth_required;
+    /* for save/restore/migration */
+    bool_t live_migrating;
+    bool_t was_frozen;
+    struct list_head persistent_invalidated_list;
+    struct tmem_page_descriptor *cur_pgp;
+    /* statistics collection */
     unsigned long compress_poor, compress_nomem;
     unsigned long compressed_pages;
     uint64_t compressed_sum_size;
     uint64_t total_cycles;
     unsigned long succ_pers_puts, succ_eph_gets, succ_pers_gets;
+    /* shared pool authentication */
+    uint64_t shared_auth_uuid[MAX_GLOBAL_SHARED_POOLS][2];
 };
 typedef struct client client_t;
 
@@ -137,6 +149,7 @@ struct tm_pool {
 struct tm_pool {
     bool_t shared;
     bool_t persistent;
+    int pageshift; /* 0 == 2**12 */
     struct list_head pool_list; /* FIXME do we need this anymore? */
     client_t *client;
     uint64_t uuid[2]; /* 0 for private, non-zero for shared */
@@ -144,8 +157,11 @@ struct tm_pool {
     rwlock_t pool_rwlock;
     struct rb_root obj_rb_root[OBJ_HASH_BUCKETS]; /* protected by pool_rwlock 
*/
     struct list_head share_list; /* valid if shared */
-    DECL_SENTINEL
     int shared_count; /* valid if shared */
+    /* for save/restore/migration */
+    struct list_head persistent_page_list;
+    struct tmem_page_descriptor *cur_pgp;
+    /* statistics collection */
     atomic_t pgp_count;
     int pgp_count_max;
     long obj_count;  /* atomicity depends on pool_rwlock held for write */
@@ -158,6 +174,7 @@ struct tm_pool {
     unsigned long gets, found_gets;
     unsigned long flushs, flushs_found;
     unsigned long flush_objs, flush_objs_found;
+    DECL_SENTINEL
 };
 typedef struct tm_pool pool_t;
 
@@ -189,16 +206,29 @@ typedef struct tmem_object_node objnode_
 typedef struct tmem_object_node objnode_t;
 
 struct tmem_page_descriptor {
-    struct list_head global_eph_pages;
-    struct list_head client_eph_pages;
-    obj_t *obj;
+    union {
+        struct list_head global_eph_pages;
+        struct list_head client_inv_pages;
+    };
+    union {
+        struct list_head client_eph_pages;
+        struct list_head pool_pers_pages;
+    };
+    union {
+        obj_t *obj;
+        uint64_t inv_oid;  /* used for invalid list only */
+    };
     uint32_t index;
-    size_t size; /* 0 == PAGE_SIZE (pfp), else compressed data (cdata) */
+    size_t size; /* 0 == PAGE_SIZE (pfp), -1 == data invalid,
+                    else compressed data (cdata) */
     union {
         pfp_t *pfp;  /* page frame pointer */
         char *cdata; /* compressed data */
     };
-    uint64_t timestamp;
+    union {
+        uint64_t timestamp;
+        uint32_t pool_id;  /* used for invalid list only */
+    };
     DECL_SENTINEL
 };
 typedef struct tmem_page_descriptor pgp_t;
@@ -209,6 +239,7 @@ static LIST_HEAD(global_pool_list);
 static LIST_HEAD(global_pool_list);
 
 static pool_t *global_shared_pools[MAX_GLOBAL_SHARED_POOLS] = { 0 };
+static bool_t global_shared_auth = 0;
 static atomic_t client_weight_total = ATOMIC_INIT(0);
 static int tmem_initialized = 0;
 
@@ -217,6 +248,7 @@ EXPORT DEFINE_SPINLOCK(tmem_spinlock);  
 EXPORT DEFINE_SPINLOCK(tmem_spinlock);  /* used iff tmh_lock_all */
 EXPORT DEFINE_RWLOCK(tmem_rwlock);      /* used iff !tmh_lock_all */
 static DEFINE_SPINLOCK(eph_lists_spinlock); /* protects global AND clients */
+static DEFINE_SPINLOCK(pers_lists_spinlock);
 
 #define tmem_spin_lock(_l)  do {if (!tmh_lock_all) spin_lock(_l);}while(0)
 #define tmem_spin_unlock(_l)  do {if (!tmh_lock_all) spin_unlock(_l);}while(0)
@@ -366,36 +398,61 @@ static NOINLINE void pgp_free(pgp_t *pgp
     ASSERT(pgp->obj != NULL);
     ASSERT_SENTINEL(pgp->obj,OBJ);
     ASSERT_SENTINEL(pgp->obj->pool,POOL);
-    ASSERT(list_empty(&pgp->global_eph_pages));
-    ASSERT(list_empty(&pgp->client_eph_pages));
+    ASSERT(pgp->obj->pool->client != NULL);
     if ( from_delete )
         ASSERT(pgp_lookup_in_obj(pgp->obj,pgp->index) == NULL);
     ASSERT(pgp->obj->pool != NULL);
     pool = pgp->obj->pool;
+    if ( is_ephemeral(pool) )
+    {
+        ASSERT(list_empty(&pgp->global_eph_pages));
+        ASSERT(list_empty(&pgp->client_eph_pages));
+    }
     pgp_free_data(pgp, pool);
+    atomic_dec_and_assert(global_pgp_count);
+    atomic_dec_and_assert(pool->pgp_count);
+    pgp->size = -1;
+    if ( is_persistent(pool) && pool->client->live_migrating )
+    {
+        pgp->inv_oid = pgp->obj->oid;
+        pgp->pool_id = pool->pool_id;
+        return;
+    }
     INVERT_SENTINEL(pgp,PGD);
     pgp->obj = NULL;
     pgp->index = -1;
-    pgp->size = -1;
-    atomic_dec_and_assert(global_pgp_count);
-    atomic_dec_and_assert(pool->pgp_count);
+    tmem_free(pgp,sizeof(pgp_t),pool);
+}
+
+static NOINLINE void pgp_free_from_inv_list(client_t *client, pgp_t *pgp)
+{
+    pool_t *pool = client->pools[pgp->pool_id];
+
+    ASSERT_SENTINEL(pool,POOL);
+    ASSERT_SENTINEL(pgp,PGD);
+    INVERT_SENTINEL(pgp,PGD);
+    pgp->obj = NULL;
+    pgp->index = -1;
     tmem_free(pgp,sizeof(pgp_t),pool);
 }
 
 /* remove the page from appropriate lists but not from parent object */
 static void pgp_delist(pgp_t *pgp, bool_t no_eph_lock)
 {
+    client_t *client;
+
     ASSERT(pgp != NULL);
     ASSERT(pgp->obj != NULL);
     ASSERT(pgp->obj->pool != NULL);
-    ASSERT(pgp->obj->pool->client != NULL);
+    client = pgp->obj->pool->client;
+    ASSERT(client != NULL);
     if ( is_ephemeral(pgp->obj->pool) )
     {
         if ( !no_eph_lock )
             tmem_spin_lock(&eph_lists_spinlock);
         if ( !list_empty(&pgp->client_eph_pages) )
-            pgp->obj->pool->client->eph_count--;
-        ASSERT(pgp->obj->pool->client->eph_count >= 0);
+            client->eph_count--;
+        ASSERT(client->eph_count >= 0);
         list_del_init(&pgp->client_eph_pages);
         if ( !list_empty(&pgp->global_eph_pages) )
             global_eph_count--;
@@ -403,6 +460,20 @@ static void pgp_delist(pgp_t *pgp, bool_
         list_del_init(&pgp->global_eph_pages);
         if ( !no_eph_lock )
             tmem_spin_unlock(&eph_lists_spinlock);
+    } else {
+        if ( client->live_migrating )
+        {
+            tmem_spin_lock(&pers_lists_spinlock);
+            list_add_tail(&pgp->client_inv_pages,
+                          &client->persistent_invalidated_list);
+            if ( pgp != pgp->obj->pool->cur_pgp )
+                list_del_init(&pgp->pool_pers_pages);
+            tmem_spin_unlock(&pers_lists_spinlock);
+        } else {
+            tmem_spin_lock(&pers_lists_spinlock);
+            list_del_init(&pgp->pool_pers_pages);
+            tmem_spin_unlock(&pers_lists_spinlock);
+        }
     }
 }
 
@@ -564,6 +635,7 @@ static NOINLINE void obj_free(obj_t *obj
     ASSERT(obj->pgp_count == 0);
     pool = obj->pool;
     ASSERT(pool != NULL);
+    ASSERT(pool->client != NULL);
     ASSERT_WRITELOCK(&pool->pool_rwlock);
     if ( obj->tree_root.rnode != NULL ) /* may be a "stump" with no leaves */
         radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free);
@@ -685,11 +757,14 @@ static pool_t * pool_alloc(void)
     for (i = 0; i < OBJ_HASH_BUCKETS; i++)
         pool->obj_rb_root[i] = RB_ROOT;
     INIT_LIST_HEAD(&pool->pool_list);
+    INIT_LIST_HEAD(&pool->persistent_page_list);
+    pool->cur_pgp = NULL;
     rwlock_init(&pool->pool_rwlock);
     pool->pgp_count_max = pool->obj_count_max = 0;
     pool->objnode_count = pool->objnode_count_max = 0;
     atomic_set(&pool->pgp_count,0);
-    pool->obj_count = 0;
+    pool->obj_count = 0; pool->shared_count = 0;
+    pool->pageshift = PAGE_SHIFT - 12;
     pool->good_puts = pool->puts = pool->dup_puts_flushed = 0;
     pool->dup_puts_replaced = pool->no_mem_puts = 0;
     pool->found_gets = pool->gets = 0;
@@ -805,6 +880,12 @@ static void pool_flush(pool_t *pool, cli
         is_persistent(pool) ? "persistent" : "ephemeral" ,
         is_shared(pool) ? "shared" : "private");
     printk("%s=%d pool_id=%d\n", 
cli_id_str,pool->client->cli_id,pool->pool_id);
+    if ( pool->client->live_migrating )
+    {
+        printk("can't %s pool while %s is live-migrating\n",
+               destroy?"destroy":"flush", client_str);
+        return;
+    }
     pool_destroy_objs(pool,0,CLI_ID_NULL);
     if ( destroy )
     {
@@ -815,10 +896,10 @@ static void pool_flush(pool_t *pool, cli
 
 /************ CLIENT MANIPULATION OPERATIONS **************************/
 
-static client_t *client_create(void)
+static client_t *client_create(cli_id_t cli_id)
 {
     client_t *client = tmem_malloc(client_t,NULL);
-    cli_id_t cli_id = tmh_get_cli_id_from_current();
+    int i;
 
     printk("tmem: initializing tmem capability for 
%s=%d...",cli_id_str,cli_id);
     if ( client == NULL )
@@ -834,15 +915,23 @@ static client_t *client_create(void)
             tmem_free(client,sizeof(client_t),NULL);
         return NULL;
     }
-    tmh_set_current_client(client);
+    tmh_set_client_from_id(client,cli_id);
     client->cli_id = cli_id;
 #ifdef __i386__
     client->compress = 0;
 #else
     client->compress = tmh_compression_enabled();
 #endif
+    client->shared_auth_required = tmh_shared_auth();
+    for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++)
+        client->shared_auth_uuid[i][0] =
+            client->shared_auth_uuid[i][1] = -1L;
+    client->frozen = 0; client->live_migrating = 0;
+    client->weight = 0; client->cap = 0;
     list_add_tail(&client->client_list, &global_client_list);
     INIT_LIST_HEAD(&client->ephemeral_page_list);
+    INIT_LIST_HEAD(&client->persistent_invalidated_list);
+    client->cur_pgp = NULL;
     client->eph_count = client->eph_count_max = 0;
     client->total_cycles = 0; client->succ_pers_puts = 0;
     client->succ_eph_gets = 0; client->succ_pers_gets = 0;
@@ -885,6 +974,11 @@ static bool_t client_over_quota(client_t
         return 0;
     return ( ((global_eph_count*100L) / client->eph_count ) >
              ((total*100L) / client->weight) );
+}
+
+static void client_freeze(client_t *client, int freeze)
+{
+    client->frozen = freeze;
 }
 
 /************ MEMORY REVOCATION ROUTINES *******************************/
@@ -993,7 +1087,8 @@ static unsigned long tmem_relinquish_npa
 
 /************ TMEM CORE OPERATIONS ************************************/
 
-static NOINLINE int do_tmem_put_compress(pgp_t *pgp, tmem_cli_mfn_t cmfn)
+static NOINLINE int do_tmem_put_compress(pgp_t *pgp, tmem_cli_mfn_t cmfn,
+                                         void *cva)
 {
     void *dst, *p;
     size_t size;
@@ -1011,7 +1106,7 @@ static NOINLINE int do_tmem_put_compress
     if ( pgp->pfp != NULL )
         pgp_free_data(pgp, pgp->obj->pool);  /* FIXME... is this right? */
     START_CYC_COUNTER(compress);
-    ret = tmh_compress_from_client(cmfn, &dst, &size);
+    ret = tmh_compress_from_client(cmfn, &dst, &size, cva);
     if ( (ret == -EFAULT) || (ret == 0) )
         goto out;
     else if ( (size == 0) || (size >= tmem_subpage_maxsize()) )
@@ -1034,7 +1129,7 @@ out:
 }
 
 static NOINLINE int do_tmem_dup_put(pgp_t *pgp, tmem_cli_mfn_t cmfn,
-              uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len)
+       uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len, void *cva)
 {
     pool_t *pool;
     obj_t *obj;
@@ -1042,7 +1137,6 @@ static NOINLINE int do_tmem_dup_put(pgp_
     pgp_t *pgpfound = NULL;
     int ret;
 
-    /* if we can successfully manipulate pgp to change out the data, do so */
     ASSERT(pgp != NULL);
     ASSERT(pgp->pfp != NULL);
     ASSERT(pgp->size != -1);
@@ -1052,10 +1146,12 @@ static NOINLINE int do_tmem_dup_put(pgp_
     pool = obj->pool;
     ASSERT(pool != NULL);
     client = pool->client;
-    if ( len != 0 && tmh_compression_enabled() &&
-         client->compress && pgp->size != 0 )
-    {
-        ret = do_tmem_put_compress(pgp,cmfn);
+    if ( client->live_migrating )
+        goto failed_dup; /* no dups allowed when migrating */
+    /* can we successfully manipulate pgp to change out the data? */
+    if ( len != 0 && client->compress && pgp->size != 0 )
+    {
+        ret = do_tmem_put_compress(pgp,cmfn,cva);
         if ( ret == 1 )
             goto done;
         else if ( ret == 0 )
@@ -1072,7 +1168,7 @@ copy_uncompressed:
     if ( ( pgp->pfp = tmem_page_alloc(pool) ) == NULL )
         goto failed_dup;
     /* tmh_copy_from_client properly handles len==0 and offsets != 0 */
-    ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len);
+    ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,0);
     if ( ret == -EFAULT )
         goto bad_copy;
     pgp->size = 0;
@@ -1115,9 +1211,10 @@ failed_dup:
 }
 
 
-static NOINLINE int do_tmem_put(pool_t *pool, uint64_t oid, uint32_t index,
+static NOINLINE int do_tmem_put(pool_t *pool,
+              uint64_t oid, uint32_t index,
               tmem_cli_mfn_t cmfn, uint32_t tmem_offset,
-              uint32_t pfn_offset, uint32_t len)
+              uint32_t pfn_offset, uint32_t len, void *cva)
 {
     obj_t *obj = NULL, *objfound = NULL, *objnew = NULL;
     pgp_t *pgp = NULL, *pgpdel = NULL;
@@ -1131,7 +1228,7 @@ static NOINLINE int do_tmem_put(pool_t *
     {
         ASSERT_SPINLOCK(&objfound->obj_spinlock);
         if ((pgp = pgp_lookup_in_obj(objfound, index)) != NULL)
-            return do_tmem_dup_put(pgp,cmfn,tmem_offset,pfn_offset,len);
+            return do_tmem_dup_put(pgp,cmfn,tmem_offset,pfn_offset,len,cva);
     }
 
     /* no puts allowed into a frozen pool (except dup puts) */
@@ -1162,10 +1259,10 @@ static NOINLINE int do_tmem_put(pool_t *
     ASSERT(ret != -EEXIST);
     pgp->index = index;
 
-    if ( len != 0 && tmh_compression_enabled() && client->compress )
+    if ( len != 0 && client->compress )
     {
         ASSERT(pgp->pfp == NULL);
-        ret = do_tmem_put_compress(pgp,cmfn);
+        ret = do_tmem_put_compress(pgp,cmfn,cva);
         if ( ret == 1 )
             goto insert_page;
         if ( ret == -ENOMEM )
@@ -1189,7 +1286,7 @@ copy_uncompressed:
         goto delete_and_free;
     }
     /* tmh_copy_from_client properly handles len==0 (TMEM_NEW_PAGE) */
-    ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len);
+    ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,cva);
     if ( ret == -EFAULT )
         goto bad_copy;
     pgp->size = 0;
@@ -1207,6 +1304,11 @@ insert_page:
         if (++client->eph_count > client->eph_count_max)
             client->eph_count_max = client->eph_count;
         tmem_spin_unlock(&eph_lists_spinlock);
+    } else { /* is_persistent */
+        tmem_spin_lock(&pers_lists_spinlock);
+        list_add_tail(&pgp->pool_pers_pages,
+            &pool->persistent_page_list);
+        tmem_spin_unlock(&pers_lists_spinlock);
     }
     ASSERT( ((objnew==obj)||(objfound==obj)) && (objnew!=objfound));
     if ( is_shared(pool) )
@@ -1249,7 +1351,7 @@ ASSERT(0);
 
 static NOINLINE int do_tmem_get(pool_t *pool, uint64_t oid, uint32_t index,
               tmem_cli_mfn_t cmfn, uint32_t tmem_offset,
-              uint32_t pfn_offset, uint32_t len)
+              uint32_t pfn_offset, uint32_t len, void *cva)
 {
     obj_t *obj;
     pgp_t *pgp;
@@ -1279,12 +1381,13 @@ static NOINLINE int do_tmem_get(pool_t *
     if ( pgp->size != 0 )
     {
         START_CYC_COUNTER(decompress);
-        if ( tmh_decompress_to_client(cmfn, pgp->cdata, pgp->size) == -EFAULT )
+        if ( tmh_decompress_to_client(cmfn, pgp->cdata,
+                                      pgp->size, cva) == -EFAULT )
             goto bad_copy;
         END_CYC_COUNTER(decompress);
     }
     else if ( tmh_copy_to_client(cmfn, pgp->pfp, tmem_offset,
-                                 pfn_offset, len) == -EFAULT)
+                                 pfn_offset, len, cva) == -EFAULT)
         goto bad_copy;
     if ( is_ephemeral(pool) )
     {
@@ -1398,10 +1501,12 @@ static NOINLINE int do_tmem_destroy_pool
     return 1;
 }
 
-static NOINLINE int do_tmem_new_pool(uint32_t flags, uint64_t uuid_lo, 
uint64_t uuid_hi)
-{
-    client_t *client = tmh_client_from_current();
-    cli_id_t cli_id = tmh_get_cli_id_from_current();
+static NOINLINE int do_tmem_new_pool(cli_id_t this_cli_id,
+                                     uint32_t this_pool_id, uint32_t flags,
+                                     uint64_t uuid_lo, uint64_t uuid_hi)
+{
+    client_t *client;
+    cli_id_t cli_id;
     int persistent = flags & TMEM_POOL_PERSIST;
     int shared = flags & TMEM_POOL_SHARED;
     int pagebits = (flags >> TMEM_POOL_PAGESIZE_SHIFT)
@@ -1410,12 +1515,22 @@ static NOINLINE int do_tmem_new_pool(uin
          & TMEM_POOL_VERSION_MASK;
     pool_t *pool, *shpool;
     int s_poolid, d_poolid, first_unused_s_poolid;
-
+    int i;
+
+    if ( this_cli_id == CLI_ID_NULL )
+    {
+        client = tmh_client_from_current();
+        cli_id = tmh_get_cli_id_from_current();
+    } else {
+        if ( (client = tmh_client_from_cli_id(this_cli_id)) == NULL)
+            return -EPERM;
+        cli_id = this_cli_id;
+    }
     ASSERT(client != NULL);
     printk("tmem: allocating %s-%s tmem pool for %s=%d...",
         persistent ? "persistent" : "ephemeral" ,
         shared ? "shared" : "private", cli_id_str, cli_id);
-    if ( specversion != 0 )
+    if ( specversion != TMEM_SPEC_VERSION )
     {
         printk("failed... unsupported spec version\n");
         return -EPERM;
@@ -1430,14 +1545,35 @@ static NOINLINE int do_tmem_new_pool(uin
         printk("failed... out of memory\n");
         return -ENOMEM;
     }
-    for ( d_poolid = 0; d_poolid < MAX_POOLS_PER_DOMAIN; d_poolid++ )
+    if ( this_cli_id != CLI_ID_NULL )
+    {
+        d_poolid = this_pool_id;
+        if ( client->pools[d_poolid] != NULL )
+            return -EPERM;
+        d_poolid = this_pool_id;
+    }
+    else for ( d_poolid = 0; d_poolid < MAX_POOLS_PER_DOMAIN; d_poolid++ )
         if ( client->pools[d_poolid] == NULL )
             break;
-    if ( d_poolid == MAX_POOLS_PER_DOMAIN )
+    if ( d_poolid >= MAX_POOLS_PER_DOMAIN )
     {
         printk("failed... no more pool slots available for this %s\n",
             client_str);
         goto fail;
+    }
+    if ( shared )
+    {
+        if ( uuid_lo == -1L && uuid_hi == -1L )
+            shared = 0;
+        if ( client->shared_auth_required && !global_shared_auth )
+        {
+            for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++)
+                if ( (client->shared_auth_uuid[i][0] == uuid_lo) &&
+                     (client->shared_auth_uuid[i][1] == uuid_hi) )
+                    break;
+            if ( i == MAX_GLOBAL_SHARED_POOLS )
+                shared = 0;
+        }
     }
     pool->shared = shared;
     pool->client = client;
@@ -1491,7 +1627,7 @@ fail:
 /************ TMEM CONTROL OPERATIONS ************************************/
 
 /* freeze/thaw all pools belonging to client cli_id (all domains if -1) */
-static int tmemc_freeze_pools(int cli_id, int arg)
+static int tmemc_freeze_pools(cli_id_t cli_id, int arg)
 {
     client_t *client;
     bool_t freeze = (arg == TMEMC_FREEZE) ? 1 : 0;
@@ -1502,20 +1638,20 @@ static int tmemc_freeze_pools(int cli_id
     if ( cli_id == CLI_ID_NULL )
     {
         list_for_each_entry(client,&global_client_list,client_list)
-            client->frozen = freeze;
+            client_freeze(client,freeze);
         printk("tmem: all pools %s for all %ss\n",s,client_str);
     }
     else
     {
         if ( (client = tmh_client_from_cli_id(cli_id)) == NULL)
             return -1;
-        client->frozen = freeze;
+        client_freeze(client,freeze);
         printk("tmem: all pools %s for %s=%d\n",s,cli_id_str,cli_id);
     }
     return 0;
 }
 
-static int tmemc_flush_mem(int cli_id, uint32_t kb)
+static int tmemc_flush_mem(cli_id_t cli_id, uint32_t kb)
 {
     uint32_t npages, flushed_pages, flushed_kb;
 
@@ -1699,7 +1835,7 @@ static int tmemc_list_global(tmem_cli_va
     return sum;
 }
 
-static int tmemc_list(int cli_id, tmem_cli_va_t buf, uint32_t len,
+static int tmemc_list(cli_id_t cli_id, tmem_cli_va_t buf, uint32_t len,
                                bool_t use_long)
 {
     client_t *client;
@@ -1716,7 +1852,6 @@ static int tmemc_list(int cli_id, tmem_c
         return -1;
     else
         off = tmemc_list_client(client, buf, 0, len, use_long);
-
 
     return 0;
 }
@@ -1740,6 +1875,9 @@ static int tmemc_set_var_one(client_t *c
         printk("tmem: cap set to %d for %s=%d\n",arg1,cli_id_str,cli_id);
         break;
     case TMEMC_SET_COMPRESS:
+#ifdef __i386__
+        return -1;
+#endif
         client->compress = arg1 ? 1 : 0;
         printk("tmem: compression %s for %s=%d\n",
             arg1 ? "enabled" : "disabled",cli_id_str,cli_id);
@@ -1751,7 +1889,7 @@ static int tmemc_set_var_one(client_t *c
     return 0;
 }
 
-static int tmemc_set_var(int cli_id, uint32_t subop, uint32_t arg1)
+static int tmemc_set_var(cli_id_t cli_id, uint32_t subop, uint32_t arg1)
 {
     client_t *client;
 
@@ -1765,11 +1903,229 @@ static int tmemc_set_var(int cli_id, uin
     return 0;
 }
 
-static int do_tmem_control(uint32_t subop, uint32_t cli_id32,
-   uint32_t arg1, uint32_t arg2, tmem_cli_va_t buf)
+static NOINLINE int tmemc_shared_pool_auth(cli_id_t cli_id, uint64_t uuid_lo,
+                                  uint64_t uuid_hi, bool_t auth)
+{
+    client_t *client;
+    int i, free = -1;
+
+    if ( cli_id == CLI_ID_NULL )
+    {
+        global_shared_auth = auth;
+        return 1;
+    }
+    client = tmh_client_from_cli_id(cli_id);
+    for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++)
+    {
+        if ( (client->shared_auth_uuid[i][0] == uuid_lo) &&
+             (client->shared_auth_uuid[i][1] == uuid_hi) )
+        {
+            if ( auth == 0 )
+                client->shared_auth_uuid[i][0] =
+                    client->shared_auth_uuid[i][1] = -1L;
+            return 1;
+        }
+        if ( (auth == 1) && (client->shared_auth_uuid[i][0] == -1L) &&
+                 (client->shared_auth_uuid[i][1] == -1L) && (free == -1) )
+            free = i;
+    }
+    if ( auth == 0 )
+        return 0;
+    if ( auth == 1 && free == -1 )
+        return -ENOMEM;
+    client->shared_auth_uuid[free][0] = uuid_lo;
+    client->shared_auth_uuid[free][1] = uuid_hi;
+    return 1;
+}
+
+static NOINLINE int tmemc_save_subop(int cli_id, uint32_t pool_id,
+                        uint32_t subop, tmem_cli_va_t buf, uint32_t arg1)
+{
+    client_t *client = tmh_client_from_cli_id(cli_id);
+    pool_t *pool =  (client == NULL) ? NULL : client->pools[pool_id];
+    uint32_t p;
+    uint64_t *uuid;
+    pgp_t *pgp, *pgp2;
+
+    switch(subop)
+    {
+    case TMEMC_SAVE_BEGIN:
+        if ( client == NULL )
+            return 0;
+        for (p = 0; p < MAX_POOLS_PER_DOMAIN; p++)
+            if ( client->pools[p] != NULL )
+                break;
+        if ( p == MAX_POOLS_PER_DOMAIN )
+            return 0;
+        client->was_frozen = client->frozen;
+        client->frozen = 1;
+        if ( arg1 != 0 )
+            client->live_migrating = 1;
+        return 1;
+    case TMEMC_RESTORE_BEGIN:
+        ASSERT(client == NULL);
+        if ( (client = client_create(cli_id)) == NULL )
+            return -1;
+        return 1;
+    case TMEMC_SAVE_GET_VERSION:
+        return TMEM_SPEC_VERSION;
+    case TMEMC_SAVE_GET_MAXPOOLS:
+        return MAX_POOLS_PER_DOMAIN;
+    case TMEMC_SAVE_GET_CLIENT_WEIGHT:
+        return client->weight == -1 ? -2 : client->weight;
+    case TMEMC_SAVE_GET_CLIENT_CAP:
+        return client->cap == -1 ? -2 : client->cap;
+    case TMEMC_SAVE_GET_CLIENT_FLAGS:
+        return (client->compress ? TMEM_CLIENT_COMPRESS : 0 ) |
+               (client->was_frozen ? TMEM_CLIENT_FROZEN : 0 );
+    case TMEMC_SAVE_GET_POOL_FLAGS:
+         if ( pool == NULL )
+             return -1;
+         return (pool->persistent ? TMEM_POOL_PERSIST : 0) |
+                (pool->shared ? TMEM_POOL_SHARED : 0) |
+                (pool->pageshift << TMEM_POOL_PAGESIZE_SHIFT);
+    case TMEMC_SAVE_GET_POOL_NPAGES:
+         if ( pool == NULL )
+             return -1;
+        return _atomic_read(pool->pgp_count);
+    case TMEMC_SAVE_GET_POOL_UUID:
+         if ( pool == NULL )
+             return -1;
+        uuid = (uint64_t *)buf.p;
+        *uuid++ = pool->uuid[0];
+        *uuid = pool->uuid[1];
+        return 0;
+    case TMEMC_SAVE_END:
+        client->live_migrating = 0;
+        if ( !list_empty(&client->persistent_invalidated_list) )
+            list_for_each_entry_safe(pgp,pgp2,
+              &client->persistent_invalidated_list, client_inv_pages)
+                pgp_free_from_inv_list(client,pgp);
+        client->frozen = client->was_frozen;
+        return 0;
+    }
+    return -1;
+}
+
+static NOINLINE int tmemc_save_get_next_page(int cli_id, int pool_id,
+                        tmem_cli_va_t buf, uint32_t bufsize)
+{
+    client_t *client = tmh_client_from_cli_id(cli_id);
+    pool_t *pool =  (client == NULL) ? NULL : client->pools[pool_id];
+    pgp_t *pgp;
+    int ret = 0;
+    struct tmem_handle *h;
+    unsigned int pagesize = 1 << (pool->pageshift+12);
+
+    if ( pool == NULL )
+        return -1;
+    if ( is_ephemeral(pool) )
+        return -1;
+    if ( bufsize < pagesize + sizeof(struct tmem_handle) )
+        return -ENOMEM;
+
+    tmem_spin_lock(&pers_lists_spinlock);
+    if ( list_empty(&pool->persistent_page_list) )
+    {
+        ret = -1;
+        goto out;
+    }
+    /* note: pool->cur_pgp is the pgp last returned by get_next_page */
+    if ( pool->cur_pgp == NULL )
+    {
+        /* process the first one */
+        pool->cur_pgp = pgp = list_entry((&pool->persistent_page_list)->next,
+                         pgp_t,pool_pers_pages);
+    } else if ( list_is_last(&pool->cur_pgp->pool_pers_pages, 
+                             &pool->persistent_page_list) )
+    {
+        /* already processed the last one in the list */
+        ret = -1;
+        goto out;
+    }
+    pgp = list_entry((&pool->cur_pgp->pool_pers_pages)->next,
+                         pgp_t,pool_pers_pages);
+    pool->cur_pgp = pgp;
+    h = (struct tmem_handle *)buf.p;
+    h->oid = pgp->obj->oid;
+    h->index = pgp->index;
+    buf.p = (void *)(h+1);
+    ret = do_tmem_get(pool, h->oid, h->index,0,0,0,pagesize,buf.p);
+
+out:
+    tmem_spin_unlock(&pers_lists_spinlock);
+    return ret;
+}
+
+static NOINLINE int tmemc_save_get_next_inv(int cli_id, tmem_cli_va_t buf,
+                        uint32_t bufsize)
+{
+    client_t *client = tmh_client_from_cli_id(cli_id);
+    pgp_t *pgp;
+    struct tmem_handle *h;
+    int ret = 0;
+
+    if ( client == NULL )
+        return 0;
+    if ( bufsize < sizeof(struct tmem_handle) )
+        return 0;
+    tmem_spin_lock(&pers_lists_spinlock);
+    if ( list_empty(&client->persistent_invalidated_list) )
+        goto out;
+    if ( client->cur_pgp == NULL )
+    {
+        pgp = list_entry((&client->persistent_invalidated_list)->next,
+                         pgp_t,client_inv_pages);
+        client->cur_pgp = pgp;
+    } else if ( list_is_last(&client->cur_pgp->client_inv_pages, 
+                             &client->persistent_invalidated_list) )
+    {
+        client->cur_pgp = NULL;
+        ret = 0;
+        goto out;
+    } else {
+        pgp = list_entry((&client->cur_pgp->client_inv_pages)->next,
+                         pgp_t,client_inv_pages);
+        client->cur_pgp = pgp;
+    }
+    h = (struct tmem_handle *)buf.p;
+    h->pool_id = pgp->pool_id;
+    h->oid = pgp->inv_oid;
+    h->index = pgp->index;
+    ret = 1;
+out:
+    tmem_spin_unlock(&pers_lists_spinlock);
+    return ret;
+}
+
+static int tmemc_restore_put_page(int cli_id, int pool_id, uint64_t oid,
+                      uint32_t index, tmem_cli_va_t buf, uint32_t bufsize)
+{
+    client_t *client = tmh_client_from_cli_id(cli_id);
+    pool_t *pool =  (client == NULL) ? NULL : client->pools[pool_id];
+int ret = 0;
+
+    if ( pool == NULL )
+        return -1;
+    return do_tmem_put(pool,oid,index,0,0,0,bufsize,buf.p);
+}
+
+static int tmemc_restore_flush_page(int cli_id, int pool_id, uint64_t oid,
+                        uint32_t index)
+{
+    client_t *client = tmh_client_from_cli_id(cli_id);
+    pool_t *pool =  (client == NULL) ? NULL : client->pools[pool_id];
+
+    if ( pool == NULL )
+        return -1;
+    return do_tmem_flush_page(pool, oid, index);
+}
+
+static NOINLINE int do_tmem_control(struct tmem_op *op)
 {
     int ret;
-    cli_id_t cli_id = (cli_id_t)cli_id32;
+    uint32_t pool_id = op->pool_id;
+    uint32_t subop = op->u.ctrl.subop;
 
     if (!tmh_current_is_privileged())
     {
@@ -1781,18 +2137,50 @@ static int do_tmem_control(uint32_t subo
     case TMEMC_THAW:
     case TMEMC_FREEZE:
     case TMEMC_DESTROY:
-        ret = tmemc_freeze_pools(cli_id,subop);
+        ret = tmemc_freeze_pools(op->u.ctrl.cli_id,subop);
         break;
     case TMEMC_FLUSH:
-        ret = tmemc_flush_mem(cli_id,arg1);
+        ret = tmemc_flush_mem(op->u.ctrl.cli_id,op->u.ctrl.arg1);
         break;
     case TMEMC_LIST:
-        ret = tmemc_list(cli_id,buf,arg1,arg2);
+        ret = tmemc_list(op->u.ctrl.cli_id,op->u.ctrl.buf,
+                         op->u.ctrl.arg1,op->u.ctrl.arg2);
         break;
     case TMEMC_SET_WEIGHT:
     case TMEMC_SET_CAP:
     case TMEMC_SET_COMPRESS:
-        ret = tmemc_set_var(cli_id,subop,arg1);
+        ret = tmemc_set_var(op->u.ctrl.cli_id,subop,op->u.ctrl.arg1);
+        break;
+    case TMEMC_SAVE_BEGIN:
+    case TMEMC_RESTORE_BEGIN:
+    case TMEMC_SAVE_GET_VERSION:
+    case TMEMC_SAVE_GET_MAXPOOLS:
+    case TMEMC_SAVE_GET_CLIENT_WEIGHT:
+    case TMEMC_SAVE_GET_CLIENT_CAP:
+    case TMEMC_SAVE_GET_CLIENT_FLAGS:
+    case TMEMC_SAVE_GET_POOL_FLAGS:
+    case TMEMC_SAVE_GET_POOL_NPAGES:
+    case TMEMC_SAVE_GET_POOL_UUID:
+    case TMEMC_SAVE_END:
+        ret = tmemc_save_subop(op->u.ctrl.cli_id,pool_id,subop,
+                        op->u.ctrl.buf,op->u.ctrl.arg1);
+        break;
+    case TMEMC_SAVE_GET_NEXT_PAGE:
+        ret = tmemc_save_get_next_page(op->u.ctrl.cli_id, pool_id,
+                                       op->u.ctrl.buf, op->u.ctrl.arg1);
+        break;
+    case TMEMC_SAVE_GET_NEXT_INV:
+        ret = tmemc_save_get_next_inv(op->u.ctrl.cli_id, op->u.ctrl.buf,
+                                      op->u.ctrl.arg1);
+        break;
+    case TMEMC_RESTORE_PUT_PAGE:
+        ret = tmemc_restore_put_page(op->u.ctrl.cli_id,pool_id,
+                                     op->u.ctrl.arg3, op->u.ctrl.arg2,
+                                     op->u.ctrl.buf, op->u.ctrl.arg1);
+        break;
+    case TMEMC_RESTORE_FLUSH_PAGE:
+        ret = tmemc_restore_flush_page(op->u.ctrl.cli_id,pool_id,
+                                       op->u.ctrl.arg3, op->u.ctrl.arg2);
         break;
     default:
         ret = -1;
@@ -1850,8 +2238,19 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
     {
         tmem_write_lock(&tmem_rwlock);
         tmem_write_lock_set = 1;
-        rc = do_tmem_control(op.u.ctrl.subop, op.u.ctrl.cli_id,
-                             op.u.ctrl.arg1, op.u.ctrl.arg2, op.u.ctrl.buf);
+        rc = do_tmem_control(&op);
+        goto out;
+    } else if ( op.cmd == TMEM_AUTH ) {
+        tmem_write_lock(&tmem_rwlock);
+        tmem_write_lock_set = 1;
+        rc = tmemc_shared_pool_auth(op.u.new.arg1,op.u.new.uuid[0],
+                         op.u.new.uuid[1],op.u.new.flags);
+        goto out;
+    } else if ( op.cmd == TMEM_RESTORE_NEW ) {
+        tmem_write_lock(&tmem_rwlock);
+        tmem_write_lock_set = 1;
+        rc = do_tmem_new_pool(op.u.new.arg1, op.pool_id, op.u.new.flags,
+                         op.u.new.uuid[0], op.u.new.uuid[1]);
         goto out;
     }
 
@@ -1860,7 +2259,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
     {
         tmem_write_lock(&tmem_rwlock);
         tmem_write_lock_set = 1;
-        if ( (client = client_create()) == NULL )
+        if ( (client = client_create(tmh_get_cli_id_from_current())) == NULL )
         {
             printk("tmem: can't create tmem structure for %s\n",client_str);
             rc = -ENOMEM;
@@ -1896,22 +2295,22 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
     switch ( op.cmd )
     {
     case TMEM_NEW_POOL:
-        rc = do_tmem_new_pool(op.u.new.flags,
+        rc = do_tmem_new_pool(CLI_ID_NULL, 0, op.u.new.flags,
                               op.u.new.uuid[0], op.u.new.uuid[1]);
         break;
     case TMEM_NEW_PAGE:
-        rc = do_tmem_put(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn,
-                         0, 0, 0);
+        rc = do_tmem_put(pool, op.u.gen.object,
+                         op.u.gen.index, op.u.gen.cmfn, 0, 0, 0, NULL);
         break;
     case TMEM_PUT_PAGE:
-        rc = do_tmem_put(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn,
-                         0, 0, PAGE_SIZE);
+        rc = do_tmem_put(pool, op.u.gen.object,
+                    op.u.gen.index, op.u.gen.cmfn, 0, 0, PAGE_SIZE, NULL);
         if (rc == 1) succ_put = 1;
         else non_succ_put = 1;
         break;
     case TMEM_GET_PAGE:
         rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn,
-                         0, 0, PAGE_SIZE);
+                         0, 0, PAGE_SIZE, 0);
         if (rc == 1) succ_get = 1;
         else non_succ_get = 1;
         break;
@@ -1930,12 +2329,13 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
     case TMEM_READ:
         rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn,
                          op.u.gen.tmem_offset, op.u.gen.pfn_offset,
-                         op.u.gen.len);
+                         op.u.gen.len,0);
         break;
     case TMEM_WRITE:
-        rc = do_tmem_put(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn,
+        rc = do_tmem_put(pool, op.u.gen.object,
+                         op.u.gen.index, op.u.gen.cmfn,
                          op.u.gen.tmem_offset, op.u.gen.pfn_offset,
-                         op.u.gen.len);
+                         op.u.gen.len, NULL);
         break;
     case TMEM_XCHG:
         /* need to hold global lock to ensure xchg is atomic */
diff -r f57cc4a7c853 -r c98fd816db85 xen/common/tmem_xen.c
--- a/xen/common/tmem_xen.c     Thu Aug 06 09:15:42 2009 +0100
+++ b/xen/common/tmem_xen.c     Thu Aug 06 09:19:55 2009 +0100
@@ -19,6 +19,9 @@ boolean_param("tmem", opt_tmem);
 
 EXPORT int opt_tmem_compress = 0;
 boolean_param("tmem_compress", opt_tmem_compress);
+
+EXPORT int opt_tmem_shared_auth = 0;
+boolean_param("tmem_shared_auth", opt_tmem_shared_auth);
 
 EXPORT int opt_tmem_lock = 0;
 integer_param("tmem_lock", opt_tmem_lock);
@@ -98,14 +101,14 @@ static inline void *cli_mfn_to_va(tmem_c
 
 EXPORT int tmh_copy_from_client(pfp_t *pfp,
     tmem_cli_mfn_t cmfn, uint32_t tmem_offset,
-    uint32_t pfn_offset, uint32_t len)
+    uint32_t pfn_offset, uint32_t len, void *cli_va)
 {
     unsigned long tmem_mfn;
-    void *tmem_va, *cli_va = NULL;
+    void *tmem_va;
 
     ASSERT(pfp != NULL);
     if ( tmem_offset || pfn_offset || len )
-        if ( (cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL)
+        if ( (cli_va == NULL) && ((cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL) 
)
             return -EFAULT;
     tmem_mfn = page_to_mfn(pfp);
     tmem_va = map_domain_page(tmem_mfn);
@@ -123,14 +126,13 @@ EXPORT int tmh_copy_from_client(pfp_t *p
 }
 
 EXPORT int tmh_compress_from_client(tmem_cli_mfn_t cmfn,
-    void **out_va, size_t *out_len)
-{
-    void *cli_va;
+    void **out_va, size_t *out_len, void *cli_va)
+{
     int ret = 0;
     unsigned char *dmem = this_cpu(dstmem);
     unsigned char *wmem = this_cpu(workmem);
 
-    if ( (cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL)
+    if ( (cli_va == NULL) && (cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL)
         return -EFAULT;
     if ( dmem == NULL || wmem == NULL )
         return 0;  /* no buffer, so can't compress */
@@ -143,13 +145,16 @@ EXPORT int tmh_compress_from_client(tmem
 }
 
 EXPORT int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp,
-    uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len)
-{
-    unsigned long tmem_mfn, cli_mfn;
-    void *tmem_va, *cli_va;
+    uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len, void *cli_va)
+{
+    unsigned long tmem_mfn, cli_mfn = 0;
+    int mark_dirty = 1;
+    void *tmem_va;
 
     ASSERT(pfp != NULL);
-    if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL)
+    if ( cli_va != NULL )
+        mark_dirty = 0;
+    else if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL)
         return -EFAULT;
     tmem_mfn = page_to_mfn(pfp);
     tmem_va = map_domain_page(tmem_mfn);
@@ -158,26 +163,35 @@ EXPORT int tmh_copy_to_client(tmem_cli_m
     else if ( (tmem_offset+len <= PAGE_SIZE) && (pfn_offset+len <= PAGE_SIZE) )
         memcpy((char *)cli_va+pfn_offset,(char *)tmem_va+tmem_offset,len);
     unmap_domain_page(tmem_va);
-    unmap_domain_page(cli_va);
-    paging_mark_dirty(current->domain,cli_mfn);
-    mb();
-    return 1;
-}
-
-EXPORT int tmh_decompress_to_client(tmem_cli_mfn_t cmfn, void *tmem_va, size_t 
size)
-{
-    unsigned long cli_mfn;
-    void *cli_va;
+    if ( mark_dirty )
+    {
+        unmap_domain_page(cli_va);
+        paging_mark_dirty(current->domain,cli_mfn);
+    }
+    mb();
+    return 1;
+}
+
+EXPORT int tmh_decompress_to_client(tmem_cli_mfn_t cmfn, void *tmem_va,
+                                    size_t size, void *cli_va)
+{
+    unsigned long cli_mfn = 0;
+    int mark_dirty = 1;
     size_t out_len = PAGE_SIZE;
     int ret;
 
-    if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL)
+    if ( cli_va != NULL )
+        mark_dirty = 0;
+    else if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL)
         return -EFAULT;
     ret = lzo1x_decompress_safe(tmem_va, size, cli_va, &out_len);
     ASSERT(ret == LZO_E_OK);
     ASSERT(out_len == PAGE_SIZE);
-    unmap_domain_page(cli_va);
-    paging_mark_dirty(current->domain,cli_mfn);
+    if ( mark_dirty )
+    {
+        unmap_domain_page(cli_va);
+        paging_mark_dirty(current->domain,cli_mfn);
+    }
     mb();
     return 1;
 }
diff -r f57cc4a7c853 -r c98fd816db85 xen/include/public/tmem.h
--- a/xen/include/public/tmem.h Thu Aug 06 09:15:42 2009 +0100
+++ b/xen/include/public/tmem.h Thu Aug 06 09:19:55 2009 +0100
@@ -42,15 +42,36 @@
 #define TMEM_WRITE                 9
 #define TMEM_XCHG                 10
 
+/* Privileged commands to HYPERVISOR_tmem_op() */
+#define TMEM_AUTH                 101 
+#define TMEM_RESTORE_NEW          102
+
 /* Subops for HYPERVISOR_tmem_op(TMEM_CONTROL) */
-#define TMEMC_THAW                 0
-#define TMEMC_FREEZE               1
-#define TMEMC_FLUSH                2
-#define TMEMC_DESTROY              3
-#define TMEMC_LIST                 4
-#define TMEMC_SET_WEIGHT           5
-#define TMEMC_SET_CAP              6
-#define TMEMC_SET_COMPRESS         7
+#define TMEMC_THAW                   0
+#define TMEMC_FREEZE                 1
+#define TMEMC_FLUSH                  2
+#define TMEMC_DESTROY                3
+#define TMEMC_LIST                   4
+#define TMEMC_SET_WEIGHT             5
+#define TMEMC_SET_CAP                6
+#define TMEMC_SET_COMPRESS           7
+#define TMEMC_SHARED_POOL_AUTH       8
+#define TMEMC_SHARED_POOL_DEAUTH     9
+#define TMEMC_SAVE_BEGIN             10
+#define TMEMC_SAVE_GET_VERSION       11
+#define TMEMC_SAVE_GET_MAXPOOLS      12
+#define TMEMC_SAVE_GET_CLIENT_WEIGHT 13
+#define TMEMC_SAVE_GET_CLIENT_CAP    14
+#define TMEMC_SAVE_GET_CLIENT_FLAGS  15
+#define TMEMC_SAVE_GET_POOL_FLAGS    16
+#define TMEMC_SAVE_GET_POOL_NPAGES   17
+#define TMEMC_SAVE_GET_POOL_UUID     18
+#define TMEMC_SAVE_GET_NEXT_PAGE     19
+#define TMEMC_SAVE_GET_NEXT_INV      20
+#define TMEMC_SAVE_END               21
+#define TMEMC_RESTORE_BEGIN          30
+#define TMEMC_RESTORE_PUT_PAGE       32
+#define TMEMC_RESTORE_FLUSH_PAGE     33
 
 /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */
 #define TMEM_POOL_PERSIST          1
@@ -60,6 +81,10 @@
 #define TMEM_POOL_VERSION_SHIFT   24
 #define TMEM_POOL_VERSION_MASK  0xff
 
+/* Bits for client flags (save/restore) */
+#define TMEM_CLIENT_COMPRESS       1
+#define TMEM_CLIENT_FROZEN         2
+
 /* Special errno values */
 #define EFROZEN                 1000
 #define EEMPTY                  1001
@@ -70,31 +95,40 @@ typedef XEN_GUEST_HANDLE(char) tmem_cli_
 typedef XEN_GUEST_HANDLE(char) tmem_cli_va_t;
 struct tmem_op {
     uint32_t cmd;
-    int32_t pool_id; /* private > 0; shared < 0; 0 is invalid */
+    int32_t pool_id;
     union {
-        struct {  /* for cmd == TMEM_NEW_POOL */
+        struct {
             uint64_t uuid[2];
             uint32_t flags;
-        } new;
-        struct {  /* for cmd == TMEM_CONTROL */
+            uint32_t arg1;
+        } new; /* for cmd == TMEM_NEW_POOL, TMEM_AUTH, TMEM_RESTORE_NEW */
+        struct { 
             uint32_t subop;
             uint32_t cli_id;
             uint32_t arg1;
             uint32_t arg2;
+            uint64_t arg3;
             tmem_cli_va_t buf;
-        } ctrl;
+        } ctrl; /* for cmd == TMEM_CONTROL */
         struct {
+            
             uint64_t object;
             uint32_t index;
             uint32_t tmem_offset;
             uint32_t pfn_offset;
             uint32_t len;
             tmem_cli_mfn_t cmfn; /* client machine page frame */
-        } gen;
+        } gen; /* for all other cmd ("generic") */
     } u;
 };
 typedef struct tmem_op tmem_op_t;
 DEFINE_XEN_GUEST_HANDLE(tmem_op_t);
+
+struct tmem_handle {
+    uint32_t pool_id;
+    uint32_t index;
+    uint64_t oid;
+};
 
 #endif
 
diff -r f57cc4a7c853 -r c98fd816db85 xen/include/xen/tmem_xen.h
--- a/xen/include/xen/tmem_xen.h        Thu Aug 06 09:15:42 2009 +0100
+++ b/xen/include/xen/tmem_xen.h        Thu Aug 06 09:19:55 2009 +0100
@@ -53,6 +53,12 @@ static inline int tmh_compression_enable
 static inline int tmh_compression_enabled(void)
 {
     return opt_tmem_compress;
+}
+
+extern int opt_tmem_shared_auth;
+static inline int tmh_shared_auth(void)
+{
+    return opt_tmem_shared_auth;
 }
 
 extern int opt_tmem;
@@ -271,9 +277,10 @@ static inline tmh_cli_ptr_t *tmh_get_cli
     return current->domain;
 }
 
-static inline void tmh_set_current_client(struct client *client)
-{
-    current->domain->tmem = client;
+static inline void tmh_set_client_from_id(struct client *client,cli_id_t 
cli_id)
+{
+    struct domain *d = get_domain_by_id(cli_id);
+    d->tmem = client;
 }
 
 static inline bool_t tmh_current_is_privileged(void)
@@ -301,9 +308,11 @@ static inline int tmh_get_tmemop_from_cl
             return rc;
         switch ( cop.cmd )
         {
-        case TMEM_NEW_POOL: u = XLAT_tmem_op_u_new;  break;
-        case TMEM_CONTROL:  u = XLAT_tmem_op_u_ctrl; break;
-        default:            u = XLAT_tmem_op_u_gen;  break;
+        case TMEM_NEW_POOL:   u = XLAT_tmem_op_u_new;   break;
+        case TMEM_CONTROL:    u = XLAT_tmem_op_u_ctrl;  break;
+        case TMEM_AUTH:       u = XLAT_tmem_op_u_new;   break;
+        case TMEM_RESTORE_NEW:u = XLAT_tmem_op_u_new;   break;
+        default:              u = XLAT_tmem_op_u_gen ;  break;
         }
 #define XLAT_tmem_op_HNDL_u_ctrl_buf(_d_, _s_) \
         guest_from_compat_handle((_d_)->u.ctrl.buf, (_s_)->u.ctrl.buf)
@@ -326,16 +335,16 @@ static inline void tmh_copy_to_client_bu
 #define tmh_cli_id_str "domid"
 #define tmh_client_str "domain"
 
-extern int tmh_decompress_to_client(tmem_cli_mfn_t,void*,size_t);
-
-extern int tmh_compress_from_client(tmem_cli_mfn_t,void**,size_t *);
+extern int tmh_decompress_to_client(tmem_cli_mfn_t,void*,size_t,void*);
+
+extern int tmh_compress_from_client(tmem_cli_mfn_t,void**,size_t *,void*);
 
 extern int tmh_copy_from_client(pfp_t *pfp,
     tmem_cli_mfn_t cmfn, uint32_t tmem_offset,
-    uint32_t pfn_offset, uint32_t len);
+    uint32_t pfn_offset, uint32_t len, void *cva);
 
 extern int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp,
-    uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len);
+    uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len, void *cva);
 
 
 #define TMEM_PERF

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] tmem: save/restore/migrate/livemigrate and shared pool authentication, Xen patchbot-unstable <=