WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Remove old blktap tools.

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] Remove old blktap tools.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Sat, 17 Jun 2006 03:10:34 +0000
Delivery-date: Fri, 16 Jun 2006 20:12:30 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User akw@xxxxxxxxxxxxxxxxxxxxx
# Node ID 840f33e54054270e3f4b9704111ed52bd381653b
# Parent  533bad7c0883189e26c2a7f43011801c417b01fe
Remove old blktap tools.

Signed-off-by: Andrew Warfield <andrew.warfield@xxxxxxxxxxxx>
---
 tools/blktap/Makefile                   |   93 --
 tools/blktap/README                     |  137 ---
 tools/blktap/README.sept05              |   33 
 tools/blktap/blkdump.c                  |   62 -
 tools/blktap/blkif.c                    |  212 -----
 tools/blktap/blktaplib.c                |  453 ----------
 tools/blktap/blktaplib.h                |  171 ----
 tools/blktap/list.h                     |   55 -
 tools/blktap/parallax/Makefile          |   62 -
 tools/blktap/parallax/README            |  171 ----
 tools/blktap/parallax/block-async.c     |  393 ---------
 tools/blktap/parallax/block-async.h     |   69 -
 tools/blktap/parallax/blockstore.c      | 1348 --------------------------------
 tools/blktap/parallax/blockstore.h      |  134 ---
 tools/blktap/parallax/blockstored.c     |  275 ------
 tools/blktap/parallax/bstest.c          |  191 ----
 tools/blktap/parallax/parallax.c        |  608 --------------
 tools/blktap/parallax/radix.c           |  631 --------------
 tools/blktap/parallax/radix.h           |   45 -
 tools/blktap/parallax/requests-async.c  |  762 ------------------
 tools/blktap/parallax/requests-async.h  |   29 
 tools/blktap/parallax/snaplog.c         |  238 -----
 tools/blktap/parallax/snaplog.h         |   61 -
 tools/blktap/parallax/vdi.c             |  367 --------
 tools/blktap/parallax/vdi.h             |   55 -
 tools/blktap/parallax/vdi_create.c      |   52 -
 tools/blktap/parallax/vdi_fill.c        |   81 -
 tools/blktap/parallax/vdi_list.c        |   47 -
 tools/blktap/parallax/vdi_snap.c        |   43 -
 tools/blktap/parallax/vdi_snap_delete.c |   48 -
 tools/blktap/parallax/vdi_snap_list.c   |   82 -
 tools/blktap/parallax/vdi_tree.c        |  132 ---
 tools/blktap/parallax/vdi_unittest.c    |  184 ----
 tools/blktap/parallax/vdi_validate.c    |   97 --
 tools/blktap/ublkback/Makefile          |   40 
 tools/blktap/ublkback/ublkback.c        |   18 
 tools/blktap/ublkback/ublkbacklib.c     |  473 -----------
 tools/blktap/ublkback/ublkbacklib.h     |   16 
 tools/blktap/xenbus.c                   |  568 -------------
 39 files changed, 8536 deletions(-)

diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/Makefile
--- a/tools/blktap/Makefile     Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-MAJOR    = 3.0
-MINOR    = 0
-SONAME   = libblktap.so.$(MAJOR)
-
-XEN_ROOT = ../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-SUBDIRS :=
-SUBDIRS += ublkback
-#SUBDIRS += parallax
-
-BLKTAP_INSTALL_DIR = /usr/sbin
-
-INSTALL            = install
-INSTALL_PROG       = $(INSTALL) -m0755
-INSTALL_DIR        = $(INSTALL) -d -m0755
-
-INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
-
-LIBS     := -lpthread -lz
-
-SRCS     :=
-SRCS     += blktaplib.c xenbus.c blkif.c
-
-CFLAGS   += -Werror
-CFLAGS   += -Wno-unused
-CFLAGS   += -fno-strict-aliasing
-CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# get asprintf():
-CFLAGS   += -D _GNU_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-CFLAGS   += $(INCLUDES) 
-DEPS     = .*.d
-
-OBJS     = $(patsubst %.c,%.o,$(SRCS))
-IBINS   :=
-#IBINS   += blkdump
-
-LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
-
-.PHONY: all
-all: mk-symlinks libblktap.so #blkdump
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: install
-install: all
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/include
-       $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
-       #$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: clean
-clean:
-       rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump
-       @set -e; for subdir in $(SUBDIRS); do \
-               $(MAKE) -C $$subdir $@;       \
-       done
-
-.PHONY: rpm
-rpm: all
-       rm -rf staging
-       mkdir staging
-       mkdir staging/i386
-       rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
-               --define "_rpmdir$$PWD/staging" -bb rpm.spec
-       mv staging/i386/*.rpm .
-       rm -rf staging
-
-libblktap.so: $(OBJS) 
-       $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared         \
-             -L$(XEN_XENSTORE) -l xenstore                       \
-             -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
-       ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
-       ln -sf libblktap.so.$(MAJOR) $@
-
-blkdump: libblktap.so
-       $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \
-             -l blktap blkdump.c
-
-.PHONY: TAGS clean install mk-symlinks rpm
-
-.PHONY: TAGS
-TAGS:
-       etags -t $(SRCS) *.h
-
--include $(DEPS)
-
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/README
--- a/tools/blktap/README       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-Block Tap User-level Interfaces
-Andrew Warfield
-andrew.warfield@xxxxxxxxxxxx
-February 8, 2005
-
-NOTE #1: The blktap is _experimental_ code.  It works for me.  Your
-mileage may vary.  Don't use it for anything important.  Please. ;)
-
-NOTE #2: All of the interfaces here are likely to change.  This is all
-early code, and I am checking it in because others want to play with
-it.  If you use it for anything, please let me know!
-
-Overview:
----------
-
-This directory contains a library and set of example applications for
-the block tap device.  The block tap hooks into the split block device
-interfaces above Xen allowing them to be extended.  This extension can
-be done in userspace with the help of a library.
-
-The tap can be installed either as an interposition domain in between
-a frontend and backend driver pair, or as a terminating backend, in
-which case it is responsible for serving all requests itself.
-
-There are two reasons that you might want to use the tap,
-corresponding to these configurations:
-
- 1. To examine or modify a stream of block requests while they are
-    in-flight (e.g. to encrypt data, or add data-driven watchpoints)
-
- 2. To prototype a new backend driver, serving requests from the tap
-    rather than passing them along to the XenLinux blkback driver.
-    (e.g. to forward block requests to a remote host)
-
-
-Interface:
-----------
-
-At the moment, the tap interface is similar in spirit to that of the
-Linux netfilter.  Requests are messages from a client (frontend)
-domain to a disk (backend) domain.  Responses are messages travelling
-back, acknowledging the completion of a request.  the library allows
-chains of functions to be attached to these events.  In addition,
-hooks may be attached to handle control messages, which signify things
-like connections from new domains.
-
-At present the control messages especially expose a lot of the
-underlying driver interfaces.  This may change in the future in order
-to simplify writing hooks.
-
-Here are the public interfaces:
-
-These allow hook functions to be chained:
-
- void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
- void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
- void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
-
-This allows a response to be injected, in the case where a request has
-been removed using BLKTAP_STOLEN.
-
- void blktap_inject_response(blkif_response_t *);
-
-These let you add file descriptors and handlers to the main poll loop:
-
- int  blktap_attach_poll(int fd, short events, int (*func)(int));
- void blktap_detach_poll(int fd);
-
-This starts the main poll loop:
-
- int  blktap_listen(void);
-
-Example:
---------
-
-blkimage.c uses an image on the local file system to serve requests to
-a domain.  Here's what it looks like:
-
----[blkimg.c]---
-
-/* blkimg.c
- *
- * file-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkimglib.h"
-
-
-int main(int argc, char *argv[])
-{
-    image_init();
-    
-    blktap_register_ctrl_hook("image_control", image_control);
-    blktap_register_request_hook("image_request", image_request);
-    blktap_listen();
-    
-    return 0;
-}
-
-----------------
-
-All of the real work is in blkimglib.c, but this illustrates the
-actual tap interface well enough.  image_control() will be called with
-all control messages.  image_request() handles requests.  As it reads
-from an on-disk image file, no requests are ever passed on to a
-backend, and so there will be no responses to process -- so there is
-nothing registered as a response hook.
-
-Other examples:
----------------
-
-Here is a list of other examples in the directory:
-
-Things that terminate a block request stream:
-
-  blkimg    - Use a image file/device to serve requests
-  blkgnbd   - Use a remote gnbd server to serve requests
-  blkaio    - Use libaio... (DOES NOT WORK)
-  
-Things that don't:
-
-  blkdump   - Print in-flight requests.
-  blkcow    - Really inefficient copy-on-write disks using libdb to store
-              writes.
-
-There are examples of plugging these things together, for instance
-blkcowgnbd is a read-only gnbd device with copy-on-write to a local
-file.
-
-TODO:
------
-
-- Make session tracking work.  At the moment these generally just handle a 
-  single front-end client at a time.
-
-- Integrate with Xend.  Need to cleanly pass a image identifier in the connect
-  message.
-
-- Make an asynchronous file-io terminator.  The libaio attempt is
-  tragically stalled because mapped foreign pages make pfn_valid fail
-  (they are VM_IO), and so cannot be passed to aio as targets.  A
-  better solution may be to tear the disk interfaces out of the real
-  backend and expose them somehow.
-
-- Make CoW suck less.
-
-- Do something more along the lines of dynamic linking for the
-  plugins, so thatthey don't all need a new main().
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/README.sept05
--- a/tools/blktap/README.sept05        Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-The blktap has been rewritten substantially based on the current
-blkback driver.  I've removed passthrough support, as this is broken
-by the move to grant tables and the lack of transitive grants.  A
-blktap VM is now only capable of terminating block requests in
-userspace.
-
-ublkback/ contains a _very_ initial cut at a user-level version of the block
-backend driver.  It gives a working example of how the current tap
-interfaces are used, in particular w.r.t. the vbd directories in
-xenstore.
-
-parallax/ contains fairly recent parallax code.  This does not run on
-the changed blktap interface, but should only be a couple of hours
-work to get going again.
-
-All of the tricky bits are done, but there is plenty of cleaning to
-do, and the top-level functionality is not here yet.  At the moment,
-the daemon ignores the pdev requested by the tools and opens the file 
-or device specified by TMP_IMAGE_FILE_NAME in ublkback.c.
-
-TODO:
-1. Fix to allow pdev in the store to specify the device to open.
-2. Add support (to tools as well) to mount arbitrary files...
-   just write the filename to mount into the store, instead of pdev.
-3. Reeximine blkif refcounting, it is almost certainly broken at the moment.
-   - creating a blkif should take a reference.
-   - each inflight request should take a reference on dequeue in blktaplib
-   - sending responses should drop refs.
-   - blkif should be implicitly freed when refcounts fall to 0.
-4. Modify the parallax req/rsp code as per ublkback to use the new tap 
-   interfaces. 
-5. Write a front end that allows parallax and normal mounts to coexist
-6. Allow blkback and blktap to run at the same time.
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blkdump.c
--- a/tools/blktap/blkdump.c    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-/* blkdump.c
- *
- * show a running trace of block requests as they fly by.
- * 
- * (c) 2004 Andrew Warfield.
- */
- 
-#include <stdio.h>
-#include "blktaplib.h"
- 
-int request_print(blkif_request_t *req)
-{
-    int i;
-    
-    if ( (req->operation == BLKIF_OP_READ) ||
-         (req->operation == BLKIF_OP_WRITE) )
-    {
-        printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", 
-                ID_TO_DOM(req->id), ID_TO_IDX(req->id), 
-                blkif_op_name[req->operation], 
-                req->nr_segments, req->handle, 
-                req->sector_number);
-        
-        
-        for (i=0; i < req->nr_segments; i++) {
-            printf("              (gref: 0x%8x start: %u stop: %u)\n",
-                   req->seg[i].gref,
-                   req->seg[i].first_sect,
-                   req->seg[i].last_sect);
-        }
-            
-    } else {
-        printf("Unknown request message type.\n");
-    }
-    
-    return BLKTAP_PASS;
-}
-
-int response_print(blkif_response_t *rsp)
-{   
-    if ( (rsp->operation == BLKIF_OP_READ) ||
-         (rsp->operation == BLKIF_OP_WRITE) )
-    {
-        printf("[%2u:%2u>%5s] (status: %d)\n", 
-                ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 
-                blkif_op_name[rsp->operation], 
-                rsp->status);
-            
-    } else {
-        printf("Unknown request message type.\n");
-    }
-    return BLKTAP_PASS;
-}
-
-int main(int argc, char *argv[])
-{
-    blktap_register_request_hook("request_print", request_print);
-    blktap_register_response_hook("response_print", response_print);
-    blktap_listen();
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blkif.c
--- a/tools/blktap/blkif.c      Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,212 +0,0 @@
-/*
- * blkif.c
- * 
- * The blkif interface for blktap.  A blkif describes an in-use virtual disk.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <err.h>
-
-#include "blktaplib.h"
-
-#if 1
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-static blkif_t      *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            ((blkif->domid != domid) || (blkif->handle != handle)) )
-        blkif = blkif->hash_next;
-    return blkif;
-}
-
-blkif_t *alloc_blkif(domid_t domid)
-{
-    blkif_t *blkif;
-
-    blkif = (blkif_t *)malloc(sizeof(blkif_t));
-    if (!blkif)
-        return NULL;
-
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid = domid;
-
-    return blkif;
-}
-
-static int (*new_blkif_hook)(blkif_t *blkif) = NULL;
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
-{
-    new_blkif_hook = fn;
-}
-
-int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
-               long int readonly)
-{
-    domid_t domid;
-    blkif_t **pblkif;
-    
-    if (blkif == NULL)
-        return -EINVAL;
-
-    domid = blkif->domid;
-    blkif->handle   = handle;
-    blkif->pdev     = pdev;
-    blkif->readonly = readonly;
-
-    /*
-     * Call out to the new_blkif_hook. The tap application should define this,
-     * and it should return having set blkif->ops
-     * 
-     */
-    if (new_blkif_hook == NULL)
-    {
-        warn("Probe detected a new blkif, but no new_blkif_hook!");
-        return -1;
-    }
-    new_blkif_hook(blkif);
-
-    /* Now wire it in. */
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif != NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-        {
-            DPRINTF("Could not create blkif: already exists\n");
-            return -1;
-        }
-        pblkif = &(*pblkif)->hash_next;
-    }
-    blkif->hash_next = NULL;
-    *pblkif = blkif;
-
-    return 0;
-}
-
-void free_blkif(blkif_t *blkif)
-{
-    blkif_t **pblkif, *curs;
-    
-    pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)];
-    while ( (curs = *pblkif) != NULL )
-    {
-        if ( blkif == curs )
-        {
-            *pblkif = curs->hash_next;
-        }
-        pblkif = &curs->hash_next;
-    }
-    free(blkif);
-}
-
-void blkif_register_request_hook(blkif_t *blkif, char *name, 
-                                 int (*rh)(blkif_t *, blkif_request_t *, int)) 
-{
-    request_hook_t *rh_ent, **c;
-    
-    rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
-    if (!rh_ent) 
-    {
-        warn("couldn't allocate a new hook");
-        return;
-    }
-    
-    rh_ent->func  = rh;
-    rh_ent->next = NULL;
-    if (asprintf(&rh_ent->name, "%s", name) == -1)
-    {
-        free(rh_ent);
-        warn("couldn't allocate a new hook name");
-        return;
-    }
-    
-    c = &blkif->request_hook_chain;
-    while (*c != NULL) {
-        c = &(*c)->next;
-    }
-    *c = rh_ent;
-}
-
-void blkif_register_response_hook(blkif_t *blkif, char *name, 
-                                  int (*rh)(blkif_t *, blkif_response_t *, 
int)) 
-{
-    response_hook_t *rh_ent, **c;
-    
-    rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
-    if (!rh_ent) 
-    { 
-        warn("couldn't allocate a new hook");
-        return;
-    }
-    
-    rh_ent->func  = rh;
-    rh_ent->next = NULL;
-    if (asprintf(&rh_ent->name, "%s", name) == -1)
-    {
-        free(rh_ent);
-        warn("couldn't allocate a new hook name");
-        return;
-    }
-    
-    c = &blkif->response_hook_chain;
-    while (*c != NULL) {
-        c = &(*c)->next;
-    }
-    *c = rh_ent;
-}
-
-void blkif_print_hooks(blkif_t *blkif)
-{
-    request_hook_t  *req_hook;
-    response_hook_t *rsp_hook;
-    
-    DPRINTF("Request Hooks:\n");
-    req_hook = blkif->request_hook_chain;
-    while (req_hook != NULL)
-    {
-        DPRINTF("  [0x%p] %s\n", req_hook->func, req_hook->name);
-        req_hook = req_hook->next;
-    }
-    
-    DPRINTF("Response Hooks:\n");
-    rsp_hook = blkif->response_hook_chain;
-    while (rsp_hook != NULL)
-    {
-        DPRINTF("  [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
-        rsp_hook = rsp_hook->next;
-    }
-}
-
-
-long int vbd_size(blkif_t *blkif)
-{
-    return 1000000000;
-}
-
-long int vbd_secsize(blkif_t *blkif)
-{
-    return 512;
-}
-
-unsigned vbd_info(blkif_t *blkif)
-{
-    return 0;
-}
-
-
-void __init_blkif(void)
-{    
-    memset(blkif_hash, 0, sizeof(blkif_hash));
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blktaplib.c
--- a/tools/blktap/blktaplib.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,453 +0,0 @@
-/*
- * blktaplib.c
- * 
- * userspace interface routines for the blktap driver.
- *
- * (threadsafe(r) version) 
- *
- * (c) 2004 Andrew Warfield.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/user.h>
-#include <err.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <linux/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/ioctl.h>
-#include <string.h>
-#include <unistd.h>
-#include <pthread.h>
-#include <xs.h>
-                                                                     
-#define __COMPILING_BLKTAP_LIB
-#include "blktaplib.h"
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-#define DEBUG_RING_IDXS 0
-
-#define POLLRDNORM     0x040 
-
-#define BLKTAP_IOCTL_KICK 1
-
-
-void got_sig_bus();
-void got_sig_int();
-
-/* in kernel these are opposite, but we are a consumer now. */
-blkif_back_ring_t  fe_ring; /* slightly counterintuitive ;) */
-blkif_front_ring_t be_ring; 
-
-unsigned long mmap_vstart = 0;
-char *blktap_mem;
-int fd = 0;
-
-#define BLKTAP_RING_PAGES       1 /* Front */
-#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
-    
-int bad_count = 0;
-void bad(void)
-{
-    bad_count ++;
-    if (bad_count > 50) exit(0);
-}
-/*-----[ ID Manipulation from tap driver code ]--------------------------*/
-
-#define ACTIVE_RING_IDX unsigned short
-
-inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
-{
-    return ( (fe_dom << 16) | idx );
-}
-
-inline unsigned int ID_TO_IDX(unsigned long id) 
-{ 
-        return ( id & 0x0000ffff );
-}
-
-inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
-
-static int (*request_hook)(blkif_request_t *req) = NULL;
-static int (*response_hook)(blkif_response_t *req) = NULL;
-        
-/*-----[ Data to/from Backend (server) VM ]------------------------------*/
-
-/*
-
-inline int write_req_to_be_ring(blkif_request_t *req)
-{
-    blkif_request_t *req_d;
-    static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-    pthread_mutex_lock(&be_prod_mutex);
-    req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
-    memcpy(req_d, req, sizeof(blkif_request_t));
-    wmb();
-    be_ring.req_prod_pvt++;
-    pthread_mutex_unlock(&be_prod_mutex);
-    
-    return 0;
-}
-*/
-
-inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
-{
-    blkif_response_t *rsp_d;
-    static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-    pthread_mutex_lock(&fe_prod_mutex);
-    rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
-    memcpy(rsp_d, rsp, sizeof(blkif_response_t));
-    wmb();
-    fe_ring.rsp_prod_pvt++;
-    pthread_mutex_unlock(&fe_prod_mutex);
-
-    return 0;
-}
-
-static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp)
-{
-    response_hook_t  *rsp_hook;
-    
-    rsp_hook = blkif->response_hook_chain;
-    while (rsp_hook != NULL)
-    {
-        switch(rsp_hook->func(blkif, rsp, 1))
-        {
-        case BLKTAP_PASS:
-            break;
-        default:
-            printf("Only PASS is supported for resp hooks!\n");
-        }
-        rsp_hook = rsp_hook->next;
-    }
-}
-
-
-static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp)
-{
-    
-    apply_rsp_hooks(blkif, rsp);
-  
-    write_rsp_to_fe_ring(rsp);
-}
-
-void blktap_kick_responses(void)
-{
-    pthread_mutex_lock(&push_mutex);
-    
-    RING_PUSH_RESPONSES(&fe_ring);
-    ioctl(fd, BLKTAP_IOCTL_KICK_FE);
-    
-    pthread_mutex_unlock(&push_mutex);
-}
-
-/*-----[ Polling fd listeners ]------------------------------------------*/
-
-#define MAX_POLLFDS 64
-
-typedef struct {
-    int (*func)(int fd);
-    struct pollfd *pfd;
-    int fd;
-    short events;
-    int active;
-} pollhook_t;
-
-static struct pollfd  pfd[MAX_POLLFDS+2]; /* tap and store are extra */
-static pollhook_t     pollhooks[MAX_POLLFDS];
-static unsigned int   ph_freelist[MAX_POLLFDS];
-static unsigned int   ph_cons, ph_prod;
-#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
-#define PH_IDX(x) (x % MAX_POLLFDS)
-
-int blktap_attach_poll(int fd, short events, int (*func)(int fd))
-{
-    pollhook_t *ph;
-    
-    if (nr_pollhooks() == MAX_POLLFDS) {
-        printf("Too many pollhooks!\n");
-        return -1;
-    }
-    
-    ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
-    
-    ph->func        = func;
-    ph->fd          = fd;
-    ph->events      = events;
-    ph->active      = 1;
-    
-    DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1, 
-            nr_pollhooks());
-    
-    return 0;
-}
-
-void blktap_detach_poll(int fd)
-{
-    int i;
-    
-    for (i=0; i<MAX_POLLFDS; i++)
-        if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
-            ph_freelist[PH_IDX(ph_prod++)] = i;
-            pollhooks[i].pfd->fd = -1;
-            pollhooks[i].active = 0;
-            break;
-        }
-        
-    DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i, 
-            nr_pollhooks());
-}
-
-void pollhook_init(void)
-{
-    int i;
-    
-    for (i=0; i < MAX_POLLFDS; i++) {
-        ph_freelist[i] = (i+1) % MAX_POLLFDS;
-        pollhooks[i].active = 0;
-    }
-    
-    ph_cons = 0;
-    ph_prod = MAX_POLLFDS;
-}
-
-void __attribute__ ((constructor)) blktaplib_init(void)
-{
-    pollhook_init();
-}
-
-/*-----[ The main listen loop ]------------------------------------------*/
-
-int blktap_listen(void)
-{
-    int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret;
-    struct xs_handle *h;
-    blkif_t *blkif;
-
-    /* comms rings: */
-    blkif_request_t  *req;
-    blkif_response_t *rsp;
-    blkif_sring_t    *sring;
-    RING_IDX          rp, i, pfd_count; 
-    
-    /* pending rings */
-    blkif_request_t req_pending[BLK_RING_SIZE];
-    /* blkif_response_t rsp_pending[BLK_RING_SIZE] */;
-    
-    /* handler hooks: */
-    request_hook_t   *req_hook;
-    response_hook_t  *rsp_hook;
-    
-    signal (SIGBUS, got_sig_bus);
-    signal (SIGINT, got_sig_int);
-    
-    __init_blkif();
-
-    fd = open("/dev/blktap", O_RDWR);
-    if (fd == -1)
-        err(-1, "open failed!");
-
-    blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 
-             PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-
-    if ((int)blktap_mem == -1) 
-        err(-1, "mmap failed!");
-
-    /* assign the rings to the mapped memory */
-/*
-    sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
-    FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
-*/  
-    sring = (blkif_sring_t *)((unsigned long)blktap_mem);
-    BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
-
-    mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
-
-
-    /* Set up store connection and watch. */
-    h = xs_daemon_open();
-    if (h == NULL) 
-        err(-1, "xs_daemon_open");
-    
-    ret = add_blockdevice_probe_watch(h, "Domain-0");
-    if (ret != 0)
-        err(0, "adding device probewatch");
-    
-    ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
-
-    while(1) {
-        int ret;
-        
-        /* build the poll list */
-        pfd_count = 0;
-        for ( i=0; i < MAX_POLLFDS; i++ ) {
-            pollhook_t *ph = &pollhooks[i];
-            
-            if (ph->active) {
-                pfd[pfd_count].fd     = ph->fd;
-                pfd[pfd_count].events = ph->events;
-                ph->pfd               = &pfd[pfd_count];
-                pfd_count++;
-            }
-        }
-
-        tap_pfd = pfd_count++;
-        pfd[tap_pfd].fd = fd;
-        pfd[tap_pfd].events = POLLIN;
-
-        store_pfd = pfd_count++;
-        pfd[store_pfd].fd = xs_fileno(h);
-        pfd[store_pfd].events = POLLIN;
-        
-        if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) {
-            if (DEBUG_RING_IDXS)
-                ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
-            continue;
-        }
-
-        for (i=0; i < MAX_POLLFDS; i++) {
-            if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
-                pollhooks[i].func(pollhooks[i].pfd->fd);
-        }
-        
-        if (pfd[store_pfd].revents) {
-            ret = xs_fire_next_watch(h);
-        }
-
-        if (pfd[tap_pfd].revents) 
-        {    
-            /* empty the fe_ring */
-            notify_fe = 0;
-            notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
-            rp = fe_ring.sring->req_prod;
-            rmb();
-            for (i = fe_ring.req_cons; i != rp; i++)
-            {
-                int done = 0; 
-
-                req = RING_GET_REQUEST(&fe_ring, i);
-                memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
-                req = &req_pending[ID_TO_IDX(req->id)];
-
-                blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle);
-
-                if (blkif != NULL)
-                {
-                    req_hook = blkif->request_hook_chain;
-                    while (req_hook != NULL)
-                    {
-                        switch(req_hook->func(blkif, req, ((i+1) == rp)))
-                        {
-                        case BLKTAP_RESPOND:
-                            apply_rsp_hooks(blkif, (blkif_response_t *)req);
-                            write_rsp_to_fe_ring((blkif_response_t *)req);
-                            notify_fe = 1;
-                            done = 1;
-                            break;
-                        case BLKTAP_STOLEN:
-                            done = 1;
-                            break;
-                        case BLKTAP_PASS:
-                            break;
-                        default:
-                            printf("Unknown request hook return value!\n");
-                        }
-                        if (done) break;
-                        req_hook = req_hook->next;
-                    }
-                }
-
-                if (done == 0) 
-                {
-                    /* this was:  */
-                    /* write_req_to_be_ring(req); */
-
-                    unsigned long id = req->id;
-                    unsigned short operation = req->operation;
-                    printf("Unterminated request!\n");
-                    rsp = (blkif_response_t *)req;
-                    rsp->id = id;
-                    rsp->operation = operation;
-                    rsp->status = BLKIF_RSP_ERROR;
-                    write_rsp_to_fe_ring(rsp);
-                    notify_fe = 1;
-                    done = 1;
-                }
-
-            }
-            fe_ring.req_cons = i;
-
-            /* empty the be_ring */
-/*
-            notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
-            rp = be_ring.sring->rsp_prod;
-            rmb();
-            for (i = be_ring.rsp_cons; i != rp; i++)
-            {
-
-                rsp = RING_GET_RESPONSE(&be_ring, i);
-                memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
-                rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
-
-                DPRINTF("copying a be request\n");
-
-                apply_rsp_hooks(rsp);
-                write_rsp_to_fe_ring(rsp);
-            }
-            be_ring.rsp_cons = i;
-*/
-            /* notify the domains */
-/*
-            if (notify_be) {
-                DPRINTF("notifying be\n");
-pthread_mutex_lock(&push_mutex);
-                RING_PUSH_REQUESTS(&be_ring);
-                ioctl(fd, BLKTAP_IOCTL_KICK_BE);
-pthread_mutex_unlock(&push_mutex);
-            }
-*/
-            if (notify_fe) {
-                DPRINTF("notifying fe\n");
-                pthread_mutex_lock(&push_mutex);
-                RING_PUSH_RESPONSES(&fe_ring);
-                ioctl(fd, BLKTAP_IOCTL_KICK_FE);
-                pthread_mutex_unlock(&push_mutex);
-            }
-        }        
-    }
-
-
-    munmap(blktap_mem, PAGE_SIZE);
-
- mmap_failed:
-    close(fd);
-
- open_failed:
-    return 0;
-}
-
-void got_sig_bus() {
-    printf("Attempted to access a page that isn't.\n");
-    exit(-1);
-}
-
-void got_sig_int() {
-    DPRINTF("quitting -- returning to passthrough mode.\n");
-    if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
-    close(fd);
-    fd = 0;
-    exit(0);
-} 
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blktaplib.h
--- a/tools/blktap/blktaplib.h  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,171 +0,0 @@
-/* blktaplib.h
- *
- * userland accessors to the block tap.
- *
- * Sept 2/05 -- I'm scaling this back to only support block remappings
- * to user in a backend domain.  Passthrough and interposition can be readded
- * once transitive grants are available.
- */
- 
-#ifndef __BLKTAPLIB_H__
-#define __BLKTAPLIB_H__
-
-#include <xenctrl.h>
-#include <sys/user.h>
-#include <xen/xen.h>
-#include <xen/io/blkif.h>
-#include <xen/io/ring.h>
-#include <xen/io/domain_controller.h>
-#include <xs.h>
-
-#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-
-/* /dev/xen/blktap resides at device number major=10, minor=202        */ 
-#define BLKTAP_MINOR 202
-
-/* size of the extra VMA area to map in attached pages. */
-#define BLKTAP_VMA_PAGES BLK_RING_SIZE
-
-/* blktap IOCTLs:                                                      */
-#define BLKTAP_IOCTL_KICK_FE         1
-#define BLKTAP_IOCTL_KICK_BE         2
-#define BLKTAP_IOCTL_SETMODE         3
-#define BLKTAP_IOCTL_PRINT_IDXS      100   
-
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
-#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
-#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
-#define BLKTAP_MODE_COPY_FE          0x00000004
-#define BLKTAP_MODE_COPY_BE          0x00000008
-#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
-#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
-
-#define BLKTAP_MODE_INTERPOSE \
-           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-#define BLKTAP_MODE_COPY_BOTH \
-           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
-
-#define BLKTAP_MODE_COPY_BOTH_PAGES \
-           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) );
-/*
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
-        );
-*/
-}
-
-/* Return values for handling messages in hooks. */
-#define BLKTAP_PASS     0 /* Keep passing this request as normal. */
-#define BLKTAP_RESPOND  1 /* Request is now a reply.  Return it.  */
-#define BLKTAP_STOLEN   2 /* Hook has stolen request.             */
-
-//#define domid_t unsigned short
-
-inline unsigned int ID_TO_IDX(unsigned long id);
-inline domid_t ID_TO_DOM(unsigned long id);
-
-int  blktap_attach_poll(int fd, short events, int (*func)(int));
-void blktap_detach_poll(int fd);
-int  blktap_listen(void);
-
-struct blkif;
-
-typedef struct request_hook_st {
-    char *name;
-    int (*func)(struct blkif *, blkif_request_t *, int);
-    struct request_hook_st *next;
-} request_hook_t;
-
-typedef struct response_hook_st {
-    char *name;
-    int (*func)(struct blkif *, blkif_response_t *, int);
-    struct response_hook_st *next;
-} response_hook_t;
-
-struct blkif_ops {
-    long int (*get_size)(struct blkif *blkif);
-    long int (*get_secsize)(struct blkif *blkif);
-    unsigned (*get_info)(struct blkif *blkif);
-};
-
-typedef struct blkif {
-    domid_t domid;
-    long int handle;
-
-    long int pdev;
-    long int readonly;
-
-    enum { DISCONNECTED, CONNECTED } state;
-
-    struct blkif_ops *ops;
-    request_hook_t *request_hook_chain;
-    response_hook_t *response_hook_chain;
-
-    struct blkif *hash_next;
-
-    void *prv;  /* device-specific data */
-} blkif_t;
-
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-blkif_t *alloc_blkif(domid_t domid);
-int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
-               long int readonly);
-void free_blkif(blkif_t *blkif);
-void __init_blkif(void);
-
-
-/* xenstore/xenbus: */
-extern int add_blockdevice_probe_watch(struct xs_handle *h, 
-                                       const char *domname);
-int xs_fire_next_watch(struct xs_handle *h);
-
-
-void blkif_print_hooks(blkif_t *blkif);
-void blkif_register_request_hook(blkif_t *blkif, char *name, 
-                             int (*rh)(blkif_t *, blkif_request_t *, int));
-void blkif_register_response_hook(blkif_t *blkif, char *name, 
-                             int (*rh)(blkif_t *, blkif_response_t *, int));
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *);
-void blktap_kick_responses(void);
-
-/* this must match the underlying driver... */
-#define MAX_PENDING_REQS 64
-
-/* Accessing attached data page mappings */
-#define MMAP_PAGES                                              \
-    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg)                                   \
-    (mmap_vstart +                                              \
-     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
-     ((_seg) * PAGE_SIZE))
-
-extern unsigned long mmap_vstart;
-
-/* Defines that are only used by library clients */
-
-#ifndef __COMPILING_BLKTAP_LIB
-
-static char *blkif_op_name[] = {
-    [BLKIF_OP_READ]       = "READ",
-    [BLKIF_OP_WRITE]      = "WRITE",
-};
-
-#endif /* __COMPILING_BLKTAP_LIB */
-    
-#endif /* __BLKTAPLIB_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/list.h
--- a/tools/blktap/list.h       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-/*
- * list.h
- * 
- * This is a subset of linux's list.h intended to be used in user-space.
- * 
- */
-
-#ifndef __LIST_H__
-#define __LIST_H__
-
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
-
-struct list_head {
-        struct list_head *next, *prev;
-};
- 
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
- 
-#define LIST_HEAD(name) \
-        struct list_head name = LIST_HEAD_INIT(name)
-
-static inline void __list_add(struct list_head *new,
-                              struct list_head *prev,
-                              struct list_head *next)
-{
-        next->prev = new;
-        new->next = next;
-        new->prev = prev;
-        prev->next = new;
-}
-
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
-        __list_add(new, head, head->next);
-}
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
-        next->prev = prev;
-        prev->next = next;
-}
-static inline void list_del(struct list_head *entry)
-{
-        __list_del(entry->prev, entry->next);
-        entry->next = LIST_POISON1;
-        entry->prev = LIST_POISON2;
-}
-#define list_entry(ptr, type, member)                                   \
-        ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-#define list_for_each_entry(pos, head, member)                          \
-        for (pos = list_entry((head)->next, typeof(*pos), member);      \
-             &pos->member != (head);                                    \
-             pos = list_entry(pos->member.next, typeof(*pos), member))
-
-#endif /* __LIST_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/Makefile
--- a/tools/blktap/parallax/Makefile    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-XEN_ROOT = ../../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-PARALLAX_INSTALL_DIR   = /usr/sbin
-
-INSTALL         = install
-INSTALL_PROG    = $(INSTALL) -m0755
-INSTALL_DIR     = $(INSTALL) -d -m0755
-
-INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC)
-
-LDFLAGS = -L.. -lpthread -lz -lblktap
-
-#PLX_SRCS := 
-PLX_SRCS := vdi.c 
-PLX_SRCS += radix.c 
-PLX_SRCS += snaplog.c
-PLX_SRCS += blockstore.c 
-PLX_SRCS += block-async.c
-PLX_SRCS += requests-async.c
-VDI_SRCS := $(PLX_SRCS)
-PLX_SRCS += parallax.c
-
-#VDI_TOOLS :=
-VDI_TOOLS := vdi_create
-VDI_TOOLS += vdi_list
-VDI_TOOLS += vdi_snap
-VDI_TOOLS += vdi_snap_list
-VDI_TOOLS += vdi_snap_delete
-VDI_TOOLS += vdi_fill
-VDI_TOOLS += vdi_tree
-VDI_TOOLS += vdi_validate
-
-CFLAGS   += -Werror
-CFLAGS   += -Wno-unused
-CFLAGS   += -fno-strict-aliasing
-CFLAGS   += $(INCLUDES)
-CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS     = .*.d
-
-OBJS     = $(patsubst %.c,%.o,$(SRCS))
-IBINS    = parallax $(VDI_TOOLS)
-
-.PHONY: all
-all: $(VDI_TOOLS) parallax blockstored
-
-.PHONY: install
-install: all
-       $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR)
-
-.PHONY: clean
-clean:
-       rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest
-
-parallax: $(PLX_SRCS)
-       $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS)
-
-${VDI_TOOLS}: %: %.c $(VDI_SRCS)
-       $(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS)
-
--include $(DEPS)
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/README
--- a/tools/blktap/parallax/README      Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,177 +0,0 @@
-Parallax Quick Overview
-March 3, 2005
-
-This is intended to provide a quick set of instructions to let you
-guys play with the current parallax source.  In it's current form, the
-code will let you run an arbitrary number of VMs off of a single disk
-image, doing copy-on-write as they make updates.  Each domain is
-assigned a virtual disk image (VDI), which may be based on a snapshot
-of an existing image.  All of the VDI and snapshot management should
-currently work.
-
-The current implementation uses a single file as a blockstore for
-_everything_ this will soon be replaced by the fancier backend code
-and the local cache.  As it stands, Parallax will create
-"blockstore.dat" in the directory that you run it from, and use
-largefile support to make this grow to unfathomable girth.  So, you
-probably want to run the daemon off of a local disk, with a lot of
-free space.
-
-Here's how to get going:
-
-0. Setup:
----------
-
-Pick a local directory on a disk with lots of room.  You should be
-running from a privileged domain (e.g. dom0) with the blocktap
-configured in and block backend NOT.
-
-For convenience (for the moment) copy all of the vdi tools (vdi_*) and
-the parallax daemon from tools/blktap into this directory.
-
-1. Populate the blockstore:
----------------------------
-
-First you need to put at least one image into the blockstore.  You
-will need a disk image, either as a file or local partition.  My
-general approach has been to
-
-(a) make a really big sparse file with 
-
-        dd if=/dev/zero of=./image bs=4K count=1 seek=[big value]
-
-(b) put a filesystem into it
-
-        mkfs.ext3 ./image
-
-(c) mount it using loopback
-
-        mkdir ./mnt
-        mount -o loop ./image
-
-(d) cd into it and untar one of the image files from srg-roots.
-
-        cd mnt
-        tar ...
-
-NOTE: Beware if your system is FC3.  mkfs is not compatible with old
-versions of fedora, and so you don't have much choice but to install
-further fc3 images if you have used the fc3 version of mkfs.
-
-(e) unmount the image
-
-        cd ..
-        umount mnt
-
-(f) now, create a new VDI to hold the image 
-
-        ./vdi_create "My new FC3 VDI"
-
-(g) get the id of the new VDI.
-
-        ./vdi_list
-
-        |      0                     My new FC3 VDI
-
-(0 is the VDI id... create a few more if you want.)
-
-(h) hoover your image into the new VDI.
-
-        ./vdi_fill 0 ./image
-
-This will pull the entire image into the blockstore and set up a
-mapping tree for it for VDI 0.  Passing a device (i.e. /dev/sda3)
-should also work, but vdi_fill has NO notion of sparseness yet, so you
-are going to pump a block into the store for each block you read.
-
-vdi_fill will count up until it is done, and you should be ready to
-go.  If you want to be anal, you can use vdi_validate to test the VDI
-against the original image.
-
-2. Create some extra VDIs
--------------------------
-
-VDIs are actually a list of snapshots, and each snapshot is a full
-image of mappings.  So, to preserve an immutable copy of a current
-VDI, do this:
-
-(a) Snapshot your new VDI.
-
-        ./vdi_snap 0
-
-Snapshotting writes the current radix root to the VDI's snapshot log,
-and assigns it a new writable root.
-
-(b) look at the VDI's snapshot log.
-
-        ./vdi_snap_list 0
-
-        | 16   0      Thu Mar  3 19:27:48 2005 565111           31
-
-The first two columns constitute a snapshot id and represent the
-(block, offset) of the snapshot record.  The Date tells you when the
-snapshot was made, and 31 is the radix root node of the snapshot.
-
-(c) Create a new VDI, based on that snapshot, and look at the list.
-
-        ./vdi_create "FC3 - Copy 1" 16 0
-        ./vdi_list
-
-        |      0                     My new FC3 VDI
-        |      1                       FC3 - Copy 1
-
-NOTE: If you have Graphviz installed on your system, you can use
-vdi_tree to generate a postscript of your current set of VDIs and
-snapshots.
-
-
-Create as many VDIs as you need for the VMs that you want to run.
-
-3. Boot some VMs:
------------------
-
-Parallax currently uses a hack in xend to pass the VDI id, you need to
-modify the disk line of the VM config that is going to mount it.
-
-(a) set up your vm config, by using the following disk line:
-
-        disk = ['parallax:1,sda1,w,0' ]
-
-This example uses VDI 1 (from vdi_list above), presents it as sda1
-(writable), and uses dom 0 as the backend.  If you were running the
-daemon (and tap driver) in some domain other than 0, you would change
-this last parameter.
-
-NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so 
that it knows what to do with "parallax:".
-
-(b) Run parallax in the backend domain.
-
-        ./parallax
-
-(c) create your new domain.
-
-        xm create ...
-
----
-
-That's pretty much all there is to it at the moment.  Hope this is
-clear enough to get you going.  Now, a few serious caveats that will
-be sorted out in the almost immediate future:
-
-WARNINGS:
----------
-
-1. There is NO locking in the VDI tools at the moment, so I'd avoid
-running them in parallel, or more importantly, running them while the
-daemon is running.
-
-2. I doubt that xend will be very happy about restarting if you have
-parallax-using domains.  So if it dies while there are active parallax
-doms, you may need to reboot.
-
-3. I've turned off write-in-place.  So at the moment, EVERY block
-write is a log append on the blockstore.  I've been having some probs
-with the radix tree's marking of writable blocks after snapshots and
-will sort this out very soon.
-
-
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/block-async.c
--- a/tools/blktap/parallax/block-async.c       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,393 +0,0 @@
-/* block-async.c
- * 
- * Asynchronous block wrappers for parallax.
- */
- 
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "block-async.h"
-#include "blockstore.h"
-#include "vdi.h"
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* We have a queue of outstanding I/O requests implemented as a 
- * circular producer-consumer ring with free-running buffers.
- * to allow reordering, this ring indirects to indexes in an 
- * ring of io_structs.
- * 
- * the block_* calls may either add an entry to this ring and return, 
- * or satisfy the request immediately and call the callback directly.
- * None of the io calls in parallax should be nested enough to worry 
- * about stack problems with this approach.
- */
-
-struct read_args {
-    uint64_t addr;
-};
-
-struct write_args {
-    uint64_t   addr;
-    char *block;
-};
-
-struct alloc_args {
-    char *block;
-};
- 
-struct pending_io_req {
-    enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;
-    union {
-        struct read_args  r;
-        struct write_args w;
-        struct alloc_args a;
-    } u;
-    io_cb_t cb;
-    void *param;
-};
-
-void radix_lock_init(struct radix_lock *r)
-{
-    int i;
-    
-    pthread_mutex_init(&r->lock, NULL);
-    for (i=0; i < 1024; i++) {
-        r->lines[i] = 0;
-        r->waiters[i] = NULL;
-        r->state[i] = ANY;
-    }
-}
-
-/* maximum outstanding I/O requests issued asynchronously */
-/* must be a power of 2.*/
-#define MAX_PENDING_IO 1024
-
-/* how many threads to concurrently issue I/O to the disk. */
-#define IO_POOL_SIZE   10
-
-static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];
-static int pending_io_list[MAX_PENDING_IO];
-static unsigned long io_prod = 0, io_cons = 0, io_free = 0;
-#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))
-#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)
-#define PENDING_IO_ENT(_x) \
-       (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])
-#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)
-#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)
-static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t  pending_io_cond = PTHREAD_COND_INITIALIZER;
-
-static void init_pending_io(void)
-{
-    int i;
-       
-    for (i=0; i<MAX_PENDING_IO; i++)
-        pending_io_list[i] = i;
-               
-} 
-
-void block_read(uint64_t addr, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-    
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);
-    req->op = IO_READ;
-    req->u.r.addr = addr;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-    
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);
-    req->op = IO_WRITE;
-    req->u.w.addr  = addr;
-    req->u.w.block = block;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-
-void block_alloc(char *block, io_cb_t cb, void *param)
-{
-    struct pending_io_req *req;
-       
-    pthread_mutex_lock(&pending_io_lock);
-    assert(CAN_PRODUCE_PENDING_IO);
-    
-    req = PENDING_IO_ENT(io_prod++);
-    req->op = IO_ALLOC;
-    req->u.a.block = block;
-    req->cb = cb;
-    req->param = param;
-    
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);    
-}
-
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    pthread_mutex_lock(&r->lock);
-    
-    if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {
-        r->lines[row]++;
-        r->state[row] = READ;
-        DPRINTF("RLOCK  : %3d (row: %d)\n", r->lines[row], row);
-        pthread_mutex_unlock(&r->lock);
-        ret.type = IO_INT_T;
-        ret.u.i = 0;
-        cb(ret, param);
-    } else {
-        struct radix_wait **rwc;
-        struct radix_wait *rw = 
-            (struct radix_wait *) malloc (sizeof(struct radix_wait));
-        DPRINTF("RLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
-        rw->type  = RLOCK;
-        rw->param = param;
-        rw->cb    = cb;
-        rw->next  = NULL;
-        /* append to waiters list. */
-        rwc = &r->waiters[row];
-        while (*rwc != NULL) rwc = &(*rwc)->next;
-        *rwc = rw;
-        pthread_mutex_unlock(&r->lock);
-        return;
-    }
-}
-
-
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    pthread_mutex_lock(&r->lock);
-    
-    /* the second check here is redundant -- just here for debugging now. */
-    if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {
-        r->state[row] = STOP;
-        r->lines[row] = -1;
-        DPRINTF("WLOCK  : %3d (row: %d)\n", r->lines[row], row);
-        pthread_mutex_unlock(&r->lock);
-        ret.type = IO_INT_T;
-        ret.u.i = 0;
-        cb(ret, param);
-    } else {
-        struct radix_wait **rwc;
-        struct radix_wait *rw = 
-            (struct radix_wait *) malloc (sizeof(struct radix_wait));
-        DPRINTF("WLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
-        rw->type  = WLOCK;
-        rw->param = param;
-        rw->cb    = cb;
-        rw->next  = NULL;
-        /* append to waiters list. */
-        rwc = &r->waiters[row];
-        while (*rwc != NULL) rwc = &(*rwc)->next;
-        *rwc = rw;
-        pthread_mutex_unlock(&r->lock);
-        return;
-    }
-       
-}
-
-/* called with radix_lock locked and lock count of zero. */
-static void wake_waiters(struct radix_lock *r, int row)
-{
-    struct pending_io_req *req;
-    struct radix_wait *rw;
-    
-    if (r->lines[row] != 0) return;
-    if (r->waiters[row] == NULL) return; 
-    
-    if (r->waiters[row]->type == WLOCK) {
-
-        rw = r->waiters[row];
-        pthread_mutex_lock(&pending_io_lock);
-        assert(CAN_PRODUCE_PENDING_IO);
-        
-        req = PENDING_IO_ENT(io_prod++);
-        req->op    = IO_WWAKE;
-        req->cb    = rw->cb;
-        req->param = rw->param;
-        r->lines[row] = -1; /* write lock the row. */
-        r->state[row] = STOP;
-        r->waiters[row] = rw->next;
-        free(rw);
-        pthread_mutex_unlock(&pending_io_lock);
-    
-    } else /* RLOCK */ {
-
-        while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {
-            rw = r->waiters[row];
-            pthread_mutex_lock(&pending_io_lock);
-            assert(CAN_PRODUCE_PENDING_IO);
-            
-            req = PENDING_IO_ENT(io_prod++);
-            req->op    = IO_RWAKE;
-            req->cb    = rw->cb;
-            req->param = rw->param;
-            r->lines[row]++; /* read lock the row. */
-            r->state[row] = READ; 
-            r->waiters[row] = rw->next;
-            free(rw);
-            pthread_mutex_unlock(&pending_io_lock);
-        }
-
-        if (r->waiters[row] != NULL) /* There is a write queued still */
-            r->state[row] = STOP;
-    }  
-    
-    pthread_mutex_lock(&pending_io_lock);
-    pthread_cond_signal(&pending_io_cond);
-    pthread_mutex_unlock(&pending_io_lock);
-}
-
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-       
-    pthread_mutex_lock(&r->lock);
-    assert(r->lines[row] > 0); /* try to catch misuse. */
-    r->lines[row]--;
-    if (r->lines[row] == 0) {
-        r->state[row] = ANY;
-        wake_waiters(r, row);
-    }
-    pthread_mutex_unlock(&r->lock);
-    cb(ret, param);
-}
-
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
-    struct io_ret ret;
-    
-    pthread_mutex_lock(&r->lock);
-    assert(r->lines[row] == -1); /* try to catch misuse. */
-    r->lines[row] = 0;
-    r->state[row] = ANY;
-    wake_waiters(r, row);
-    pthread_mutex_unlock(&r->lock);
-    cb(ret, param);
-}
-
-/* consumer calls */
-static void do_next_io_req(struct pending_io_req *req)
-{
-    struct io_ret          ret;
-    void  *param;
-    
-    switch (req->op) {
-    case IO_READ:
-        ret.type = IO_BLOCK_T;
-        ret.u.b  = readblock(req->u.r.addr);
-        break;
-    case IO_WRITE:
-        ret.type = IO_INT_T;
-        ret.u.i  = writeblock(req->u.w.addr, req->u.w.block);
-        DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);
-        break;
-    case IO_ALLOC:
-        ret.type = IO_ADDR_T;
-        ret.u.a  = allocblock(req->u.a.block);
-        break;
-    case IO_RWAKE:
-        DPRINTF("WAKE DEFERRED RLOCK!\n");
-        ret.type = IO_INT_T;
-        ret.u.i  = 0;
-        break;
-    case IO_WWAKE:
-        DPRINTF("WAKE DEFERRED WLOCK!\n");
-        ret.type = IO_INT_T;
-        ret.u.i  = 0;
-        break;
-    default:
-        DPRINTF("Unknown IO operation on pending list!\n");
-        return;
-    }
-    
-    param = req->param;
-    pthread_mutex_lock(&pending_io_lock);
-    pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);
-    pthread_mutex_unlock(&pending_io_lock);
-       
-    assert(req->cb != NULL);
-    req->cb(ret, param);
-    
-}
-
-void *io_thread(void *param) 
-{
-    int tid;
-    struct pending_io_req *req;
-    
-    /* Set this thread's tid. */
-    tid = *(int *)param;
-    free(param);
-    
-start:
-    pthread_mutex_lock(&pending_io_lock);
-    while (io_prod == io_cons) {
-        pthread_cond_wait(&pending_io_cond, &pending_io_lock);
-    }
-    
-    if (io_prod == io_cons) {
-        /* unnecessary wakeup. */
-        pthread_mutex_unlock(&pending_io_lock);
-        goto start;
-    }
-    
-    req = PENDING_IO_ENT(io_cons++);
-    pthread_mutex_unlock(&pending_io_lock);
-       
-    do_next_io_req(req);
-    
-    goto start;
-       
-}
-
-static pthread_t io_pool[IO_POOL_SIZE];
-void start_io_threads(void)
-
-{      
-    int i, tid=0;
-    
-    for (i=0; i < IO_POOL_SIZE; i++) {
-        int ret, *t;
-        t = (int *)malloc(sizeof(int));
-        *t = tid++;
-        ret = pthread_create(&io_pool[i], NULL, io_thread, t);
-        if (ret != 0) printf("Error starting thread %d\n", i);
-    }
-       
-}
-
-void init_block_async(void)
-{
-    init_pending_io();
-    start_io_threads();
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/block-async.h
--- a/tools/blktap/parallax/block-async.h       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-/* block-async.h
- * 
- * Asynchronous block wrappers for parallax.
- */
- 
-#ifndef _BLOCKASYNC_H_
-#define _BLOCKASYNC_H_
-
-#include <assert.h>
-#include <xenctrl.h>
-#include "vdi.h"
-
-struct io_ret
-{
-    enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;
-    union {
-        uint64_t   a;
-        char *b;
-        int   i;
-    } u;
-};
-
-typedef void (*io_cb_t)(struct io_ret r, void *param);
-
-/* per-vdi lock structures to make sure requests run in a safe order. */
-struct radix_wait {
-    enum {RLOCK, WLOCK} type;
-    io_cb_t  cb;
-    void    *param;
-    struct radix_wait *next;
-};
-
-struct radix_lock {
-    pthread_mutex_t lock;
-    int                    lines[1024];
-    struct radix_wait     *waiters[1024];
-    enum {ANY, READ, STOP} state[1024];
-};
-void radix_lock_init(struct radix_lock *r);
-
-void block_read(uint64_t addr, io_cb_t cb, void *param);
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param);
-void block_alloc(char *block, io_cb_t cb, void *param);
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void init_block_async(void);
-
-static inline uint64_t IO_ADDR(struct io_ret r)
-{
-    assert(r.type == IO_ADDR_T);
-    return r.u.a;
-}
-
-static inline char *IO_BLOCK(struct io_ret r)
-{
-    assert(r.type == IO_BLOCK_T);
-    return r.u.b;
-}
-
-static inline int IO_INT(struct io_ret r)
-{
-    assert(r.type == IO_INT_T);
-    return r.u.i;
-}
-
-
-#endif //_BLOCKASYNC_H_
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/blockstore.c
--- a/tools/blktap/parallax/blockstore.c        Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1348 +0,0 @@
-/**************************************************************************
- * 
- * blockstore.c
- *
- * Simple block store interface
- *
- */
- 
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <stdarg.h>
-#include "blockstore.h"
-#include <pthread.h>
-
-//#define BLOCKSTORE_REMOTE
-//#define BSDEBUG
-
-#define RETRY_TIMEOUT 1000000 /* microseconds */
-
-/*****************************************************************************
- * Debugging
- */
-#ifdef BSDEBUG
-void DB(char *format, ...)
-{
-    va_list args;
-    fprintf(stderr, "[%05u] ", (int)pthread_getspecific(tid_key));
-    va_start(args, format);
-    vfprintf(stderr, format, args);
-    va_end(args);
-}
-#else
-#define DB(format, ...) (void)0
-#endif
-
-#ifdef BLOCKSTORE_REMOTE
-
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-
-/*****************************************************************************
- * Network state                                                             *
- *****************************************************************************/
-
-/* The individual disk servers we talks to. These will be referenced by
- * an integer index into bsservers[].
- */
-bsserver_t bsservers[MAX_SERVERS];
-
-/* The cluster map. This is indexed by an integer cluster number.
- */
-bscluster_t bsclusters[MAX_CLUSTERS];
-
-/* Local socket.
- */
-struct sockaddr_in sin_local;
-int bssock = 0;
-
-/*****************************************************************************
- * Notification                                                              *
- *****************************************************************************/
-
-typedef struct pool_thread_t_struct {
-    pthread_mutex_t ptmutex;
-    pthread_cond_t ptcv;
-    int newdata;
-} pool_thread_t;
-
-pool_thread_t pool_thread[READ_POOL_SIZE+1];
-
-#define RECV_NOTIFY(tid) { \
-    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
-    pool_thread[tid].newdata = 1; \
-    DB("CV Waking %u", tid); \
-    pthread_cond_signal(&(pool_thread[tid].ptcv)); \
-    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-#define RECV_AWAIT(tid) { \
-    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
-    if (pool_thread[tid].newdata) { \
-        pool_thread[tid].newdata = 0; \
-        DB("CV Woken %u", tid); \
-    } \
-    else { \
-        DB("CV Waiting %u", tid); \
-        pthread_cond_wait(&(pool_thread[tid].ptcv), \
-                          &(pool_thread[tid].ptmutex)); \
-    } \
-    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-
-/*****************************************************************************
- * Message queue management                                                  *
- *****************************************************************************/
-
-/* Protects the queue manipulation critcal regions.
- */
-pthread_mutex_t ptmutex_queue;
-#define ENTER_QUEUE_CR pthread_mutex_lock(&ptmutex_queue)
-#define LEAVE_QUEUE_CR pthread_mutex_unlock(&ptmutex_queue)
-
-pthread_mutex_t ptmutex_recv;
-#define ENTER_RECV_CR pthread_mutex_lock(&ptmutex_recv)
-#define LEAVE_RECV_CR pthread_mutex_unlock(&ptmutex_recv)
-
-/* A message queue entry. We allocate one of these for every request we send.
- * Asynchronous reply reception also used one of these.
- */
-typedef struct bsq_t_struct {
-    struct bsq_t_struct *prev;
-    struct bsq_t_struct *next;
-    int status;
-    int server;
-    int length;
-    struct msghdr msghdr;
-    struct iovec iov[2];
-    int tid;
-    struct timeval tv_sent;
-    bshdr_t message;
-    void *block;
-} bsq_t;
-
-#define BSQ_STATUS_MATCHED 1
-
-pthread_mutex_t ptmutex_luid;
-#define ENTER_LUID_CR pthread_mutex_lock(&ptmutex_luid)
-#define LEAVE_LUID_CR pthread_mutex_unlock(&ptmutex_luid)
-
-static uint64_t luid_cnt = 0x1000ULL;
-uint64_t new_luid(void) {
-    uint64_t luid;
-    ENTER_LUID_CR;
-    luid = luid_cnt++;
-    LEAVE_LUID_CR;
-    return luid;
-}
-
-/* Queue of outstanding requests.
- */
-bsq_t *bs_head = NULL;
-bsq_t *bs_tail = NULL;
-int bs_qlen = 0;
-
-/*
- */
-void queuedebug(char *msg) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen);
-    for (q = bs_head; q; q = q->next) {
-        fprintf(stderr, "  luid=%016llx server=%u\n",
-                q->message.luid, q->server);
-    }
-    LEAVE_QUEUE_CR;
-}
-
-int enqueue(bsq_t *qe) {
-    ENTER_QUEUE_CR;
-    qe->next = NULL;
-    qe->prev = bs_tail;
-    if (!bs_head)
-        bs_head = qe;
-    else
-        bs_tail->next = qe;
-    bs_tail = qe;
-    bs_qlen++;
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("enqueue");
-#endif
-    return 0;
-}
-
-int dequeue(bsq_t *qe) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    for (q = bs_head; q; q = q->next) {
-        if (q == qe) {
-            if (q->prev)
-                q->prev->next = q->next;
-            else 
-                bs_head = q->next;
-            if (q->next)
-                q->next->prev = q->prev;
-            else
-                bs_tail = q->prev;
-            bs_qlen--;
-            goto found;
-        }
-    }
-
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("dequeue not found");
-#endif
-    return 0;
-
-    found:
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("dequeue not found");
-#endif
-    return 1;
-}
-
-bsq_t *queuesearch(bsq_t *qe) {
-    bsq_t *q;
-    ENTER_QUEUE_CR;
-    for (q = bs_head; q; q = q->next) {
-        if ((qe->server == q->server) &&
-            (qe->message.operation == q->message.operation) &&
-            (qe->message.luid == q->message.luid)) {
-
-            if ((q->message.operation == BSOP_READBLOCK) &&
-                ((q->message.flags & BSOP_FLAG_ERROR) == 0)) {
-                q->block = qe->block;
-                qe->block = NULL;
-            }
-            q->length = qe->length;
-            q->message.flags = qe->message.flags;
-            q->message.id = qe->message.id;
-            q->status |= BSQ_STATUS_MATCHED;
-
-            if (q->prev)
-                q->prev->next = q->next;
-            else 
-                bs_head = q->next;
-            if (q->next)
-                q->next->prev = q->prev;
-            else
-                bs_tail = q->prev;
-            q->next = NULL;
-            q->prev = NULL;
-            bs_qlen--;
-            goto found;
-        }
-    }
-
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("queuesearch not found");
-#endif
-    return NULL;
-
-    found:
-    LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
-    queuedebug("queuesearch found");
-#endif
-    return q;
-}
-
-/*****************************************************************************
- * Network communication                                                     *
- *****************************************************************************/
-
-int send_message(bsq_t *qe) {
-    int rc;
-
-    qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin);
-    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    qe->msghdr.msg_iov = qe->iov;
-    if (qe->block)
-        qe->msghdr.msg_iovlen = 2;
-    else
-        qe->msghdr.msg_iovlen = 1;
-    qe->msghdr.msg_control = NULL;
-    qe->msghdr.msg_controllen = 0;
-    qe->msghdr.msg_flags = 0;
-
-    qe->iov[0].iov_base = (void *)&(qe->message);
-    qe->iov[0].iov_len = MSGBUFSIZE_ID;
-
-    if (qe->block) {
-        qe->iov[1].iov_base = qe->block;
-        qe->iov[1].iov_len = BLOCK_SIZE;
-    }
-
-    qe->message.luid = new_luid();
-
-    qe->status = 0;
-    qe->tid = (int)pthread_getspecific(tid_key);
-    if (enqueue(qe) < 0) {
-        fprintf(stderr, "Error enqueuing request.\n");
-        return -1;
-    }
-
-    gettimeofday(&(qe->tv_sent), NULL);
-    DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid);
-    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
-    //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0,
-    //           (struct sockaddr *)&(bsservers[qe->server].sin),
-    //           sizeof(struct sockaddr_in));
-    if (rc < 0)
-        return rc;
-
-    return rc;
-}
-
-int recv_message(bsq_t *qe) {
-    struct sockaddr_in from;
-    //int flen = sizeof(from);
-    int rc;
-
-    qe->msghdr.msg_name = &from;
-    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    qe->msghdr.msg_iov = qe->iov;
-    if (qe->block)
-        qe->msghdr.msg_iovlen = 2;
-    else
-        qe->msghdr.msg_iovlen = 1;
-    qe->msghdr.msg_control = NULL;
-    qe->msghdr.msg_controllen = 0;
-    qe->msghdr.msg_flags = 0;
-
-    qe->iov[0].iov_base = (void *)&(qe->message);
-    qe->iov[0].iov_len = MSGBUFSIZE_ID;
-    if (qe->block) {
-        qe->iov[1].iov_base = qe->block;
-        qe->iov[1].iov_len = BLOCK_SIZE;
-    }
-
-    rc = recvmsg(bssock, &(qe->msghdr), 0);
-
-    //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0,
-    //               (struct sockaddr *)&from, &flen);
-    return rc;
-}
-
-int get_server_number(struct sockaddr_in *sin) {
-    int i;
-
-#ifdef BSDEBUG2
-    fprintf(stderr,
-            "get_server_number(%u.%u.%u.%u/%u)\n",
-            (unsigned int)sin->sin_addr.s_addr & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff,
-            ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff,
-            (unsigned int)sin->sin_port);
-#endif
-
-    for (i = 0; i < MAX_SERVERS; i++) {
-        if (bsservers[i].hostname) {
-#ifdef BSDEBUG2
-            fprintf(stderr,
-                    "get_server_number check %u.%u.%u.%u/%u\n",
-                    (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 
16)&0xff,
-                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 
24)&0xff,
-                    (unsigned int)bsservers[i].sin.sin_port);
-#endif
-            if ((sin->sin_family == bsservers[i].sin.sin_family) &&
-                (sin->sin_port == bsservers[i].sin.sin_port) &&
-                (memcmp((void *)&(sin->sin_addr),
-                        (void *)&(bsservers[i].sin.sin_addr),
-                        sizeof(struct in_addr)) == 0)) {
-                return i;
-            }
-        }        
-    }
-
-    return -1;
-}
-
-void *rx_buffer = NULL;
-bsq_t rx_qe;
-bsq_t *recv_any(void) {
-    struct sockaddr_in from;
-    int rc;
-    
-    DB("ENTER recv_any\n");
-
-    rx_qe.msghdr.msg_name = &from;
-    rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in);
-    rx_qe.msghdr.msg_iov = rx_qe.iov;
-    if (!rx_buffer) {
-        rx_buffer = malloc(BLOCK_SIZE);
-        if (!rx_buffer) {
-            perror("recv_any malloc");
-            return NULL;
-        }
-    }
-    rx_qe.block = rx_buffer;
-    rx_buffer = NULL;
-    rx_qe.msghdr.msg_iovlen = 2;
-    rx_qe.msghdr.msg_control = NULL;
-    rx_qe.msghdr.msg_controllen = 0;
-    rx_qe.msghdr.msg_flags = 0;
-    
-    rx_qe.iov[0].iov_base = (void *)&(rx_qe.message);
-    rx_qe.iov[0].iov_len = MSGBUFSIZE_ID;
-    rx_qe.iov[1].iov_base = rx_qe.block;
-    rx_qe.iov[1].iov_len = BLOCK_SIZE;
-
-    rc = recvmsg(bssock, &(rx_qe.msghdr), 0);
-    if (rc < 0) {
-        perror("recv_any");
-        return NULL;
-    }
-
-    rx_qe.length = rc;    
-    rx_qe.server = get_server_number(&from);
-
-    DB("recv_any from %d luid=%016llx len=%u\n",
-       rx_qe.server, rx_qe.message.luid, rx_qe.length);
-
-    return &rx_qe;
-}
-
-void recv_recycle_buffer(bsq_t *q) {
-    if (q->block) {
-        rx_buffer = q->block;
-        q->block = NULL;
-    }
-}
-
-// cycle through reading any incoming, searching for a match in the
-// queue, until we have all we need.
-int wait_recv(bsq_t **reqs, int numreqs) {
-    bsq_t *q, *m;
-    unsigned int x, i;
-    int tid = (int)pthread_getspecific(tid_key);
-
-    DB("ENTER wait_recv %u\n", numreqs);
-
-    checkmatch:
-    x = 0xffffffff;
-    for (i = 0; i < numreqs; i++) {
-        x &= reqs[i]->status;
-    }
-    if ((x & BSQ_STATUS_MATCHED)) {
-        DB("LEAVE wait_recv\n");
-        return numreqs;
-    }
-
-    RECV_AWAIT(tid);
-
-    /*
-    rxagain:
-    ENTER_RECV_CR;
-    q = recv_any();
-    LEAVE_RECV_CR;
-    if (!q)
-        return -1;
-
-    m = queuesearch(q);
-    recv_recycle_buffer(q);
-    if (!m) {
-        fprintf(stderr, "Unmatched RX\n");
-        goto rxagain;
-    }
-    */
-
-    goto checkmatch;
-
-}
-
-/* retry
- */
-static int retry_count = 0;
-int retry(bsq_t *qe)
-{
-    int rc;
-    gettimeofday(&(qe->tv_sent), NULL);
-    DB("retry to %d luid=%016llx\n", qe->server, qe->message.luid);
-    retry_count++;
-    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
-    if (rc < 0)
-        return rc;
-    return 0;
-}
-
-/* queue runner
- */
-void *queue_runner(void *arg)
-{
-    for (;;) {
-        struct timeval now;
-        long long nowus, sus;
-        bsq_t *q;
-        int r;
-
-        sleep(1);
-
-        gettimeofday(&now, NULL);
-        nowus = now.tv_usec + now.tv_sec * 1000000;
-        ENTER_QUEUE_CR;
-        r = retry_count;
-        for (q = bs_head; q; q = q->next) {
-            sus = q->tv_sent.tv_usec + q->tv_sent.tv_sec * 1000000;
-            if ((nowus - sus) > RETRY_TIMEOUT) {
-                if (retry(q) < 0) {
-                    fprintf(stderr, "Error on sendmsg retry.\n");
-                }
-            }
-        }
-        if (r != retry_count) {
-            fprintf(stderr, "RETRIES: %u %u\n", retry_count - r, retry_count);
-        }
-        LEAVE_QUEUE_CR;
-    }
-}
-
-/* receive loop
- */
-void *receive_loop(void *arg)
-{
-    bsq_t *q, *m;
-
-    for(;;) {
-        q = recv_any();
-        if (!q) {
-            fprintf(stderr, "recv_any error\n");
-        }
-        else {
-            m = queuesearch(q);
-            recv_recycle_buffer(q);
-            if (!m) {
-                fprintf(stderr, "Unmatched RX\n");
-            }
-            else {
-                DB("RX MATCH");
-                RECV_NOTIFY(m->tid);
-            }
-        }
-    }
-}
-pthread_t pthread_recv;
-
-/*****************************************************************************
- * Reading                                                                   *
- *****************************************************************************/
-
-void *readblock_indiv(int server, uint64_t id) {
-    void *block;
-    bsq_t *qe;
-    int len, rc;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("readblock qe malloc");
-        return NULL;
-    }
-    qe->block = NULL;
-    
-    /*
-    qe->block = malloc(BLOCK_SIZE);
-    if (!qe->block) {
-        perror("readblock qe malloc");
-        free((void *)qe);
-        return NULL;
-    }
-    */
-
-    qe->server = server;
-
-    qe->message.operation = BSOP_READBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = id;
-    qe->length = MSGBUFSIZE_ID;
-
-    if (send_message(qe) < 0) {
-        perror("readblock sendto");
-        goto err;
-    }
-    
-    /*len = recv_message(qe);
-    if (len < 0) {
-        perror("readblock recv");
-        goto err;
-    }*/
-
-    rc = wait_recv(&qe, 1);
-    if (rc < 0) {
-        perror("readblock recv");
-        goto err;
-    }
-
-    if ((qe->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "readblock server error\n");
-        goto err;
-    }
-    if (qe->length < MSGBUFSIZE_BLOCK) {
-        fprintf(stderr, "readblock recv short (%u)\n", len);
-        goto err;
-    }
-    /* if ((block = malloc(BLOCK_SIZE)) == NULL) {
-        perror("readblock malloc");
-        goto err;
-    }
-    memcpy(block, qe->message.block, BLOCK_SIZE);
-    */    
-    block = qe->block;
-
-    free((void *)qe);
-    return block;
-
-    err:
-    free(qe->block);
-    free((void *)qe);
-    return NULL;
-}
-
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *
- *   @return: pointer to block, NULL on error
- */
-void *readblock(uint64_t id) {
-    int map = (int)BSID_MAP(id);
-    uint64_t xid;
-    static int i = CLUSTER_MAX_REPLICAS - 1;
-    void *block = NULL;
-
-    /* special case for the "superblock" just use the first block on the
-     * first replica. (extend to blocks < 6 for vdi bug)
-     */
-    if (id < 6) {
-        block = readblock_indiv(bsclusters[map].servers[0], id);
-        goto out;
-    }
-
-    i++;
-    if (i >= CLUSTER_MAX_REPLICAS)
-        i = 0;
-    switch (i) {
-    case 0:
-        xid = BSID_REPLICA0(id);
-        break;
-    case 1:
-        xid = BSID_REPLICA1(id);
-        break;
-    case 2:
-        xid = BSID_REPLICA2(id);
-        break;
-    }
-    
-    block = readblock_indiv(bsclusters[map].servers[i], xid);
-
-    out:
-#ifdef BSDEBUG
-    if (block)
-        fprintf(stderr, "READ:  %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-                id,
-                (unsigned int)((unsigned char *)block)[0],
-                (unsigned int)((unsigned char *)block)[1],
-                (unsigned int)((unsigned char *)block)[2],
-                (unsigned int)((unsigned char *)block)[3],
-                (unsigned int)((unsigned char *)block)[4],
-                (unsigned int)((unsigned char *)block)[5],
-                (unsigned int)((unsigned char *)block)[6],
-                (unsigned int)((unsigned char *)block)[7]);
-    else
-        fprintf(stderr, "READ:  %016llx NULL\n", id);
-#endif
-    return block;
-}
-
-/*****************************************************************************
- * Writing                                                                   *
- *****************************************************************************/
-
-bsq_t *writeblock_indiv(int server, uint64_t id, void *block) {
-
-    bsq_t *qe;
-    int len;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("writeblock qe malloc");
-        goto err;
-    }
-    qe->server = server;
-
-    qe->message.operation = BSOP_WRITEBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = id;
-    //memcpy(qe->message.block, block, BLOCK_SIZE);
-    qe->block = block;
-    qe->length = MSGBUFSIZE_BLOCK;
-
-    if (send_message(qe) < 0) {
-        perror("writeblock sendto");
-        goto err;
-    }
-
-    return qe;
-
-    err:
-    free((void *)qe);
-    return NULL;
-}
-    
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    
-    int map = (int)BSID_MAP(id);
-    int rep0 = bsclusters[map].servers[0];
-    int rep1 = bsclusters[map].servers[1];
-    int rep2 = bsclusters[map].servers[2];
-    bsq_t *reqs[3];
-    int rc;
-
-    reqs[0] = reqs[1] = reqs[2] = NULL;
-
-#ifdef BSDEBUG
-    fprintf(stderr,
-            "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-            id,
-            (unsigned int)((unsigned char *)block)[0],
-            (unsigned int)((unsigned char *)block)[1],
-            (unsigned int)((unsigned char *)block)[2],
-            (unsigned int)((unsigned char *)block)[3],
-            (unsigned int)((unsigned char *)block)[4],
-            (unsigned int)((unsigned char *)block)[5],
-            (unsigned int)((unsigned char *)block)[6],
-            (unsigned int)((unsigned char *)block)[7]);
-#endif
-
-    /* special case for the "superblock" just use the first block on the
-     * first replica. (extend to blocks < 6 for vdi bug)
-     */
-    if (id < 6) {
-        reqs[0] = writeblock_indiv(rep0, id, block);
-        if (!reqs[0])
-            return -1;
-        rc = wait_recv(reqs, 1);
-        return rc;
-    }
-
-    reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block);
-    if (!reqs[0])
-        goto err;
-    reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block);
-    if (!reqs[1])
-        goto err;
-    reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block);
-    if (!reqs[2])
-        goto err;
-
-    rc = wait_recv(reqs, 3);
-    if (rc < 0) {
-        perror("writeblock recv");
-        goto err;
-    }
-    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server0 error\n");
-        goto err;
-    }
-    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server1 error\n");
-        goto err;
-    }
-    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "writeblock server2 error\n");
-        goto err;
-    }
-
-
-    free((void *)reqs[0]);
-    free((void *)reqs[1]);
-    free((void *)reqs[2]);
-    return 0;
-
-    err:
-    if (reqs[0]) {
-        dequeue(reqs[0]);
-        free((void *)reqs[0]);
-    }
-    if (reqs[1]) {
-        dequeue(reqs[1]);
-        free((void *)reqs[1]);
-    }
-    if (reqs[2]) {
-        dequeue(reqs[2]);
-        free((void *)reqs[2]);
-    }
-    return -1;
-}
-
-/*****************************************************************************
- * Allocation                                                                *
- *****************************************************************************/
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock(void *block) {
-    return allocblock_hint(block, 0);
-}
-
-bsq_t *allocblock_hint_indiv(int server, void *block, uint64_t hint) {
-    bsq_t *qe;
-    int len;
-
-    qe = (bsq_t *)malloc(sizeof(bsq_t));
-    if (!qe) {
-        perror("allocblock_hint qe malloc");
-        goto err;
-    }
-    qe->server = server;
-
-    qe->message.operation = BSOP_ALLOCBLOCK;
-    qe->message.flags = 0;
-    qe->message.id = hint;
-    //memcpy(qe->message.block, block, BLOCK_SIZE);
-    qe->block = block;
-    qe->length = MSGBUFSIZE_BLOCK;
-
-    if (send_message(qe) < 0) {
-        perror("allocblock_hint sendto");
-        goto err;
-    }
-    
-    return qe;
-
-    err:
-    free((void *)qe);
-    return NULL;
-}
-
-/**
- * allocblock_hint: write a new block to disk
- *   @block: pointer to block
- *   @hint: allocation hint
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
-    int map = (int)hint;
-    int rep0 = bsclusters[map].servers[0];
-    int rep1 = bsclusters[map].servers[1];
-    int rep2 = bsclusters[map].servers[2];
-    bsq_t *reqs[3];
-    int rc;
-    uint64_t id0, id1, id2;
-
-    reqs[0] = reqs[1] = reqs[2] = NULL;
-
-    DB("ENTER allocblock\n");
-
-    reqs[0] = allocblock_hint_indiv(rep0, block, hint);
-    if (!reqs[0])
-        goto err;
-    reqs[1] = allocblock_hint_indiv(rep1, block, hint);
-    if (!reqs[1])
-        goto err;
-    reqs[2] = allocblock_hint_indiv(rep2, block, hint);
-    if (!reqs[2])
-        goto err;
-
-    rc = wait_recv(reqs, 3);
-    if (rc < 0) {
-        perror("allocblock recv");
-        goto err;
-    }
-    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server0 error\n");
-        goto err;
-    }
-    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server1 error\n");
-        goto err;
-    }
-    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
-        fprintf(stderr, "allocblock server2 error\n");
-        goto err;
-    }
-
-    id0 = reqs[0]->message.id;
-    id1 = reqs[1]->message.id;
-    id2 = reqs[2]->message.id;
-
-#ifdef BSDEBUG
-    fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
-            BSID(map, id0, id1, id2),
-            (unsigned int)((unsigned char *)block)[0],
-            (unsigned int)((unsigned char *)block)[1],
-            (unsigned int)((unsigned char *)block)[2],
-            (unsigned int)((unsigned char *)block)[3],
-            (unsigned int)((unsigned char *)block)[4],
-            (unsigned int)((unsigned char *)block)[5],
-            (unsigned int)((unsigned char *)block)[6],
-            (unsigned int)((unsigned char *)block)[7]);
-#endif
-    
-    free((void *)reqs[0]);
-    free((void *)reqs[1]);
-    free((void *)reqs[2]);
-    return BSID(map, id0, id1, id2);
-
-    err:
-    if (reqs[0]) {
-        dequeue(reqs[0]);
-        free((void *)reqs[0]);
-    }
-    if (reqs[1]) {
-        dequeue(reqs[1]);
-        free((void *)reqs[1]);
-    }
-    if (reqs[2]) {
-        dequeue(reqs[2]);
-        free((void *)reqs[2]);
-    }
-    return 0;
-}
-
-#else /* /BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Local storage version                                                     *
- *****************************************************************************/
- 
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *
- *   @return: pointer to block, NULL on error
- */
-
-void *readblock(uint64_t id) {
-    void *block;
-    int block_fp;
-   
-//printf("readblock(%llu)\n", id); 
-    block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return NULL;
-    }
-    
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        printf ("%Ld ", id);
-        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
-        perror("readblock lseek");
-        goto err;
-    }
-    if ((block = malloc(BLOCK_SIZE)) == NULL) {
-        perror("readblock malloc");
-        goto err;
-    }
-    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("readblock read");
-        free(block);
-        goto err;
-    }
-    close(block_fp);
-    return block;
-    
-err:
-    close(block_fp);
-    return NULL;
-}
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    
-    int block_fp;
-    
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-    }
-
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        perror("writeblock lseek");
-        goto err;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) < 0) {
-        perror("writeblock write");
-        goto err;
-    }
-    close(block_fp);
-    return 0;
-
-err:
-    close(block_fp);
-    return -1;
-}
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-
-uint64_t allocblock(void *block) {
-    uint64_t lb;
-    off64_t pos;
-    int block_fp;
-    
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return 0;
-    }
-
-    pos = lseek64(block_fp, 0, SEEK_END);
-    if (pos == (off64_t)-1) {
-        perror("allocblock lseek");
-        goto err;
-    }
-    if (pos % BLOCK_SIZE != 0) {
-        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
-        goto err;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("allocblock write");
-        goto err;
-    }
-    lb = pos / BLOCK_SIZE + 1;
-//printf("alloc(%Ld)\n", lb);
-    close(block_fp);
-    return lb;
-    
-err:
-    close(block_fp);
-    return 0;
-    
-}
-
-/**
- * allocblock_hint: write a new block to disk
- *   @block: pointer to block
- *   @hint: allocation hint
- *
- *   @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
-    return allocblock(block);
-}
-
-#endif /* BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Memory management                                                         *
- *****************************************************************************/
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- *   @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
-    void *block = malloc(BLOCK_SIZE);
-    if (block == NULL) {
-        perror("newblock");
-        return NULL;
-    }
-    memset(block, 0, BLOCK_SIZE);
-    return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- *   @id: block id (zero if this is only in-memory)
- *   @block: block to be freed
- */
-void freeblock(void *block) {
-        free(block);
-}
-
-static freeblock_t *new_freeblock(void)
-{
-    freeblock_t *fb;
-    
-    fb = newblock();
-    
-    if (fb == NULL) return NULL;
-    
-    fb->magic = FREEBLOCK_MAGIC;
-    fb->next  = 0ULL;
-    fb->count = 0ULL;
-    memset(fb->list, 0, sizeof fb->list);
-    
-    return fb;
-}
-
-void releaseblock(uint64_t id)
-{
-    blockstore_super_t *bs_super;
-    freeblock_t *fl_current;
-    
-    /* get superblock */
-    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-    
-    /* get freeblock_current */
-    if (bs_super->freelist_current == 0ULL) 
-    {
-        fl_current = new_freeblock();
-        bs_super->freelist_current = allocblock(fl_current);
-        writeblock(BLOCKSTORE_SUPER, bs_super);
-    } else {
-        fl_current = readblock(bs_super->freelist_current);
-    }
-    
-    /* if full, chain to superblock and allocate new current */
-    
-    if (fl_current->count == FREEBLOCK_SIZE) {
-        fl_current->next = bs_super->freelist_full;
-        writeblock(bs_super->freelist_current, fl_current);
-        bs_super->freelist_full = bs_super->freelist_current;
-        freeblock(fl_current);
-        fl_current = new_freeblock();
-        bs_super->freelist_current = allocblock(fl_current);
-        writeblock(BLOCKSTORE_SUPER, bs_super);
-    }
-    
-    /* append id to current */
-    fl_current->list[fl_current->count++] = id;
-    writeblock(bs_super->freelist_current, fl_current);
-    
-    freeblock(fl_current);
-    freeblock(bs_super);
-    
-    
-}
-
-/* freelist debug functions: */
-void freelist_count(int print_each)
-{
-    blockstore_super_t *bs_super;
-    freeblock_t *fb;
-    uint64_t total = 0, next;
-    
-    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-    
-    if (bs_super->freelist_current == 0ULL) {
-        printf("freelist is empty!\n");
-        return;
-    }
-    
-    fb = readblock(bs_super->freelist_current);
-    printf("%Ld entires on current.\n", fb->count);
-    total += fb->count;
-    if (print_each == 1)
-    {
-        int i;
-        for (i=0; i< fb->count; i++)
-            printf("  %Ld\n", fb->list[i]);
-    }
-    
-    freeblock(fb);
-    
-    if (bs_super->freelist_full == 0ULL) {
-        printf("freelist_full is empty!\n");
-        return;
-    }
-    
-    next = bs_super->freelist_full;
-    for (;;) {
-        fb = readblock(next);
-        total += fb->count;
-        if (print_each == 1)
-        {
-            int i;
-            for (i=0; i< fb->count; i++)
-                printf("  %Ld\n", fb->list[i]);
-        }
-        next = fb->next;
-        freeblock(fb);
-        if (next == 0ULL) break;
-    }
-    printf("Total of %Ld ids on freelist.\n", total);
-}
-
-/*****************************************************************************
- * Initialisation                                                            *
- *****************************************************************************/
-
-int __init_blockstore(void)
-{
-    int i;
-    blockstore_super_t *bs_super;
-    uint64_t ret;
-    int block_fp;
-    
-#ifdef BLOCKSTORE_REMOTE
-    struct hostent *addr;
-
-    pthread_mutex_init(&ptmutex_queue, NULL);
-    pthread_mutex_init(&ptmutex_luid, NULL);
-    pthread_mutex_init(&ptmutex_recv, NULL);
-    /*pthread_mutex_init(&ptmutex_notify, NULL);*/
-    for (i = 0; i <= READ_POOL_SIZE; i++) {
-        pool_thread[i].newdata = 0;
-        pthread_mutex_init(&(pool_thread[i].ptmutex), NULL);
-        pthread_cond_init(&(pool_thread[i].ptcv), NULL);
-    }
-
-    bsservers[0].hostname = "firebug.cl.cam.ac.uk";
-    bsservers[1].hostname = "planb.cl.cam.ac.uk";
-    bsservers[2].hostname = "simcity.cl.cam.ac.uk";
-    bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/;
-    bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/;
-    bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/;
-    bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/;
-    bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/;
-    bsservers[8].hostname = NULL;
-    bsservers[9].hostname = NULL;
-    bsservers[10].hostname = NULL;
-    bsservers[11].hostname = NULL;
-    bsservers[12].hostname = NULL;
-    bsservers[13].hostname = NULL;
-    bsservers[14].hostname = NULL;
-    bsservers[15].hostname = NULL;
-
-    for (i = 0; i < MAX_SERVERS; i++) {
-        if (!bsservers[i].hostname)
-            continue;
-        addr = gethostbyname(bsservers[i].hostname);
-        if (!addr) {
-            perror("bad hostname");
-            return -1;
-        }
-        bsservers[i].sin.sin_family = addr->h_addrtype;
-        bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT);
-        bsservers[i].sin.sin_addr.s_addr = 
-            ((struct in_addr *)(addr->h_addr))->s_addr;
-    }
-
-    /* Cluster map
-     */
-    bsclusters[0].servers[0] = 0;
-    bsclusters[0].servers[1] = 1;
-    bsclusters[0].servers[2] = 2;
-    bsclusters[1].servers[0] = 1;
-    bsclusters[1].servers[1] = 2;
-    bsclusters[1].servers[2] = 3;
-    bsclusters[2].servers[0] = 2;
-    bsclusters[2].servers[1] = 3;
-    bsclusters[2].servers[2] = 4;
-    bsclusters[3].servers[0] = 3;
-    bsclusters[3].servers[1] = 4;
-    bsclusters[3].servers[2] = 5;
-    bsclusters[4].servers[0] = 4;
-    bsclusters[4].servers[1] = 5;
-    bsclusters[4].servers[2] = 6;
-    bsclusters[5].servers[0] = 5;
-    bsclusters[5].servers[1] = 6;
-    bsclusters[5].servers[2] = 7;
-    bsclusters[6].servers[0] = 6;
-    bsclusters[6].servers[1] = 7;
-    bsclusters[6].servers[2] = 0;
-    bsclusters[7].servers[0] = 7;
-    bsclusters[7].servers[1] = 0;
-    bsclusters[7].servers[2] = 1;
-
-    /* Local socket set up
-     */
-    bssock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (bssock < 0) {
-        perror("Bad socket");
-        return -1;
-    }
-    memset(&sin_local, 0, sizeof(sin_local));
-    sin_local.sin_family = AF_INET;
-    sin_local.sin_port = htons(BLOCKSTORED_PORT);
-    sin_local.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) {
-        perror("bind");
-        close(bssock);
-        return -1;
-    }
-
-    pthread_create(&pthread_recv, NULL, receive_loop, NULL);
-    pthread_create(&pthread_recv, NULL, queue_runner, NULL);
-
-#else /* /BLOCKSTORE_REMOTE */
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-        exit(-1);
-    }
-    
-    if (lseek(block_fp, 0, SEEK_END) == 0) {
-        bs_super = newblock();
-        bs_super->magic            = BLOCKSTORE_MAGIC;
-        bs_super->freelist_full    = 0LL;
-        bs_super->freelist_current = 0LL;
-        
-        ret = allocblock(bs_super);
-        
-        freeblock(bs_super);
-    } else {
-        bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-        if (bs_super->magic != BLOCKSTORE_MAGIC)
-        {
-            printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n");
-            exit(-1);
-        }
-        freeblock(bs_super);
-    }
-        
-    close(block_fp);
-        
-#endif /*  BLOCKSTORE_REMOTE */   
-    return 0;
-}
-
-void __exit_blockstore(void)
-{
-    int i;
-#ifdef BLOCKSTORE_REMOTE
-    pthread_mutex_destroy(&ptmutex_recv);
-    pthread_mutex_destroy(&ptmutex_luid);
-    pthread_mutex_destroy(&ptmutex_queue);
-    /*pthread_mutex_destroy(&ptmutex_notify);
-      pthread_cond_destroy(&ptcv_notify);*/
-    for (i = 0; i <= READ_POOL_SIZE; i++) {
-        pthread_mutex_destroy(&(pool_thread[i].ptmutex));
-        pthread_cond_destroy(&(pool_thread[i].ptcv));
-    }
-#endif
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/blockstore.h
--- a/tools/blktap/parallax/blockstore.h        Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-/**************************************************************************
- * 
- * blockstore.h
- *
- * Simple block store interface
- *
- */
- 
-#ifndef __BLOCKSTORE_H__
-#define __BLOCKSTORE_H__
-
-#include <netinet/in.h>
-#include <xenctrl.h>
-
-#define BLOCK_SIZE  4096
-#define BLOCK_SHIFT   12
-#define BLOCK_MASK  0xfffffffffffff000LL
-
-/* XXX SMH: where is the below supposed to be defined???? */
-#ifndef SECTOR_SHIFT 
-#define SECTOR_SHIFT   9 
-#endif
-
-#define FREEBLOCK_SIZE  (BLOCK_SIZE / sizeof(uint64_t)) - (3 * 
sizeof(uint64_t))
-#define FREEBLOCK_MAGIC 0x0fee0fee0fee0feeULL
-
-typedef struct {
-    uint64_t magic;
-    uint64_t next;
-    uint64_t count;
-    uint64_t list[FREEBLOCK_SIZE];
-} freeblock_t; 
-
-#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaaULL
-#define BLOCKSTORE_SUPER 1ULL
-
-typedef struct {
-    uint64_t magic;
-    uint64_t freelist_full;
-    uint64_t freelist_current;
-} blockstore_super_t;
-
-extern void *newblock();
-extern void *readblock(uint64_t id);
-extern uint64_t allocblock(void *block);
-extern uint64_t allocblock_hint(void *block, uint64_t hint);
-extern int writeblock(uint64_t id, void *block);
-
-/* Add this blockid to a freelist, to be recycled by the allocator. */
-extern void releaseblock(uint64_t id);
-
-/* this is a memory free() operation for block-sized allocations */
-extern void freeblock(void *block);
-extern int __init_blockstore(void);
-
-/* debug for freelist. */
-void freelist_count(int print_each);
-#define ALLOCFAIL (((uint64_t)(-1)))
-
-/* Distribution
- */
-#define BLOCKSTORED_PORT 9346
-
-struct bshdr_t_struct {
-    uint32_t            operation;
-    uint32_t            flags;
-    uint64_t            id;
-    uint64_t            luid;
-} __attribute__ ((packed));
-typedef struct bshdr_t_struct bshdr_t;
-
-struct bsmsg_t_struct {
-    bshdr_t        hdr;
-    unsigned char  block[BLOCK_SIZE];
-} __attribute__ ((packed));
-
-typedef struct bsmsg_t_struct bsmsg_t;
-
-#define MSGBUFSIZE_OP    sizeof(uint32_t)
-#define MSGBUFSIZE_FLAGS (sizeof(uint32_t) + sizeof(uint32_t))
-#define MSGBUFSIZE_ID    (sizeof(uint32_t) + sizeof(uint32_t) + 
sizeof(uint64_t) + sizeof(uint64_t))
-#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t)
-
-#define BSOP_READBLOCK  0x01
-#define BSOP_WRITEBLOCK 0x02
-#define BSOP_ALLOCBLOCK 0x03
-#define BSOP_FREEBLOCK  0x04
-
-#define BSOP_FLAG_ERROR 0x01
-
-#define BS_ALLOC_SKIP 10
-#define BS_ALLOC_HACK
-
-/* Remote hosts and cluster map - XXX need to generalise
- */
-
-/*
-
-  Interim ID format is
-
-  63 60 59                40 39                20 19                 0
-  +----+--------------------+--------------------+--------------------+
-  |map | replica 2          | replica 1          | replica 0          |
-  +----+--------------------+--------------------+--------------------+
-
-  The map is an index into a table detailing which machines form the
-  cluster.
-
- */
-
-#define BSID_REPLICA0(_id) ((_id)&0xfffffULL)
-#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL)
-#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL)
-#define BSID_MAP(_id)      (((_id)>>60)&0xfULL)
-
-#define BSID(_map, _rep0, _rep1, _rep2) ((((uint64_t)(_map))<<60) | \
-                                         (((uint64_t)(_rep2))<<40) | \
-                                         (((uint64_t)(_rep1))<<20) | 
((uint64_t)(_rep0)))
-
-typedef struct bsserver_t_struct {
-    char              *hostname;
-    struct sockaddr_in sin;
-} bsserver_t;
-
-#define MAX_SERVERS 16
-
-#define CLUSTER_MAX_REPLICAS 3
-typedef struct bscluster_t_struct {
-    int servers[CLUSTER_MAX_REPLICAS];
-} bscluster_t;
-
-#define MAX_CLUSTERS 16
-
-#endif /* __BLOCKSTORE_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/blockstored.c
--- a/tools/blktap/parallax/blockstored.c       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,275 +0,0 @@
-/**************************************************************************
- * 
- * blockstored.c
- *
- * Block store daemon.
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <errno.h>
-#include "blockstore.h"
-
-//#define BSDEBUG
-
-int readblock_into(uint64_t id, void *block);
-
-int open_socket(uint16_t port) {
-    
-    struct sockaddr_in sn;
-    int sock;
-
-    sock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (sock < 0) {
-        perror("Bad socket");
-        return -1;
-    }
-    memset(&sn, 0, sizeof(sn));
-    sn.sin_family = AF_INET;
-    sn.sin_port = htons(port);
-    sn.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
-        perror("bind");
-        close(sock);
-        return -1;
-    }
-
-    return sock;
-}
-
-static int block_fp = -1;
-static int bssock = -1;
-
-int send_reply(struct sockaddr_in *peer, void *buffer, int len) {
-
-    int rc;
-    
-#ifdef BSDEBUG
-    fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n",
-            len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t 
*)buffer)->hdr.id);
-#endif
-    rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer, 
sizeof(*peer));
-    if (rc < 0) {
-        perror("send_reply");
-        return 1;
-    }
-
-
-    return 0;
-}
-
-static bsmsg_t msgbuf;
-
-void service_loop(void) {
-
-    for (;;) {
-        int rc, len;
-        struct sockaddr_in from;
-        size_t slen = sizeof(from);
-        uint64_t bid;
-
-        len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0,
-                       (struct sockaddr *)&from, &slen);
-
-        if (len < 0) {
-            perror("recvfrom");
-            continue;
-        }
-
-        if (len < MSGBUFSIZE_OP) {
-            fprintf(stderr, "Short packet.\n");
-            continue;
-        }
-
-#ifdef BSDEBUG
-        fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n",
-                len, msgbuf.hdr.operation, msgbuf.hdr.id);
-#endif
-
-        switch (msgbuf.hdr.operation) {
-        case BSOP_READBLOCK:
-            if (len < MSGBUFSIZE_ID) {
-                fprintf(stderr, "Short packet (readblock %u).\n", len);
-                continue;
-            }
-            rc = readblock_into(msgbuf.hdr.id, msgbuf.block);
-            if (rc < 0) {
-                fprintf(stderr, "readblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK);
-            break;
-        case BSOP_WRITEBLOCK:
-            if (len < MSGBUFSIZE_BLOCK) {
-                fprintf(stderr, "Short packet (writeblock %u).\n", len);
-                continue;
-            }
-            rc = writeblock(msgbuf.hdr.id, msgbuf.block);
-            if (rc < 0) {
-                fprintf(stderr, "writeblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-            break;
-        case BSOP_ALLOCBLOCK:
-            if (len < MSGBUFSIZE_BLOCK) {
-                fprintf(stderr, "Short packet (allocblock %u).\n", len);
-                continue;
-            }
-            bid = allocblock(msgbuf.block);
-            if (bid == ALLOCFAIL) {
-                fprintf(stderr, "allocblock error\n");
-                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
-                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-                continue;
-            }
-            msgbuf.hdr.id = bid;
-            msgbuf.hdr.flags = 0;
-            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
-            break;
-        }
-
-    }
-}
- 
-/**
- * readblock: read a block from disk
- *   @id: block id to read
- *   @block: pointer to buffer to receive block
- *
- *   @return: 0 if OK, other on error
- */
-
-int readblock_into(uint64_t id, void *block) {
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
-        perror("readblock lseek");
-        return -1;
-    }
-    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("readblock read");
-        return -1;
-    }
-    return 0;
-}
-
-/**
- * writeblock: write an existing block to disk
- *   @id: block id
- *   @block: pointer to block
- *
- *   @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
-        perror("writeblock lseek");
-        return -1;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) < 0) {
-        perror("writeblock write");
-        return -1;
-    }
-    return 0;
-}
-
-/**
- * allocblock: write a new block to disk
- *   @block: pointer to block
- *
- *   @return: new id of block on disk
- */
-static uint64_t lastblock = 0;
-
-uint64_t allocblock(void *block) {
-    uint64_t lb;
-    off64_t pos;
-
-    retry:
-    pos = lseek64(block_fp, 0, SEEK_END);
-    if (pos == (off64_t)-1) {
-        perror("allocblock lseek");
-        return ALLOCFAIL;
-    }
-    if (pos % BLOCK_SIZE != 0) {
-        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
-        return ALLOCFAIL;
-    }
-    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
-        perror("allocblock write");
-        return ALLOCFAIL;
-    }
-    lb = pos / BLOCK_SIZE + 1;
-
-#ifdef BS_ALLOC_HACK
-    if (lb < BS_ALLOC_SKIP)
-        goto retry;
-#endif
-    
-    if (lb <= lastblock)
-        printf("[*** %Ld alredy allocated! ***]\n", lb);
-    
-    lastblock = lb;
-    return lb;
-}
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- *   @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
-    void *block = malloc(BLOCK_SIZE);
-    if (block == NULL) {
-        perror("newblock");
-        return NULL;
-    }
-    memset(block, 0, BLOCK_SIZE);
-    return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- *   @id: block id (zero if this is only in-memory)
- *   @block: block to be freed
- */
-void freeblock(void *block) {
-        free(block);
-}
-
-
-int main(int argc, char **argv)
-{
-    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
-    if (block_fp < 0) {
-        perror("open");
-        return -1;
-    }
-
-    bssock = open_socket(BLOCKSTORED_PORT);
-    if (bssock < 0) {
-        return -1;
-    }
-
-    service_loop();
-    
-    close(bssock);
-
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/bstest.c
--- a/tools/blktap/parallax/bstest.c    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-/**************************************************************************
- * 
- * bstest.c
- *
- * Block store daemon test program.
- *
- * usage: bstest <host>|X {r|w|a} ID 
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-#include <errno.h>
-#include "blockstore.h"
-
-int direct(char *host, uint32_t op, uint64_t id, int len) {
-    struct sockaddr_in sn, peer;
-    int sock;
-    bsmsg_t msgbuf;
-    int rc, slen;
-    struct hostent *addr;
-
-    addr = gethostbyname(host);
-    if (!addr) {
-        perror("bad hostname");
-        exit(1);
-    }
-    peer.sin_family = addr->h_addrtype;
-    peer.sin_port = htons(BLOCKSTORED_PORT);
-    peer.sin_addr.s_addr =  ((struct in_addr *)(addr->h_addr))->s_addr;
-    fprintf(stderr, "Sending to: %u.%u.%u.%u\n",
-            (unsigned int)(unsigned char)addr->h_addr[0],
-            (unsigned int)(unsigned char)addr->h_addr[1],
-            (unsigned int)(unsigned char)addr->h_addr[2],
-            (unsigned int)(unsigned char)addr->h_addr[3]);
-
-    sock = socket(AF_INET, SOCK_DGRAM, 0);
-    if (sock < 0) {
-        perror("Bad socket");
-        exit(1);
-    }
-    memset(&sn, 0, sizeof(sn));
-    sn.sin_family = AF_INET;
-    sn.sin_port = htons(BLOCKSTORED_PORT);
-    sn.sin_addr.s_addr = htonl(INADDR_ANY);
-    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
-        perror("bind");
-        close(sock);
-        exit(1);
-    }
-
-    memset((void *)&msgbuf, 0, sizeof(msgbuf));
-    msgbuf.operation = op;
-    msgbuf.id = id;
-
-    rc = sendto(sock, (void *)&msgbuf, len, 0,
-                (struct sockaddr *)&peer, sizeof(peer));
-    if (rc < 0) {
-        perror("sendto");
-        exit(1);
-    }
-
-    slen = sizeof(peer);
-    len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0,
-                   (struct sockaddr *)&peer, &slen);
-    if (len < 0) {
-        perror("recvfrom");
-        exit(1);
-    }
-
-    printf("Reply %u bytes:\n", len);
-    if (len >= MSGBUFSIZE_OP)
-        printf("  operation: %u\n", msgbuf.operation);
-    if (len >= MSGBUFSIZE_FLAGS)
-        printf("  flags: 0x%x\n", msgbuf.flags);
-    if (len >= MSGBUFSIZE_ID)
-        printf("  id: %llu\n", msgbuf.id);
-    if (len >= (MSGBUFSIZE_ID + 4))
-        printf("  data: %02x %02x %02x %02x...\n",
-               (unsigned int)msgbuf.block[0],
-               (unsigned int)msgbuf.block[1],
-               (unsigned int)msgbuf.block[2],
-               (unsigned int)msgbuf.block[3]);
-    
-    if (sock > 0)
-        close(sock);
-   
-    return 0;
-}
-
-int main (int argc, char **argv) {
-
-    uint32_t op = 0;
-    uint64_t id = 0;
-    int len = 0, rc;
-    void *block;
-
-    if (argc < 3) {
-        fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n");
-        return 1;
-    }
-
-    switch (argv[2][0]) {
-    case 'r':
-    case 'R':
-        op = BSOP_READBLOCK;
-        len = MSGBUFSIZE_ID;
-        break;
-    case 'w':
-    case 'W':
-        op = BSOP_WRITEBLOCK;
-        len = MSGBUFSIZE_BLOCK;
-        break;
-    case 'a':
-    case 'A':
-        op = BSOP_ALLOCBLOCK;
-        len = MSGBUFSIZE_BLOCK;
-        break;
-    default:
-        fprintf(stderr, "Unknown action '%s'.\n", argv[2]);
-        return 1;
-    }
-
-    if (argc >= 4)
-        id = atoll(argv[3]);
-
-    if (strcmp(argv[1], "X") == 0) {
-        rc = __init_blockstore();
-        if (rc < 0) {
-            fprintf(stderr, "blockstore init failed.\n");
-            return 1;
-        }
-        switch(op) {
-        case BSOP_READBLOCK:
-            block = readblock(id);
-            if (block) {
-                printf("data: %02x %02x %02x %02x...\n",
-                       (unsigned int)((unsigned char*)block)[0],
-                       (unsigned int)((unsigned char*)block)[1],
-                       (unsigned int)((unsigned char*)block)[2],
-                       (unsigned int)((unsigned char*)block)[3]);
-            }
-            break;
-        case BSOP_WRITEBLOCK:
-            block = malloc(BLOCK_SIZE);
-            if (!block) {
-                perror("bstest malloc");
-                return 1;
-            }
-            memset(block, 0, BLOCK_SIZE);
-            rc = writeblock(id, block);
-            if (rc != 0) {
-                printf("error\n");
-            }
-            else {
-                printf("OK\n");
-            }
-            break;
-        case BSOP_ALLOCBLOCK:
-            block = malloc(BLOCK_SIZE);
-            if (!block) {
-                perror("bstest malloc");
-                return 1;
-            }
-            memset(block, 0, BLOCK_SIZE);
-            id = allocblock_hint(block, id);
-            if (id == 0) {
-                printf("error\n");
-            }
-            else {
-                printf("ID: %llu\n", id);
-            }
-            break;
-        }
-    }
-    else {
-        direct(argv[1], op, id, len);
-    }
-
-
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/parallax.c
--- a/tools/blktap/parallax/parallax.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,608 +0,0 @@
-/**************************************************************************
- * 
- * parallax.c
- *
- * The Parallax Storage Server
- *
- */
- 
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "blktaplib.h"
-#include "blockstore.h"
-#include "vdi.h"
-#include "block-async.h"
-#include "requests-async.h"
-
-#define PARALLAX_DEV     61440
-#define SECTS_PER_NODE   8
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* ------[ session records ]----------------------------------------------- */
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-#define VDI_HASHSZ 16
-#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
-
-typedef struct blkif {
-    domid_t       domid;
-    unsigned int  handle;
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    vdi_t        *vdi_hash[VDI_HASHSZ];
-    struct blkif *hash_next;
-} blkif_t;
-
-static blkif_t      *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    if ( handle != 0 )
-        printf("blktap/parallax don't currently support non-0 dev handles!\n");
-    
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            ((blkif->domid != domid) || (blkif->handle != handle)) )
-        blkif = blkif->hash_next;
-    return blkif;
-}
-
-vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
-{
-    vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
-    
-    while ((vdi != NULL) && (vdi->vdevice != device))
-        vdi = vdi->next;
-    
-    return vdi;
-}
-
-/* ------[ control message handling ]-------------------------------------- */
-
-void blkif_create(blkif_be_create_t *create)
-{
-    domid_t       domid  = create->domid;
-    unsigned int  handle = create->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    DPRINTF("parallax (blkif_create): create is %p\n", create); 
-    
-    if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
-    {
-        DPRINTF("Could not create blkif: out of memory\n");
-        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid  = domid;
-    blkif->handle = handle;
-    blkif->status = DISCONNECTED;
-
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif != NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-        {
-            DPRINTF("Could not create blkif: already exists (%d,%d)\n",
-                domid, handle);
-            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
-            free(blkif);
-            return;
-        }
-        pblkif = &(*pblkif)->hash_next;
-    }
-
-    blkif->hash_next = *pblkif;
-    *pblkif = blkif;
-
-    DPRINTF("Successfully created blkif\n");
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_destroy(blkif_be_destroy_t *destroy)
-{
-    domid_t       domid  = destroy->domid;
-    unsigned int  handle = destroy->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); 
-    
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif = *pblkif) != NULL )
-    {
-        if ( (blkif->domid == domid) && (blkif->handle == handle) )
-        {
-            if ( blkif->status != DISCONNECTED )
-                goto still_connected;
-            goto destroy;
-        }
-        pblkif = &blkif->hash_next;
-    }
-
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- still_connected:
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
-    return;
-
- destroy:
-    *pblkif = blkif->hash_next;
-    free(blkif);
-    destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_create(blkif_be_vbd_create_t *create)
-{
-    blkif_t            *blkif;
-    vdi_t              *vdi, **vdip;
-    blkif_vdev_t        vdevice = create->vdevice;
-
-    DPRINTF("parallax (vbd_create): create=%p\n", create); 
-    
-    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
-    if ( blkif == NULL )
-    {
-        DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 
-                create->domid, create->blkif_handle); 
-        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    /* VDI identifier is in grow->extent.sector_start */
-    DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 
-            (unsigned long)create->dev_handle);
-
-    vdi = vdi_get(create->dev_handle);
-    if (vdi == NULL)
-    {
-        printf("parallax (vbd_create): VDI %lx not found.\n",
-               (unsigned long)create->dev_handle);
-        create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
-        return;
-    }
-    
-    vdi->next = NULL;
-    vdi->vdevice = vdevice;
-    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
-    while (*vdip != NULL)
-        vdip = &(*vdip)->next;
-    *vdip = vdi;
-    
-    DPRINTF("blkif_create succeeded\n"); 
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
-{
-    blkif_t            *blkif;
-    vdi_t              *vdi, **vdip;
-    blkif_vdev_t        vdevice = destroy->vdevice;
-    
-    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
-    if ( blkif == NULL )
-    {
-        DPRINTF("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 
-                destroy->domid, destroy->blkif_handle); 
-        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
-    while ((*vdip != NULL) && ((*vdip)->vdevice != vdevice))
-        vdip = &(*vdip)->next;
-
-    if (*vdip != NULL) 
-    {
-        vdi = *vdip;
-        *vdip = vdi->next;
-        vdi_put(vdi);
-    }
-        
-}
-
-int parallax_control(control_msg_t *msg)
-{
-    domid_t  domid;
-    int      ret;
-
-    DPRINTF("parallax_control: msg is %p\n", msg); 
-    
-    if (msg->type != CMSG_BLKIF_BE) 
-    {
-        printf("Unexpected control message (%d)\n", msg->type);
-        return 0;
-    }
-    
-    switch(msg->subtype)
-    {
-    case CMSG_BLKIF_BE_CREATE:
-        if ( msg->length != sizeof(blkif_be_create_t) )
-            goto parse_error;
-        blkif_create((blkif_be_create_t *)msg->msg);
-        break;   
-        
-    case CMSG_BLKIF_BE_DESTROY:
-        if ( msg->length != sizeof(blkif_be_destroy_t) )
-            goto parse_error;
-        blkif_destroy((blkif_be_destroy_t *)msg->msg);
-        break;  
-        
-    case CMSG_BLKIF_BE_VBD_CREATE:
-        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
-            goto parse_error;
-        vbd_create((blkif_be_vbd_create_t *)msg->msg);
-        break;
-        
-    case CMSG_BLKIF_BE_VBD_DESTROY:
-        if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
-            goto parse_error;
-        vbd_destroy((blkif_be_vbd_destroy_t *)msg->msg);
-        break;
-
-    case CMSG_BLKIF_BE_CONNECT:
-    case CMSG_BLKIF_BE_DISCONNECT:
-        /* we don't manage the device channel, the tap does. */
-        break;
-
-    default:
-        goto parse_error;
-    }
-    return 0;
-parse_error:
-    printf("Bad control message!\n");
-    return 0;
-    
-}    
-
-int parallax_probe(blkif_request_t *req, blkif_t *blkif)
-{
-    blkif_response_t *rsp;
-    vdisk_t *img_info;
-    vdi_t *vdi;
-    int i, nr_vdis = 0; 
-
-    DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif); 
-
-    /* We expect one buffer only. */
-    if ( req->nr_segments != 1 )
-      goto err;
-
-    /* Make sure the buffer is page-sized. */
-    if ( (req->seg[0].first_sect != 0) || (req->seg[0].last_sect != 7) )
-      goto err;
-
-    /* fill the list of devices */
-    for (i=0; i<VDI_HASHSZ; i++) {
-        vdi = blkif->vdi_hash[i];
-        while (vdi) {
-            img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
-            img_info[nr_vdis].device   = vdi->vdevice;
-            img_info[nr_vdis].info     = 0;
-            /* The -1 here accounts for the LSB in the radix tree */
-            img_info[nr_vdis].capacity = 
-                    ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE);
-            nr_vdis++;
-            vdi = vdi->next;
-        }
-    }
-
-    
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_PROBE;
-    rsp->status = nr_vdis; /* number of disks */
-
-    DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis);
-    return  BLKTAP_RESPOND;
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_PROBE;
-    rsp->status = BLKIF_RSP_ERROR;
-    
-    DPRINTF("parallax_probe: send error response\n"); 
-    return BLKTAP_RESPOND;  
-}
-
-typedef struct {
-    blkif_request_t *req;
-    int              count;
-    int              error;
-    pthread_mutex_t  mutex;
-} pending_t;
-
-#define MAX_REQUESTS 64
-pending_t pending_list[MAX_REQUESTS];
-
-struct cb_param {
-    pending_t *pent;
-    int       segment;
-    uint64_t       sector; 
-    uint64_t       vblock; /* for debug printing -- can be removed. */
-};
-
-static void read_cb(struct io_ret r, void *in_param)
-{
-    struct cb_param *param = (struct cb_param *)in_param;
-    pending_t *p = param->pent;
-    int segment = param->segment;
-    blkif_request_t *req = p->req;
-    unsigned long size, offset, start;
-    char *dpage, *spage;
-       
-    spage  = IO_BLOCK(r);
-    if (spage == NULL) { p->error++; goto finish; }
-    dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment);
-    
-    /* Calculate read size and offset within the read block. */
-
-    offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE;
-    size = (req->seg[segment].last_sect - req->seg[segment].first_sect + 1) <<
-        SECTOR_SHIFT;
-    start = req->seg[segment].first_sect << SECTOR_SHIFT;
-
-    DPRINTF("ParallaxRead: sect: %lld (%ld,%ld),  "
-            "vblock %llx, "
-            "size %lx\n", 
-            param->sector,
-            p->req->seg[segment].first_sect,
-            p->req->seg[segment].last_sect,
-            param->vblock, size); 
-
-    memcpy(dpage + start, spage + offset, size);
-    freeblock(spage);
-    
-    /* Done the read.  Now update the pending record. */
- finish:
-    pthread_mutex_lock(&p->mutex);
-    p->count--;
-    
-    if (p->count == 0) {
-       blkif_response_t *rsp;
-       
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = BLKIF_OP_READ;
-       if (p->error == 0) {
-            rsp->status = BLKIF_RSP_OKAY;
-       } else {
-            rsp->status = BLKIF_RSP_ERROR;
-       }
-        blktap_inject_response(rsp);       
-    }
-    
-    pthread_mutex_unlock(&p->mutex);
-       
-    free(param); /* TODO: replace with cached alloc/dealloc */
-}      
-
-int parallax_read(blkif_request_t *req, blkif_t *blkif)
-{
-    blkif_response_t *rsp;
-    uint64_t vblock, gblock;
-    vdi_t *vdi;
-    uint64_t sector;
-    int i;
-    char *dpage, *spage;
-    pending_t *pent;
-
-    vdi = blkif_get_vdi(blkif, req->device);
-    
-    if ( vdi == NULL )
-        goto err;
-        
-    pent = &pending_list[ID_TO_IDX(req->id)];
-    pent->count = req->nr_segments;
-    pent->req = req;
-    pthread_mutex_init(&pent->mutex, NULL);
-    
-    for (i = 0; i < req->nr_segments; i++) {
-        pthread_t tid;
-        int ret;
-        struct cb_param *p;
-        
-        /* Round the requested segment to a block address. */
-        sector  = req->sector_number + (8*i);
-        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
-        
-        /* TODO: Replace this call to malloc with a cached allocation */
-        p = (struct cb_param *)malloc(sizeof(struct cb_param));
-        p->pent = pent;
-        p->sector = sector; 
-        p->segment = i;     
-        p->vblock = vblock; /* dbg */
-        
-        /* Get that block from the store. */
-        vdi_read(vdi, vblock, read_cb, (void *)p);    
-    }
-    
-    return BLKTAP_STOLEN;
-
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_READ;
-    rsp->status = BLKIF_RSP_ERROR;
-    
-    return BLKTAP_RESPOND;  
-}
-
-static void write_cb(struct io_ret r, void *in_param)
-{
-    struct cb_param *param = (struct cb_param *)in_param;
-    pending_t *p = param->pent;
-    blkif_request_t *req = p->req;
-    
-    /* catch errors from the block code. */
-    if (IO_INT(r) < 0) p->error++;
-    
-    pthread_mutex_lock(&p->mutex);
-    p->count--;
-    
-    if (p->count == 0) {
-       blkif_response_t *rsp;
-       
-        rsp = (blkif_response_t *)req;
-        rsp->id = req->id;
-        rsp->operation = BLKIF_OP_WRITE;
-       if (p->error == 0) {
-            rsp->status = BLKIF_RSP_OKAY;
-       } else {
-            rsp->status = BLKIF_RSP_ERROR;
-       }
-        blktap_inject_response(rsp);       
-    }
-    
-    pthread_mutex_unlock(&p->mutex);
-       
-    free(param); /* TODO: replace with cached alloc/dealloc */
-}
-
-int parallax_write(blkif_request_t *req, blkif_t *blkif)
-{
-    blkif_response_t *rsp;
-    uint64_t sector;
-    int i, writable = 0;
-    uint64_t vblock, gblock;
-    char *spage;
-    unsigned long size, offset, start;
-    vdi_t *vdi;
-    pending_t *pent;
-
-    vdi = blkif_get_vdi(blkif, req->device);
-    
-    if ( vdi == NULL )
-        goto err;
-        
-    pent = &pending_list[ID_TO_IDX(req->id)];
-    pent->count = req->nr_segments;
-    pent->req = req;
-    pthread_mutex_init(&pent->mutex, NULL);
-    
-    for (i = 0; i < req->nr_segments; i++) {
-        struct cb_param *p;
-        
-        spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-        
-        /* Round the requested segment to a block address. */
-        
-        sector  = req->sector_number + (8*i);
-        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
-        
-        /* Calculate read size and offset within the read block. */
-        
-        offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
-        size = (req->seg[i].last_sect - req->seg[i].first_sect + 1) <<
-            SECTOR_SHIFT;
-        start = req->seg[i].first_sect << SECTOR_SHIFT;
-
-        DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld),  "
-                "vblock %llx, gblock %llx, "
-                "size %lx\n", 
-                sector, 
-                req->seg[i].first_sect, req->seg[i].last_sect,
-                vblock, gblock, size); 
-      
-        /* XXX: For now we just freak out if they try to write a   */
-        /* non block-sized, block-aligned page.                    */
-        
-        if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) {
-            printf("]\n] STRANGE WRITE!\n]\n");
-            goto err;
-        }
-        
-        /* TODO: Replace this call to malloc with a cached allocation */
-        p = (struct cb_param *)malloc(sizeof(struct cb_param));
-        p->pent = pent;
-        p->sector = sector; 
-        p->segment = i;     
-        p->vblock = vblock; /* dbg */
-        
-        /* Issue the write to the store. */
-        vdi_write(vdi, vblock, spage, write_cb, (void *)p);
-    }
-
-    return BLKTAP_STOLEN;
-
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = BLKIF_OP_WRITE;
-    rsp->status = BLKIF_RSP_ERROR;
-    
-    return BLKTAP_RESPOND;  
-}
-
-int parallax_request(blkif_request_t *req)
-{
-    blkif_response_t *rsp;
-    domid_t  dom   = ID_TO_DOM(req->id);
-    blkif_t *blkif = blkif_find_by_handle(dom, 0);
-    
-    if (blkif == NULL)
-        goto err;
-    
-    if ( req->operation == BLKIF_OP_PROBE ) {
-        
-        return parallax_probe(req, blkif);
-        
-    } else if ( req->operation == BLKIF_OP_READ ) {
-        
-        return parallax_read(req, blkif);
-        
-    } else if ( req->operation == BLKIF_OP_WRITE ) {
-        
-        return parallax_write(req, blkif);
-        
-    } else {
-        printf("Unknown request message type!\n");
-        /* Unknown operation */
-        goto err;
-    }
-    
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->operation = req->operation;
-    rsp->id = req->id;
-    rsp->status = BLKIF_RSP_ERROR;
-    return BLKTAP_RESPOND;  
-}
-
-void __init_parallax(void) 
-{
-    memset(blkif_hash, 0, sizeof(blkif_hash));
-}
-
-
-
-int main(int argc, char *argv[])
-{
-    DPRINTF("parallax: starting.\n"); 
-    __init_blockstore();
-    DPRINTF("parallax: initialized blockstore...\n"); 
-    init_block_async();
-    DPRINTF("parallax: initialized async blocks...\n"); 
-    __init_vdi();
-    DPRINTF("parallax: initialized vdi registry etc...\n"); 
-    __init_parallax();
-    DPRINTF("parallax: initialized local stuff..\n"); 
-
-    blktap_register_ctrl_hook("parallax_control", parallax_control);
-    blktap_register_request_hook("parallax_request", parallax_request);
-    DPRINTF("parallax: added ctrl + request hooks, starting listen...\n"); 
-    blktap_listen();
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/radix.c
--- a/tools/blktap/parallax/radix.c     Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,631 +0,0 @@
-/*
- * Radix tree for mapping (up to) 63-bit virtual block IDs to
- * 63-bit global block IDs
- *
- * Pointers within the tree set aside the least significant bit to indicate
- * whther or not the target block is writable from this node.
- *
- * The block with ID 0 is assumed to be an empty block of all zeros
- */
-
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-#include <pthread.h>
-#include "blockstore.h"
-#include "radix.h"
-
-#define RADIX_TREE_MAP_SHIFT 9
-#define RADIX_TREE_MAP_MASK 0x1ff
-#define RADIX_TREE_MAP_ENTRIES 512
-
-/*
-#define DEBUG
-*/
-
-/* Experimental radix cache. */
-
-static  pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER;
-static  int rcache_count = 0;
-#define RCACHE_MAX 1024
-
-typedef struct rcache_st {
-    radix_tree_node  *node;
-    uint64_t               id;
-    struct rcache_st *hash_next;
-    struct rcache_st *cache_next;
-    struct rcache_st *cache_prev;
-} rcache_t;
-
-static rcache_t *rcache_head = NULL;
-static rcache_t *rcache_tail = NULL;
-
-#define RCHASH_SIZE 512ULL
-rcache_t *rcache[RCHASH_SIZE];
-#define RCACHE_HASH(_id) ((_id) & (RCHASH_SIZE - 1))
-
-void __rcache_init(void)
-{
-    int i;
-
-    for (i=0; i<RCHASH_SIZE; i++)
-        rcache[i] = NULL;
-}
-    
-
-void rcache_write(uint64_t id, radix_tree_node *node)
-{
-    rcache_t *r, *tmp, **curs;
-    
-    pthread_mutex_lock(&rcache_mutex);
-    
-    /* Is it already in the cache? */
-    r = rcache[RCACHE_HASH(id)];
-    
-    for (;;) {
-        if (r == NULL) 
-            break;
-        if (r->id == id) 
-        {
-            memcpy(r->node, node, BLOCK_SIZE);
-            
-            /* bring to front. */
-            if (r != rcache_head) {
-                
-                if (r == rcache_tail) {
-                    if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
-                    rcache_tail->cache_next = NULL;
-                }
-
-                tmp = r->cache_next;
-                if (r->cache_next != NULL) r->cache_next->cache_prev 
-                                                     = r->cache_prev;
-                if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
-
-                r->cache_prev = NULL;
-                r->cache_next = rcache_head;
-                if (rcache_head != NULL) rcache_head->cache_prev = r;
-                rcache_head = r;
-            }
-
-//printf("Update (%Ld)\n", r->id);
-            goto done;
-        }
-        r = r->hash_next;
-    }
-    
-    if ( rcache_count == RCACHE_MAX ) 
-    {
-        /* Remove an entry */
-        
-        r = rcache_tail;
-        if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
-        rcache_tail->cache_next = NULL;
-        freeblock(r->node);
-        
-        curs = &rcache[RCACHE_HASH(r->id)];
-        while ((*curs) != r)
-            curs = &(*curs)->hash_next;
-        *curs = r->hash_next;
-//printf("Evict (%Ld)\n", r->id);
-        
-    } else {
-        
-        r = (rcache_t *)malloc(sizeof(rcache_t));
-        rcache_count++;
-    }
-    
-    r->node = newblock();
-    memcpy(r->node, node, BLOCK_SIZE);
-    r->id = id;
-    
-    r->hash_next = rcache[RCACHE_HASH(id)];
-    rcache[RCACHE_HASH(id)] = r;
-    
-    r->cache_prev = NULL;
-    r->cache_next = rcache_head;
-    if (rcache_head != NULL) rcache_head->cache_prev = r;
-    rcache_head = r;
-    if (rcache_tail == NULL) rcache_tail = r;
-    
-//printf("Added (%Ld, %p)\n", id, r->node);
-done:
-    pthread_mutex_unlock(&rcache_mutex);
-}
-
-radix_tree_node *rcache_read(uint64_t id)
-{
-    rcache_t *r, *tmp;
-    radix_tree_node *node = NULL;
-    
-    pthread_mutex_lock(&rcache_mutex);
-
-    r = rcache[RCACHE_HASH(id)];
-    
-    for (;;) {
-        if (r == NULL) {
-//printf("Miss (%Ld)\n", id);
-            goto done;
-        }
-        if (r->id == id) break;
-        r = r->hash_next;
-    }
-   
-    /* bring to front. */
-    if (r != rcache_head) 
-    {
-        if (r == rcache_tail) {
-            if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
-            rcache_tail->cache_next = NULL;
-        }
-        tmp = r->cache_next;
-        if (r->cache_next != NULL) r->cache_next->cache_prev = r->cache_prev;
-        if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
-
-        r->cache_prev = NULL;
-        r->cache_next = rcache_head;
-        if (rcache_head != NULL) rcache_head->cache_prev = r;
-        rcache_head = r;
-    }
-    
-    node = newblock();
-    memcpy(node, r->node, BLOCK_SIZE);
-    
-//printf("Hit (%Ld, %p)\n", id, r->node);
-done:
-    pthread_mutex_unlock(&rcache_mutex);
-    
-    return(node);
-}
-
-
-void *rc_readblock(uint64_t id)
-{
-    void *ret;
-    
-    ret = (void *)rcache_read(id);
-    
-    if (ret != NULL) return ret;
-    
-    ret = readblock(id);
-    
-    if (ret != NULL)
-        rcache_write(id, ret);
-    
-    return(ret);
-}
-
-uint64_t rc_allocblock(void *block)
-{
-    uint64_t ret;
-    
-    ret = allocblock(block);
-    
-    if (ret != ZERO)
-        rcache_write(ret, block);
-    
-    return(ret);
-}
-
-int rc_writeblock(uint64_t id, void *block)
-{
-    int ret;
-    
-    ret = writeblock(id, block);
-    rcache_write(id, block);
-    
-    return(ret);
-}
-
-
-/*
- * block device interface and other helper functions
- * with these functions, block id is just a 63-bit number, with
- * no special consideration for the LSB
- */
-radix_tree_node cloneblock(radix_tree_node block);
-
-/*
- * main api
- * with these functions, the LSB of root always indicates
- * whether or not the block is writable, including the return
- * values of update and snapshot
- */
-uint64_t lookup(int height, uint64_t root, uint64_t key);
-uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val);
-uint64_t snapshot(uint64_t root);
-
-/**
- * cloneblock: clone an existing block in memory
- *   @block: the old block
- *
- *   @return: new block, with LSB cleared for every entry
- */
-radix_tree_node cloneblock(radix_tree_node block) {
-    radix_tree_node node = (radix_tree_node) malloc(BLOCK_SIZE);
-    int i;
-    if (node == NULL) {
-        perror("cloneblock malloc");
-        return NULL;
-    }
-    for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
-        node[i] = block[i] & ONEMASK;
-    return node;
-}
-
-/**
- * lookup: find a value given a key
- *   @height: height in bits of the radix tree
- *   @root: root node id, with set LSB indicating writable node
- *   @key: key to lookup
- *
- *   @return: value on success, zero on error
- */
-
-uint64_t lookup(int height, uint64_t root, uint64_t key) {
-    radix_tree_node node;
-    uint64_t mask = ONE;
-    
-    assert(key >> height == 0);
-
-    /* the root block may be smaller to ensure all leaves are full */
-    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
-
-    /* now carve off equal sized chunks at each step */
-    for (;;) {
-        uint64_t oldroot;
-
-#ifdef DEBUG
-        printf("lookup: height=%3d root=%3Ld offset=%3d%s\n", height, root,
-                (int) ((key >> height) & RADIX_TREE_MAP_MASK),
-                (iswritable(root) ? "" : " (readonly)"));
-#endif
-        
-        if (getid(root) == ZERO)
-            return ZERO;
-
-        oldroot = root;
-        node = (radix_tree_node) rc_readblock(getid(root));
-        if (node == NULL)
-            return ZERO;
-
-        root = node[(key >> height) & RADIX_TREE_MAP_MASK];
-        mask &= root;
-        freeblock(node);
-
-        if (height == 0)
-            return ( root & ONEMASK ) | mask;
-
-        height -= RADIX_TREE_MAP_SHIFT;
-    }
-
-    return ZERO;
-}
-
-/*
- * update: set a radix tree entry, doing copy-on-write as necessary
- *   @height: height in bits of the radix tree
- *   @root: root node id, with set LSB indicating writable node
- *   @key: key to set
- *   @val: value to set, s.t. radix(key)=val
- *
- *   @returns: (possibly new) root id on success (with LSB=1), 0 on failure
- */
-
-uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val) {
-    int offset;
-    uint64_t child;
-    radix_tree_node node;
-    
-    /* base case--return val */
-    if (height == 0)
-        return val;
-
-    /* the root block may be smaller to ensure all leaves are full */
-    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
-    offset = (key >> height) & RADIX_TREE_MAP_MASK;
-
-#ifdef DEBUG
-    printf("update: height=%3d root=%3Ld offset=%3d%s\n", height, root,
-            offset, (iswritable(root)?"":" (clone)"));
-#endif
-
-    /* load a block, or create a new one */
-    if (root == ZERO) {
-        node = (radix_tree_node) newblock();
-    } else {
-        node = (radix_tree_node) rc_readblock(getid(root));
-
-        if (!iswritable(root)) {
-            /* need to clone this node */
-            radix_tree_node oldnode = node;
-            node = cloneblock(node);
-            freeblock(oldnode);
-            root = ZERO;
-        }
-    }
-
-    if (node == NULL) {
-#ifdef DEBUG
-        printf("update: node is null!\n");
-#endif
-        return ZERO;
-    }
-
-    child = update(height, node[offset], key, val);
-
-    if (child == ZERO) {
-        freeblock(node);
-        return ZERO;
-    } else if (child == node[offset]) {
-        /* no change, so we already owned the child */
-        assert(iswritable(root));
-
-        freeblock(node);
-        return root;
-    }
-
-    node[offset] = child;
-
-    /* new/cloned blocks need to be saved */
-    if (root == ZERO) {
-        /* mark this as an owned block */
-        root = rc_allocblock(node);
-        if (root)
-            root = writable(root);
-    } else if (rc_writeblock(getid(root), node) < 0) {
-        freeblock(node);
-        return ZERO;
-    }
-
-    freeblock(node);
-    return root;
-}
-
-/**
- * snapshot: create a snapshot
- *   @root: old root node
- *
- *   @return: new root node, 0 on error
- */
-uint64_t snapshot(uint64_t root) {
-    radix_tree_node node, newnode;
-
-    if ((node = rc_readblock(getid(root))) == NULL)
-        return ZERO;
-
-    newnode = cloneblock(node);
-    freeblock(node);
-    if (newnode == NULL)
-        return ZERO;
-    
-    root = rc_allocblock(newnode);
-    freeblock(newnode);
-
-    if (root == ZERO)
-        return ZERO;
-    else
-        return writable(root);
-}
-
-/**
- * collapse: collapse a parent onto a child.
- * 
- * NOTE: This assumes that parent and child really are, and further that
- * there are no other children forked from this parent. (children of the
- * child are okay...)
- */
-
-int collapse(int height, uint64_t proot, uint64_t croot)
-{
-    int i, numlinks, ret, total = 0;
-    radix_tree_node pnode, cnode;
-    
-    if (height == 0) {
-        height = -1; /* terminate recursion */
-    } else {        
-        height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
-    }
-    numlinks = (1UL << RADIX_TREE_MAP_SHIFT);
-
-    /* Terminal cases: */
-
-    if ( (getid(proot) == ZERO) || (getid(croot) == ZERO) )
-        return -1;
-    
-    /* get roots */
-    if ((pnode = readblock(getid(proot))) == NULL)
-        return -1;
-    
-    if ((cnode = readblock(getid(croot))) == NULL)
-    {
-        freeblock(pnode);
-        return -1;
-    }
-    
-    /* For each writable link in proot */
-    for (i=0; i<numlinks; i++)
-    {
-        if ( pnode[i] == cnode[i] ) continue;
-        
-        /* collapse (next level) */
-        /* if height != 0 and writable... */
-        if (( height >= 0 ) && ( iswritable(pnode[i]) ) )
-        {
-            //printf("   %Ld is writable (i=%d).\n", getid(pnode[i]), i);
-            ret = collapse(height, pnode[i], cnode[i]);
-            if (ret == -1) 
-            {
-                total = -1;
-            } else {
-                total += ret;
-            }
-        }
-    
-        
-    }
-    
-    /* if plink is writable, AND clink is writable -> free plink block */
-    if ( ( iswritable(proot) ) && ( iswritable(croot) ) ) 
-    {
-        releaseblock(getid(proot));
-        if (ret >=0) total++;
-        //printf("   Delete %Ld\n", getid(proot));
-    }
-//printf("done : %Ld\n", getid(proot));
-    return total;
-
-}
-
-
-void print_root(uint64_t root, int height, FILE *dot_f)
-{
-    FILE *f;
-    int i;
-    radix_tree_node node;
-    char *style[2] = { "", "style=bold,color=blue," };
-    
-    if (dot_f == NULL) {
-        f = fopen("radix.dot", "w");
-        if (f == NULL) {
-            perror("print_root: open");
-            return;
-        }
-
-        /* write graph preamble */
-        fprintf(f, "digraph G {\n");
-
-        /* add a node for this root. */
-        fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
-                getid(root), style[iswritable(root)], getid(root));
-    }
-    
-    printf("print_root(%Ld)\n", getid(root));
-    
-    /* base case */
-    if (height == 0) {
-        /* add a node and edge for each child root */
-        node = (radix_tree_node) readblock(getid(root));
-        if (node == NULL)
-            return;
-        
-        for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) {
-            if (node[i] != ZERO) {
-                fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
-                        getid(node[i]), style[iswritable(node[i])], 
-                        getid(node[i]));
-                fprintf(f, "   n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 
-                        getid(node[i]), i);
-            }
-        }
-        freeblock(node);
-        return;
-    }
-
-    /* the root block may be smaller to ensure all leaves are full */
-    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
-
-    if (getid(root) == ZERO)
-        return;
-
-    node = (radix_tree_node) readblock(getid(root));
-    if (node == NULL)
-        return;
-
-    /* add a node and edge for each child root */
-    for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
-        if (node[i] != ZERO) {
-            fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
-                    getid(node[i]), style[iswritable(node[i])], 
-                    getid(node[i]));
-
-            print_root(node[i], height-RADIX_TREE_MAP_SHIFT, f);
-            fprintf(f, "   n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 
-                    getid(node[i]), i);
-        }
-
-    freeblock(node);
-    
-    /* write graph postamble */
-    if (dot_f == NULL) {
-        fprintf(f, "}\n");
-        fclose(f);
-    }
-}
-
-#ifdef RADIX_STANDALONE
-
-int main(int argc, char **argv) {
-    uint64_t key = ZERO, val = ZERO;
-    uint64_t root = writable(2ULL);
-    uint64_t p = ZERO, c = ZERO;
-    int v;
-    char buff[4096];
-
-    __init_blockstore();
-    
-    memset(buff, 0, 4096);
-    /*fp = open("radix.dat", O_RDWR | O_CREAT, 0644);
-
-    if (fp < 3) {
-        perror("open");
-        return -1;
-    }
-    if (lseek(fp, 0, SEEK_END) == 0) {
-        write(fp, buff, 4096);
-    }*/
-        
-    allocblock(buff);
-            
-    printf("Recognized commands:\n"
-           "Note: the LSB of a node number indicates if it is writable\n"
-           "  root <node>               set root to <node>\n"
-           "  snapshot                  take a snapshot of the root\n"
-           "  set <key> <val>           set key=val\n"
-           "  get <key>                 query key\n"
-           "  c <proot> <croot>         collapse\n"
-           "  pr                        print tree to dot\n"
-           "  pf <1=verbose>            print freelist\n"
-           "  quit\n"
-           "\nroot = %Ld\n", root);
-    for (;;) {
-        //print_root(root, 34, NULL);
-        //system("dot radix.dot -Tps -o radix.ps");
-
-        printf("> ");
-        fflush(stdout);
-        fgets(buff, 1024, stdin);
-        if (feof(stdin))
-            break;
-        if (sscanf(buff, " root %Ld", &root) == 1) {
-            printf("root set to %Ld\n", root);
-        } else if (sscanf(buff, " set %Ld %Ld", &key, &val) == 2) {
-            root = update(34, root, key, val);
-            printf("root = %Ld\n", root);
-        } else if (sscanf(buff, " c %Ld %Ld", &p, &c) == 2) {
-            v = collapse(34, p, c);
-            printf("reclaimed %d blocks.\n", v);
-        } else if (sscanf(buff, " get %Ld", &key) == 1) {
-            val = lookup(34, root, key);
-            printf("value = %Ld\n", val);
-        } else if (!strcmp(buff, "quit\n")) {
-            break;
-        } else if (!strcmp(buff, "snapshot\n")) {
-            root = snapshot(root);
-            printf("new root = %Ld\n", root);
-        } else if (sscanf(buff, " pr %Ld", &root) == 1) {
-            print_root(root, 34, NULL);
-        } else if (sscanf(buff, " pf %d", &v) == 1) {
-            freelist_count(v);
-        } else if (!strcmp(buff, "pf\n")) {
-            freelist_count(0);
-        } else {
-            printf("command not recognized\n");
-        }
-    }
-    return 0;
-}
-
-#endif
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/radix.h
--- a/tools/blktap/parallax/radix.h     Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-/*
- * Radix tree for mapping (up to) 63-bit virtual block IDs to
- * 63-bit global block IDs
- *
- * Pointers within the tree set aside the least significant bit to indicate
- * whther or not the target block is writable from this node.
- *
- * The block with ID 0 is assumed to be an empty block of all zeros
- */
-
-#ifndef __RADIX_H__
-#define __RADIX_H__
-
-/* I don't really like exposing these, but... */
-#define getid(x) (((x)>>1)&0x7fffffffffffffffLL)
-#define putid(x) ((x)<<1)
-#define writable(x) (((x)<<1)|1LL)
-#define iswritable(x) ((x)&1LL)
-#define ZERO 0LL
-#define ONE 1LL
-#define ONEMASK 0xffffffffffffffeLL
-
-#define RADIX_TREE_MAP_SHIFT 9
-#define RADIX_TREE_MAP_MASK 0x1ff
-#define RADIX_TREE_MAP_ENTRIES 512
-
-typedef uint64_t *radix_tree_node;
-
-
-/*
- * main api
- * with these functions, the LSB of root always indicates
- * whether or not the block is writable, including the return
- * values of update and snapshot
- */
-uint64_t lookup(int height, uint64_t root, uint64_t key);
-uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val);
-uint64_t snapshot(uint64_t root);
-int collapse(int height, uint64_t proot, uint64_t croot);
-int isprivate(int height, uint64_t root, uint64_t key);
-
-
-void __rcache_init(void);
-
-#endif /* __RADIX_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/requests-async.c
--- a/tools/blktap/parallax/requests-async.c    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,762 +0,0 @@
-/* requests-async.c
- *
- * asynchronous request dispatcher for radix access in parallax.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <assert.h>
-#include <pthread.h>
-#include <err.h>
-#include <zlib.h> /* for crc32() */
-#include "requests-async.h"
-#include "vdi.h"
-#include "radix.h"
-
-#define L1_IDX(_a) (((_a) & 0x0000000007fc0000ULL) >> 18)
-#define L2_IDX(_a) (((_a) & 0x000000000003fe00ULL) >> 9)
-#define L3_IDX(_a) (((_a) & 0x00000000000001ffULL))
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-struct block_info {
-    uint32_t        crc;
-    uint32_t        unused;
-};
-
-struct io_req {
-    enum { IO_OP_READ, IO_OP_WRITE } op;
-    uint64_t        root;
-    uint64_t        vaddr;
-    int        state;
-    io_cb_t    cb;
-    void      *param;
-    struct radix_lock *lock;
-
-    /* internal stuff: */
-    struct io_ret     retval;/* holds the return while we unlock. */
-    char             *block; /* the block to write */
-    radix_tree_node   radix[3];
-    uint64_t               radix_addr[3];
-    struct block_info bi;
-};
-
-void clear_w_bits(radix_tree_node node) 
-{
-    int i;
-    for (i=0; i<RADIX_TREE_MAP_ENTRIES; i++)
-        node[i] = node[i] & ONEMASK;
-    return;
-}
-
-void clear_L3_w_bits(radix_tree_node node) 
-{
-    int i;
-    for (i=0; i<RADIX_TREE_MAP_ENTRIES; i+=2)
-        node[i] = node[i] & ONEMASK;
-    return;
-}
-
-enum states {
-    /* both */
-    READ_L1,
-    READ_L2,
-    READ_L3,
-
-    /* read */
-    READ_LOCKED,
-    READ_DATA,
-    READ_UNLOCKED,
-    RETURN_ZERO,
-
-    /* write */
-    WRITE_LOCKED,
-    WRITE_DATA,
-    WRITE_L3,
-    WRITE_UNLOCKED,
-    
-    /* L3 Zero Path */
-    ALLOC_DATA_L3z,
-    WRITE_L3_L3z,
-    
-    /* L3 Fault Path */
-    ALLOC_DATA_L3f,
-    WRITE_L3_L3f,
-    
-    /* L2 Zero Path */
-    ALLOC_DATA_L2z,
-    WRITE_L2_L2z,
-    ALLOC_L3_L2z,
-    WRITE_L2_L3z,
-    
-    /* L2 Fault Path */
-    READ_L3_L2f,
-    ALLOC_DATA_L2f,
-    WRITE_L2_L2f,
-    ALLOC_L3_L2f,
-    WRITE_L2_L3f,
-
-    /* L1 Zero Path */
-    ALLOC_DATA_L1z,
-    ALLOC_L3_L1z,
-    ALLOC_L2_L1z,
-    WRITE_L1_L1z,
-
-    /* L1 Fault Path */
-    READ_L2_L1f,
-    READ_L3_L1f,
-    ALLOC_DATA_L1f,
-    ALLOC_L3_L1f,
-    ALLOC_L2_L1f,
-    WRITE_L1_L1f,
-    
-};
-
-enum radix_offsets {
-    L1 = 0, 
-    L2 = 1,
-    L3 = 2
-};
-
-
-static void read_cb(struct io_ret ret, void *param);
-static void write_cb(struct io_ret ret, void *param);
-
-int vdi_read(vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param)
-{
-    struct io_req *req;
-
-    if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
-    /* Every second line in the bottom-level radix tree is used to      */
-    /* store crc32 values etc. We shift the vadder here to achied this. */
-    vaddr <<= 1;
-
-    req = (struct io_req *)malloc(sizeof (struct io_req));
-    if (req == NULL) return ERR_NOMEM;
-
-    req->radix[0] = req->radix[1] = req->radix[2] = NULL;      
-    req->op    = IO_OP_READ;
-    req->root  = vdi->radix_root;
-    req->lock  = vdi->radix_lock; 
-    req->vaddr = vaddr;
-    req->cb    = cb;
-    req->param = param;
-    req->state = READ_LOCKED;
-
-    block_rlock(req->lock, L1_IDX(vaddr), read_cb, req);
-       
-    return 0;
-}
-
-
-int   vdi_write(vdi_t *vdi, uint64_t vaddr, char *block, 
-                io_cb_t cb, void *param)
-{
-    struct io_req *req;
-
-    if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
-    /* Every second line in the bottom-level radix tree is used to      */
-    /* store crc32 values etc. We shift the vadder here to achied this. */
-    vaddr <<= 1;
-
-    req = (struct io_req *)malloc(sizeof (struct io_req));
-    if (req == NULL) return ERR_NOMEM; 
-
-    req->radix[0] = req->radix[1] = req->radix[2] = NULL;
-    req->op     = IO_OP_WRITE;
-    req->root   = vdi->radix_root;
-    req->lock   = vdi->radix_lock; 
-    req->vaddr  = vaddr;
-    req->block  = block;
-    /* Todo: add a pseodoheader to the block to include some location   */
-    /* information in the CRC as well.                                  */
-    req->bi.crc = (uint32_t) crc32(0L, Z_NULL, 0); 
-    req->bi.crc = (uint32_t) crc32(req->bi.crc, block, BLOCK_SIZE); 
-    req->bi.unused = 0xdeadbeef;
-
-    req->cb     = cb;
-    req->param  = param;
-    req->radix_addr[L1] = getid(req->root); /* for consistency */
-    req->state  = WRITE_LOCKED;
-
-    block_wlock(req->lock, L1_IDX(vaddr), write_cb, req);
-
-
-    return 0;
-}
-
-static void read_cb(struct io_ret ret, void *param)
-{
-    struct io_req *req = (struct io_req *)param;
-    radix_tree_node node;
-    uint64_t idx;
-    char *block;
-    void *req_param;
-
-    DPRINTF("read_cb\n");
-    /* get record */
-    switch(req->state) {
-       
-    case READ_LOCKED: 
-    
-        DPRINTF("READ_LOCKED\n");
-       req->state = READ_L1;
-       block_read(getid(req->root), read_cb, req); 
-       break;
-       
-    case READ_L1: /* block is the radix root */
-
-        DPRINTF("READ_L1\n");
-        block = IO_BLOCK(ret);
-        if (block == NULL) goto fail;
-        node = (radix_tree_node) block;
-        idx  = getid( node[L1_IDX(req->vaddr)] );
-        free(block);
-        if ( idx == ZERO ) {
-            req->state = RETURN_ZERO;
-            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
-        } else {
-            req->state = READ_L2;
-            block_read(idx, read_cb, req);
-        }
-        break;
-
-    case READ_L2:
-
-        DPRINTF("READ_L2\n");
-        block = IO_BLOCK(ret);
-        if (block == NULL) goto fail;
-        node = (radix_tree_node) block;
-        idx  = getid( node[L2_IDX(req->vaddr)] );
-        free(block);
-        if ( idx == ZERO ) {
-            req->state = RETURN_ZERO;
-            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
-        } else {
-            req->state = READ_L3;
-            block_read(idx, read_cb, req);
-        }
-        break;
-
-    case READ_L3:
-    {
-        struct block_info *bi;
-
-        DPRINTF("READ_L3\n");
-        block = IO_BLOCK(ret);
-        if (block == NULL) goto fail;
-        node = (radix_tree_node) block;
-        idx  = getid( node[L3_IDX(req->vaddr)] );
-        bi = (struct block_info *) &node[L3_IDX(req->vaddr) + 1];
-        req->bi = *bi;
-        free(block);
-        if ( idx == ZERO )  {
-            req->state = RETURN_ZERO;
-            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
-        } else {
-            req->state = READ_DATA;
-            block_read(idx, read_cb, req);
-        }
-        break;
-    }
-    case READ_DATA:
-    {
-        uint32_t crc;
-
-        DPRINTF("READ_DATA\n");
-        block = IO_BLOCK(ret);
-        if (block == NULL) goto fail;
-
-        /* crc check */
-        crc = (uint32_t) crc32(0L, Z_NULL, 0); 
-        crc = (uint32_t) crc32(crc, block, BLOCK_SIZE); 
-        if (crc != req->bi.crc) {
-            /* TODO: add a retry loop here.                          */
-            /* Do this after the cache is added -- make sure to      */
-            /* invalidate the bad page before reissuing the read.    */
-
-            warn("Bad CRC on vaddr (%Lu:%d)\n", req->vaddr, req->bi.unused);
-#ifdef PRINT_BADCRC_PAGES
-            {
-                int j;
-                for (j=0; j<BLOCK_SIZE; j++) {
-                    if isprint(block[j]) {
-                        printf("%c", block[j]);
-                    } else {
-                        printf(".");
-                    }
-                    if ((j % 64) == 0) printf("\n");
-                }
-            }
-#endif /* PRINT_BADCRC_PAGES */
-
-            /* fast and loose for the moment. */
-            /* goto fail;                     */
-        }
-
-        req->retval = ret;
-        req->state = READ_UNLOCKED;
-        block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
-        break;
-    }
-    case READ_UNLOCKED:
-    {
-        struct io_ret r;
-        io_cb_t cb;
-        DPRINTF("READ_UNLOCKED\n");
-        req_param = req->param;
-        r         = req->retval;
-        cb        = req->cb;
-        free(req);
-        cb(r, req_param);
-        break;
-    }
-    
-    case RETURN_ZERO:
-    {
-        struct io_ret r;
-        io_cb_t cb;
-        DPRINTF("RETURN_ZERO\n");
-        req_param = req->param;
-        cb        = req->cb;
-        free(req);
-        r.type = IO_BLOCK_T;
-        r.u.b = newblock();
-        cb(r, req_param);
-        break;
-    }
-        
-    default:
-       DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
-       goto fail;
-    }
- 
-    return;
-
- fail:
-    {
-        struct io_ret r;
-        io_cb_t cb;
-        DPRINTF("asyn_read had a read error.\n");
-        req_param = req->param;
-        r         = ret;
-        cb        = req->cb;
-        free(req);
-        cb(r, req_param);
-    }
-
-
-}
-
-static void write_cb(struct io_ret r, void *param)
-{
-    struct io_req *req = (struct io_req *)param;
-    radix_tree_node node;
-    uint64_t a, addr;
-    void *req_param;
-    struct block_info *bi;
-
-    switch(req->state) {
-       
-    case WRITE_LOCKED:
-        
-        DPRINTF("WRITE_LOCKED (%llu)\n", L1_IDX(req->vaddr));
-       req->state = READ_L1;
-       block_read(getid(req->root), write_cb, req); 
-       break;
-       
-    case READ_L1: /* block is the radix root */
-
-        DPRINTF("READ_L1\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        if (node == NULL) goto fail;
-        a    = node[L1_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix_addr[L2] = addr;
-        req->radix[L1] = node;
-
-        if ( addr == ZERO ) {
-            /* L1 empty subtree: */
-            req->state = ALLOC_DATA_L1z;
-            block_alloc( req->block, write_cb, req );
-        } else if ( !iswritable(a) ) {
-            /* L1 fault: */
-            req->state = READ_L2_L1f;
-            block_read( addr, write_cb, req );
-        } else {
-            req->state = READ_L2;
-            block_read( addr, write_cb, req );
-        }
-        break;
-    
-    case READ_L2:
-
-        DPRINTF("READ_L2\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        if (node == NULL) goto fail;
-        a    = node[L2_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix_addr[L3] = addr;
-        req->radix[L2] = node;
-
-        if ( addr == ZERO ) {
-            /* L2 empty subtree: */
-            req->state = ALLOC_DATA_L2z;
-            block_alloc( req->block, write_cb, req );
-        } else if ( !iswritable(a) ) {
-            /* L2 fault: */
-            req->state = READ_L3_L2f;
-            block_read( addr, write_cb, req );
-        } else {
-            req->state = READ_L3;
-            block_read( addr, write_cb, req );
-        }
-        break;
-    
-    case READ_L3:
-
-        DPRINTF("READ_L3\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        if (node == NULL) goto fail;
-        a    = node[L3_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix[L3] = node;
-
-        if ( addr == ZERO ) {
-            /* L3 fault: */
-            req->state = ALLOC_DATA_L3z;
-            block_alloc( req->block, write_cb, req );
-        } else if ( !iswritable(a) ) {
-            /* L3 fault: */
-            req->state = ALLOC_DATA_L3f;
-            block_alloc( req->block, write_cb, req );
-        } else {
-            req->state = WRITE_DATA;
-            block_write( addr, req->block, write_cb, req );
-        }
-        break;
-    
-    case WRITE_DATA:
-
-        DPRINTF("WRITE_DATA\n");
-        /* The L3 radix points to the correct block, we just need to  */
-        /* update the crc.                                            */
-        if (IO_INT(r) < 0) goto fail;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 101;
-        *bi = req->bi;
-        req->state = WRITE_L3;
-        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
-        break;
-    
-    /* L3 Zero Path: */
-
-    case ALLOC_DATA_L3z:
-
-        DPRINTF("ALLOC_DATA_L3z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 102;
-        *bi = req->bi;
-        req->state = WRITE_L3_L3z;
-        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
-        break;
-    
-    /* L3 Fault Path: */
-
-    case ALLOC_DATA_L3f:
-    
-        DPRINTF("ALLOC_DATA_L3f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 103;
-        *bi = req->bi;
-        req->state = WRITE_L3_L3f;
-        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
-        break;
-
-    /* L2 Zero Path: */
-        
-    case ALLOC_DATA_L2z:
-
-        DPRINTF("ALLOC_DATA_L2z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3] = newblock();
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 104;
-        *bi = req->bi;
-        req->state = ALLOC_L3_L2z;
-        block_alloc( (char*)req->radix[L3], write_cb, req );
-        break;
-
-    case ALLOC_L3_L2z:
-
-        DPRINTF("ALLOC_L3_L2z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L2][L2_IDX(req->vaddr)] = a;
-        req->state = WRITE_L2_L2z;
-        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
-        break;
-        
-    /* L2 Fault Path: */
-        
-    case READ_L3_L2f:
-    
-       DPRINTF("READ_L3_L2f\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        clear_L3_w_bits(node);
-        if (node == NULL) goto fail;
-        a    = node[L2_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix[L3] = node;
-        req->state = ALLOC_DATA_L2f;
-        block_alloc( req->block, write_cb, req );
-        break;
-                
-    case ALLOC_DATA_L2f:
-
-        DPRINTF("ALLOC_DATA_L2f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 105;
-        *bi = req->bi;
-        req->state = ALLOC_L3_L2f;
-        block_alloc( (char*)req->radix[L3], write_cb, req );
-        break;
-
-    case ALLOC_L3_L2f:
-
-        DPRINTF("ALLOC_L3_L2f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L2][L2_IDX(req->vaddr)] = a;
-        req->state = WRITE_L2_L2f;
-        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
-        break;
-        
-    /* L1 Zero Path: */
-    
-    case ALLOC_DATA_L1z:
-
-        DPRINTF("ALLOC_DATA_L1z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3] = newblock();
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 106;
-        *bi = req->bi;
-        req->state = ALLOC_L3_L1z;
-        block_alloc( (char*)req->radix[L3], write_cb, req );
-        break;
-        
-    case ALLOC_L3_L1z:
-
-        DPRINTF("ALLOC_L3_L1z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L2] = newblock();
-        req->radix[L2][L2_IDX(req->vaddr)] = a;
-        req->state = ALLOC_L2_L1z;
-        block_alloc( (char*)req->radix[L2], write_cb, req );
-        break;
-
-    case ALLOC_L2_L1z:
-
-        DPRINTF("ALLOC_L2_L1z\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L1][L1_IDX(req->vaddr)] = a;
-        req->state = WRITE_L1_L1z;
-        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
-        break;
-
-    /* L1 Fault Path: */
-        
-    case READ_L2_L1f:
-    
-       DPRINTF("READ_L2_L1f\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        clear_w_bits(node);
-        if (node == NULL) goto fail;
-        a    = node[L2_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix_addr[L3] = addr;
-        req->radix[L2] = node;
-        
-        if (addr == ZERO) {
-            /* nothing below L2, create an empty L3 and alloc data. */
-            /* (So skip READ_L3_L1f.) */
-            req->radix[L3] = newblock();
-            req->state = ALLOC_DATA_L1f;
-            block_alloc( req->block, write_cb, req );
-        } else {
-            req->state = READ_L3_L1f;
-            block_read( addr, write_cb, req );
-        }
-        break;
-        
-    case READ_L3_L1f:
-    
-       DPRINTF("READ_L3_L1f\n");
-        node = (radix_tree_node) IO_BLOCK(r);
-        clear_L3_w_bits(node);
-        if (node == NULL) goto fail;
-        a    = node[L2_IDX(req->vaddr)];
-        addr = getid(a);
-
-        req->radix[L3] = node;
-        req->state = ALLOC_DATA_L1f;
-        block_alloc( req->block, write_cb, req );
-        break;
-                
-    case ALLOC_DATA_L1f:
-
-        DPRINTF("ALLOC_DATA_L1f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L3][L3_IDX(req->vaddr)] = a;
-        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
-        req->bi.unused = 107;
-        *bi = req->bi;
-        req->state = ALLOC_L3_L1f;
-        block_alloc( (char*)req->radix[L3], write_cb, req );
-        break;
-
-    case ALLOC_L3_L1f:
-
-        DPRINTF("ALLOC_L3_L1f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L2][L2_IDX(req->vaddr)] = a;
-        req->state = ALLOC_L2_L1f;
-        block_alloc( (char*)req->radix[L2], write_cb, req );
-        break;
-
-    case ALLOC_L2_L1f:
-
-        DPRINTF("ALLOC_L2_L1f\n");
-        addr = IO_ADDR(r);
-        a = writable(addr);
-        req->radix[L1][L1_IDX(req->vaddr)] = a;
-        req->state = WRITE_L1_L1f;
-        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
-        break;
-
-    case WRITE_L3:
-    case WRITE_L3_L3z:
-    case WRITE_L3_L3f:
-    case WRITE_L2_L2z:
-    case WRITE_L2_L2f:
-    case WRITE_L1_L1z:
-    case WRITE_L1_L1f:
-    {
-       int i;
-        DPRINTF("DONE\n");
-        /* free any saved node vals. */
-        for (i=0; i<3; i++)
-            if (req->radix[i] != 0) free(req->radix[i]);
-        req->retval = r;
-        req->state = WRITE_UNLOCKED;
-        block_wunlock(req->lock, L1_IDX(req->vaddr), write_cb, req);
-        break;
-    }
-    case WRITE_UNLOCKED:
-    {
-        struct io_ret r;
-        io_cb_t cb;
-        DPRINTF("WRITE_UNLOCKED!\n");
-        req_param = req->param;
-        r         = req->retval;
-        cb        = req->cb;
-        free(req);
-        cb(r, req_param);
-        break;
-    }
-        
-    default:
-       DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
-       goto fail;
-    }
-    
-    return;
-    
- fail:
-    {
-        struct io_ret r;
-        io_cb_t cb;
-        int i;
-
-        DPRINTF("asyn_write had a read error mid-way.\n");
-        req_param = req->param;
-        cb        = req->cb;
-        r.type = IO_INT_T;
-        r.u.i  = -1;
-        /* free any saved node vals. */
-        for (i=0; i<3; i++)
-            free(req->radix[i]);
-        free(req);
-        cb(r, req_param);
-    }
-}
-
-char *vdi_read_s(vdi_t *vdi, uint64_t vaddr)
-{
-    pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
-    char *block = NULL;
-    int ret;
-
-    void reads_cb(struct io_ret r, void *param) 
-    {
-        block = IO_BLOCK(r);
-        pthread_mutex_unlock((pthread_mutex_t *)param);
-    }
-
-    pthread_mutex_lock(&m);
-    ret = vdi_read(vdi, vaddr, reads_cb, &m);
-
-    if (ret == 0) pthread_mutex_lock(&m);
-    
-    return block;
-}
-
-
-int vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block)
-{
-    pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
-    int ret, result;
-
-    void writes_cb(struct io_ret r, void *param) 
-    {
-        result = IO_INT(r);
-        pthread_mutex_unlock((pthread_mutex_t *)param);
-    }
-
-    pthread_mutex_lock(&m);
-    ret = vdi_write(vdi, vaddr, block, writes_cb, &m);
-
-    if (ret == 0) pthread_mutex_lock(&m);
-    
-    return result;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/requests-async.h
--- a/tools/blktap/parallax/requests-async.h    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-#ifndef _REQUESTSASYNC_H_
-#define _REQUESTSASYNC_H_
-
-#include "block-async.h"
-#include "blockstore.h" /* for newblock etc. */
-
-/*
-#define BLOCK_SIZE 4096
-#define ZERO 0ULL
-#define getid(x) (((x)>>1)&0x7fffffffffffffffLLU)
-#define iswritable(x) (((x) & 1LLU) != 0)
-#define writable(x) (((x) << 1) | 1LLU)
-#define readonly(x) ((uint64_t)((x) << 1))
-*/
-
-#define VADDR_MASK 0x0000000003ffffffLLU /* 26-bits = 256Gig */
-#define VALID_VADDR(x) (((x) & VADDR_MASK) == (x))
-
-int vdi_read (vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param);
-int vdi_write(vdi_t *vdi, uint64_t vaddr, char *block, io_cb_t cb, void 
*param);
-             
-/* synchronous versions: */
-char *vdi_read_s (vdi_t *vdi, uint64_t vaddr);
-int   vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block);
-
-#define ERR_BAD_VADDR  -1
-#define ERR_NOMEM      -2
-
-#endif //_REQUESTSASYNC_H_
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/snaplog.c
--- a/tools/blktap/parallax/snaplog.c   Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,238 +0,0 @@
-/**************************************************************************
- * 
- * snaplog.c
- *
- * Snapshot log on-disk data structure.
- *
- */
- 
- /* VDI histories are made from chains of snapshot logs.  These logs record 
-  * the (radix) root and timestamp of individual snapshots.
-  *
-  * creation of a new VDI involves 'forking' a snapshot log, by creating a 
-  * new, empty log (in a new VDI) and parenting it off of a record in an 
-  * existing snapshot log.
-  *
-  * snapshot log blocks have at most one writer.
-  */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "snaplog.h"
-
-
-
-snap_block_t *snap_get_block(uint64_t block)
-{
-    snap_block_t *blk = (snap_block_t *)readblock(block);
-    
-    if ( blk == NULL)
-        return NULL;
-    if ( blk->hdr.magic != SNAP_MAGIC ) {
-        freeblock(blk);
-        return NULL;
-    }
-    
-    return blk;
-}
-    
-int snap_get_id(snap_id_t *id, snap_rec_t *target)
-{
-    snap_block_t *blk;
-    
-    if ( id == NULL )
-        return -1;
-    
-    blk = snap_get_block(id->block);
-    
-    if ( blk == NULL ) 
-        return -1;
-    
-    if ( id->index > blk->hdr.nr_entries ) {
-        freeblock(blk);
-        return -1;
-    }
-    
-    *target = blk->snaps[id->index];
-    freeblock(blk);
-    return 0;
-}
-
-int __snap_block_create(snap_id_t *parent_id, snap_id_t *fork_id,
-                                  snap_id_t *new_id)
-{
-    snap_rec_t parent_rec, fork_rec;
-    snap_block_t *blk, *pblk;
-    /*
-    if ( (parent_id != NULL) && (snap_get_id(parent_id, &parent_rec) != 0) )
-        return -1;    
-    
-    if ( (fork_id != NULL) && (snap_get_id(fork_id, &fork_rec) != 0) )
-        return -1;   
-*/
-    blk = (snap_block_t *)newblock();
-    blk->hdr.magic  = SNAP_MAGIC;
-    blk->hdr.nr_entries  = 0;
-    blk->hdr.log_entries = 0;
-    blk->hdr.immutable   = 0;
-    
-    if (   (parent_id  != NULL) 
-        && (parent_id->block != fork_id->block) 
-        && (parent_id->block != 0)) {
-        
-        pblk = snap_get_block(parent_id->block);
-        blk->hdr.log_entries = pblk->hdr.log_entries;
-        freeblock(pblk);
-    }
-    
-    if (parent_id != NULL) {
-        blk->hdr.parent_block = *parent_id;
-        blk->hdr.fork_block   = *fork_id;
-    } else {
-        blk->hdr.parent_block = null_snap_id;
-        blk->hdr.fork_block   = null_snap_id;
-    }
-    
-    new_id->index = 0;
-    new_id->block = allocblock(blk);
-    freeblock(blk);
-    if (new_id->block == 0)
-        return -1;
-    
-    return 0;
-}
-
-int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id)
-{
-    return __snap_block_create(parent_id, parent_id, new_id);
-}
-
-int snap_append(snap_id_t *old_id, snap_rec_t *rec, snap_id_t *new_id)
-{
-    snap_id_t id = *old_id;
-    snap_block_t *blk = snap_get_block(id.block);
-    
-    if ( rec->deleted == 1 ) {
-        printf("Attempt to append a deleted snapshot!\n");
-        return -1;
-    }
-    
-    if ( blk->hdr.immutable != 0 ) {
-        printf("Attempt to snap an immutable snap block!\n");
-        return -1;
-    }
-    
-    new_id->block = id.block;
-    
-    if (blk->hdr.nr_entries == SNAPS_PER_BLOCK) {
-        int ret;
-        
-        id.index--; /* make id point to the last full record */
-        
-        ret = __snap_block_create(&id, &blk->hdr.fork_block, new_id);
-        if ( ret != 0 ) {
-            freeblock(blk);
-            return -1;
-        }
-        
-        blk->hdr.immutable = 1;
-        writeblock(id.block, blk);
-        freeblock(blk);
-        blk = snap_get_block(new_id->block);
-        id = *new_id;
-    }
-    
-    blk->snaps[blk->hdr.nr_entries] = *rec;
-    blk->hdr.nr_entries++;
-    blk->hdr.log_entries++;
-    new_id->index = blk->hdr.nr_entries;
-    //printf("snap: %u %u\n", blk->hdr.nr_entries, blk->hdr.log_entries);
-    writeblock(id.block, blk);
-    freeblock(blk);
-    return 0;
-}
-
-int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id)
-{
-    snap_block_t *p_blk, *c_blk, *blk;
-    snap_rec_t   *p_rec, *c_rec;
-    int ret = -1;
-    
-    p_blk = snap_get_block(p_id->block);
-    
-    if (p_blk == NULL) return(-1);
-    
-    if (c_id->block == p_id->block)
-    {
-        c_blk = p_blk;
-    } else {
-         c_blk = snap_get_block(c_id->block);
-    }
-    
-    if (p_blk == NULL) {
-        freeblock(p_blk);
-        return(-1);
-    }
-     
-    /* parent and child must not be deleted. */
-    p_rec = &p_blk->snaps[p_id->index];
-    c_rec = &c_blk->snaps[c_id->index];
-    /*
-    if ( (p_rec->deleted == 1) || (c_rec->deleted == 1) ) {
-        printf("One of those snaps is already deleted.\n");
-        goto done;
-    }
-    */
-    /* first non-deleted thing in the log before child must be parent. */
-    
-    /* XXX todo: text the range here for delete (and eventually fork) bits) */
-    /* for now, snaps must be consecutive, on the same log page: */
-    
-    if ((p_id->block != c_id->block) || (p_id->index != c_id->index-1))
-    {
-        printf("Deleting non-consecutive snaps is not done yet.\n");
-        goto done;
-    }
-    
-    /* mark parent as deleted XXX: may need to lock parent block here.*/
-    p_rec->deleted = 1;
-    writeblock(p_id->block, p_blk);
-    
-    /* delete the parent */
-    printf("collapse(%Ld, %Ld)\n", p_rec->radix_root, c_rec->radix_root);
-    ret = collapse(height, p_rec->radix_root, c_rec->radix_root);
-    
-    /* return the number of blocks reclaimed. */
-    
-done:
-    if (c_blk != p_blk) freeblock(c_blk);
-    freeblock(p_blk);
-    
-    return(ret);
-}
-
-void snap_print_history(snap_id_t *snap_id)
-{
-    snap_id_t id = *snap_id;
-    unsigned int idx = id.index;
-    snap_block_t *new_blk, *blk = snap_get_block(id.block);
-    
-    while ( blk ) {
-        printf("[Snap block %Ld]:\n", id.block);
-        do {
-            printf("   %03u: root: %Ld ts: %ld.%ld\n", idx, 
-                    blk->snaps[idx].radix_root,
-                    blk->snaps[idx].timestamp.tv_sec,
-                    blk->snaps[idx].timestamp.tv_usec);
-        } while (idx-- != 0);
-        
-        id = blk->hdr.parent_block;
-        if (id.block != 0) {
-            new_blk = snap_get_block(id.block);
-        }
-        freeblock(blk);
-        blk = new_blk;
-    }
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/snaplog.h
--- a/tools/blktap/parallax/snaplog.h   Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-/**************************************************************************
- * 
- * snaplog.h
- *
- * Snapshot log on-disk data structure.
- *
- */
- 
-#include "radix.h"
-#include "blockstore.h"    /* for BLOCK_SIZE */
- 
-#ifndef __SNAPLOG_H__
-#define __SNAPLOG_H__
-
-typedef struct snap_id {
-    uint64_t            block;
-    unsigned int   index;
-} snap_id_t;
-
-typedef struct snap_rec {
-    uint64_t            radix_root;
-    struct timeval timestamp;
-    /* flags: */
-    unsigned       deleted:1;
-} snap_rec_t;
-
-
-int  snap_block_create(snap_id_t *parent_id, snap_id_t *new_id);
-int  snap_append(snap_id_t *id, snap_rec_t *rec, snap_id_t *new_id);
-int  snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id);
-void snap_print_history(snap_id_t *snap_id);
-int  snap_get_id(snap_id_t *id, snap_rec_t *target);
-
-
-/* exported for vdi debugging */
-#define SNAP_MAGIC 0xff00ff0aa0ff00ffLL
-
-static const snap_id_t null_snap_id = { 0, 0 }; 
-
-typedef struct snap_block_hdr {
-    uint64_t            magic;
-    snap_id_t      parent_block; /* parent block within this chain */
-    snap_id_t      fork_block;   /* where this log was forked */
-    unsigned       log_entries;  /* total entries since forking */
-    unsigned short nr_entries;   /* entries in snaps[] */
-    unsigned short immutable;    /* has this snap page become immutable? */
-} snap_block_hdr_t;
-
-
-#define SNAPS_PER_BLOCK \
-    ((BLOCK_SIZE - sizeof(snap_block_hdr_t)) / sizeof(snap_rec_t))
-
-typedef struct snap_block {
-    snap_block_hdr_t hdr;
-    snap_rec_t       snaps[SNAPS_PER_BLOCK];
-} snap_block_t;
-    
-
-snap_block_t *snap_get_block(uint64_t block);
-
-#endif /* __SNAPLOG_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi.c
--- a/tools/blktap/parallax/vdi.c       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,367 +0,0 @@
-/**************************************************************************
- * 
- * vdi.c
- *
- * Virtual Disk Image (VDI) Interfaces
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <string.h>
-#include <sys/time.h>
-#include <pthread.h>
-#include "blockstore.h"
-#include "block-async.h"
-#include "requests-async.h"
-#include "radix.h"
-#include "vdi.h"
-                    
-#define VDI_REG_BLOCK   2LL
-#define VDI_RADIX_ROOT  writable(3)
-                                                            
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* I haven't decided about this registry stuff, so this is just a really
- * quick lash-up so that there is some way to track VDIs.
- *
- * (Most vdi access should be with a direct handle to the block, so this
- *  registry is just for start-of-day lookup and other control operations.)
- */
-
-vdi_registry_t *create_vdi_registry(void)
-{
-    vdi_registry_t *reg = (vdi_registry_t *)newblock();
-    
-    if (reg == NULL)
-        return NULL;
-    
-    /* zero-fill the vdi radix root while we have an empty block. */
-    writeblock(VDI_RADIX_ROOT, (void *)reg);
-    
-    
-    DPRINTF("[vdi.c] Creating VDI registry!\n");
-    reg->magic      = VDI_REG_MAGIC;
-    reg->nr_vdis    = 0;
-    
-    writeblock(VDI_REG_BLOCK, (void *)reg);
-    
-    return reg;
-}
-    
-vdi_registry_t *get_vdi_registry(void)
-{
-    vdi_registry_t *vdi_reg = (vdi_registry_t *)readblock(VDI_REG_BLOCK);
-    
-    if ( vdi_reg == NULL )
-        vdi_reg = create_vdi_registry();
-    
-    if ( vdi_reg->magic != VDI_REG_MAGIC ) {
-        freeblock(vdi_reg);
-        return NULL;
-    }
-    
-    return vdi_reg;
-}
-
-
-vdi_t *vdi_create(snap_id_t *parent_snap, char *name)
-{
-    int ret;
-    vdi_t *vdi;
-    vdi_registry_t *vdi_reg;
-    snap_rec_t snap_rec;
-    
-    /* create a vdi struct */
-    vdi = newblock();
-    if (vdi == NULL) 
-        return NULL;
-    
-    if ( snap_get_id(parent_snap, &snap_rec) == 0 ) {
-        vdi->radix_root = snapshot(snap_rec.radix_root);
-    } else {
-        vdi->radix_root = allocblock((void *)vdi); /* vdi is just zeros here */
-        vdi->radix_root = writable(vdi->radix_root); /* grr. */
-    }
-    
-    /* create a snapshot log, and add it to the vdi struct */
-    
-    ret = snap_block_create(parent_snap, &vdi->snap);
-    if ( ret != 0 ) {
-        DPRINTF("Error getting snap block in vdi_create.\n");
-        freeblock(vdi);
-        return NULL;
-    }
-            
-    /* append the vdi to the registry, fill block and id.             */
-    /* implicit allocation means we have to write the vdi twice here. */
-    vdi_reg    = get_vdi_registry();
-    if ( vdi_reg == NULL ) {
-        freeblock(vdi);
-        return NULL;
-    }
-    
-    vdi->block = allocblock((void *)vdi);
-    vdi->id    = vdi_reg->nr_vdis++;
-    strncpy(vdi->name, name, VDI_NAME_SZ);
-    vdi->name[VDI_NAME_SZ] = '\0';
-    vdi->radix_lock = NULL; /* for tidiness */
-    writeblock(vdi->block, (void *)vdi);
-    
-    update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block);
-    writeblock(VDI_REG_BLOCK, (void *)vdi_reg);
-    freeblock(vdi_reg);
-    
-    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
-    if (vdi->radix_lock == NULL) 
-    {
-       perror("couldn't malloc radix_lock for new vdi!");
-       freeblock(vdi);
-       return NULL;
-    }
-    radix_lock_init(vdi->radix_lock);
-    
-    return vdi;
-}
-
-/* vdi_get and vdi_put currently act more like alloc/free -- they don't 
- * do refcount-based allocation.  
- */
-vdi_t *vdi_get(uint64_t vdi_id)
-{
-    uint64_t vdi_blk;
-    vdi_t *vdi;
-    
-    vdi_blk = lookup(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi_id);
-    
-    if ( vdi_blk == 0 )
-        return NULL;
-    
-    vdi = (vdi_t *)readblock(vdi_blk);
-    
-    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
-    if (vdi->radix_lock == NULL) 
-    {
-       perror("couldn't malloc radix_lock for new vdi!");
-       freeblock(vdi);
-       return NULL;
-    }
-    radix_lock_init(vdi->radix_lock);
-    
-    return vdi;
-}
-
-void vdi_put(vdi_t *vdi)
-{
-    free(vdi->radix_lock);
-    freeblock(vdi);
-}
-
-void vdi_snapshot(vdi_t *vdi)
-{
-    snap_rec_t rec;
-    int ret;
-    
-    rec.radix_root = vdi->radix_root;
-    gettimeofday(&rec.timestamp, NULL);
-    rec.deleted = 0;
-    
-    vdi->radix_root = snapshot(vdi->radix_root);
-    ret = snap_append(&vdi->snap, &rec, &vdi->snap);
-    if ( ret != 0 ) {
-        printf("snap_append returned failure\n");
-        return;
-    }
-    writeblock(vdi->block, vdi);
-}
-    
-int __init_vdi()
-{
-    /* sneak this in here for the moment. */
-    __rcache_init();
-    
-    /* force the registry to be created if it doesn't exist. */
-    vdi_registry_t *vdi_reg = get_vdi_registry();
-    if (vdi_reg == NULL) {
-        printf("[vdi.c] Couldn't get/create a VDI registry!\n");
-        return -1;
-    }
-    freeblock(vdi_reg);
-    
-    
-    return 0;
-}
-    
-#ifdef VDI_STANDALONE
-
-#define TEST_VDIS      50
-#define NR_ITERS    50000
-#define FORK_POINTS   200
-#define INIT_VDIS       3
-#define INIT_SNAPS     40
-
-/* These must be of decreasing size: */
-#define NEW_FORK       (RAND_MAX-(RAND_MAX/1000))
-#define NEW_ROOT_VDI   (RAND_MAX-((RAND_MAX/1000)*2))
-#define NEW_FORK_VDI   (RAND_MAX-((RAND_MAX/1000)*3))
-
-#define GRAPH_DOT_FILE "vdi.dot"
-#define GRAPH_PS_FILE  "vdi.ps"
-
-
-typedef struct sh_st {
-    snap_id_t     id;
-    struct sh_st *next;
-} sh_t;
-
-#define SNAP_HASHSZ 1024
-sh_t *node_hash[SNAP_HASHSZ];
-#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
-
-#define SNAPID_EQUAL(_a,_b) \
-    (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
-int sh_check_and_add(snap_id_t *id)
-{
-    sh_t **s = &node_hash[SNAP_HASH(id)];
-    
-    while (*s != NULL) {
-        if (SNAPID_EQUAL(&((*s)->id), id))
-            return 1;
-        *s = (*s)->next;
-    }
-    
-    *s = (sh_t *)malloc(sizeof(sh_t));
-    (*s)->id = *id;
-    (*s)->next = NULL;
-    
-    return 0;
-}
-
-int main(int argc, char *argv[])
-{
-    vdi_t *vdi_list[TEST_VDIS];
-    snap_id_t id, fork_points[FORK_POINTS];
-    int nr_vdis = 0, nr_forks = 0;
-    int i, j, r;
-    FILE *f;
-    char name[VDI_NAME_SZ];
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    printf("[o] Generating seed VDIs. (%d VDIs)\n", INIT_VDIS);
-    
-    for (i=0; i<INIT_VDIS; i++) {
-        r=rand();
-        
-        sprintf(name, "VDI Number %d", nr_vdis);
-        vdi_list[i] = vdi_create(NULL, name);
-        for (j=0; j<(r%INIT_SNAPS); j++)
-            vdi_snapshot(vdi_list[i]);
-        fork_points[i] = vdi_list[i]->snap;
-        nr_vdis++;
-        nr_forks++;
-    }
-    
-    printf("[o] Running a random workload. (%d iterations)\n", NR_ITERS);
-            
-    for (i=0; i<NR_ITERS; i++) {
-        r = rand();
-        
-        if ( r > NEW_FORK ) {
-            if ( nr_forks > FORK_POINTS )
-                continue;
-            id = vdi_list[r%nr_vdis]->snap;
-            if ( ( id.block == 0 ) || ( id.index == 0 ) )
-                continue;
-            id.index--;
-            fork_points[nr_forks++] = id;
-            
-        } else if ( r > NEW_ROOT_VDI ) {
-            
-            if ( nr_vdis == TEST_VDIS )
-                continue;
-            
-            sprintf(name, "VDI Number %d.", nr_vdis);
-            vdi_list[nr_vdis++] = vdi_create(NULL, name);
-            
-        } else if ( r > NEW_FORK_VDI ) {
-            
-            if ( nr_vdis == TEST_VDIS )
-                continue;
-            
-            sprintf(name, "VDI Number %d.", nr_vdis);
-            vdi_list[nr_vdis++] = vdi_create(&fork_points[r%nr_forks], name);
-            
-        } else /* SNAPSHOT */ {
-            
-            vdi_snapshot(vdi_list[r%nr_vdis]);
-            
-        }
-    }
-    
-    /* now dump it out to a dot file. */
-    printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
-    
-    f = fopen(GRAPH_DOT_FILE, "w");
-    
-    /* write graph preamble */
-    fprintf(f, "digraph G {\n");
-    fprintf(f, "   rankdir=LR\n");
-    
-    for (i=0; i<nr_vdis; i++) {
-        char oldnode[255];
-        snap_block_t *blk;
-        snap_id_t id = vdi_list[i]->snap;
-        int nr_snaps, done=0;
-        
-        /* add a node for the id */
-printf("vdi: %d\n", i);
-        fprintf(f, "   n%Ld%d 
[color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 
-                id.block, id.index, vdi_list[i]->name,
-                id.block, id.index);
-        sprintf(oldnode, "n%Ld%d", id.block, id.index);
-        
-        while (id.block != 0) {
-            blk = snap_get_block(id.block);
-            nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
-            id = blk->hdr.fork_block;
-            
-            done = sh_check_and_add(&id);
-            
-            /* add a node for the fork_id */
-            if (!done) {
-                fprintf(f, "   n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 
-                    id.block, id.index,
-                    id.block, id.index);
-            }
-            
-            /* add an edge between them */
-            fprintf(f, "   n%Ld%d -> %s [label=\"%u snapshots\"]\n",
-                    id.block, id.index, oldnode, nr_snaps);
-            sprintf(oldnode, "n%Ld%d", id.block, id.index);
-            freeblock(blk);
-            
-            if (done) break;
-        }
-    }
-    
-    /* write graph postamble */
-    fprintf(f, "}\n");
-    fclose(f);
-    
-    printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
-    {
-        char cmd[255];
-        sprintf(cmd, "dot %s -Tps -o %s", GRAPH_DOT_FILE, GRAPH_PS_FILE);
-        system(cmd);
-    }
-    return 0;
-}
-
-#endif
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi.h
--- a/tools/blktap/parallax/vdi.h       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-#ifndef _VDI_H_
-#define _VDI_H_
-/**************************************************************************
- * 
- * vdi.h
- *
- * Virtual Disk Image (VDI) Interfaces
- *
- */
-
-#ifndef __VDI_H__
-#define __VDI_H__
-
-#include "blktaplib.h"
-#include "snaplog.h"
-
-#define VDI_HEIGHT     27 /* Note that these are now hard-coded */
-#define VDI_REG_HEIGHT 27 /* in the async lookup code           */
-
-#define VDI_NAME_SZ 256
-
-
-typedef struct vdi {
-    uint64_t         id;               /* unique vdi id -- used by the 
registry   */
-    uint64_t         block;            /* block where this vdi lives (also 
unique)*/
-    uint64_t         radix_root;       /* radix root node for block mappings   
   */
-    snap_id_t   snap;             /* next snapshot slot for this VDI         */
-    struct vdi *next;             /* used to hash-chain in blkif.            */
-    blkif_vdev_t vdevice;         /* currently mounted as...                 */
-    struct radix_lock *radix_lock;/* per-line L1 RW lock for parallel reqs   */
-    char        name[VDI_NAME_SZ];/* human readable vdi name                 */
-} vdi_t;
-
-#define VDI_REG_MAGIC   0xff00ff0bb0ff00ffLL
-
-typedef struct vdi_registry {
-    uint64_t     magic;
-    uint64_t     nr_vdis;
-} vdi_registry_t;
-
-
-int __init_vdi(void);
-
-vdi_t *vdi_get(uint64_t vdi_id);
-void vdi_put(vdi_t *vdi);
-vdi_registry_t *get_vdi_registry(void);
-vdi_t *vdi_create(snap_id_t *parent_snap, char *name);
-uint64_t vdi_lookup_block(vdi_t *vdi, uint64_t vdi_block, int *writable);
-void vdi_update_block(vdi_t *vdi, uint64_t vdi_block, uint64_t g_block);
-void vdi_snapshot(vdi_t *vdi);
-
-
-#endif /* __VDI_H__ */
-
-#endif //_VDI_H_
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_create.c
--- a/tools/blktap/parallax/vdi_create.c        Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-/**************************************************************************
- * 
- * vdi_create.c
- *
- * Create a new vdi.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_t       *vdi;
-    char         name[VDI_NAME_SZ] = "";
-    snap_id_t    id;
-    int          from_snap = 0;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    if ( argc == 1 ) {
-        printf("usage: %s <VDI Name> [<snap block> <snap idx>]\n", argv[0]);
-        exit(-1);
-    }
-    
-    strncpy( name, argv[1], VDI_NAME_SZ);
-    name[VDI_NAME_SZ] = '\0';    
-    
-    if ( argc > 3 ) {
-        id.block   = (uint64_t)          atoll(argv[2]);
-        id.index   = (unsigned int) atol (argv[3]);
-        from_snap  = 1;
-    }
-    
-    vdi = vdi_create( from_snap ? &id : NULL, name);
-    
-    if ( vdi == NULL ) {
-        printf("Failed to create VDI!\n");
-        freeblock(vdi);
-        exit(-1);
-    }
-    
-    freeblock(vdi);
-    
-    return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_fill.c
--- a/tools/blktap/parallax/vdi_fill.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-/**************************************************************************
- * 
- * vdi_fill.c
- *
- * Hoover a file or device into a vdi.
- * You must first create the vdi with vdi_create.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "requests-async.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_t       *vdi;
-    uint64_t          id;
-    int          fd;
-    struct stat  st;
-    uint64_t          tot_size;
-    char         spage[BLOCK_SIZE];
-    char        *dpage;
-    uint64_t          vblock = 0, count=0;
-    
-    __init_blockstore();
-    init_block_async();
-    __init_vdi();
-    
-    if ( argc < 3 ) {
-        printf("usage: %s <VDI id> <filename>\n", argv[0]);
-        exit(-1);
-    }
-        
-    id = (uint64_t) atoll(argv[1]);
-    
-    vdi = vdi_get( id );
-    
-    if ( vdi == NULL ) {
-        printf("Failed to retreive VDI %Ld!\n", id);
-        exit(-1);
-    }
-    
-    fd = open(argv[2], O_RDONLY | O_LARGEFILE);
-    
-    if (fd < 0) {
-        printf("Couldn't open %s!\n", argv[2]);
-        exit(-1);
-    }
-    
-    if ( fstat(fd, &st) != 0 ) {
-        printf("Couldn't stat %s!\n", argv[2]);
-        exit(-1);
-    }
-    
-    tot_size = (uint64_t) st.st_size;
-    printf("Filling VDI %Ld with %Ld bytes.\n", id, tot_size);
-    
-    printf("%011Ld blocks total\n", tot_size / BLOCK_SIZE);    
-    printf("           ");
-    while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
-        vdi_write_s(vdi, vblock, spage);
-        
-        vblock++;
-        if ((vblock % 512) == 0)
-        printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
-        fflush(stdout);
-    }
-    printf("\n");
-    
-    freeblock(vdi);
-    
-    return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_list.c
--- a/tools/blktap/parallax/vdi_list.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-/**************************************************************************
- * 
- * vdi_list.c
- *
- * Print a list of VDIs on the block store.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_registry_t *reg;
-    vdi_t *vdi;
-    int i;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    reg = get_vdi_registry();
-    
-    if ( reg == NULL ) {
-        printf("couldn't get VDI registry.\n");
-        exit(-1);
-    }
-    
-    for (i=0; i < reg->nr_vdis; i++) {
-        vdi = vdi_get(i);
-        
-        if ( vdi != NULL ) {
-            
-            printf("%10Ld %60s\n", vdi->id, vdi->name);
-            freeblock(vdi);
-            
-        }
-    }
-    
-    freeblock(reg);
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_snap.c
--- a/tools/blktap/parallax/vdi_snap.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-/**************************************************************************
- * 
- * vdi_snap.c
- *
- * Snapshot a vdi.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_t  *vdi;
-    uint64_t     id;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    if ( argc == 1 ) {
-        printf("usage: %s <VDI id>\n", argv[0]);
-        exit(-1);
-    }
-    
-    id = (uint64_t) atoll(argv[1]);
-    
-    vdi = vdi_get(id);
-    
-    if ( vdi == NULL ) {
-        printf("couldn't find the requested VDI.\n");
-        freeblock(vdi);
-        exit(-1);
-    }
-    
-    vdi_snapshot(vdi);
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_snap_delete.c
--- a/tools/blktap/parallax/vdi_snap_delete.c   Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-/**************************************************************************
- * 
- * vdi_snap_delete.c
- *
- * Delete a snapshot.
- *
- * This is not finished:  right now it takes a snap n and calls 
- * snap_collapse(n,n+1).
- *
- * TODO: support for non-consecutive, non-same-block snaps
- *       Avoid forking probs.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "snaplog.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    snap_id_t    id, c_id;
-    int ret;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    if ( argc != 3 ) {
-        printf("usage: %s <snap block> <snap idx>\n", argv[0]);
-        exit(-1);
-    }
-    
-    id.block   = (uint64_t)          atoll(argv[1]);
-    id.index   = (unsigned int) atol (argv[2]);
-    
-    c_id = id;
-    c_id.index++;
-    
-    ret = snap_collapse(VDI_HEIGHT, &id, &c_id);
-    
-    printf("Freed %d blocks.\n", ret);
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_snap_list.c
--- a/tools/blktap/parallax/vdi_snap_list.c     Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-/**************************************************************************
- * 
- * vdi_snap_list.c
- *
- * Print a list of snapshots for the specified vdi.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_t        *vdi;
-    uint64_t           id;
-    int           i, max_snaps = -1;
-    snap_block_t *blk;
-    snap_id_t     sid;
-    char         *t;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    if ( argc == 1 ) {
-        printf("usage: %s <VDI id> [max snaps]\n", argv[0]);
-        exit(-1);
-    }
-    
-    id = (uint64_t) atoll(argv[1]);
-    
-    if ( argc > 2 ) {
-        max_snaps = atoi(argv[2]);
-    }
-    
-    vdi = vdi_get(id);
-    
-    if ( vdi == NULL ) {
-        printf("couldn't find the requested VDI.\n");
-        freeblock(vdi);
-        exit(-1);
-    }
-    
-    sid = vdi->snap;
-    sid.index--;
-    
-    //printf("%8s%4s%21s %12s %1s\n", "Block", "idx", "timestamp", 
-    //    "radix root", "d");
-    printf("%8s%4s%37s %12s %1s\n", "Block", "idx", "timestamp", 
-            "radix root", "d");
-     
-    while (sid.block != 0) {
-        blk = snap_get_block(sid.block);
-        for (i = sid.index; i >= 0; i--) {
-            if ( max_snaps == 0  ) {
-                freeblock(blk);
-                goto done;
-            }
-            t = ctime(&blk->snaps[i].timestamp.tv_sec);
-            t[strlen(t)-1] = '\0';
-            //printf("%8Ld%4u%14lu.%06lu %12Ld %1s\n",
-            printf("%8Ld%4u%30s %06lu %12Ld %1s\n",
-                    sid.block, i, 
-                    //blk->snaps[i].timestamp.tv_sec,
-                    t,
-                    blk->snaps[i].timestamp.tv_usec,
-                    blk->snaps[i].radix_root,
-                    blk->snaps[i].deleted ? "*" : " ");
-            if ( max_snaps != -1 ) 
-                max_snaps--;
-        }
-        sid = blk->hdr.parent_block;
-        freeblock(blk);
-    }
-done:            
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_tree.c
--- a/tools/blktap/parallax/vdi_tree.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,132 +0,0 @@
-/**************************************************************************
- * 
- * vdi_tree.c
- *
- * Output current vdi tree to dot and postscript.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-#define GRAPH_DOT_FILE "vdi.dot"
-#define GRAPH_PS_FILE  "vdi.ps"
-
-typedef struct sh_st {
-    snap_id_t     id;
-    struct sh_st *next;
-} sh_t;
-
-#define SNAP_HASHSZ 1024
-sh_t *node_hash[SNAP_HASHSZ];
-#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
-
-#define SNAPID_EQUAL(_a,_b) \
-    (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
-int sh_check_and_add(snap_id_t *id)
-{
-    sh_t **s = &node_hash[SNAP_HASH(id)];
-    
-    while (*s != NULL) {
-        if (SNAPID_EQUAL(&((*s)->id), id))
-            return 1;
-        *s = (*s)->next;
-    }
-    
-    *s = (sh_t *)malloc(sizeof(sh_t));
-    (*s)->id = *id;
-    (*s)->next = NULL;
-    
-    return 0;
-}
-
-int main(int argc, char *argv[])
-{
-    FILE *f;
-    char dot_file[255] = GRAPH_DOT_FILE;
-    char  ps_file[255] = GRAPH_PS_FILE;
-    int nr_vdis = 0, nr_forks = 0;
-    vdi_registry_t *reg;
-    vdi_t *vdi;
-    int i;
-    
-    __init_blockstore();
-    __init_vdi();
-    
-    reg = get_vdi_registry();
-    
-    if ( reg == NULL ) {
-        printf("couldn't get VDI registry.\n");
-        exit(-1);
-    }
-    
-    if ( argc > 1 ) {
-        strncpy(ps_file, argv[1], 255);
-        ps_file[255] = '\0';
-    }
-    
-    /* now dump it out to a dot file. */
-    printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
-    
-    f = fopen(dot_file, "w");
-    
-    /* write graph preamble */
-    fprintf(f, "digraph G {\n");
-    fprintf(f, "   rankdir=LR\n");
-    
-    for (i=0; i<reg->nr_vdis; i++) {
-        char oldnode[255];
-        snap_block_t *blk;
-        snap_id_t id;
-        int nr_snaps, done=0;
-        
-        vdi = vdi_get(i);
-        id = vdi->snap;
-        /* add a node for the id */
-printf("vdi: %d\n", i);
-        fprintf(f, "   n%Ld%d 
[color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 
-                id.block, id.index, vdi->name,
-                id.block, id.index);
-        sprintf(oldnode, "n%Ld%d", id.block, id.index);
-        
-        while (id.block != 0) {
-            blk = snap_get_block(id.block);
-            nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
-            id = blk->hdr.fork_block;
-            
-            done = sh_check_and_add(&id);
-            
-            /* add a node for the fork_id */
-            if (!done) {
-                fprintf(f, "   n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 
-                    id.block, id.index,
-                    id.block, id.index);
-            }
-            
-            /* add an edge between them */
-            fprintf(f, "   n%Ld%d -> %s [label=\"%u snapshots\"]\n",
-                    id.block, id.index, oldnode, nr_snaps);
-            sprintf(oldnode, "n%Ld%d", id.block, id.index);
-            freeblock(blk);
-            
-            if (done) break;
-        }
-    }
-    
-    /* write graph postamble */
-    fprintf(f, "}\n");
-    fclose(f);
-    
-    printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
-    {
-        char cmd[255];
-        sprintf(cmd, "dot %s -Tps -o %s", dot_file, ps_file);
-        system(cmd);
-    }
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_unittest.c
--- a/tools/blktap/parallax/vdi_unittest.c      Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,184 +0,0 @@
-/**************************************************************************
- * 
- * vdi_unittest.c
- *
- * Run a small test workload to ensure that data access through a vdi
- * is (at least superficially) correct.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "requests-async.h"
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-#define TEST_PAGES  32
-static char *zero_page;
-static char pages[TEST_PAGES][BLOCK_SIZE];
-static int next_page = 0;
-
-void fill_test_pages(void)
-{
-    int i, j;
-    long *page;
-
-    for (i=0; i< TEST_PAGES; i++) {
-        page = (unsigned long *)pages[i];
-        for (j=0; j<(BLOCK_SIZE/4); j++) {
-            page[j] = random();
-        }
-    }
-
-    zero_page = newblock();
-}
-
-inline uint64_t make_vaddr(uint64_t L1, uint64_t L2, uint64_t L3)
-{
-    uint64_t ret = L1;
-
-    ret = (ret << 9) | L2;
-    ret = (ret << 9) | L3;
-
-    return ret;
-}
-
-void touch_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3)
-{
-    uint64_t vaddr;
-    char *page = pages[next_page++];
-    char *rpage = NULL;
-
-    printf("TOUCH (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
-
-    vaddr = make_vaddr(L1, L2, L3);
-    vdi_write_s(vdi, vaddr, page);
-    rpage = vdi_read_s(vdi, vaddr);
-
-    if (rpage == NULL) 
-    {
-        printf( "read %Lu returned NULL\n", vaddr); 
-        return; 
-    }
-
-    if (memcmp(page, rpage, BLOCK_SIZE) != 0)
-    {
-        printf( "read %Lu returned a different page\n", vaddr);
-        return;
-    }
-
-    freeblock(rpage);
-}
-
-void test_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3, char *page)
-{
-    uint64_t vaddr;
-    char *rpage = NULL;
-
-    printf("TEST  (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
-
-    vaddr = make_vaddr(L1, L2, L3);
-    rpage = vdi_read_s(vdi, vaddr);
-
-    if (rpage == NULL) 
-    {
-        printf( "read %Lu returned NULL\n", vaddr); 
-        return; 
-    }
-
-    if (memcmp(page, rpage, BLOCK_SIZE) != 0)
-    {
-        printf( "read %Lu returned a different page\n", vaddr);
-        return;
-    }
-
-    freeblock(rpage);
-}
-
-void coverage_test(vdi_t *vdi)
-{
-    uint64_t vaddr;
-    int i, j, k;
-
-    /* Do a series of writes and reads to test all paths through the 
-     * async radix code.  The radix request code will dump CRC warnings
-     * if there are data problems here as well.
-     */
-
-    /* L1 Zero */
-    touch_block(vdi, 0, 0, 0);
-
-    /* L2 Zero */
-    i = next_page;
-    touch_block(vdi, 0, 1, 0);
-
-    /* L3 Zero */
-    j = next_page;
-    touch_block(vdi, 0, 0, 1);
-    k = next_page;
-    touch_block(vdi, 0, 1, 1);
-
-    /* Direct write */
-    touch_block(vdi, 0, 0, 0);
-
-    vdi_snapshot(vdi);
-
-    /* L1 fault */
-    touch_block(vdi, 0, 0, 0);
-    /* test the read-only branches that should have been copied over. */
-    test_block(vdi, 0, 1, 0, pages[i]);
-    test_block(vdi, 0, 0, 1, pages[j]);
-
-    /* L2 fault */
-    touch_block(vdi, 0, 1, 0);
-    test_block(vdi, 0, 1, 1, pages[k]);
-
-    /* L3 fault */
-    touch_block(vdi, 0, 0, 1);
-    
-    /* read - L1 zero */
-    test_block(vdi, 1, 0, 0, zero_page);
-    
-    /* read - L2 zero */
-    test_block(vdi, 0, 2, 0, zero_page);
-
-    /* read - L3 zero */
-    test_block(vdi, 0, 0, 2, zero_page);
-}
-
-int main(int argc, char *argv[])
-{
-    vdi_t       *vdi;
-    uint64_t          id;
-    int          fd;
-    struct stat  st;
-    uint64_t          tot_size;
-    char         spage[BLOCK_SIZE];
-    char        *dpage;
-    uint64_t          vblock = 0, count=0;
-    
-    __init_blockstore();
-    init_block_async();
-    __init_vdi();
-        
-    vdi = vdi_create( NULL, "UNIT TEST VDI");
-    
-    if ( vdi == NULL ) {
-        printf("Failed to create VDI!\n");
-        freeblock(vdi);
-        exit(-1);
-    }
-
-    fill_test_pages();
-    coverage_test(vdi);
-    
-    freeblock(vdi);
-    
-    return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_validate.c
--- a/tools/blktap/parallax/vdi_validate.c      Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-/**************************************************************************
- * 
- * vdi_validate.c
- *
- * Intended to sanity-check vm_fill and the underlying vdi code.
- *
- * Block-by-block compare of a vdi with a file/device on the disk.
- *
- */
- 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-#include "requests-async.h"
-
-int main(int argc, char *argv[])
-{
-    vdi_t       *vdi;
-    uint64_t          id;
-    int          fd;
-    struct stat  st;
-    uint64_t          tot_size;
-    char         spage[BLOCK_SIZE], *dpage;
-    char        *vpage;
-    uint64_t          vblock = 0, count=0;
-    
-    __init_blockstore();
-    init_block_async();
-    __init_vdi();
-    
-    if ( argc < 3 ) {
-        printf("usage: %s <VDI id> <filename>\n", argv[0]);
-        exit(-1);
-    }
-        
-    id = (uint64_t) atoll(argv[1]);
-    
-    vdi = vdi_get( id );
-    
-    if ( vdi == NULL ) {
-        printf("Failed to retreive VDI %Ld!\n", id);
-        exit(-1);
-    }
-    
-    fd = open(argv[2], O_RDONLY | O_LARGEFILE);
-    
-    if (fd < 0) {
-        printf("Couldn't open %s!\n", argv[2]);
-        exit(-1);
-    }
-    
-    if ( fstat(fd, &st) != 0 ) {
-        printf("Couldn't stat %s!\n", argv[2]);
-        exit(-1);
-    }
-    
-    tot_size = (uint64_t) st.st_size;
-    printf("Testing VDI %Ld (%Ld bytes).\n", id, tot_size);
-    
-    printf("           ");
-    while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
-
-        dpage = vdi_read_s(vdi, vblock);
-
-        if (dpage == NULL) {
-            printf("\n\nfound an unmapped VDI block (%Ld)\n", vblock);
-            exit(0);
-        }
-
-        if (memcmp(spage, dpage, BLOCK_SIZE) != 0) {
-            printf("\n\nblocks don't match! (%Ld)\n", vblock);
-            exit(0);
-        }
-        
-        freeblock(dpage);
-        
-        vblock++;
-        if ((vblock % 1024) == 0) {
-            printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
-            fflush(stdout);
-        }
-    }
-    printf("\n");
-    
-    printf("VDI %Ld looks good!\n", id);
-    
-    freeblock(vdi);
-    
-    return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/Makefile
--- a/tools/blktap/ublkback/Makefile    Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-
-XEN_ROOT = ../../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-INCLUDES += -I..
-
-INSTALL            = install
-INSTALL_PROG = $(INSTALL) -m0755
-IBIN         = ublkback
-INSTALL_DIR  = /usr/sbin
-
-CFLAGS   += -Werror
-CFLAGS   += -Wno-unused
-CFLAGS   += -fno-strict-aliasing
-CFLAGS   += -I $(XEN_LIBXC)
-CFLAGS   += $(INCLUDES) -I.
-CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS     = .*.d
-
-OBJS     = $(patsubst %.c,%.o,$(SRCS))
-
-.PHONY: all
-all: $(IBIN)
-
-LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
-
-.PHONY: install
-install:
-       $(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INSTALL_DIR)
-
-.PHONY: clean
-clean:
-       rm -rf *.o*~ $(DEPS) xen TAGS $(IBIN)
-
-ublkback: 
-       $(CC) $(CFLAGS) -o ublkback -L$(XEN_LIBXC) -L. -L..  \
-             -lblktap -laio ublkback.c ublkbacklib.c -pg
-
--include $(DEPS)
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/ublkback.c
--- a/tools/blktap/ublkback/ublkback.c  Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-/* ublkback.c
- *
- * libaio-based userlevel backend.
- */
-
-#include "blktaplib.h"
-#include "ublkbacklib.h"
-
-
-int main(int argc, char *argv[])
-{
-    ublkback_init();
-    
-    register_new_blkif_hook(ublkback_new_blkif);
-    blktap_listen();
-    
-    return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/ublkbacklib.c
--- a/tools/blktap/ublkback/ublkbacklib.c       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,473 +0,0 @@
-/* ublkbacklib.c
- *
- * file/device image-backed block device -- using linux libaio.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- *
- * NOTE: This doesn't work.  Grrr.
- */
-
-#define _GNU_SOURCE
-#define __USE_LARGEFILE64
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <string.h>
-#include <db.h>       
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/poll.h>
-#include <unistd.h>
-#include <errno.h>
-#include <libaio.h>
-#include <pthread.h>
-#include <time.h>
-#include <err.h>
-#include "blktaplib.h"
-
-/* XXXX:  */
-/* Current code just mounts this file/device to any requests that come in. */
-//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
-#define TMP_IMAGE_FILE_NAME "fc3.image"
-
-#define MAX_REQUESTS            64 /* must be synced with the blkif drivers. */
-#define MAX_SEGMENTS_PER_REQ    11
-#define SECTOR_SHIFT             9
-#define MAX_AIO_REQS   (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-           
-#if 1                                                                        
-#define ASSERT(_p) \
-    if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif                                                                     
-
-/* Note on pending_reqs: I assume all reqs are queued before they start to 
- * get filled.  so count of 0 is an unused record.
- */
-typedef struct {
-    blkif_request_t  req;
-    blkif_t         *blkif;
-    int              count;
-} pending_req_t;
-
-static pending_req_t    pending_list[MAX_REQUESTS];
-static io_context_t  ctx;
-static struct iocb  *iocb_free[MAX_AIO_REQS];
-static int           iocb_free_count;
-
-/* ---[ Notification mecahnism ]--------------------------------------- */
-
-enum { 
-    READ   = 0,
-    WRITE  = 1
-};
-
-static int aio_notify[2];
-static volatile int aio_listening = 0;
-static pthread_mutex_t notifier_sem = PTHREAD_MUTEX_INITIALIZER;
-
-static struct io_event aio_events[MAX_AIO_REQS];
-static int             aio_event_count = 0;
-
-/* this is commented out in libaio.h for some reason. */
-extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);
-
-static void *notifier_thread(void *arg)
-{
-    int ret; 
-    int msg = 0x00feeb00;
-    
-    DPRINTF("Notifier thread started.\n");
-    for (;;) {
-        pthread_mutex_lock(&notifier_sem);
-        if ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0) {
-            aio_event_count = ret;
-            write(aio_notify[WRITE], &msg, sizeof(msg));
-        } else {
-                printf("[io_queue_wait error! %d]\n", errno);
-                pthread_mutex_unlock(&notifier_sem);
-        }
-    }
-}
-
-/* --- Talking to xenstore: ------------------------------------------- */
-
-int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done);
-int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done);
-
-typedef struct image {
-    /* These need to turn into an array/rbtree for multi-disk support. */
-    int  fd;
-    uint64_t  fsid;
-    blkif_vdev_t   vdevice;
-    long int size;
-    long int secsize;
-    long int info;
-} image_t;
-
-long int ublkback_get_size(blkif_t *blkif)
-{
-    image_t *img = (image_t *)blkif->prv;
-    return img->size;
-}
-
-long int ublkback_get_secsize(blkif_t *blkif)
-{
-    image_t *img = (image_t *)blkif->prv;
-    return img->secsize;
-}
-
-unsigned ublkback_get_info(blkif_t *blkif)
-{
-    image_t *img = (image_t *)blkif->prv;
-    return img->info;
-}
-
-static struct blkif_ops ublkback_ops = {
-    get_size:    ublkback_get_size,
-    get_secsize: ublkback_get_secsize,
-    get_info:    ublkback_get_info,
-};
-
-int ublkback_new_blkif(blkif_t *blkif)
-{
-    image_t *image;
-    struct stat stat;
-    int ret;
-
-    image = (image_t *)malloc(sizeof(image_t));
-    if (image == NULL) {
-        printf("error allocating image record.\n");
-        return -ENOMEM;
-    }
-
-    /* Open it. */
-    image->fd = open(TMP_IMAGE_FILE_NAME, 
-                     O_RDWR | O_DIRECT | O_LARGEFILE);
-
-    if ((image->fd < 0) && (errno == EINVAL)) {
-        /* Maybe O_DIRECT isn't supported. */
-        warn("open() failed on '%s', trying again without O_DIRECT",
-               TMP_IMAGE_FILE_NAME);
-        image->fd = open(TMP_IMAGE_FILE_NAME, O_RDWR | O_LARGEFILE);
-    }
-
-    if (image->fd < 0) {
-        warn("Couldn't open image file!");
-        free(image);
-        return -EINVAL;
-    }
-
-    /* Size it. */
-    ret = fstat(image->fd, &stat);
-    if (ret != 0) {
-        printf("Couldn't stat image in PROBE!");
-        return -EINVAL;
-    }
-    
-    image->size = (stat.st_size >> SECTOR_SHIFT);
-
-    /* TODO: IOCTL to get size of raw device. */
-/*
-  ret = ioctl(img->fd, BLKGETSIZE, &blksize);
-  if (ret != 0) {
-  printf("Couldn't ioctl image in PROBE!\n");
-  goto err;
-  }
-*/
-    if (image->size == 0)
-        image->size =((uint64_t) 16836057);
-    image->secsize = 512;
-    image->info = 0;
-
-    /* Register the hooks */
-    blkif_register_request_hook(blkif, "Ublkback req.", ublkback_request);
-    blkif_register_response_hook(blkif, "Ublkback resp.", ublkback_response);
-
-
-    printf(">X<Created a new blkif! pdev was %ld, but you got %s\n", 
-           blkif->pdev, TMP_IMAGE_FILE_NAME);
-
-    blkif->ops = &ublkback_ops;
-    blkif->prv = (void *)image;
-
-    return 0;
-}
-
-
-/* --- Moving the bits: ----------------------------------------------- */
-
-static int batch_count = 0;
-int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done)
-{
-    int fd;
-    uint64_t sector;
-    char *spage, *dpage;
-    int ret, i, idx;
-    blkif_response_t *rsp;
-    domid_t dom = ID_TO_DOM(req->id);
-    static struct iocb *ioq[MAX_SEGMENTS_PER_REQ*MAX_REQUESTS]; 
-    static int io_idx = 0;
-    struct iocb *io;
-    image_t *img;
-
-    img = (image_t *)blkif->prv;
-    fd = img->fd;
-
-    switch (req->operation) 
-    {
-    case BLKIF_OP_WRITE:
-    {
-        unsigned long size;
-
-        batch_count++;
-
-        idx = ID_TO_IDX(req->id);
-        ASSERT(pending_list[idx].count == 0);
-        memcpy(&pending_list[idx].req, req, sizeof(*req));
-        pending_list[idx].count = req->nr_segments;
-        pending_list[idx].blkif = blkif;
-        
-        for (i = 0; i < req->nr_segments; i++) {
-            
-            sector = req->sector_number + (8*i);
-            
-            size = req->seg[i].last_sect - req->seg[i].first_sect + 1;
-            
-            if (req->seg[i].first_sect != 0)
-                DPRINTF("iWR: sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
-                        "pos: %15lu\n",
-                        req->sector_number, sector, 
-                        req->seg[i].first_sect, req->seg[i].last_sect,
-                        (long)(sector << SECTOR_SHIFT));
-                        
-            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-            spage += req->seg[i].first_sect << SECTOR_SHIFT;
-            
-            /*convert size and sector to byte offsets */
-            size   <<= SECTOR_SHIFT;
-            sector <<= SECTOR_SHIFT;
-            
-            io = iocb_free[--iocb_free_count];
-            io_prep_pwrite(io, fd, spage, size, sector);
-            io->data = (void *)idx;
-            //ioq[i] = io;
-            ioq[io_idx++] = io;
-        }
-
-        if (batch_done) {
-            ret = io_submit(ctx, io_idx, ioq);
-            batch_count = 0;
-            if (ret < 0)
-                printf("BADNESS: io_submit error! (%d)\n", errno);
-            io_idx = 0;
-        }
-        
-        return BLKTAP_STOLEN;
-        
-    }
-    case BLKIF_OP_READ:
-    {
-        unsigned long size;
-        
-        batch_count++;
-        idx = ID_TO_IDX(req->id);
-        ASSERT(pending_list[idx].count == 0);
-        memcpy(&pending_list[idx].req, req, sizeof(*req));
-        pending_list[idx].count = req->nr_segments;
-        pending_list[idx].blkif = blkif;
-        
-        for (i = 0; i < req->nr_segments; i++) {
-            
-            sector  = req->sector_number + (8*i);
-            
-            size = req->seg[i].last_sect - req->seg[i].first_sect + 1;
-
-            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-            dpage += req->seg[i].first_sect << SECTOR_SHIFT;
-            
-            if (req->seg[i].first_sect != 0)
-                DPRINTF("iRD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
-                        "pos: %15lu dpage: %p\n", 
-                        req->sector_number, sector, 
-                        req->seg[i].first_sect, req->seg[i].last_sect,
-                        (long)(sector << SECTOR_SHIFT), dpage);
-            
-            /*convert size and sector to byte offsets */
-            size   <<= SECTOR_SHIFT;
-            sector <<= SECTOR_SHIFT;
-            
-
-            /*
-             * NB: Looks like AIO now has non-page aligned support, this path 
-             * can probably be removed...  Only really used for hunting
-             * superblocks anyway... ;)
-             */
-            if ( ((unsigned long)dpage % PAGE_SIZE) != 0 ) {
-                /* AIO to raw devices must be page aligned, so do this read
-                 * synchronously.  The OS is probably just looking for 
-                 * a superblock or something, so this won't hurt performance. 
-                 */
-                int ret;
-
-                printf("Slow path block read.\n");
-                /* Question: do in-progress aio ops modify the file cursor? */
-                ret = lseek(fd, sector, SEEK_SET);
-                if (ret == (off_t)-1)
-                    printf("lseek failed!\n");
-                ret = read(fd, dpage, size);
-                if (ret < 0)
-                    printf("read problem (%d)\n", ret);
-                printf("|\n|\n| read: %lld, %lu, %d\n|\n|\n", sector, size, 
ret);
-
-                /* not an async request any more... */
-                pending_list[idx].count--;
-
-                rsp = (blkif_response_t *)req;
-                rsp->id = req->id;
-                rsp->operation = BLKIF_OP_READ;
-                rsp->status = BLKIF_RSP_OKAY;
-                return BLKTAP_RESPOND;  
-                /* Doh -- need to flush aio if this is end-of-batch */
-            }
-
-            io = iocb_free[--iocb_free_count];
-            
-            io_prep_pread(io, fd, dpage, size, sector);
-            io->data = (void *)idx;
-            
-            ioq[io_idx++] = io;
-            //ioq[i] = io;
-        }
-        
-        if (batch_done) {
-            ret = io_submit(ctx, io_idx, ioq);
-            batch_count = 0;
-            if (ret < 0)
-                printf("BADNESS: io_submit error! (%d)\n", errno);
-            io_idx = 0;
-        }
-        
-        return BLKTAP_STOLEN;
-        
-    }
-    }
-    
-    printf("Unknown block operation!\n");
-err:
-    rsp = (blkif_response_t *)req;
-    rsp->id = req->id;
-    rsp->operation = req->operation;
-    rsp->status = BLKIF_RSP_ERROR;
-    return BLKTAP_RESPOND;  
-}
-
-
-int ublkback_pollhook(int fd)
-{
-    struct io_event *ep;
-    int n, ret, idx;
-    blkif_request_t *req;
-    blkif_response_t *rsp;
-    int responses_queued = 0;
-    int pages=0;
-    
-    for (ep = aio_events; aio_event_count-- > 0; ep++) {
-        struct iocb *io = ep->obj;
-        idx = (int) ep->data;
-        
-        if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
-            printf("invalid index returned(%u)!\n", idx);
-            break;
-        }
-        
-        if ((int)ep->res < 0) 
-            printf("***\n***aio request error! (%d,%d)\n***\n", 
-                   (int)ep->res, (int)ep->res2);
-        
-        pending_list[idx].count--;
-        iocb_free[iocb_free_count++] = io;
-        pages++;
-
-        if (pending_list[idx].count == 0) {
-            blkif_request_t tmp = pending_list[idx].req;
-            rsp = (blkif_response_t *)&pending_list[idx].req;
-            rsp->id = tmp.id;
-            rsp->operation = tmp.operation;
-            rsp->status = BLKIF_RSP_OKAY;
-            blkif_inject_response(pending_list[idx].blkif, rsp);
-            responses_queued++;
-        }
-    }
-
-    if (responses_queued) {
-        blktap_kick_responses();
-    }
-    
-    read(aio_notify[READ], &idx, sizeof(idx));
-    aio_listening = 1;
-    pthread_mutex_unlock(&notifier_sem);
-    
-    return 0;
-}
-
-/* the image library terminates the request stream. _resp is a noop. */
-int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done)
-{   
-    return BLKTAP_PASS;
-}
-
-void ublkback_init(void)
-{
-    int i, rc;
-    pthread_t p;
-    
-    for (i = 0; i < MAX_REQUESTS; i++)
-        pending_list[i].count = 0; 
-    
-    memset(&ctx, 0, sizeof(ctx));
-    rc = io_queue_init(MAX_AIO_REQS, &ctx);
-    if (rc != 0) {
-        printf("queue_init failed! (%d)\n", rc);
-        exit(0);
-    }
-    
-    for (i=0; i<MAX_AIO_REQS; i++) {
-        if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) {
-            printf("error allocating iocb array\n");
-            exit(0);
-        }
-        iocb_free_count = i;
-    }
-    
-    rc = pipe(aio_notify);
-    if (rc != 0) {
-        printf("pipe failed! (%d)\n", errno);
-        exit(0);
-    }
-    
-    rc = pthread_create(&p, NULL, notifier_thread, NULL);
-    if (rc != 0) {
-        printf("pthread_create failed! (%d)\n", errno);
-        exit(0);
-    }
-    
-    aio_listening = 1;
-    
-    blktap_attach_poll(aio_notify[READ], POLLIN, ublkback_pollhook);
-}
-
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/ublkbacklib.h
--- a/tools/blktap/ublkback/ublkbacklib.h       Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-/* blkaiolib.h
- *
- * aio image-backed block device.
- * 
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-int  ublkback_request(blkif_request_t *req, int batch_done);
-int  ublkback_response(blkif_response_t *rsp); /* noop */
-int  ublkback_new_blkif(blkif_t *blkif);
-void ublkback_init(void);
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/xenbus.c
--- a/tools/blktap/xenbus.c     Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,568 +0,0 @@
-/*
- * xenbus.c
- * 
- * xenbus interface to the blocktap.
- * 
- * this handles the top-half of integration with block devices through the
- * store -- the tap driver negotiates the device channel etc, while the
- * userland tap clinet needs to sort out the disk parameters etc.
- * 
- * A. Warfield 2005 Based primarily on the blkback and xenbus driver code.  
- * Comments there apply here...
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <err.h>
-#include <stdarg.h>
-#include <errno.h>
-#include <xs.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <poll.h>
-#include "blktaplib.h"
-#include "list.h"
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* --- Xenstore / Xenbus helpers ---------------------------------------- */
-/*
- * These should all be pulled out into the xenstore API.  I'm faulting commands
- * in from the xenbus interface as i need them.
- */
-
-
-/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
-int xs_gather(struct xs_handle *xs, const char *dir, ...)
-{
-    va_list ap;
-    const char *name;
-    char *path;
-    int ret = 0;
-    
-    va_start(ap, dir);
-    while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
-        const char *fmt = va_arg(ap, char *);
-        void *result = va_arg(ap, void *);
-        char *p;
-        
-        if (asprintf(&path, "%s/%s", dir, name) == -1)
-        {
-            warn("allocation error in xs_gather!\n");
-            ret = ENOMEM;
-            break;
-        }
-        p = xs_read(xs, path, NULL);
-        free(path);
-        if (p == NULL) {
-            ret = ENOENT;
-            break;
-        }
-        if (fmt) {
-            if (sscanf(p, fmt, result) == 0)
-                ret = EINVAL;
-            free(p);
-        } else
-            *(char **)result = p;
-    }
-    va_end(ap);
-    return ret;
-}
-
-/* Single printf and write: returns -errno or 0. */
-int xs_printf(struct xs_handle *h, const char *dir, const char *node, 
-                  const char *fmt, ...)
-{
-        char *buf, *path;
-        va_list ap;
-        int ret;
- 
-        va_start(ap, fmt);
-        ret = vasprintf(&buf, fmt, ap);
-        va_end(ap);
- 
-        asprintf(&path, "%s/%s", dir, node);
-
-        if ((path == NULL) || (buf == NULL))
-            return 0;
-
-        ret = xs_write(h, path, buf, strlen(buf)+1);
-
-        free(buf);
-        free(path);
-
-        return ret;
-}
-
-
-int xs_exists(struct xs_handle *h, const char *path)
-{
-    char **d;
-    int num;
-
-    d = xs_directory(h, path, &num);
-    if (d == NULL)
-        return 0;
-    free(d);
-    return 1;
-}
-
-
-
-/* This assumes that the domain name we are looking for is unique! */
-char *get_dom_domid(struct xs_handle *h, const char *name)
-{
-    char **e, *val, *domid = NULL;
-    int num, i, len;
-    char *path;
-
-    e = xs_directory(h, "/local/domain", &num);
-
-    i=0;
-    while (i < num) {
-        asprintf(&path, "/local/domain/%s/name", e[i]);
-        val = xs_read(h, path, &len);
-        free(path);
-        if (val == NULL)
-            continue;
-        if (strcmp(val, name) == 0) {
-            /* match! */
-            asprintf(&path, "/local/domain/%s/domid", e[i]);
-            domid = xs_read(h, path, &len);
-            free(val);
-            free(path);
-            break;
-        }
-        free(val);
-        i++;
-    }
-
-    free(e);
-    return domid;
-}
-
-static int strsep_len(const char *str, char c, unsigned int len)
-{
-    unsigned int i;
-    
-    for (i = 0; str[i]; i++)
-        if (str[i] == c) {
-            if (len == 0)
-                return i;
-            len--;
-        }
-    return (len == 0) ? i : -ERANGE;
-}
-
-
-/* xenbus watches: */     
-/* Register callback to watch this node. */
-struct xenbus_watch
-{
-        struct list_head list;
-        char *node;
-        void (*callback)(struct xs_handle *h, 
-                         struct xenbus_watch *, 
-                         const  char *node);
-};
-
-static LIST_HEAD(watches);
-
-/* A little paranoia: we don't just trust token. */
-static struct xenbus_watch *find_watch(const char *token)
-{
-    struct xenbus_watch *i, *cmp;
-    
-    cmp = (void *)strtoul(token, NULL, 16);
-    
-    list_for_each_entry(i, &watches, list)
-        if (i == cmp)
-            return i;
-    return NULL;
-}
-
-/* Register callback to watch this node. like xs_watch, return 0 on failure */
-int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
-{
-    /* Pointer in ascii is the token. */
-    char token[sizeof(watch) * 2 + 1];
-    int er;
-    
-    sprintf(token, "%lX", (long)watch);
-    if (find_watch(token)) 
-    {
-        warn("watch collision!");
-        return -EINVAL;
-    }
-    
-    er = xs_watch(h, watch->node, token);
-    if (er != 0) {
-        list_add(&watch->list, &watches);
-    } 
-        
-    return er;
-}
-
-int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
-{
-    char token[sizeof(watch) * 2 + 1];
-    int er;
-    
-    sprintf(token, "%lX", (long)watch);
-    if (!find_watch(token))
-    {
-        warn("no such watch!");
-        return -EINVAL;
-    }
-    
-    
-    er = xs_unwatch(h, watch->node, token);
-    list_del(&watch->list);
-    
-    if (er == 0)
-        warn("XENBUS Failed to release watch %s: %i",
-             watch->node, er);
-    return 0;
-}
-
-/* Re-register callbacks to all watches. */
-void reregister_xenbus_watches(struct xs_handle *h)
-{
-    struct xenbus_watch *watch;
-    char token[sizeof(watch) * 2 + 1];
-    
-    list_for_each_entry(watch, &watches, list) {
-        sprintf(token, "%lX", (long)watch);
-        xs_watch(h, watch->node, token);
-    }
-}
-
-/* based on watch_thread() */
-int xs_fire_next_watch(struct xs_handle *h)
-{
-    char **res;
-    char *token;
-    char *node = NULL;
-    struct xenbus_watch *w;
-    int er;
-    unsigned int num;
-
-    res = xs_read_watch(h, &num);
-    if (res == NULL) 
-        return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */
-
-    node  = res[XS_WATCH_PATH];
-    token = res[XS_WATCH_TOKEN];
-
-    w = find_watch(token);
-    if (!w)
-    {
-        warn("unregistered watch fired");
-        goto done;
-    }
-    w->callback(h, w, node);
-
- done:
-    free(res);
-    return 1;
-}
-
-
-
-
-/* ---------------------------------------------------------------------- */
-
-struct backend_info
-{
-    /* our communications channel */
-    blkif_t *blkif;
-    
-    long int frontend_id;
-    long int pdev;
-    long int readonly;
-    
-    /* watch back end for changes */
-    struct xenbus_watch backend_watch;
-    char *backpath;
-
-    /* watch front end for changes */
-    struct xenbus_watch watch;
-    char *frontpath;
-
-    struct list_head list;
-};
-
-static LIST_HEAD(belist);
-
-static struct backend_info *be_lookup_be(const char *bepath)
-{
-    struct backend_info *be;
-
-    list_for_each_entry(be, &belist, list)
-        if (strcmp(bepath, be->backpath) == 0)
-            return be;
-    return (struct backend_info *)NULL;
-}
-
-static int be_exists_be(const char *bepath)
-{
-    return ( be_lookup_be(bepath) != NULL );
-}
-
-static struct backend_info *be_lookup_fe(const char *fepath)
-{
-    struct backend_info *be;
-
-    list_for_each_entry(be, &belist, list)
-        if (strcmp(fepath, be->frontpath) == 0)
-            return be;
-    return (struct backend_info *)NULL;
-}
-
-static int backend_remove(struct xs_handle *h, struct backend_info *be)
-{
-    /* Turn off watches. */
-    if (be->watch.node)
-        unregister_xenbus_watch(h, &be->watch);
-    if (be->backend_watch.node)
-        unregister_xenbus_watch(h, &be->backend_watch);
-
-    /* Unhook from be list. */
-    list_del(&be->list);
-
-    /* Free everything else. */
-    if (be->blkif)
-        free_blkif(be->blkif);
-    free(be->frontpath);
-    free(be->backpath);
-    free(be);
-    return 0;
-}
-
-static void frontend_changed(struct xs_handle *h, struct xenbus_watch *w, 
-                     const char *fepath_im)
-{
-    struct backend_info *be;
-    char *fepath = NULL;
-    int er;
-
-    be = be_lookup_fe(w->node);
-    if (be == NULL)
-    {
-        warn("frontend changed called for nonexistent backend! (%s)", fepath);
-        goto fail;
-    }
-    
-    /* If other end is gone, delete ourself. */
-    if (w->node && !xs_exists(h, be->frontpath)) {
-        DPRINTF("DELETING BE: %s\n", be->backpath);
-        backend_remove(h, be);
-        return;
-    }
-
-    if (be->blkif == NULL || (be->blkif->state == CONNECTED))
-        return;
-
-    /* Supply the information about the device the frontend needs */
-    er = xs_transaction_start(h, be->backpath);
-    if (er == 0) {
-        warn("starting transaction");
-        goto fail;
-    }
-    
-    er = xs_printf(h, be->backpath, "sectors", "%lu",
-                           be->blkif->ops->get_size(be->blkif));
-    if (er == 0) {
-        warn("writing sectors");
-        goto fail;
-    }
-    
-    er = xs_printf(h, be->backpath, "info", "%u",
-                           be->blkif->ops->get_info(be->blkif));
-    if (er == 0) {
-        warn("writing info");
-        goto fail;
-    }
-    
-    er = xs_printf(h, be->backpath, "sector-size", "%lu",
-                           be->blkif->ops->get_secsize(be->blkif));
-    if (er == 0) {
-        warn("writing sector-size");
-        goto fail;
-    }
-
-    be->blkif->state = CONNECTED;
-
-    xs_transaction_end(h, 0);
-
-    return;
-
- fail:
-    free(fepath);
-}
-
-
-static void backend_changed(struct xs_handle *h, struct xenbus_watch *w, 
-                     const char *bepath_im)
-{
-    struct backend_info *be;
-    char *path = NULL, *p;
-    int len, er;
-    long int pdev = 0, handle;
-
-    be = be_lookup_be(w->node);
-    if (be == NULL)
-    {
-        warn("backend changed called for nonexistent backend! (%s)", w->node);
-        goto fail;
-    }
-    
-    er = xs_gather(h, be->backpath, "physical-device", "%li", &pdev, NULL);
-    if (er != 0) 
-        goto fail;
-
-    if (be->pdev && be->pdev != pdev) {
-        warn("changing physical-device not supported");
-        goto fail;
-    }
-    be->pdev = pdev;
-
-    asprintf(&path, "%s/%s", w->node, "read-only");
-    if (xs_exists(h, path))
-        be->readonly = 1;
-
-    if (be->blkif == NULL) {
-        /* Front end dir is a number, which is used as the handle. */
-        p = strrchr(be->frontpath, '/') + 1;
-        handle = strtoul(p, NULL, 0);
-
-        be->blkif = alloc_blkif(be->frontend_id);
-        if (be->blkif == NULL) 
-            goto fail;
-
-        er = blkif_init(be->blkif, handle, be->pdev, be->readonly);
-        if (er) 
-            goto fail;
-
-        DPRINTF("[BECHG]: ADDED A NEW BLKIF (%s)\n", w->node);
-
-        /* Pass in NULL node to skip exist test. */
-        frontend_changed(h, &be->watch, NULL);
-    }
-
- fail:
-    free(path);
-}
-
-static void blkback_probe(struct xs_handle *h, struct xenbus_watch *w, 
-                         const char *bepath_im)
-{
-       struct backend_info *be = NULL;
-       char *frontend = NULL, *bepath = NULL;
-       int er, len;
-
-        bepath = strdup(bepath_im);
-        if (!bepath)
-            return;
-        len = strsep_len(bepath, '/', 6);
-        if (len < 0) 
-            goto free_be;
-        
-        bepath[len] = '\0'; /*truncate the passed-in string with predjudice. */
-
-       be = malloc(sizeof(*be));
-       if (!be) {
-               warn("allocating backend structure");
-               goto free_be;
-       }
-       memset(be, 0, sizeof(*be));
-
-       frontend = NULL;
-       er = xs_gather(h, bepath,
-                        "frontend-id", "%li", &be->frontend_id,
-                        "frontend", NULL, &frontend,
-                        NULL);
-       if (er)
-               goto free_be;
-
-       if (strlen(frontend) == 0 || !xs_exists(h, frontend)) {
-            /* If we can't get a frontend path and a frontend-id,
-             * then our bus-id is no longer valid and we need to
-             * destroy the backend device.
-             */
-            DPRINTF("No frontend (%s)\n", frontend);
-            goto free_be;
-       }
-
-        /* Are we already tracking this device? */
-        if (be_exists_be(bepath))
-            goto free_be;
-
-        be->backpath = bepath;
-       be->backend_watch.node = be->backpath;
-       be->backend_watch.callback = backend_changed;
-       er = register_xenbus_watch(h, &be->backend_watch);
-       if (er == 0) {
-               be->backend_watch.node = NULL;
-               warn("error adding backend watch on %s", bepath);
-               goto free_be;
-       }
-
-       be->frontpath = frontend;
-       be->watch.node = be->frontpath;
-       be->watch.callback = frontend_changed;
-       er = register_xenbus_watch(h, &be->watch);
-       if (er == 0) {
-               be->watch.node = NULL;
-               warn("adding frontend watch on %s", be->frontpath);
-               goto free_be;
-       }
-
-        list_add(&be->list, &belist);
-
-        DPRINTF("[PROBE]: ADDED NEW DEVICE (%s)\n", bepath_im);
-
-       backend_changed(h, &be->backend_watch, bepath);
-       return;
-
- free_be:
-       if (be && (be->backend_watch.node))
-            unregister_xenbus_watch(h, &be->backend_watch);
-        free(frontend);
-        free(bepath);
-       free(be);
-       return;
-}
-
-
-int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname)
-{
-    char *domid, *path;
-    struct xenbus_watch *vbd_watch;
-    int er;
-
-    domid = get_dom_domid(h, domname);
-
-    DPRINTF("%s: %s\n", domname, (domid != NULL) ? domid : "[ not found! ]");
-
-    asprintf(&path, "/local/domain/%s/backend/vbd", domid);
-    if (path == NULL) 
-        return -ENOMEM;
-
-    vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch));
-    vbd_watch->node     = path;
-    vbd_watch->callback = blkback_probe;
-    er = register_xenbus_watch(h, vbd_watch);
-    if (er == 0) {
-        warn("Error adding vbd probe watch %s", path);
-        return -EINVAL;
-    }
-
-    return 0;
-}

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Remove old blktap tools., Xen patchbot-unstable <=