# HG changeset patch
# User akw@xxxxxxxxxxxxxxxxxxxxx
# Node ID 840f33e54054270e3f4b9704111ed52bd381653b
# Parent 533bad7c0883189e26c2a7f43011801c417b01fe
Remove old blktap tools.
Signed-off-by: Andrew Warfield <andrew.warfield@xxxxxxxxxxxx>
---
tools/blktap/Makefile | 93 --
tools/blktap/README | 137 ---
tools/blktap/README.sept05 | 33
tools/blktap/blkdump.c | 62 -
tools/blktap/blkif.c | 212 -----
tools/blktap/blktaplib.c | 453 ----------
tools/blktap/blktaplib.h | 171 ----
tools/blktap/list.h | 55 -
tools/blktap/parallax/Makefile | 62 -
tools/blktap/parallax/README | 171 ----
tools/blktap/parallax/block-async.c | 393 ---------
tools/blktap/parallax/block-async.h | 69 -
tools/blktap/parallax/blockstore.c | 1348 --------------------------------
tools/blktap/parallax/blockstore.h | 134 ---
tools/blktap/parallax/blockstored.c | 275 ------
tools/blktap/parallax/bstest.c | 191 ----
tools/blktap/parallax/parallax.c | 608 --------------
tools/blktap/parallax/radix.c | 631 --------------
tools/blktap/parallax/radix.h | 45 -
tools/blktap/parallax/requests-async.c | 762 ------------------
tools/blktap/parallax/requests-async.h | 29
tools/blktap/parallax/snaplog.c | 238 -----
tools/blktap/parallax/snaplog.h | 61 -
tools/blktap/parallax/vdi.c | 367 --------
tools/blktap/parallax/vdi.h | 55 -
tools/blktap/parallax/vdi_create.c | 52 -
tools/blktap/parallax/vdi_fill.c | 81 -
tools/blktap/parallax/vdi_list.c | 47 -
tools/blktap/parallax/vdi_snap.c | 43 -
tools/blktap/parallax/vdi_snap_delete.c | 48 -
tools/blktap/parallax/vdi_snap_list.c | 82 -
tools/blktap/parallax/vdi_tree.c | 132 ---
tools/blktap/parallax/vdi_unittest.c | 184 ----
tools/blktap/parallax/vdi_validate.c | 97 --
tools/blktap/ublkback/Makefile | 40
tools/blktap/ublkback/ublkback.c | 18
tools/blktap/ublkback/ublkbacklib.c | 473 -----------
tools/blktap/ublkback/ublkbacklib.h | 16
tools/blktap/xenbus.c | 568 -------------
39 files changed, 8536 deletions(-)
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/Makefile
--- a/tools/blktap/Makefile Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-MAJOR = 3.0
-MINOR = 0
-SONAME = libblktap.so.$(MAJOR)
-
-XEN_ROOT = ../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-SUBDIRS :=
-SUBDIRS += ublkback
-#SUBDIRS += parallax
-
-BLKTAP_INSTALL_DIR = /usr/sbin
-
-INSTALL = install
-INSTALL_PROG = $(INSTALL) -m0755
-INSTALL_DIR = $(INSTALL) -d -m0755
-
-INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
-
-LIBS := -lpthread -lz
-
-SRCS :=
-SRCS += blktaplib.c xenbus.c blkif.c
-
-CFLAGS += -Werror
-CFLAGS += -Wno-unused
-CFLAGS += -fno-strict-aliasing
-CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# get asprintf():
-CFLAGS += -D _GNU_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS += -Wp,-MD,.$(@F).d
-CFLAGS += $(INCLUDES)
-DEPS = .*.d
-
-OBJS = $(patsubst %.c,%.o,$(SRCS))
-IBINS :=
-#IBINS += blkdump
-
-LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
-
-.PHONY: all
-all: mk-symlinks libblktap.so #blkdump
- @set -e; for subdir in $(SUBDIRS); do \
- $(MAKE) -C $$subdir $@; \
- done
-
-.PHONY: install
-install: all
- $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
- $(INSTALL_DIR) -p $(DESTDIR)/usr/include
- $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
- $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
- #$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
- @set -e; for subdir in $(SUBDIRS); do \
- $(MAKE) -C $$subdir $@; \
- done
-
-.PHONY: clean
-clean:
- rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump
- @set -e; for subdir in $(SUBDIRS); do \
- $(MAKE) -C $$subdir $@; \
- done
-
-.PHONY: rpm
-rpm: all
- rm -rf staging
- mkdir staging
- mkdir staging/i386
- rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
- --define "_rpmdir$$PWD/staging" -bb rpm.spec
- mv staging/i386/*.rpm .
- rm -rf staging
-
-libblktap.so: $(OBJS)
- $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared \
- -L$(XEN_XENSTORE) -l xenstore \
- -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
- ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
- ln -sf libblktap.so.$(MAJOR) $@
-
-blkdump: libblktap.so
- $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \
- -l blktap blkdump.c
-
-.PHONY: TAGS clean install mk-symlinks rpm
-
-.PHONY: TAGS
-TAGS:
- etags -t $(SRCS) *.h
-
--include $(DEPS)
-
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/README
--- a/tools/blktap/README Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-Block Tap User-level Interfaces
-Andrew Warfield
-andrew.warfield@xxxxxxxxxxxx
-February 8, 2005
-
-NOTE #1: The blktap is _experimental_ code. It works for me. Your
-mileage may vary. Don't use it for anything important. Please. ;)
-
-NOTE #2: All of the interfaces here are likely to change. This is all
-early code, and I am checking it in because others want to play with
-it. If you use it for anything, please let me know!
-
-Overview:
----------
-
-This directory contains a library and set of example applications for
-the block tap device. The block tap hooks into the split block device
-interfaces above Xen allowing them to be extended. This extension can
-be done in userspace with the help of a library.
-
-The tap can be installed either as an interposition domain in between
-a frontend and backend driver pair, or as a terminating backend, in
-which case it is responsible for serving all requests itself.
-
-There are two reasons that you might want to use the tap,
-corresponding to these configurations:
-
- 1. To examine or modify a stream of block requests while they are
- in-flight (e.g. to encrypt data, or add data-driven watchpoints)
-
- 2. To prototype a new backend driver, serving requests from the tap
- rather than passing them along to the XenLinux blkback driver.
- (e.g. to forward block requests to a remote host)
-
-
-Interface:
-----------
-
-At the moment, the tap interface is similar in spirit to that of the
-Linux netfilter. Requests are messages from a client (frontend)
-domain to a disk (backend) domain. Responses are messages travelling
-back, acknowledging the completion of a request. the library allows
-chains of functions to be attached to these events. In addition,
-hooks may be attached to handle control messages, which signify things
-like connections from new domains.
-
-At present the control messages especially expose a lot of the
-underlying driver interfaces. This may change in the future in order
-to simplify writing hooks.
-
-Here are the public interfaces:
-
-These allow hook functions to be chained:
-
- void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
- void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
- void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
-
-This allows a response to be injected, in the case where a request has
-been removed using BLKTAP_STOLEN.
-
- void blktap_inject_response(blkif_response_t *);
-
-These let you add file descriptors and handlers to the main poll loop:
-
- int blktap_attach_poll(int fd, short events, int (*func)(int));
- void blktap_detach_poll(int fd);
-
-This starts the main poll loop:
-
- int blktap_listen(void);
-
-Example:
---------
-
-blkimage.c uses an image on the local file system to serve requests to
-a domain. Here's what it looks like:
-
----[blkimg.c]---
-
-/* blkimg.c
- *
- * file-backed disk.
- */
-
-#include "blktaplib.h"
-#include "blkimglib.h"
-
-
-int main(int argc, char *argv[])
-{
- image_init();
-
- blktap_register_ctrl_hook("image_control", image_control);
- blktap_register_request_hook("image_request", image_request);
- blktap_listen();
-
- return 0;
-}
-
-----------------
-
-All of the real work is in blkimglib.c, but this illustrates the
-actual tap interface well enough. image_control() will be called with
-all control messages. image_request() handles requests. As it reads
-from an on-disk image file, no requests are ever passed on to a
-backend, and so there will be no responses to process -- so there is
-nothing registered as a response hook.
-
-Other examples:
----------------
-
-Here is a list of other examples in the directory:
-
-Things that terminate a block request stream:
-
- blkimg - Use a image file/device to serve requests
- blkgnbd - Use a remote gnbd server to serve requests
- blkaio - Use libaio... (DOES NOT WORK)
-
-Things that don't:
-
- blkdump - Print in-flight requests.
- blkcow - Really inefficient copy-on-write disks using libdb to store
- writes.
-
-There are examples of plugging these things together, for instance
-blkcowgnbd is a read-only gnbd device with copy-on-write to a local
-file.
-
-TODO:
------
-
-- Make session tracking work. At the moment these generally just handle a
- single front-end client at a time.
-
-- Integrate with Xend. Need to cleanly pass a image identifier in the connect
- message.
-
-- Make an asynchronous file-io terminator. The libaio attempt is
- tragically stalled because mapped foreign pages make pfn_valid fail
- (they are VM_IO), and so cannot be passed to aio as targets. A
- better solution may be to tear the disk interfaces out of the real
- backend and expose them somehow.
-
-- Make CoW suck less.
-
-- Do something more along the lines of dynamic linking for the
- plugins, so thatthey don't all need a new main().
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/README.sept05
--- a/tools/blktap/README.sept05 Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-The blktap has been rewritten substantially based on the current
-blkback driver. I've removed passthrough support, as this is broken
-by the move to grant tables and the lack of transitive grants. A
-blktap VM is now only capable of terminating block requests in
-userspace.
-
-ublkback/ contains a _very_ initial cut at a user-level version of the block
-backend driver. It gives a working example of how the current tap
-interfaces are used, in particular w.r.t. the vbd directories in
-xenstore.
-
-parallax/ contains fairly recent parallax code. This does not run on
-the changed blktap interface, but should only be a couple of hours
-work to get going again.
-
-All of the tricky bits are done, but there is plenty of cleaning to
-do, and the top-level functionality is not here yet. At the moment,
-the daemon ignores the pdev requested by the tools and opens the file
-or device specified by TMP_IMAGE_FILE_NAME in ublkback.c.
-
-TODO:
-1. Fix to allow pdev in the store to specify the device to open.
-2. Add support (to tools as well) to mount arbitrary files...
- just write the filename to mount into the store, instead of pdev.
-3. Reeximine blkif refcounting, it is almost certainly broken at the moment.
- - creating a blkif should take a reference.
- - each inflight request should take a reference on dequeue in blktaplib
- - sending responses should drop refs.
- - blkif should be implicitly freed when refcounts fall to 0.
-4. Modify the parallax req/rsp code as per ublkback to use the new tap
- interfaces.
-5. Write a front end that allows parallax and normal mounts to coexist
-6. Allow blkback and blktap to run at the same time.
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blkdump.c
--- a/tools/blktap/blkdump.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-/* blkdump.c
- *
- * show a running trace of block requests as they fly by.
- *
- * (c) 2004 Andrew Warfield.
- */
-
-#include <stdio.h>
-#include "blktaplib.h"
-
-int request_print(blkif_request_t *req)
-{
- int i;
-
- if ( (req->operation == BLKIF_OP_READ) ||
- (req->operation == BLKIF_OP_WRITE) )
- {
- printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n",
- ID_TO_DOM(req->id), ID_TO_IDX(req->id),
- blkif_op_name[req->operation],
- req->nr_segments, req->handle,
- req->sector_number);
-
-
- for (i=0; i < req->nr_segments; i++) {
- printf(" (gref: 0x%8x start: %u stop: %u)\n",
- req->seg[i].gref,
- req->seg[i].first_sect,
- req->seg[i].last_sect);
- }
-
- } else {
- printf("Unknown request message type.\n");
- }
-
- return BLKTAP_PASS;
-}
-
-int response_print(blkif_response_t *rsp)
-{
- if ( (rsp->operation == BLKIF_OP_READ) ||
- (rsp->operation == BLKIF_OP_WRITE) )
- {
- printf("[%2u:%2u>%5s] (status: %d)\n",
- ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id),
- blkif_op_name[rsp->operation],
- rsp->status);
-
- } else {
- printf("Unknown request message type.\n");
- }
- return BLKTAP_PASS;
-}
-
-int main(int argc, char *argv[])
-{
- blktap_register_request_hook("request_print", request_print);
- blktap_register_response_hook("response_print", response_print);
- blktap_listen();
-
- return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blkif.c
--- a/tools/blktap/blkif.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,212 +0,0 @@
-/*
- * blkif.c
- *
- * The blkif interface for blktap. A blkif describes an in-use virtual disk.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <err.h>
-
-#include "blktaplib.h"
-
-#if 1
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-static blkif_t *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
- blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif != NULL) &&
- ((blkif->domid != domid) || (blkif->handle != handle)) )
- blkif = blkif->hash_next;
- return blkif;
-}
-
-blkif_t *alloc_blkif(domid_t domid)
-{
- blkif_t *blkif;
-
- blkif = (blkif_t *)malloc(sizeof(blkif_t));
- if (!blkif)
- return NULL;
-
- memset(blkif, 0, sizeof(*blkif));
- blkif->domid = domid;
-
- return blkif;
-}
-
-static int (*new_blkif_hook)(blkif_t *blkif) = NULL;
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
-{
- new_blkif_hook = fn;
-}
-
-int blkif_init(blkif_t *blkif, long int handle, long int pdev,
- long int readonly)
-{
- domid_t domid;
- blkif_t **pblkif;
-
- if (blkif == NULL)
- return -EINVAL;
-
- domid = blkif->domid;
- blkif->handle = handle;
- blkif->pdev = pdev;
- blkif->readonly = readonly;
-
- /*
- * Call out to the new_blkif_hook. The tap application should define this,
- * and it should return having set blkif->ops
- *
- */
- if (new_blkif_hook == NULL)
- {
- warn("Probe detected a new blkif, but no new_blkif_hook!");
- return -1;
- }
- new_blkif_hook(blkif);
-
- /* Now wire it in. */
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( *pblkif != NULL )
- {
- if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
- {
- DPRINTF("Could not create blkif: already exists\n");
- return -1;
- }
- pblkif = &(*pblkif)->hash_next;
- }
- blkif->hash_next = NULL;
- *pblkif = blkif;
-
- return 0;
-}
-
-void free_blkif(blkif_t *blkif)
-{
- blkif_t **pblkif, *curs;
-
- pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)];
- while ( (curs = *pblkif) != NULL )
- {
- if ( blkif == curs )
- {
- *pblkif = curs->hash_next;
- }
- pblkif = &curs->hash_next;
- }
- free(blkif);
-}
-
-void blkif_register_request_hook(blkif_t *blkif, char *name,
- int (*rh)(blkif_t *, blkif_request_t *, int))
-{
- request_hook_t *rh_ent, **c;
-
- rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
- if (!rh_ent)
- {
- warn("couldn't allocate a new hook");
- return;
- }
-
- rh_ent->func = rh;
- rh_ent->next = NULL;
- if (asprintf(&rh_ent->name, "%s", name) == -1)
- {
- free(rh_ent);
- warn("couldn't allocate a new hook name");
- return;
- }
-
- c = &blkif->request_hook_chain;
- while (*c != NULL) {
- c = &(*c)->next;
- }
- *c = rh_ent;
-}
-
-void blkif_register_response_hook(blkif_t *blkif, char *name,
- int (*rh)(blkif_t *, blkif_response_t *,
int))
-{
- response_hook_t *rh_ent, **c;
-
- rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
- if (!rh_ent)
- {
- warn("couldn't allocate a new hook");
- return;
- }
-
- rh_ent->func = rh;
- rh_ent->next = NULL;
- if (asprintf(&rh_ent->name, "%s", name) == -1)
- {
- free(rh_ent);
- warn("couldn't allocate a new hook name");
- return;
- }
-
- c = &blkif->response_hook_chain;
- while (*c != NULL) {
- c = &(*c)->next;
- }
- *c = rh_ent;
-}
-
-void blkif_print_hooks(blkif_t *blkif)
-{
- request_hook_t *req_hook;
- response_hook_t *rsp_hook;
-
- DPRINTF("Request Hooks:\n");
- req_hook = blkif->request_hook_chain;
- while (req_hook != NULL)
- {
- DPRINTF(" [0x%p] %s\n", req_hook->func, req_hook->name);
- req_hook = req_hook->next;
- }
-
- DPRINTF("Response Hooks:\n");
- rsp_hook = blkif->response_hook_chain;
- while (rsp_hook != NULL)
- {
- DPRINTF(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
- rsp_hook = rsp_hook->next;
- }
-}
-
-
-long int vbd_size(blkif_t *blkif)
-{
- return 1000000000;
-}
-
-long int vbd_secsize(blkif_t *blkif)
-{
- return 512;
-}
-
-unsigned vbd_info(blkif_t *blkif)
-{
- return 0;
-}
-
-
-void __init_blkif(void)
-{
- memset(blkif_hash, 0, sizeof(blkif_hash));
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blktaplib.c
--- a/tools/blktap/blktaplib.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,453 +0,0 @@
-/*
- * blktaplib.c
- *
- * userspace interface routines for the blktap driver.
- *
- * (threadsafe(r) version)
- *
- * (c) 2004 Andrew Warfield.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/user.h>
-#include <err.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <linux/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/ioctl.h>
-#include <string.h>
-#include <unistd.h>
-#include <pthread.h>
-#include <xs.h>
-
-#define __COMPILING_BLKTAP_LIB
-#include "blktaplib.h"
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-#define DEBUG_RING_IDXS 0
-
-#define POLLRDNORM 0x040
-
-#define BLKTAP_IOCTL_KICK 1
-
-
-void got_sig_bus();
-void got_sig_int();
-
-/* in kernel these are opposite, but we are a consumer now. */
-blkif_back_ring_t fe_ring; /* slightly counterintuitive ;) */
-blkif_front_ring_t be_ring;
-
-unsigned long mmap_vstart = 0;
-char *blktap_mem;
-int fd = 0;
-
-#define BLKTAP_RING_PAGES 1 /* Front */
-#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
-
-int bad_count = 0;
-void bad(void)
-{
- bad_count ++;
- if (bad_count > 50) exit(0);
-}
-/*-----[ ID Manipulation from tap driver code ]--------------------------*/
-
-#define ACTIVE_RING_IDX unsigned short
-
-inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
-{
- return ( (fe_dom << 16) | idx );
-}
-
-inline unsigned int ID_TO_IDX(unsigned long id)
-{
- return ( id & 0x0000ffff );
-}
-
-inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
-
-static int (*request_hook)(blkif_request_t *req) = NULL;
-static int (*response_hook)(blkif_response_t *req) = NULL;
-
-/*-----[ Data to/from Backend (server) VM ]------------------------------*/
-
-/*
-
-inline int write_req_to_be_ring(blkif_request_t *req)
-{
- blkif_request_t *req_d;
- static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
- pthread_mutex_lock(&be_prod_mutex);
- req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
- memcpy(req_d, req, sizeof(blkif_request_t));
- wmb();
- be_ring.req_prod_pvt++;
- pthread_mutex_unlock(&be_prod_mutex);
-
- return 0;
-}
-*/
-
-inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
-{
- blkif_response_t *rsp_d;
- static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
-
- pthread_mutex_lock(&fe_prod_mutex);
- rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
- memcpy(rsp_d, rsp, sizeof(blkif_response_t));
- wmb();
- fe_ring.rsp_prod_pvt++;
- pthread_mutex_unlock(&fe_prod_mutex);
-
- return 0;
-}
-
-static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp)
-{
- response_hook_t *rsp_hook;
-
- rsp_hook = blkif->response_hook_chain;
- while (rsp_hook != NULL)
- {
- switch(rsp_hook->func(blkif, rsp, 1))
- {
- case BLKTAP_PASS:
- break;
- default:
- printf("Only PASS is supported for resp hooks!\n");
- }
- rsp_hook = rsp_hook->next;
- }
-}
-
-
-static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp)
-{
-
- apply_rsp_hooks(blkif, rsp);
-
- write_rsp_to_fe_ring(rsp);
-}
-
-void blktap_kick_responses(void)
-{
- pthread_mutex_lock(&push_mutex);
-
- RING_PUSH_RESPONSES(&fe_ring);
- ioctl(fd, BLKTAP_IOCTL_KICK_FE);
-
- pthread_mutex_unlock(&push_mutex);
-}
-
-/*-----[ Polling fd listeners ]------------------------------------------*/
-
-#define MAX_POLLFDS 64
-
-typedef struct {
- int (*func)(int fd);
- struct pollfd *pfd;
- int fd;
- short events;
- int active;
-} pollhook_t;
-
-static struct pollfd pfd[MAX_POLLFDS+2]; /* tap and store are extra */
-static pollhook_t pollhooks[MAX_POLLFDS];
-static unsigned int ph_freelist[MAX_POLLFDS];
-static unsigned int ph_cons, ph_prod;
-#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
-#define PH_IDX(x) (x % MAX_POLLFDS)
-
-int blktap_attach_poll(int fd, short events, int (*func)(int fd))
-{
- pollhook_t *ph;
-
- if (nr_pollhooks() == MAX_POLLFDS) {
- printf("Too many pollhooks!\n");
- return -1;
- }
-
- ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
-
- ph->func = func;
- ph->fd = fd;
- ph->events = events;
- ph->active = 1;
-
- DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1,
- nr_pollhooks());
-
- return 0;
-}
-
-void blktap_detach_poll(int fd)
-{
- int i;
-
- for (i=0; i<MAX_POLLFDS; i++)
- if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
- ph_freelist[PH_IDX(ph_prod++)] = i;
- pollhooks[i].pfd->fd = -1;
- pollhooks[i].active = 0;
- break;
- }
-
- DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i,
- nr_pollhooks());
-}
-
-void pollhook_init(void)
-{
- int i;
-
- for (i=0; i < MAX_POLLFDS; i++) {
- ph_freelist[i] = (i+1) % MAX_POLLFDS;
- pollhooks[i].active = 0;
- }
-
- ph_cons = 0;
- ph_prod = MAX_POLLFDS;
-}
-
-void __attribute__ ((constructor)) blktaplib_init(void)
-{
- pollhook_init();
-}
-
-/*-----[ The main listen loop ]------------------------------------------*/
-
-int blktap_listen(void)
-{
- int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret;
- struct xs_handle *h;
- blkif_t *blkif;
-
- /* comms rings: */
- blkif_request_t *req;
- blkif_response_t *rsp;
- blkif_sring_t *sring;
- RING_IDX rp, i, pfd_count;
-
- /* pending rings */
- blkif_request_t req_pending[BLK_RING_SIZE];
- /* blkif_response_t rsp_pending[BLK_RING_SIZE] */;
-
- /* handler hooks: */
- request_hook_t *req_hook;
- response_hook_t *rsp_hook;
-
- signal (SIGBUS, got_sig_bus);
- signal (SIGINT, got_sig_int);
-
- __init_blkif();
-
- fd = open("/dev/blktap", O_RDWR);
- if (fd == -1)
- err(-1, "open failed!");
-
- blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE,
- PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-
- if ((int)blktap_mem == -1)
- err(-1, "mmap failed!");
-
- /* assign the rings to the mapped memory */
-/*
- sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
- FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
-*/
- sring = (blkif_sring_t *)((unsigned long)blktap_mem);
- BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
-
- mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
-
-
- /* Set up store connection and watch. */
- h = xs_daemon_open();
- if (h == NULL)
- err(-1, "xs_daemon_open");
-
- ret = add_blockdevice_probe_watch(h, "Domain-0");
- if (ret != 0)
- err(0, "adding device probewatch");
-
- ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
-
- while(1) {
- int ret;
-
- /* build the poll list */
- pfd_count = 0;
- for ( i=0; i < MAX_POLLFDS; i++ ) {
- pollhook_t *ph = &pollhooks[i];
-
- if (ph->active) {
- pfd[pfd_count].fd = ph->fd;
- pfd[pfd_count].events = ph->events;
- ph->pfd = &pfd[pfd_count];
- pfd_count++;
- }
- }
-
- tap_pfd = pfd_count++;
- pfd[tap_pfd].fd = fd;
- pfd[tap_pfd].events = POLLIN;
-
- store_pfd = pfd_count++;
- pfd[store_pfd].fd = xs_fileno(h);
- pfd[store_pfd].events = POLLIN;
-
- if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) {
- if (DEBUG_RING_IDXS)
- ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
- continue;
- }
-
- for (i=0; i < MAX_POLLFDS; i++) {
- if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
- pollhooks[i].func(pollhooks[i].pfd->fd);
- }
-
- if (pfd[store_pfd].revents) {
- ret = xs_fire_next_watch(h);
- }
-
- if (pfd[tap_pfd].revents)
- {
- /* empty the fe_ring */
- notify_fe = 0;
- notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
- rp = fe_ring.sring->req_prod;
- rmb();
- for (i = fe_ring.req_cons; i != rp; i++)
- {
- int done = 0;
-
- req = RING_GET_REQUEST(&fe_ring, i);
- memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
- req = &req_pending[ID_TO_IDX(req->id)];
-
- blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle);
-
- if (blkif != NULL)
- {
- req_hook = blkif->request_hook_chain;
- while (req_hook != NULL)
- {
- switch(req_hook->func(blkif, req, ((i+1) == rp)))
- {
- case BLKTAP_RESPOND:
- apply_rsp_hooks(blkif, (blkif_response_t *)req);
- write_rsp_to_fe_ring((blkif_response_t *)req);
- notify_fe = 1;
- done = 1;
- break;
- case BLKTAP_STOLEN:
- done = 1;
- break;
- case BLKTAP_PASS:
- break;
- default:
- printf("Unknown request hook return value!\n");
- }
- if (done) break;
- req_hook = req_hook->next;
- }
- }
-
- if (done == 0)
- {
- /* this was: */
- /* write_req_to_be_ring(req); */
-
- unsigned long id = req->id;
- unsigned short operation = req->operation;
- printf("Unterminated request!\n");
- rsp = (blkif_response_t *)req;
- rsp->id = id;
- rsp->operation = operation;
- rsp->status = BLKIF_RSP_ERROR;
- write_rsp_to_fe_ring(rsp);
- notify_fe = 1;
- done = 1;
- }
-
- }
- fe_ring.req_cons = i;
-
- /* empty the be_ring */
-/*
- notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
- rp = be_ring.sring->rsp_prod;
- rmb();
- for (i = be_ring.rsp_cons; i != rp; i++)
- {
-
- rsp = RING_GET_RESPONSE(&be_ring, i);
- memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
- rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
-
- DPRINTF("copying a be request\n");
-
- apply_rsp_hooks(rsp);
- write_rsp_to_fe_ring(rsp);
- }
- be_ring.rsp_cons = i;
-*/
- /* notify the domains */
-/*
- if (notify_be) {
- DPRINTF("notifying be\n");
-pthread_mutex_lock(&push_mutex);
- RING_PUSH_REQUESTS(&be_ring);
- ioctl(fd, BLKTAP_IOCTL_KICK_BE);
-pthread_mutex_unlock(&push_mutex);
- }
-*/
- if (notify_fe) {
- DPRINTF("notifying fe\n");
- pthread_mutex_lock(&push_mutex);
- RING_PUSH_RESPONSES(&fe_ring);
- ioctl(fd, BLKTAP_IOCTL_KICK_FE);
- pthread_mutex_unlock(&push_mutex);
- }
- }
- }
-
-
- munmap(blktap_mem, PAGE_SIZE);
-
- mmap_failed:
- close(fd);
-
- open_failed:
- return 0;
-}
-
-void got_sig_bus() {
- printf("Attempted to access a page that isn't.\n");
- exit(-1);
-}
-
-void got_sig_int() {
- DPRINTF("quitting -- returning to passthrough mode.\n");
- if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
- close(fd);
- fd = 0;
- exit(0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/blktaplib.h
--- a/tools/blktap/blktaplib.h Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,171 +0,0 @@
-/* blktaplib.h
- *
- * userland accessors to the block tap.
- *
- * Sept 2/05 -- I'm scaling this back to only support block remappings
- * to user in a backend domain. Passthrough and interposition can be readded
- * once transitive grants are available.
- */
-
-#ifndef __BLKTAPLIB_H__
-#define __BLKTAPLIB_H__
-
-#include <xenctrl.h>
-#include <sys/user.h>
-#include <xen/xen.h>
-#include <xen/io/blkif.h>
-#include <xen/io/ring.h>
-#include <xen/io/domain_controller.h>
-#include <xs.h>
-
-#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-
-/* /dev/xen/blktap resides at device number major=10, minor=202 */
-#define BLKTAP_MINOR 202
-
-/* size of the extra VMA area to map in attached pages. */
-#define BLKTAP_VMA_PAGES BLK_RING_SIZE
-
-/* blktap IOCTLs: */
-#define BLKTAP_IOCTL_KICK_FE 1
-#define BLKTAP_IOCTL_KICK_BE 2
-#define BLKTAP_IOCTL_SETMODE 3
-#define BLKTAP_IOCTL_PRINT_IDXS 100
-
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
-#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
-#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE 0x00000002
-#define BLKTAP_MODE_COPY_FE 0x00000004
-#define BLKTAP_MODE_COPY_BE 0x00000008
-#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010
-#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020
-
-#define BLKTAP_MODE_INTERPOSE \
- (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-#define BLKTAP_MODE_COPY_BOTH \
- (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
-
-#define BLKTAP_MODE_COPY_BOTH_PAGES \
- (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
- return (
- ( arg == BLKTAP_MODE_PASSTHROUGH ) ||
- ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
- ( arg == BLKTAP_MODE_INTERPOSE ) );
-/*
- return (
- ( arg == BLKTAP_MODE_PASSTHROUGH ) ||
- ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
- ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
- ( arg == BLKTAP_MODE_INTERPOSE ) ||
- ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
- ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
- ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
- );
-*/
-}
-
-/* Return values for handling messages in hooks. */
-#define BLKTAP_PASS 0 /* Keep passing this request as normal. */
-#define BLKTAP_RESPOND 1 /* Request is now a reply. Return it. */
-#define BLKTAP_STOLEN 2 /* Hook has stolen request. */
-
-//#define domid_t unsigned short
-
-inline unsigned int ID_TO_IDX(unsigned long id);
-inline domid_t ID_TO_DOM(unsigned long id);
-
-int blktap_attach_poll(int fd, short events, int (*func)(int));
-void blktap_detach_poll(int fd);
-int blktap_listen(void);
-
-struct blkif;
-
-typedef struct request_hook_st {
- char *name;
- int (*func)(struct blkif *, blkif_request_t *, int);
- struct request_hook_st *next;
-} request_hook_t;
-
-typedef struct response_hook_st {
- char *name;
- int (*func)(struct blkif *, blkif_response_t *, int);
- struct response_hook_st *next;
-} response_hook_t;
-
-struct blkif_ops {
- long int (*get_size)(struct blkif *blkif);
- long int (*get_secsize)(struct blkif *blkif);
- unsigned (*get_info)(struct blkif *blkif);
-};
-
-typedef struct blkif {
- domid_t domid;
- long int handle;
-
- long int pdev;
- long int readonly;
-
- enum { DISCONNECTED, CONNECTED } state;
-
- struct blkif_ops *ops;
- request_hook_t *request_hook_chain;
- response_hook_t *response_hook_chain;
-
- struct blkif *hash_next;
-
- void *prv; /* device-specific data */
-} blkif_t;
-
-void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-blkif_t *alloc_blkif(domid_t domid);
-int blkif_init(blkif_t *blkif, long int handle, long int pdev,
- long int readonly);
-void free_blkif(blkif_t *blkif);
-void __init_blkif(void);
-
-
-/* xenstore/xenbus: */
-extern int add_blockdevice_probe_watch(struct xs_handle *h,
- const char *domname);
-int xs_fire_next_watch(struct xs_handle *h);
-
-
-void blkif_print_hooks(blkif_t *blkif);
-void blkif_register_request_hook(blkif_t *blkif, char *name,
- int (*rh)(blkif_t *, blkif_request_t *, int));
-void blkif_register_response_hook(blkif_t *blkif, char *name,
- int (*rh)(blkif_t *, blkif_response_t *, int));
-void blkif_inject_response(blkif_t *blkif, blkif_response_t *);
-void blktap_kick_responses(void);
-
-/* this must match the underlying driver... */
-#define MAX_PENDING_REQS 64
-
-/* Accessing attached data page mappings */
-#define MMAP_PAGES \
- (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg) \
- (mmap_vstart + \
- ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
- ((_seg) * PAGE_SIZE))
-
-extern unsigned long mmap_vstart;
-
-/* Defines that are only used by library clients */
-
-#ifndef __COMPILING_BLKTAP_LIB
-
-static char *blkif_op_name[] = {
- [BLKIF_OP_READ] = "READ",
- [BLKIF_OP_WRITE] = "WRITE",
-};
-
-#endif /* __COMPILING_BLKTAP_LIB */
-
-#endif /* __BLKTAPLIB_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/list.h
--- a/tools/blktap/list.h Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-/*
- * list.h
- *
- * This is a subset of linux's list.h intended to be used in user-space.
- *
- */
-
-#ifndef __LIST_H__
-#define __LIST_H__
-
-#define LIST_POISON1 ((void *) 0x00100100)
-#define LIST_POISON2 ((void *) 0x00200200)
-
-struct list_head {
- struct list_head *next, *prev;
-};
-
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
-
-#define LIST_HEAD(name) \
- struct list_head name = LIST_HEAD_INIT(name)
-
-static inline void __list_add(struct list_head *new,
- struct list_head *prev,
- struct list_head *next)
-{
- next->prev = new;
- new->next = next;
- new->prev = prev;
- prev->next = new;
-}
-
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
- __list_add(new, head, head->next);
-}
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
- next->prev = prev;
- prev->next = next;
-}
-static inline void list_del(struct list_head *entry)
-{
- __list_del(entry->prev, entry->next);
- entry->next = LIST_POISON1;
- entry->prev = LIST_POISON2;
-}
-#define list_entry(ptr, type, member) \
- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-#define list_for_each_entry(pos, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member))
-
-#endif /* __LIST_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/Makefile
--- a/tools/blktap/parallax/Makefile Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-XEN_ROOT = ../../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-PARALLAX_INSTALL_DIR = /usr/sbin
-
-INSTALL = install
-INSTALL_PROG = $(INSTALL) -m0755
-INSTALL_DIR = $(INSTALL) -d -m0755
-
-INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC)
-
-LDFLAGS = -L.. -lpthread -lz -lblktap
-
-#PLX_SRCS :=
-PLX_SRCS := vdi.c
-PLX_SRCS += radix.c
-PLX_SRCS += snaplog.c
-PLX_SRCS += blockstore.c
-PLX_SRCS += block-async.c
-PLX_SRCS += requests-async.c
-VDI_SRCS := $(PLX_SRCS)
-PLX_SRCS += parallax.c
-
-#VDI_TOOLS :=
-VDI_TOOLS := vdi_create
-VDI_TOOLS += vdi_list
-VDI_TOOLS += vdi_snap
-VDI_TOOLS += vdi_snap_list
-VDI_TOOLS += vdi_snap_delete
-VDI_TOOLS += vdi_fill
-VDI_TOOLS += vdi_tree
-VDI_TOOLS += vdi_validate
-
-CFLAGS += -Werror
-CFLAGS += -Wno-unused
-CFLAGS += -fno-strict-aliasing
-CFLAGS += $(INCLUDES)
-CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS += -Wp,-MD,.$(@F).d
-DEPS = .*.d
-
-OBJS = $(patsubst %.c,%.o,$(SRCS))
-IBINS = parallax $(VDI_TOOLS)
-
-.PHONY: all
-all: $(VDI_TOOLS) parallax blockstored
-
-.PHONY: install
-install: all
- $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR)
-
-.PHONY: clean
-clean:
- rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest
-
-parallax: $(PLX_SRCS)
- $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS)
-
-${VDI_TOOLS}: %: %.c $(VDI_SRCS)
- $(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS)
-
--include $(DEPS)
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/README
--- a/tools/blktap/parallax/README Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,177 +0,0 @@
-Parallax Quick Overview
-March 3, 2005
-
-This is intended to provide a quick set of instructions to let you
-guys play with the current parallax source. In it's current form, the
-code will let you run an arbitrary number of VMs off of a single disk
-image, doing copy-on-write as they make updates. Each domain is
-assigned a virtual disk image (VDI), which may be based on a snapshot
-of an existing image. All of the VDI and snapshot management should
-currently work.
-
-The current implementation uses a single file as a blockstore for
-_everything_ this will soon be replaced by the fancier backend code
-and the local cache. As it stands, Parallax will create
-"blockstore.dat" in the directory that you run it from, and use
-largefile support to make this grow to unfathomable girth. So, you
-probably want to run the daemon off of a local disk, with a lot of
-free space.
-
-Here's how to get going:
-
-0. Setup:
----------
-
-Pick a local directory on a disk with lots of room. You should be
-running from a privileged domain (e.g. dom0) with the blocktap
-configured in and block backend NOT.
-
-For convenience (for the moment) copy all of the vdi tools (vdi_*) and
-the parallax daemon from tools/blktap into this directory.
-
-1. Populate the blockstore:
----------------------------
-
-First you need to put at least one image into the blockstore. You
-will need a disk image, either as a file or local partition. My
-general approach has been to
-
-(a) make a really big sparse file with
-
- dd if=/dev/zero of=./image bs=4K count=1 seek=[big value]
-
-(b) put a filesystem into it
-
- mkfs.ext3 ./image
-
-(c) mount it using loopback
-
- mkdir ./mnt
- mount -o loop ./image
-
-(d) cd into it and untar one of the image files from srg-roots.
-
- cd mnt
- tar ...
-
-NOTE: Beware if your system is FC3. mkfs is not compatible with old
-versions of fedora, and so you don't have much choice but to install
-further fc3 images if you have used the fc3 version of mkfs.
-
-(e) unmount the image
-
- cd ..
- umount mnt
-
-(f) now, create a new VDI to hold the image
-
- ./vdi_create "My new FC3 VDI"
-
-(g) get the id of the new VDI.
-
- ./vdi_list
-
- | 0 My new FC3 VDI
-
-(0 is the VDI id... create a few more if you want.)
-
-(h) hoover your image into the new VDI.
-
- ./vdi_fill 0 ./image
-
-This will pull the entire image into the blockstore and set up a
-mapping tree for it for VDI 0. Passing a device (i.e. /dev/sda3)
-should also work, but vdi_fill has NO notion of sparseness yet, so you
-are going to pump a block into the store for each block you read.
-
-vdi_fill will count up until it is done, and you should be ready to
-go. If you want to be anal, you can use vdi_validate to test the VDI
-against the original image.
-
-2. Create some extra VDIs
--------------------------
-
-VDIs are actually a list of snapshots, and each snapshot is a full
-image of mappings. So, to preserve an immutable copy of a current
-VDI, do this:
-
-(a) Snapshot your new VDI.
-
- ./vdi_snap 0
-
-Snapshotting writes the current radix root to the VDI's snapshot log,
-and assigns it a new writable root.
-
-(b) look at the VDI's snapshot log.
-
- ./vdi_snap_list 0
-
- | 16 0 Thu Mar 3 19:27:48 2005 565111 31
-
-The first two columns constitute a snapshot id and represent the
-(block, offset) of the snapshot record. The Date tells you when the
-snapshot was made, and 31 is the radix root node of the snapshot.
-
-(c) Create a new VDI, based on that snapshot, and look at the list.
-
- ./vdi_create "FC3 - Copy 1" 16 0
- ./vdi_list
-
- | 0 My new FC3 VDI
- | 1 FC3 - Copy 1
-
-NOTE: If you have Graphviz installed on your system, you can use
-vdi_tree to generate a postscript of your current set of VDIs and
-snapshots.
-
-
-Create as many VDIs as you need for the VMs that you want to run.
-
-3. Boot some VMs:
------------------
-
-Parallax currently uses a hack in xend to pass the VDI id, you need to
-modify the disk line of the VM config that is going to mount it.
-
-(a) set up your vm config, by using the following disk line:
-
- disk = ['parallax:1,sda1,w,0' ]
-
-This example uses VDI 1 (from vdi_list above), presents it as sda1
-(writable), and uses dom 0 as the backend. If you were running the
-daemon (and tap driver) in some domain other than 0, you would change
-this last parameter.
-
-NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so
that it knows what to do with "parallax:".
-
-(b) Run parallax in the backend domain.
-
- ./parallax
-
-(c) create your new domain.
-
- xm create ...
-
----
-
-That's pretty much all there is to it at the moment. Hope this is
-clear enough to get you going. Now, a few serious caveats that will
-be sorted out in the almost immediate future:
-
-WARNINGS:
----------
-
-1. There is NO locking in the VDI tools at the moment, so I'd avoid
-running them in parallel, or more importantly, running them while the
-daemon is running.
-
-2. I doubt that xend will be very happy about restarting if you have
-parallax-using domains. So if it dies while there are active parallax
-doms, you may need to reboot.
-
-3. I've turned off write-in-place. So at the moment, EVERY block
-write is a log append on the blockstore. I've been having some probs
-with the radix tree's marking of writable blocks after snapshots and
-will sort this out very soon.
-
-
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/block-async.c
--- a/tools/blktap/parallax/block-async.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,393 +0,0 @@
-/* block-async.c
- *
- * Asynchronous block wrappers for parallax.
- */
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "block-async.h"
-#include "blockstore.h"
-#include "vdi.h"
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* We have a queue of outstanding I/O requests implemented as a
- * circular producer-consumer ring with free-running buffers.
- * to allow reordering, this ring indirects to indexes in an
- * ring of io_structs.
- *
- * the block_* calls may either add an entry to this ring and return,
- * or satisfy the request immediately and call the callback directly.
- * None of the io calls in parallax should be nested enough to worry
- * about stack problems with this approach.
- */
-
-struct read_args {
- uint64_t addr;
-};
-
-struct write_args {
- uint64_t addr;
- char *block;
-};
-
-struct alloc_args {
- char *block;
-};
-
-struct pending_io_req {
- enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;
- union {
- struct read_args r;
- struct write_args w;
- struct alloc_args a;
- } u;
- io_cb_t cb;
- void *param;
-};
-
-void radix_lock_init(struct radix_lock *r)
-{
- int i;
-
- pthread_mutex_init(&r->lock, NULL);
- for (i=0; i < 1024; i++) {
- r->lines[i] = 0;
- r->waiters[i] = NULL;
- r->state[i] = ANY;
- }
-}
-
-/* maximum outstanding I/O requests issued asynchronously */
-/* must be a power of 2.*/
-#define MAX_PENDING_IO 1024
-
-/* how many threads to concurrently issue I/O to the disk. */
-#define IO_POOL_SIZE 10
-
-static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];
-static int pending_io_list[MAX_PENDING_IO];
-static unsigned long io_prod = 0, io_cons = 0, io_free = 0;
-#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))
-#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)
-#define PENDING_IO_ENT(_x) \
- (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])
-#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)
-#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)
-static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t pending_io_cond = PTHREAD_COND_INITIALIZER;
-
-static void init_pending_io(void)
-{
- int i;
-
- for (i=0; i<MAX_PENDING_IO; i++)
- pending_io_list[i] = i;
-
-}
-
-void block_read(uint64_t addr, io_cb_t cb, void *param)
-{
- struct pending_io_req *req;
-
- pthread_mutex_lock(&pending_io_lock);
- assert(CAN_PRODUCE_PENDING_IO);
-
- req = PENDING_IO_ENT(io_prod++);
- DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);
- req->op = IO_READ;
- req->u.r.addr = addr;
- req->cb = cb;
- req->param = param;
-
- pthread_cond_signal(&pending_io_cond);
- pthread_mutex_unlock(&pending_io_lock);
-}
-
-
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param)
-{
- struct pending_io_req *req;
-
- pthread_mutex_lock(&pending_io_lock);
- assert(CAN_PRODUCE_PENDING_IO);
-
- req = PENDING_IO_ENT(io_prod++);
- DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);
- req->op = IO_WRITE;
- req->u.w.addr = addr;
- req->u.w.block = block;
- req->cb = cb;
- req->param = param;
-
- pthread_cond_signal(&pending_io_cond);
- pthread_mutex_unlock(&pending_io_lock);
-}
-
-
-void block_alloc(char *block, io_cb_t cb, void *param)
-{
- struct pending_io_req *req;
-
- pthread_mutex_lock(&pending_io_lock);
- assert(CAN_PRODUCE_PENDING_IO);
-
- req = PENDING_IO_ENT(io_prod++);
- req->op = IO_ALLOC;
- req->u.a.block = block;
- req->cb = cb;
- req->param = param;
-
- pthread_cond_signal(&pending_io_cond);
- pthread_mutex_unlock(&pending_io_lock);
-}
-
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
- struct io_ret ret;
- pthread_mutex_lock(&r->lock);
-
- if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {
- r->lines[row]++;
- r->state[row] = READ;
- DPRINTF("RLOCK : %3d (row: %d)\n", r->lines[row], row);
- pthread_mutex_unlock(&r->lock);
- ret.type = IO_INT_T;
- ret.u.i = 0;
- cb(ret, param);
- } else {
- struct radix_wait **rwc;
- struct radix_wait *rw =
- (struct radix_wait *) malloc (sizeof(struct radix_wait));
- DPRINTF("RLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
- rw->type = RLOCK;
- rw->param = param;
- rw->cb = cb;
- rw->next = NULL;
- /* append to waiters list. */
- rwc = &r->waiters[row];
- while (*rwc != NULL) rwc = &(*rwc)->next;
- *rwc = rw;
- pthread_mutex_unlock(&r->lock);
- return;
- }
-}
-
-
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
- struct io_ret ret;
- pthread_mutex_lock(&r->lock);
-
- /* the second check here is redundant -- just here for debugging now. */
- if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {
- r->state[row] = STOP;
- r->lines[row] = -1;
- DPRINTF("WLOCK : %3d (row: %d)\n", r->lines[row], row);
- pthread_mutex_unlock(&r->lock);
- ret.type = IO_INT_T;
- ret.u.i = 0;
- cb(ret, param);
- } else {
- struct radix_wait **rwc;
- struct radix_wait *rw =
- (struct radix_wait *) malloc (sizeof(struct radix_wait));
- DPRINTF("WLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
- rw->type = WLOCK;
- rw->param = param;
- rw->cb = cb;
- rw->next = NULL;
- /* append to waiters list. */
- rwc = &r->waiters[row];
- while (*rwc != NULL) rwc = &(*rwc)->next;
- *rwc = rw;
- pthread_mutex_unlock(&r->lock);
- return;
- }
-
-}
-
-/* called with radix_lock locked and lock count of zero. */
-static void wake_waiters(struct radix_lock *r, int row)
-{
- struct pending_io_req *req;
- struct radix_wait *rw;
-
- if (r->lines[row] != 0) return;
- if (r->waiters[row] == NULL) return;
-
- if (r->waiters[row]->type == WLOCK) {
-
- rw = r->waiters[row];
- pthread_mutex_lock(&pending_io_lock);
- assert(CAN_PRODUCE_PENDING_IO);
-
- req = PENDING_IO_ENT(io_prod++);
- req->op = IO_WWAKE;
- req->cb = rw->cb;
- req->param = rw->param;
- r->lines[row] = -1; /* write lock the row. */
- r->state[row] = STOP;
- r->waiters[row] = rw->next;
- free(rw);
- pthread_mutex_unlock(&pending_io_lock);
-
- } else /* RLOCK */ {
-
- while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {
- rw = r->waiters[row];
- pthread_mutex_lock(&pending_io_lock);
- assert(CAN_PRODUCE_PENDING_IO);
-
- req = PENDING_IO_ENT(io_prod++);
- req->op = IO_RWAKE;
- req->cb = rw->cb;
- req->param = rw->param;
- r->lines[row]++; /* read lock the row. */
- r->state[row] = READ;
- r->waiters[row] = rw->next;
- free(rw);
- pthread_mutex_unlock(&pending_io_lock);
- }
-
- if (r->waiters[row] != NULL) /* There is a write queued still */
- r->state[row] = STOP;
- }
-
- pthread_mutex_lock(&pending_io_lock);
- pthread_cond_signal(&pending_io_cond);
- pthread_mutex_unlock(&pending_io_lock);
-}
-
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
- struct io_ret ret;
-
- pthread_mutex_lock(&r->lock);
- assert(r->lines[row] > 0); /* try to catch misuse. */
- r->lines[row]--;
- if (r->lines[row] == 0) {
- r->state[row] = ANY;
- wake_waiters(r, row);
- }
- pthread_mutex_unlock(&r->lock);
- cb(ret, param);
-}
-
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
-{
- struct io_ret ret;
-
- pthread_mutex_lock(&r->lock);
- assert(r->lines[row] == -1); /* try to catch misuse. */
- r->lines[row] = 0;
- r->state[row] = ANY;
- wake_waiters(r, row);
- pthread_mutex_unlock(&r->lock);
- cb(ret, param);
-}
-
-/* consumer calls */
-static void do_next_io_req(struct pending_io_req *req)
-{
- struct io_ret ret;
- void *param;
-
- switch (req->op) {
- case IO_READ:
- ret.type = IO_BLOCK_T;
- ret.u.b = readblock(req->u.r.addr);
- break;
- case IO_WRITE:
- ret.type = IO_INT_T;
- ret.u.i = writeblock(req->u.w.addr, req->u.w.block);
- DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);
- break;
- case IO_ALLOC:
- ret.type = IO_ADDR_T;
- ret.u.a = allocblock(req->u.a.block);
- break;
- case IO_RWAKE:
- DPRINTF("WAKE DEFERRED RLOCK!\n");
- ret.type = IO_INT_T;
- ret.u.i = 0;
- break;
- case IO_WWAKE:
- DPRINTF("WAKE DEFERRED WLOCK!\n");
- ret.type = IO_INT_T;
- ret.u.i = 0;
- break;
- default:
- DPRINTF("Unknown IO operation on pending list!\n");
- return;
- }
-
- param = req->param;
- pthread_mutex_lock(&pending_io_lock);
- pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);
- pthread_mutex_unlock(&pending_io_lock);
-
- assert(req->cb != NULL);
- req->cb(ret, param);
-
-}
-
-void *io_thread(void *param)
-{
- int tid;
- struct pending_io_req *req;
-
- /* Set this thread's tid. */
- tid = *(int *)param;
- free(param);
-
-start:
- pthread_mutex_lock(&pending_io_lock);
- while (io_prod == io_cons) {
- pthread_cond_wait(&pending_io_cond, &pending_io_lock);
- }
-
- if (io_prod == io_cons) {
- /* unnecessary wakeup. */
- pthread_mutex_unlock(&pending_io_lock);
- goto start;
- }
-
- req = PENDING_IO_ENT(io_cons++);
- pthread_mutex_unlock(&pending_io_lock);
-
- do_next_io_req(req);
-
- goto start;
-
-}
-
-static pthread_t io_pool[IO_POOL_SIZE];
-void start_io_threads(void)
-
-{
- int i, tid=0;
-
- for (i=0; i < IO_POOL_SIZE; i++) {
- int ret, *t;
- t = (int *)malloc(sizeof(int));
- *t = tid++;
- ret = pthread_create(&io_pool[i], NULL, io_thread, t);
- if (ret != 0) printf("Error starting thread %d\n", i);
- }
-
-}
-
-void init_block_async(void)
-{
- init_pending_io();
- start_io_threads();
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/block-async.h
--- a/tools/blktap/parallax/block-async.h Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-/* block-async.h
- *
- * Asynchronous block wrappers for parallax.
- */
-
-#ifndef _BLOCKASYNC_H_
-#define _BLOCKASYNC_H_
-
-#include <assert.h>
-#include <xenctrl.h>
-#include "vdi.h"
-
-struct io_ret
-{
- enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;
- union {
- uint64_t a;
- char *b;
- int i;
- } u;
-};
-
-typedef void (*io_cb_t)(struct io_ret r, void *param);
-
-/* per-vdi lock structures to make sure requests run in a safe order. */
-struct radix_wait {
- enum {RLOCK, WLOCK} type;
- io_cb_t cb;
- void *param;
- struct radix_wait *next;
-};
-
-struct radix_lock {
- pthread_mutex_t lock;
- int lines[1024];
- struct radix_wait *waiters[1024];
- enum {ANY, READ, STOP} state[1024];
-};
-void radix_lock_init(struct radix_lock *r);
-
-void block_read(uint64_t addr, io_cb_t cb, void *param);
-void block_write(uint64_t addr, char *block, io_cb_t cb, void *param);
-void block_alloc(char *block, io_cb_t cb, void *param);
-void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
-void init_block_async(void);
-
-static inline uint64_t IO_ADDR(struct io_ret r)
-{
- assert(r.type == IO_ADDR_T);
- return r.u.a;
-}
-
-static inline char *IO_BLOCK(struct io_ret r)
-{
- assert(r.type == IO_BLOCK_T);
- return r.u.b;
-}
-
-static inline int IO_INT(struct io_ret r)
-{
- assert(r.type == IO_INT_T);
- return r.u.i;
-}
-
-
-#endif //_BLOCKASYNC_H_
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/blockstore.c
--- a/tools/blktap/parallax/blockstore.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1348 +0,0 @@
-/**************************************************************************
- *
- * blockstore.c
- *
- * Simple block store interface
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <stdarg.h>
-#include "blockstore.h"
-#include <pthread.h>
-
-//#define BLOCKSTORE_REMOTE
-//#define BSDEBUG
-
-#define RETRY_TIMEOUT 1000000 /* microseconds */
-
-/*****************************************************************************
- * Debugging
- */
-#ifdef BSDEBUG
-void DB(char *format, ...)
-{
- va_list args;
- fprintf(stderr, "[%05u] ", (int)pthread_getspecific(tid_key));
- va_start(args, format);
- vfprintf(stderr, format, args);
- va_end(args);
-}
-#else
-#define DB(format, ...) (void)0
-#endif
-
-#ifdef BLOCKSTORE_REMOTE
-
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-
-/*****************************************************************************
- * Network state *
- *****************************************************************************/
-
-/* The individual disk servers we talks to. These will be referenced by
- * an integer index into bsservers[].
- */
-bsserver_t bsservers[MAX_SERVERS];
-
-/* The cluster map. This is indexed by an integer cluster number.
- */
-bscluster_t bsclusters[MAX_CLUSTERS];
-
-/* Local socket.
- */
-struct sockaddr_in sin_local;
-int bssock = 0;
-
-/*****************************************************************************
- * Notification *
- *****************************************************************************/
-
-typedef struct pool_thread_t_struct {
- pthread_mutex_t ptmutex;
- pthread_cond_t ptcv;
- int newdata;
-} pool_thread_t;
-
-pool_thread_t pool_thread[READ_POOL_SIZE+1];
-
-#define RECV_NOTIFY(tid) { \
- pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
- pool_thread[tid].newdata = 1; \
- DB("CV Waking %u", tid); \
- pthread_cond_signal(&(pool_thread[tid].ptcv)); \
- pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-#define RECV_AWAIT(tid) { \
- pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
- if (pool_thread[tid].newdata) { \
- pool_thread[tid].newdata = 0; \
- DB("CV Woken %u", tid); \
- } \
- else { \
- DB("CV Waiting %u", tid); \
- pthread_cond_wait(&(pool_thread[tid].ptcv), \
- &(pool_thread[tid].ptmutex)); \
- } \
- pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
-
-/*****************************************************************************
- * Message queue management *
- *****************************************************************************/
-
-/* Protects the queue manipulation critcal regions.
- */
-pthread_mutex_t ptmutex_queue;
-#define ENTER_QUEUE_CR pthread_mutex_lock(&ptmutex_queue)
-#define LEAVE_QUEUE_CR pthread_mutex_unlock(&ptmutex_queue)
-
-pthread_mutex_t ptmutex_recv;
-#define ENTER_RECV_CR pthread_mutex_lock(&ptmutex_recv)
-#define LEAVE_RECV_CR pthread_mutex_unlock(&ptmutex_recv)
-
-/* A message queue entry. We allocate one of these for every request we send.
- * Asynchronous reply reception also used one of these.
- */
-typedef struct bsq_t_struct {
- struct bsq_t_struct *prev;
- struct bsq_t_struct *next;
- int status;
- int server;
- int length;
- struct msghdr msghdr;
- struct iovec iov[2];
- int tid;
- struct timeval tv_sent;
- bshdr_t message;
- void *block;
-} bsq_t;
-
-#define BSQ_STATUS_MATCHED 1
-
-pthread_mutex_t ptmutex_luid;
-#define ENTER_LUID_CR pthread_mutex_lock(&ptmutex_luid)
-#define LEAVE_LUID_CR pthread_mutex_unlock(&ptmutex_luid)
-
-static uint64_t luid_cnt = 0x1000ULL;
-uint64_t new_luid(void) {
- uint64_t luid;
- ENTER_LUID_CR;
- luid = luid_cnt++;
- LEAVE_LUID_CR;
- return luid;
-}
-
-/* Queue of outstanding requests.
- */
-bsq_t *bs_head = NULL;
-bsq_t *bs_tail = NULL;
-int bs_qlen = 0;
-
-/*
- */
-void queuedebug(char *msg) {
- bsq_t *q;
- ENTER_QUEUE_CR;
- fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen);
- for (q = bs_head; q; q = q->next) {
- fprintf(stderr, " luid=%016llx server=%u\n",
- q->message.luid, q->server);
- }
- LEAVE_QUEUE_CR;
-}
-
-int enqueue(bsq_t *qe) {
- ENTER_QUEUE_CR;
- qe->next = NULL;
- qe->prev = bs_tail;
- if (!bs_head)
- bs_head = qe;
- else
- bs_tail->next = qe;
- bs_tail = qe;
- bs_qlen++;
- LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
- queuedebug("enqueue");
-#endif
- return 0;
-}
-
-int dequeue(bsq_t *qe) {
- bsq_t *q;
- ENTER_QUEUE_CR;
- for (q = bs_head; q; q = q->next) {
- if (q == qe) {
- if (q->prev)
- q->prev->next = q->next;
- else
- bs_head = q->next;
- if (q->next)
- q->next->prev = q->prev;
- else
- bs_tail = q->prev;
- bs_qlen--;
- goto found;
- }
- }
-
- LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
- queuedebug("dequeue not found");
-#endif
- return 0;
-
- found:
- LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
- queuedebug("dequeue not found");
-#endif
- return 1;
-}
-
-bsq_t *queuesearch(bsq_t *qe) {
- bsq_t *q;
- ENTER_QUEUE_CR;
- for (q = bs_head; q; q = q->next) {
- if ((qe->server == q->server) &&
- (qe->message.operation == q->message.operation) &&
- (qe->message.luid == q->message.luid)) {
-
- if ((q->message.operation == BSOP_READBLOCK) &&
- ((q->message.flags & BSOP_FLAG_ERROR) == 0)) {
- q->block = qe->block;
- qe->block = NULL;
- }
- q->length = qe->length;
- q->message.flags = qe->message.flags;
- q->message.id = qe->message.id;
- q->status |= BSQ_STATUS_MATCHED;
-
- if (q->prev)
- q->prev->next = q->next;
- else
- bs_head = q->next;
- if (q->next)
- q->next->prev = q->prev;
- else
- bs_tail = q->prev;
- q->next = NULL;
- q->prev = NULL;
- bs_qlen--;
- goto found;
- }
- }
-
- LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
- queuedebug("queuesearch not found");
-#endif
- return NULL;
-
- found:
- LEAVE_QUEUE_CR;
-#ifdef BSDEBUG
- queuedebug("queuesearch found");
-#endif
- return q;
-}
-
-/*****************************************************************************
- * Network communication *
- *****************************************************************************/
-
-int send_message(bsq_t *qe) {
- int rc;
-
- qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin);
- qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
- qe->msghdr.msg_iov = qe->iov;
- if (qe->block)
- qe->msghdr.msg_iovlen = 2;
- else
- qe->msghdr.msg_iovlen = 1;
- qe->msghdr.msg_control = NULL;
- qe->msghdr.msg_controllen = 0;
- qe->msghdr.msg_flags = 0;
-
- qe->iov[0].iov_base = (void *)&(qe->message);
- qe->iov[0].iov_len = MSGBUFSIZE_ID;
-
- if (qe->block) {
- qe->iov[1].iov_base = qe->block;
- qe->iov[1].iov_len = BLOCK_SIZE;
- }
-
- qe->message.luid = new_luid();
-
- qe->status = 0;
- qe->tid = (int)pthread_getspecific(tid_key);
- if (enqueue(qe) < 0) {
- fprintf(stderr, "Error enqueuing request.\n");
- return -1;
- }
-
- gettimeofday(&(qe->tv_sent), NULL);
- DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid);
- rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
- //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0,
- // (struct sockaddr *)&(bsservers[qe->server].sin),
- // sizeof(struct sockaddr_in));
- if (rc < 0)
- return rc;
-
- return rc;
-}
-
-int recv_message(bsq_t *qe) {
- struct sockaddr_in from;
- //int flen = sizeof(from);
- int rc;
-
- qe->msghdr.msg_name = &from;
- qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
- qe->msghdr.msg_iov = qe->iov;
- if (qe->block)
- qe->msghdr.msg_iovlen = 2;
- else
- qe->msghdr.msg_iovlen = 1;
- qe->msghdr.msg_control = NULL;
- qe->msghdr.msg_controllen = 0;
- qe->msghdr.msg_flags = 0;
-
- qe->iov[0].iov_base = (void *)&(qe->message);
- qe->iov[0].iov_len = MSGBUFSIZE_ID;
- if (qe->block) {
- qe->iov[1].iov_base = qe->block;
- qe->iov[1].iov_len = BLOCK_SIZE;
- }
-
- rc = recvmsg(bssock, &(qe->msghdr), 0);
-
- //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0,
- // (struct sockaddr *)&from, &flen);
- return rc;
-}
-
-int get_server_number(struct sockaddr_in *sin) {
- int i;
-
-#ifdef BSDEBUG2
- fprintf(stderr,
- "get_server_number(%u.%u.%u.%u/%u)\n",
- (unsigned int)sin->sin_addr.s_addr & 0xff,
- ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff,
- ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff,
- ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff,
- (unsigned int)sin->sin_port);
-#endif
-
- for (i = 0; i < MAX_SERVERS; i++) {
- if (bsservers[i].hostname) {
-#ifdef BSDEBUG2
- fprintf(stderr,
- "get_server_number check %u.%u.%u.%u/%u\n",
- (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff,
- ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff,
- ((unsigned int)bsservers[i].sin.sin_addr.s_addr >>
16)&0xff,
- ((unsigned int)bsservers[i].sin.sin_addr.s_addr >>
24)&0xff,
- (unsigned int)bsservers[i].sin.sin_port);
-#endif
- if ((sin->sin_family == bsservers[i].sin.sin_family) &&
- (sin->sin_port == bsservers[i].sin.sin_port) &&
- (memcmp((void *)&(sin->sin_addr),
- (void *)&(bsservers[i].sin.sin_addr),
- sizeof(struct in_addr)) == 0)) {
- return i;
- }
- }
- }
-
- return -1;
-}
-
-void *rx_buffer = NULL;
-bsq_t rx_qe;
-bsq_t *recv_any(void) {
- struct sockaddr_in from;
- int rc;
-
- DB("ENTER recv_any\n");
-
- rx_qe.msghdr.msg_name = &from;
- rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in);
- rx_qe.msghdr.msg_iov = rx_qe.iov;
- if (!rx_buffer) {
- rx_buffer = malloc(BLOCK_SIZE);
- if (!rx_buffer) {
- perror("recv_any malloc");
- return NULL;
- }
- }
- rx_qe.block = rx_buffer;
- rx_buffer = NULL;
- rx_qe.msghdr.msg_iovlen = 2;
- rx_qe.msghdr.msg_control = NULL;
- rx_qe.msghdr.msg_controllen = 0;
- rx_qe.msghdr.msg_flags = 0;
-
- rx_qe.iov[0].iov_base = (void *)&(rx_qe.message);
- rx_qe.iov[0].iov_len = MSGBUFSIZE_ID;
- rx_qe.iov[1].iov_base = rx_qe.block;
- rx_qe.iov[1].iov_len = BLOCK_SIZE;
-
- rc = recvmsg(bssock, &(rx_qe.msghdr), 0);
- if (rc < 0) {
- perror("recv_any");
- return NULL;
- }
-
- rx_qe.length = rc;
- rx_qe.server = get_server_number(&from);
-
- DB("recv_any from %d luid=%016llx len=%u\n",
- rx_qe.server, rx_qe.message.luid, rx_qe.length);
-
- return &rx_qe;
-}
-
-void recv_recycle_buffer(bsq_t *q) {
- if (q->block) {
- rx_buffer = q->block;
- q->block = NULL;
- }
-}
-
-// cycle through reading any incoming, searching for a match in the
-// queue, until we have all we need.
-int wait_recv(bsq_t **reqs, int numreqs) {
- bsq_t *q, *m;
- unsigned int x, i;
- int tid = (int)pthread_getspecific(tid_key);
-
- DB("ENTER wait_recv %u\n", numreqs);
-
- checkmatch:
- x = 0xffffffff;
- for (i = 0; i < numreqs; i++) {
- x &= reqs[i]->status;
- }
- if ((x & BSQ_STATUS_MATCHED)) {
- DB("LEAVE wait_recv\n");
- return numreqs;
- }
-
- RECV_AWAIT(tid);
-
- /*
- rxagain:
- ENTER_RECV_CR;
- q = recv_any();
- LEAVE_RECV_CR;
- if (!q)
- return -1;
-
- m = queuesearch(q);
- recv_recycle_buffer(q);
- if (!m) {
- fprintf(stderr, "Unmatched RX\n");
- goto rxagain;
- }
- */
-
- goto checkmatch;
-
-}
-
-/* retry
- */
-static int retry_count = 0;
-int retry(bsq_t *qe)
-{
- int rc;
- gettimeofday(&(qe->tv_sent), NULL);
- DB("retry to %d luid=%016llx\n", qe->server, qe->message.luid);
- retry_count++;
- rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
- if (rc < 0)
- return rc;
- return 0;
-}
-
-/* queue runner
- */
-void *queue_runner(void *arg)
-{
- for (;;) {
- struct timeval now;
- long long nowus, sus;
- bsq_t *q;
- int r;
-
- sleep(1);
-
- gettimeofday(&now, NULL);
- nowus = now.tv_usec + now.tv_sec * 1000000;
- ENTER_QUEUE_CR;
- r = retry_count;
- for (q = bs_head; q; q = q->next) {
- sus = q->tv_sent.tv_usec + q->tv_sent.tv_sec * 1000000;
- if ((nowus - sus) > RETRY_TIMEOUT) {
- if (retry(q) < 0) {
- fprintf(stderr, "Error on sendmsg retry.\n");
- }
- }
- }
- if (r != retry_count) {
- fprintf(stderr, "RETRIES: %u %u\n", retry_count - r, retry_count);
- }
- LEAVE_QUEUE_CR;
- }
-}
-
-/* receive loop
- */
-void *receive_loop(void *arg)
-{
- bsq_t *q, *m;
-
- for(;;) {
- q = recv_any();
- if (!q) {
- fprintf(stderr, "recv_any error\n");
- }
- else {
- m = queuesearch(q);
- recv_recycle_buffer(q);
- if (!m) {
- fprintf(stderr, "Unmatched RX\n");
- }
- else {
- DB("RX MATCH");
- RECV_NOTIFY(m->tid);
- }
- }
- }
-}
-pthread_t pthread_recv;
-
-/*****************************************************************************
- * Reading *
- *****************************************************************************/
-
-void *readblock_indiv(int server, uint64_t id) {
- void *block;
- bsq_t *qe;
- int len, rc;
-
- qe = (bsq_t *)malloc(sizeof(bsq_t));
- if (!qe) {
- perror("readblock qe malloc");
- return NULL;
- }
- qe->block = NULL;
-
- /*
- qe->block = malloc(BLOCK_SIZE);
- if (!qe->block) {
- perror("readblock qe malloc");
- free((void *)qe);
- return NULL;
- }
- */
-
- qe->server = server;
-
- qe->message.operation = BSOP_READBLOCK;
- qe->message.flags = 0;
- qe->message.id = id;
- qe->length = MSGBUFSIZE_ID;
-
- if (send_message(qe) < 0) {
- perror("readblock sendto");
- goto err;
- }
-
- /*len = recv_message(qe);
- if (len < 0) {
- perror("readblock recv");
- goto err;
- }*/
-
- rc = wait_recv(&qe, 1);
- if (rc < 0) {
- perror("readblock recv");
- goto err;
- }
-
- if ((qe->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "readblock server error\n");
- goto err;
- }
- if (qe->length < MSGBUFSIZE_BLOCK) {
- fprintf(stderr, "readblock recv short (%u)\n", len);
- goto err;
- }
- /* if ((block = malloc(BLOCK_SIZE)) == NULL) {
- perror("readblock malloc");
- goto err;
- }
- memcpy(block, qe->message.block, BLOCK_SIZE);
- */
- block = qe->block;
-
- free((void *)qe);
- return block;
-
- err:
- free(qe->block);
- free((void *)qe);
- return NULL;
-}
-
-/**
- * readblock: read a block from disk
- * @id: block id to read
- *
- * @return: pointer to block, NULL on error
- */
-void *readblock(uint64_t id) {
- int map = (int)BSID_MAP(id);
- uint64_t xid;
- static int i = CLUSTER_MAX_REPLICAS - 1;
- void *block = NULL;
-
- /* special case for the "superblock" just use the first block on the
- * first replica. (extend to blocks < 6 for vdi bug)
- */
- if (id < 6) {
- block = readblock_indiv(bsclusters[map].servers[0], id);
- goto out;
- }
-
- i++;
- if (i >= CLUSTER_MAX_REPLICAS)
- i = 0;
- switch (i) {
- case 0:
- xid = BSID_REPLICA0(id);
- break;
- case 1:
- xid = BSID_REPLICA1(id);
- break;
- case 2:
- xid = BSID_REPLICA2(id);
- break;
- }
-
- block = readblock_indiv(bsclusters[map].servers[i], xid);
-
- out:
-#ifdef BSDEBUG
- if (block)
- fprintf(stderr, "READ: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
- id,
- (unsigned int)((unsigned char *)block)[0],
- (unsigned int)((unsigned char *)block)[1],
- (unsigned int)((unsigned char *)block)[2],
- (unsigned int)((unsigned char *)block)[3],
- (unsigned int)((unsigned char *)block)[4],
- (unsigned int)((unsigned char *)block)[5],
- (unsigned int)((unsigned char *)block)[6],
- (unsigned int)((unsigned char *)block)[7]);
- else
- fprintf(stderr, "READ: %016llx NULL\n", id);
-#endif
- return block;
-}
-
-/*****************************************************************************
- * Writing *
- *****************************************************************************/
-
-bsq_t *writeblock_indiv(int server, uint64_t id, void *block) {
-
- bsq_t *qe;
- int len;
-
- qe = (bsq_t *)malloc(sizeof(bsq_t));
- if (!qe) {
- perror("writeblock qe malloc");
- goto err;
- }
- qe->server = server;
-
- qe->message.operation = BSOP_WRITEBLOCK;
- qe->message.flags = 0;
- qe->message.id = id;
- //memcpy(qe->message.block, block, BLOCK_SIZE);
- qe->block = block;
- qe->length = MSGBUFSIZE_BLOCK;
-
- if (send_message(qe) < 0) {
- perror("writeblock sendto");
- goto err;
- }
-
- return qe;
-
- err:
- free((void *)qe);
- return NULL;
-}
-
-
-/**
- * writeblock: write an existing block to disk
- * @id: block id
- * @block: pointer to block
- *
- * @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-
- int map = (int)BSID_MAP(id);
- int rep0 = bsclusters[map].servers[0];
- int rep1 = bsclusters[map].servers[1];
- int rep2 = bsclusters[map].servers[2];
- bsq_t *reqs[3];
- int rc;
-
- reqs[0] = reqs[1] = reqs[2] = NULL;
-
-#ifdef BSDEBUG
- fprintf(stderr,
- "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
- id,
- (unsigned int)((unsigned char *)block)[0],
- (unsigned int)((unsigned char *)block)[1],
- (unsigned int)((unsigned char *)block)[2],
- (unsigned int)((unsigned char *)block)[3],
- (unsigned int)((unsigned char *)block)[4],
- (unsigned int)((unsigned char *)block)[5],
- (unsigned int)((unsigned char *)block)[6],
- (unsigned int)((unsigned char *)block)[7]);
-#endif
-
- /* special case for the "superblock" just use the first block on the
- * first replica. (extend to blocks < 6 for vdi bug)
- */
- if (id < 6) {
- reqs[0] = writeblock_indiv(rep0, id, block);
- if (!reqs[0])
- return -1;
- rc = wait_recv(reqs, 1);
- return rc;
- }
-
- reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block);
- if (!reqs[0])
- goto err;
- reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block);
- if (!reqs[1])
- goto err;
- reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block);
- if (!reqs[2])
- goto err;
-
- rc = wait_recv(reqs, 3);
- if (rc < 0) {
- perror("writeblock recv");
- goto err;
- }
- if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "writeblock server0 error\n");
- goto err;
- }
- if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "writeblock server1 error\n");
- goto err;
- }
- if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "writeblock server2 error\n");
- goto err;
- }
-
-
- free((void *)reqs[0]);
- free((void *)reqs[1]);
- free((void *)reqs[2]);
- return 0;
-
- err:
- if (reqs[0]) {
- dequeue(reqs[0]);
- free((void *)reqs[0]);
- }
- if (reqs[1]) {
- dequeue(reqs[1]);
- free((void *)reqs[1]);
- }
- if (reqs[2]) {
- dequeue(reqs[2]);
- free((void *)reqs[2]);
- }
- return -1;
-}
-
-/*****************************************************************************
- * Allocation *
- *****************************************************************************/
-
-/**
- * allocblock: write a new block to disk
- * @block: pointer to block
- *
- * @return: new id of block on disk
- */
-uint64_t allocblock(void *block) {
- return allocblock_hint(block, 0);
-}
-
-bsq_t *allocblock_hint_indiv(int server, void *block, uint64_t hint) {
- bsq_t *qe;
- int len;
-
- qe = (bsq_t *)malloc(sizeof(bsq_t));
- if (!qe) {
- perror("allocblock_hint qe malloc");
- goto err;
- }
- qe->server = server;
-
- qe->message.operation = BSOP_ALLOCBLOCK;
- qe->message.flags = 0;
- qe->message.id = hint;
- //memcpy(qe->message.block, block, BLOCK_SIZE);
- qe->block = block;
- qe->length = MSGBUFSIZE_BLOCK;
-
- if (send_message(qe) < 0) {
- perror("allocblock_hint sendto");
- goto err;
- }
-
- return qe;
-
- err:
- free((void *)qe);
- return NULL;
-}
-
-/**
- * allocblock_hint: write a new block to disk
- * @block: pointer to block
- * @hint: allocation hint
- *
- * @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
- int map = (int)hint;
- int rep0 = bsclusters[map].servers[0];
- int rep1 = bsclusters[map].servers[1];
- int rep2 = bsclusters[map].servers[2];
- bsq_t *reqs[3];
- int rc;
- uint64_t id0, id1, id2;
-
- reqs[0] = reqs[1] = reqs[2] = NULL;
-
- DB("ENTER allocblock\n");
-
- reqs[0] = allocblock_hint_indiv(rep0, block, hint);
- if (!reqs[0])
- goto err;
- reqs[1] = allocblock_hint_indiv(rep1, block, hint);
- if (!reqs[1])
- goto err;
- reqs[2] = allocblock_hint_indiv(rep2, block, hint);
- if (!reqs[2])
- goto err;
-
- rc = wait_recv(reqs, 3);
- if (rc < 0) {
- perror("allocblock recv");
- goto err;
- }
- if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "allocblock server0 error\n");
- goto err;
- }
- if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "allocblock server1 error\n");
- goto err;
- }
- if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
- fprintf(stderr, "allocblock server2 error\n");
- goto err;
- }
-
- id0 = reqs[0]->message.id;
- id1 = reqs[1]->message.id;
- id2 = reqs[2]->message.id;
-
-#ifdef BSDEBUG
- fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
- BSID(map, id0, id1, id2),
- (unsigned int)((unsigned char *)block)[0],
- (unsigned int)((unsigned char *)block)[1],
- (unsigned int)((unsigned char *)block)[2],
- (unsigned int)((unsigned char *)block)[3],
- (unsigned int)((unsigned char *)block)[4],
- (unsigned int)((unsigned char *)block)[5],
- (unsigned int)((unsigned char *)block)[6],
- (unsigned int)((unsigned char *)block)[7]);
-#endif
-
- free((void *)reqs[0]);
- free((void *)reqs[1]);
- free((void *)reqs[2]);
- return BSID(map, id0, id1, id2);
-
- err:
- if (reqs[0]) {
- dequeue(reqs[0]);
- free((void *)reqs[0]);
- }
- if (reqs[1]) {
- dequeue(reqs[1]);
- free((void *)reqs[1]);
- }
- if (reqs[2]) {
- dequeue(reqs[2]);
- free((void *)reqs[2]);
- }
- return 0;
-}
-
-#else /* /BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Local storage version *
- *****************************************************************************/
-
-/**
- * readblock: read a block from disk
- * @id: block id to read
- *
- * @return: pointer to block, NULL on error
- */
-
-void *readblock(uint64_t id) {
- void *block;
- int block_fp;
-
-//printf("readblock(%llu)\n", id);
- block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
-
- if (block_fp < 0) {
- perror("open");
- return NULL;
- }
-
- if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
- printf ("%Ld ", id);
- printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
- perror("readblock lseek");
- goto err;
- }
- if ((block = malloc(BLOCK_SIZE)) == NULL) {
- perror("readblock malloc");
- goto err;
- }
- if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
- perror("readblock read");
- free(block);
- goto err;
- }
- close(block_fp);
- return block;
-
-err:
- close(block_fp);
- return NULL;
-}
-
-/**
- * writeblock: write an existing block to disk
- * @id: block id
- * @block: pointer to block
- *
- * @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
-
- int block_fp;
-
- block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
- if (block_fp < 0) {
- perror("open");
- return -1;
- }
-
- if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
- perror("writeblock lseek");
- goto err;
- }
- if (write(block_fp, block, BLOCK_SIZE) < 0) {
- perror("writeblock write");
- goto err;
- }
- close(block_fp);
- return 0;
-
-err:
- close(block_fp);
- return -1;
-}
-
-/**
- * allocblock: write a new block to disk
- * @block: pointer to block
- *
- * @return: new id of block on disk
- */
-
-uint64_t allocblock(void *block) {
- uint64_t lb;
- off64_t pos;
- int block_fp;
-
- block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
- if (block_fp < 0) {
- perror("open");
- return 0;
- }
-
- pos = lseek64(block_fp, 0, SEEK_END);
- if (pos == (off64_t)-1) {
- perror("allocblock lseek");
- goto err;
- }
- if (pos % BLOCK_SIZE != 0) {
- fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
- goto err;
- }
- if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
- perror("allocblock write");
- goto err;
- }
- lb = pos / BLOCK_SIZE + 1;
-//printf("alloc(%Ld)\n", lb);
- close(block_fp);
- return lb;
-
-err:
- close(block_fp);
- return 0;
-
-}
-
-/**
- * allocblock_hint: write a new block to disk
- * @block: pointer to block
- * @hint: allocation hint
- *
- * @return: new id of block on disk
- */
-uint64_t allocblock_hint(void *block, uint64_t hint) {
- return allocblock(block);
-}
-
-#endif /* BLOCKSTORE_REMOTE */
-
-/*****************************************************************************
- * Memory management *
- *****************************************************************************/
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- * @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
- void *block = malloc(BLOCK_SIZE);
- if (block == NULL) {
- perror("newblock");
- return NULL;
- }
- memset(block, 0, BLOCK_SIZE);
- return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- * @id: block id (zero if this is only in-memory)
- * @block: block to be freed
- */
-void freeblock(void *block) {
- free(block);
-}
-
-static freeblock_t *new_freeblock(void)
-{
- freeblock_t *fb;
-
- fb = newblock();
-
- if (fb == NULL) return NULL;
-
- fb->magic = FREEBLOCK_MAGIC;
- fb->next = 0ULL;
- fb->count = 0ULL;
- memset(fb->list, 0, sizeof fb->list);
-
- return fb;
-}
-
-void releaseblock(uint64_t id)
-{
- blockstore_super_t *bs_super;
- freeblock_t *fl_current;
-
- /* get superblock */
- bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-
- /* get freeblock_current */
- if (bs_super->freelist_current == 0ULL)
- {
- fl_current = new_freeblock();
- bs_super->freelist_current = allocblock(fl_current);
- writeblock(BLOCKSTORE_SUPER, bs_super);
- } else {
- fl_current = readblock(bs_super->freelist_current);
- }
-
- /* if full, chain to superblock and allocate new current */
-
- if (fl_current->count == FREEBLOCK_SIZE) {
- fl_current->next = bs_super->freelist_full;
- writeblock(bs_super->freelist_current, fl_current);
- bs_super->freelist_full = bs_super->freelist_current;
- freeblock(fl_current);
- fl_current = new_freeblock();
- bs_super->freelist_current = allocblock(fl_current);
- writeblock(BLOCKSTORE_SUPER, bs_super);
- }
-
- /* append id to current */
- fl_current->list[fl_current->count++] = id;
- writeblock(bs_super->freelist_current, fl_current);
-
- freeblock(fl_current);
- freeblock(bs_super);
-
-
-}
-
-/* freelist debug functions: */
-void freelist_count(int print_each)
-{
- blockstore_super_t *bs_super;
- freeblock_t *fb;
- uint64_t total = 0, next;
-
- bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
-
- if (bs_super->freelist_current == 0ULL) {
- printf("freelist is empty!\n");
- return;
- }
-
- fb = readblock(bs_super->freelist_current);
- printf("%Ld entires on current.\n", fb->count);
- total += fb->count;
- if (print_each == 1)
- {
- int i;
- for (i=0; i< fb->count; i++)
- printf(" %Ld\n", fb->list[i]);
- }
-
- freeblock(fb);
-
- if (bs_super->freelist_full == 0ULL) {
- printf("freelist_full is empty!\n");
- return;
- }
-
- next = bs_super->freelist_full;
- for (;;) {
- fb = readblock(next);
- total += fb->count;
- if (print_each == 1)
- {
- int i;
- for (i=0; i< fb->count; i++)
- printf(" %Ld\n", fb->list[i]);
- }
- next = fb->next;
- freeblock(fb);
- if (next == 0ULL) break;
- }
- printf("Total of %Ld ids on freelist.\n", total);
-}
-
-/*****************************************************************************
- * Initialisation *
- *****************************************************************************/
-
-int __init_blockstore(void)
-{
- int i;
- blockstore_super_t *bs_super;
- uint64_t ret;
- int block_fp;
-
-#ifdef BLOCKSTORE_REMOTE
- struct hostent *addr;
-
- pthread_mutex_init(&ptmutex_queue, NULL);
- pthread_mutex_init(&ptmutex_luid, NULL);
- pthread_mutex_init(&ptmutex_recv, NULL);
- /*pthread_mutex_init(&ptmutex_notify, NULL);*/
- for (i = 0; i <= READ_POOL_SIZE; i++) {
- pool_thread[i].newdata = 0;
- pthread_mutex_init(&(pool_thread[i].ptmutex), NULL);
- pthread_cond_init(&(pool_thread[i].ptcv), NULL);
- }
-
- bsservers[0].hostname = "firebug.cl.cam.ac.uk";
- bsservers[1].hostname = "planb.cl.cam.ac.uk";
- bsservers[2].hostname = "simcity.cl.cam.ac.uk";
- bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/;
- bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/;
- bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/;
- bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/;
- bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/;
- bsservers[8].hostname = NULL;
- bsservers[9].hostname = NULL;
- bsservers[10].hostname = NULL;
- bsservers[11].hostname = NULL;
- bsservers[12].hostname = NULL;
- bsservers[13].hostname = NULL;
- bsservers[14].hostname = NULL;
- bsservers[15].hostname = NULL;
-
- for (i = 0; i < MAX_SERVERS; i++) {
- if (!bsservers[i].hostname)
- continue;
- addr = gethostbyname(bsservers[i].hostname);
- if (!addr) {
- perror("bad hostname");
- return -1;
- }
- bsservers[i].sin.sin_family = addr->h_addrtype;
- bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT);
- bsservers[i].sin.sin_addr.s_addr =
- ((struct in_addr *)(addr->h_addr))->s_addr;
- }
-
- /* Cluster map
- */
- bsclusters[0].servers[0] = 0;
- bsclusters[0].servers[1] = 1;
- bsclusters[0].servers[2] = 2;
- bsclusters[1].servers[0] = 1;
- bsclusters[1].servers[1] = 2;
- bsclusters[1].servers[2] = 3;
- bsclusters[2].servers[0] = 2;
- bsclusters[2].servers[1] = 3;
- bsclusters[2].servers[2] = 4;
- bsclusters[3].servers[0] = 3;
- bsclusters[3].servers[1] = 4;
- bsclusters[3].servers[2] = 5;
- bsclusters[4].servers[0] = 4;
- bsclusters[4].servers[1] = 5;
- bsclusters[4].servers[2] = 6;
- bsclusters[5].servers[0] = 5;
- bsclusters[5].servers[1] = 6;
- bsclusters[5].servers[2] = 7;
- bsclusters[6].servers[0] = 6;
- bsclusters[6].servers[1] = 7;
- bsclusters[6].servers[2] = 0;
- bsclusters[7].servers[0] = 7;
- bsclusters[7].servers[1] = 0;
- bsclusters[7].servers[2] = 1;
-
- /* Local socket set up
- */
- bssock = socket(AF_INET, SOCK_DGRAM, 0);
- if (bssock < 0) {
- perror("Bad socket");
- return -1;
- }
- memset(&sin_local, 0, sizeof(sin_local));
- sin_local.sin_family = AF_INET;
- sin_local.sin_port = htons(BLOCKSTORED_PORT);
- sin_local.sin_addr.s_addr = htonl(INADDR_ANY);
- if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) {
- perror("bind");
- close(bssock);
- return -1;
- }
-
- pthread_create(&pthread_recv, NULL, receive_loop, NULL);
- pthread_create(&pthread_recv, NULL, queue_runner, NULL);
-
-#else /* /BLOCKSTORE_REMOTE */
- block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
- if (block_fp < 0) {
- perror("open");
- return -1;
- exit(-1);
- }
-
- if (lseek(block_fp, 0, SEEK_END) == 0) {
- bs_super = newblock();
- bs_super->magic = BLOCKSTORE_MAGIC;
- bs_super->freelist_full = 0LL;
- bs_super->freelist_current = 0LL;
-
- ret = allocblock(bs_super);
-
- freeblock(bs_super);
- } else {
- bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
- if (bs_super->magic != BLOCKSTORE_MAGIC)
- {
- printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n");
- exit(-1);
- }
- freeblock(bs_super);
- }
-
- close(block_fp);
-
-#endif /* BLOCKSTORE_REMOTE */
- return 0;
-}
-
-void __exit_blockstore(void)
-{
- int i;
-#ifdef BLOCKSTORE_REMOTE
- pthread_mutex_destroy(&ptmutex_recv);
- pthread_mutex_destroy(&ptmutex_luid);
- pthread_mutex_destroy(&ptmutex_queue);
- /*pthread_mutex_destroy(&ptmutex_notify);
- pthread_cond_destroy(&ptcv_notify);*/
- for (i = 0; i <= READ_POOL_SIZE; i++) {
- pthread_mutex_destroy(&(pool_thread[i].ptmutex));
- pthread_cond_destroy(&(pool_thread[i].ptcv));
- }
-#endif
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/blockstore.h
--- a/tools/blktap/parallax/blockstore.h Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-/**************************************************************************
- *
- * blockstore.h
- *
- * Simple block store interface
- *
- */
-
-#ifndef __BLOCKSTORE_H__
-#define __BLOCKSTORE_H__
-
-#include <netinet/in.h>
-#include <xenctrl.h>
-
-#define BLOCK_SIZE 4096
-#define BLOCK_SHIFT 12
-#define BLOCK_MASK 0xfffffffffffff000LL
-
-/* XXX SMH: where is the below supposed to be defined???? */
-#ifndef SECTOR_SHIFT
-#define SECTOR_SHIFT 9
-#endif
-
-#define FREEBLOCK_SIZE (BLOCK_SIZE / sizeof(uint64_t)) - (3 *
sizeof(uint64_t))
-#define FREEBLOCK_MAGIC 0x0fee0fee0fee0feeULL
-
-typedef struct {
- uint64_t magic;
- uint64_t next;
- uint64_t count;
- uint64_t list[FREEBLOCK_SIZE];
-} freeblock_t;
-
-#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaaULL
-#define BLOCKSTORE_SUPER 1ULL
-
-typedef struct {
- uint64_t magic;
- uint64_t freelist_full;
- uint64_t freelist_current;
-} blockstore_super_t;
-
-extern void *newblock();
-extern void *readblock(uint64_t id);
-extern uint64_t allocblock(void *block);
-extern uint64_t allocblock_hint(void *block, uint64_t hint);
-extern int writeblock(uint64_t id, void *block);
-
-/* Add this blockid to a freelist, to be recycled by the allocator. */
-extern void releaseblock(uint64_t id);
-
-/* this is a memory free() operation for block-sized allocations */
-extern void freeblock(void *block);
-extern int __init_blockstore(void);
-
-/* debug for freelist. */
-void freelist_count(int print_each);
-#define ALLOCFAIL (((uint64_t)(-1)))
-
-/* Distribution
- */
-#define BLOCKSTORED_PORT 9346
-
-struct bshdr_t_struct {
- uint32_t operation;
- uint32_t flags;
- uint64_t id;
- uint64_t luid;
-} __attribute__ ((packed));
-typedef struct bshdr_t_struct bshdr_t;
-
-struct bsmsg_t_struct {
- bshdr_t hdr;
- unsigned char block[BLOCK_SIZE];
-} __attribute__ ((packed));
-
-typedef struct bsmsg_t_struct bsmsg_t;
-
-#define MSGBUFSIZE_OP sizeof(uint32_t)
-#define MSGBUFSIZE_FLAGS (sizeof(uint32_t) + sizeof(uint32_t))
-#define MSGBUFSIZE_ID (sizeof(uint32_t) + sizeof(uint32_t) +
sizeof(uint64_t) + sizeof(uint64_t))
-#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t)
-
-#define BSOP_READBLOCK 0x01
-#define BSOP_WRITEBLOCK 0x02
-#define BSOP_ALLOCBLOCK 0x03
-#define BSOP_FREEBLOCK 0x04
-
-#define BSOP_FLAG_ERROR 0x01
-
-#define BS_ALLOC_SKIP 10
-#define BS_ALLOC_HACK
-
-/* Remote hosts and cluster map - XXX need to generalise
- */
-
-/*
-
- Interim ID format is
-
- 63 60 59 40 39 20 19 0
- +----+--------------------+--------------------+--------------------+
- |map | replica 2 | replica 1 | replica 0 |
- +----+--------------------+--------------------+--------------------+
-
- The map is an index into a table detailing which machines form the
- cluster.
-
- */
-
-#define BSID_REPLICA0(_id) ((_id)&0xfffffULL)
-#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL)
-#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL)
-#define BSID_MAP(_id) (((_id)>>60)&0xfULL)
-
-#define BSID(_map, _rep0, _rep1, _rep2) ((((uint64_t)(_map))<<60) | \
- (((uint64_t)(_rep2))<<40) | \
- (((uint64_t)(_rep1))<<20) |
((uint64_t)(_rep0)))
-
-typedef struct bsserver_t_struct {
- char *hostname;
- struct sockaddr_in sin;
-} bsserver_t;
-
-#define MAX_SERVERS 16
-
-#define CLUSTER_MAX_REPLICAS 3
-typedef struct bscluster_t_struct {
- int servers[CLUSTER_MAX_REPLICAS];
-} bscluster_t;
-
-#define MAX_CLUSTERS 16
-
-#endif /* __BLOCKSTORE_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/blockstored.c
--- a/tools/blktap/parallax/blockstored.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,275 +0,0 @@
-/**************************************************************************
- *
- * blockstored.c
- *
- * Block store daemon.
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <errno.h>
-#include "blockstore.h"
-
-//#define BSDEBUG
-
-int readblock_into(uint64_t id, void *block);
-
-int open_socket(uint16_t port) {
-
- struct sockaddr_in sn;
- int sock;
-
- sock = socket(AF_INET, SOCK_DGRAM, 0);
- if (sock < 0) {
- perror("Bad socket");
- return -1;
- }
- memset(&sn, 0, sizeof(sn));
- sn.sin_family = AF_INET;
- sn.sin_port = htons(port);
- sn.sin_addr.s_addr = htonl(INADDR_ANY);
- if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
- perror("bind");
- close(sock);
- return -1;
- }
-
- return sock;
-}
-
-static int block_fp = -1;
-static int bssock = -1;
-
-int send_reply(struct sockaddr_in *peer, void *buffer, int len) {
-
- int rc;
-
-#ifdef BSDEBUG
- fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n",
- len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t
*)buffer)->hdr.id);
-#endif
- rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer,
sizeof(*peer));
- if (rc < 0) {
- perror("send_reply");
- return 1;
- }
-
-
- return 0;
-}
-
-static bsmsg_t msgbuf;
-
-void service_loop(void) {
-
- for (;;) {
- int rc, len;
- struct sockaddr_in from;
- size_t slen = sizeof(from);
- uint64_t bid;
-
- len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0,
- (struct sockaddr *)&from, &slen);
-
- if (len < 0) {
- perror("recvfrom");
- continue;
- }
-
- if (len < MSGBUFSIZE_OP) {
- fprintf(stderr, "Short packet.\n");
- continue;
- }
-
-#ifdef BSDEBUG
- fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n",
- len, msgbuf.hdr.operation, msgbuf.hdr.id);
-#endif
-
- switch (msgbuf.hdr.operation) {
- case BSOP_READBLOCK:
- if (len < MSGBUFSIZE_ID) {
- fprintf(stderr, "Short packet (readblock %u).\n", len);
- continue;
- }
- rc = readblock_into(msgbuf.hdr.id, msgbuf.block);
- if (rc < 0) {
- fprintf(stderr, "readblock error\n");
- msgbuf.hdr.flags = BSOP_FLAG_ERROR;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
- continue;
- }
- msgbuf.hdr.flags = 0;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK);
- break;
- case BSOP_WRITEBLOCK:
- if (len < MSGBUFSIZE_BLOCK) {
- fprintf(stderr, "Short packet (writeblock %u).\n", len);
- continue;
- }
- rc = writeblock(msgbuf.hdr.id, msgbuf.block);
- if (rc < 0) {
- fprintf(stderr, "writeblock error\n");
- msgbuf.hdr.flags = BSOP_FLAG_ERROR;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
- continue;
- }
- msgbuf.hdr.flags = 0;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
- break;
- case BSOP_ALLOCBLOCK:
- if (len < MSGBUFSIZE_BLOCK) {
- fprintf(stderr, "Short packet (allocblock %u).\n", len);
- continue;
- }
- bid = allocblock(msgbuf.block);
- if (bid == ALLOCFAIL) {
- fprintf(stderr, "allocblock error\n");
- msgbuf.hdr.flags = BSOP_FLAG_ERROR;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
- continue;
- }
- msgbuf.hdr.id = bid;
- msgbuf.hdr.flags = 0;
- send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
- break;
- }
-
- }
-}
-
-/**
- * readblock: read a block from disk
- * @id: block id to read
- * @block: pointer to buffer to receive block
- *
- * @return: 0 if OK, other on error
- */
-
-int readblock_into(uint64_t id, void *block) {
- if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
- printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
- perror("readblock lseek");
- return -1;
- }
- if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
- perror("readblock read");
- return -1;
- }
- return 0;
-}
-
-/**
- * writeblock: write an existing block to disk
- * @id: block id
- * @block: pointer to block
- *
- * @return: zero on success, -1 on failure
- */
-int writeblock(uint64_t id, void *block) {
- if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
- perror("writeblock lseek");
- return -1;
- }
- if (write(block_fp, block, BLOCK_SIZE) < 0) {
- perror("writeblock write");
- return -1;
- }
- return 0;
-}
-
-/**
- * allocblock: write a new block to disk
- * @block: pointer to block
- *
- * @return: new id of block on disk
- */
-static uint64_t lastblock = 0;
-
-uint64_t allocblock(void *block) {
- uint64_t lb;
- off64_t pos;
-
- retry:
- pos = lseek64(block_fp, 0, SEEK_END);
- if (pos == (off64_t)-1) {
- perror("allocblock lseek");
- return ALLOCFAIL;
- }
- if (pos % BLOCK_SIZE != 0) {
- fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
- return ALLOCFAIL;
- }
- if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
- perror("allocblock write");
- return ALLOCFAIL;
- }
- lb = pos / BLOCK_SIZE + 1;
-
-#ifdef BS_ALLOC_HACK
- if (lb < BS_ALLOC_SKIP)
- goto retry;
-#endif
-
- if (lb <= lastblock)
- printf("[*** %Ld alredy allocated! ***]\n", lb);
-
- lastblock = lb;
- return lb;
-}
-
-/**
- * newblock: get a new in-memory block set to zeros
- *
- * @return: pointer to new block, NULL on error
- */
-void *newblock(void) {
- void *block = malloc(BLOCK_SIZE);
- if (block == NULL) {
- perror("newblock");
- return NULL;
- }
- memset(block, 0, BLOCK_SIZE);
- return block;
-}
-
-
-/**
- * freeblock: unallocate an in-memory block
- * @id: block id (zero if this is only in-memory)
- * @block: block to be freed
- */
-void freeblock(void *block) {
- free(block);
-}
-
-
-int main(int argc, char **argv)
-{
- block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
-
- if (block_fp < 0) {
- perror("open");
- return -1;
- }
-
- bssock = open_socket(BLOCKSTORED_PORT);
- if (bssock < 0) {
- return -1;
- }
-
- service_loop();
-
- close(bssock);
-
- return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/bstest.c
--- a/tools/blktap/parallax/bstest.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-/**************************************************************************
- *
- * bstest.c
- *
- * Block store daemon test program.
- *
- * usage: bstest <host>|X {r|w|a} ID
- *
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <netinet/in.h>
-#include <netdb.h>
-#include <errno.h>
-#include "blockstore.h"
-
-int direct(char *host, uint32_t op, uint64_t id, int len) {
- struct sockaddr_in sn, peer;
- int sock;
- bsmsg_t msgbuf;
- int rc, slen;
- struct hostent *addr;
-
- addr = gethostbyname(host);
- if (!addr) {
- perror("bad hostname");
- exit(1);
- }
- peer.sin_family = addr->h_addrtype;
- peer.sin_port = htons(BLOCKSTORED_PORT);
- peer.sin_addr.s_addr = ((struct in_addr *)(addr->h_addr))->s_addr;
- fprintf(stderr, "Sending to: %u.%u.%u.%u\n",
- (unsigned int)(unsigned char)addr->h_addr[0],
- (unsigned int)(unsigned char)addr->h_addr[1],
- (unsigned int)(unsigned char)addr->h_addr[2],
- (unsigned int)(unsigned char)addr->h_addr[3]);
-
- sock = socket(AF_INET, SOCK_DGRAM, 0);
- if (sock < 0) {
- perror("Bad socket");
- exit(1);
- }
- memset(&sn, 0, sizeof(sn));
- sn.sin_family = AF_INET;
- sn.sin_port = htons(BLOCKSTORED_PORT);
- sn.sin_addr.s_addr = htonl(INADDR_ANY);
- if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
- perror("bind");
- close(sock);
- exit(1);
- }
-
- memset((void *)&msgbuf, 0, sizeof(msgbuf));
- msgbuf.operation = op;
- msgbuf.id = id;
-
- rc = sendto(sock, (void *)&msgbuf, len, 0,
- (struct sockaddr *)&peer, sizeof(peer));
- if (rc < 0) {
- perror("sendto");
- exit(1);
- }
-
- slen = sizeof(peer);
- len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0,
- (struct sockaddr *)&peer, &slen);
- if (len < 0) {
- perror("recvfrom");
- exit(1);
- }
-
- printf("Reply %u bytes:\n", len);
- if (len >= MSGBUFSIZE_OP)
- printf(" operation: %u\n", msgbuf.operation);
- if (len >= MSGBUFSIZE_FLAGS)
- printf(" flags: 0x%x\n", msgbuf.flags);
- if (len >= MSGBUFSIZE_ID)
- printf(" id: %llu\n", msgbuf.id);
- if (len >= (MSGBUFSIZE_ID + 4))
- printf(" data: %02x %02x %02x %02x...\n",
- (unsigned int)msgbuf.block[0],
- (unsigned int)msgbuf.block[1],
- (unsigned int)msgbuf.block[2],
- (unsigned int)msgbuf.block[3]);
-
- if (sock > 0)
- close(sock);
-
- return 0;
-}
-
-int main (int argc, char **argv) {
-
- uint32_t op = 0;
- uint64_t id = 0;
- int len = 0, rc;
- void *block;
-
- if (argc < 3) {
- fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n");
- return 1;
- }
-
- switch (argv[2][0]) {
- case 'r':
- case 'R':
- op = BSOP_READBLOCK;
- len = MSGBUFSIZE_ID;
- break;
- case 'w':
- case 'W':
- op = BSOP_WRITEBLOCK;
- len = MSGBUFSIZE_BLOCK;
- break;
- case 'a':
- case 'A':
- op = BSOP_ALLOCBLOCK;
- len = MSGBUFSIZE_BLOCK;
- break;
- default:
- fprintf(stderr, "Unknown action '%s'.\n", argv[2]);
- return 1;
- }
-
- if (argc >= 4)
- id = atoll(argv[3]);
-
- if (strcmp(argv[1], "X") == 0) {
- rc = __init_blockstore();
- if (rc < 0) {
- fprintf(stderr, "blockstore init failed.\n");
- return 1;
- }
- switch(op) {
- case BSOP_READBLOCK:
- block = readblock(id);
- if (block) {
- printf("data: %02x %02x %02x %02x...\n",
- (unsigned int)((unsigned char*)block)[0],
- (unsigned int)((unsigned char*)block)[1],
- (unsigned int)((unsigned char*)block)[2],
- (unsigned int)((unsigned char*)block)[3]);
- }
- break;
- case BSOP_WRITEBLOCK:
- block = malloc(BLOCK_SIZE);
- if (!block) {
- perror("bstest malloc");
- return 1;
- }
- memset(block, 0, BLOCK_SIZE);
- rc = writeblock(id, block);
- if (rc != 0) {
- printf("error\n");
- }
- else {
- printf("OK\n");
- }
- break;
- case BSOP_ALLOCBLOCK:
- block = malloc(BLOCK_SIZE);
- if (!block) {
- perror("bstest malloc");
- return 1;
- }
- memset(block, 0, BLOCK_SIZE);
- id = allocblock_hint(block, id);
- if (id == 0) {
- printf("error\n");
- }
- else {
- printf("ID: %llu\n", id);
- }
- break;
- }
- }
- else {
- direct(argv[1], op, id, len);
- }
-
-
- return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/parallax.c
--- a/tools/blktap/parallax/parallax.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,608 +0,0 @@
-/**************************************************************************
- *
- * parallax.c
- *
- * The Parallax Storage Server
- *
- */
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#include "blktaplib.h"
-#include "blockstore.h"
-#include "vdi.h"
-#include "block-async.h"
-#include "requests-async.h"
-
-#define PARALLAX_DEV 61440
-#define SECTS_PER_NODE 8
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* ------[ session records ]----------------------------------------------- */
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-#define VDI_HASHSZ 16
-#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
-
-typedef struct blkif {
- domid_t domid;
- unsigned int handle;
- enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
- vdi_t *vdi_hash[VDI_HASHSZ];
- struct blkif *hash_next;
-} blkif_t;
-
-static blkif_t *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
- if ( handle != 0 )
- printf("blktap/parallax don't currently support non-0 dev handles!\n");
-
- blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif != NULL) &&
- ((blkif->domid != domid) || (blkif->handle != handle)) )
- blkif = blkif->hash_next;
- return blkif;
-}
-
-vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
-{
- vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
-
- while ((vdi != NULL) && (vdi->vdevice != device))
- vdi = vdi->next;
-
- return vdi;
-}
-
-/* ------[ control message handling ]-------------------------------------- */
-
-void blkif_create(blkif_be_create_t *create)
-{
- domid_t domid = create->domid;
- unsigned int handle = create->blkif_handle;
- blkif_t **pblkif, *blkif;
-
- DPRINTF("parallax (blkif_create): create is %p\n", create);
-
- if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
- {
- DPRINTF("Could not create blkif: out of memory\n");
- create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
-
- memset(blkif, 0, sizeof(*blkif));
- blkif->domid = domid;
- blkif->handle = handle;
- blkif->status = DISCONNECTED;
-
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( *pblkif != NULL )
- {
- if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
- {
- DPRINTF("Could not create blkif: already exists (%d,%d)\n",
- domid, handle);
- create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
- free(blkif);
- return;
- }
- pblkif = &(*pblkif)->hash_next;
- }
-
- blkif->hash_next = *pblkif;
- *pblkif = blkif;
-
- DPRINTF("Successfully created blkif\n");
- create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_destroy(blkif_be_destroy_t *destroy)
-{
- domid_t domid = destroy->domid;
- unsigned int handle = destroy->blkif_handle;
- blkif_t **pblkif, *blkif;
-
- DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy);
-
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif = *pblkif) != NULL )
- {
- if ( (blkif->domid == domid) && (blkif->handle == handle) )
- {
- if ( blkif->status != DISCONNECTED )
- goto still_connected;
- goto destroy;
- }
- pblkif = &blkif->hash_next;
- }
-
- destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
-
- still_connected:
- destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
- return;
-
- destroy:
- *pblkif = blkif->hash_next;
- free(blkif);
- destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_create(blkif_be_vbd_create_t *create)
-{
- blkif_t *blkif;
- vdi_t *vdi, **vdip;
- blkif_vdev_t vdevice = create->vdevice;
-
- DPRINTF("parallax (vbd_create): create=%p\n", create);
-
- blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
- if ( blkif == NULL )
- {
- DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n",
- create->domid, create->blkif_handle);
- create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- /* VDI identifier is in grow->extent.sector_start */
- DPRINTF("vbd_create: create->dev_handle (id) is %lx\n",
- (unsigned long)create->dev_handle);
-
- vdi = vdi_get(create->dev_handle);
- if (vdi == NULL)
- {
- printf("parallax (vbd_create): VDI %lx not found.\n",
- (unsigned long)create->dev_handle);
- create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
- return;
- }
-
- vdi->next = NULL;
- vdi->vdevice = vdevice;
- vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
- while (*vdip != NULL)
- vdip = &(*vdip)->next;
- *vdip = vdi;
-
- DPRINTF("blkif_create succeeded\n");
- create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
-{
- blkif_t *blkif;
- vdi_t *vdi, **vdip;
- blkif_vdev_t vdevice = destroy->vdevice;
-
- blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
- if ( blkif == NULL )
- {
- DPRINTF("vbd_destroy attempted for non-existent blkif (%u,%u)\n",
- destroy->domid, destroy->blkif_handle);
- destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
- while ((*vdip != NULL) && ((*vdip)->vdevice != vdevice))
- vdip = &(*vdip)->next;
-
- if (*vdip != NULL)
- {
- vdi = *vdip;
- *vdip = vdi->next;
- vdi_put(vdi);
- }
-
-}
-
-int parallax_control(control_msg_t *msg)
-{
- domid_t domid;
- int ret;
-
- DPRINTF("parallax_control: msg is %p\n", msg);
-
- if (msg->type != CMSG_BLKIF_BE)
- {
- printf("Unexpected control message (%d)\n", msg->type);
- return 0;
- }
-
- switch(msg->subtype)
- {
- case CMSG_BLKIF_BE_CREATE:
- if ( msg->length != sizeof(blkif_be_create_t) )
- goto parse_error;
- blkif_create((blkif_be_create_t *)msg->msg);
- break;
-
- case CMSG_BLKIF_BE_DESTROY:
- if ( msg->length != sizeof(blkif_be_destroy_t) )
- goto parse_error;
- blkif_destroy((blkif_be_destroy_t *)msg->msg);
- break;
-
- case CMSG_BLKIF_BE_VBD_CREATE:
- if ( msg->length != sizeof(blkif_be_vbd_create_t) )
- goto parse_error;
- vbd_create((blkif_be_vbd_create_t *)msg->msg);
- break;
-
- case CMSG_BLKIF_BE_VBD_DESTROY:
- if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
- goto parse_error;
- vbd_destroy((blkif_be_vbd_destroy_t *)msg->msg);
- break;
-
- case CMSG_BLKIF_BE_CONNECT:
- case CMSG_BLKIF_BE_DISCONNECT:
- /* we don't manage the device channel, the tap does. */
- break;
-
- default:
- goto parse_error;
- }
- return 0;
-parse_error:
- printf("Bad control message!\n");
- return 0;
-
-}
-
-int parallax_probe(blkif_request_t *req, blkif_t *blkif)
-{
- blkif_response_t *rsp;
- vdisk_t *img_info;
- vdi_t *vdi;
- int i, nr_vdis = 0;
-
- DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif);
-
- /* We expect one buffer only. */
- if ( req->nr_segments != 1 )
- goto err;
-
- /* Make sure the buffer is page-sized. */
- if ( (req->seg[0].first_sect != 0) || (req->seg[0].last_sect != 7) )
- goto err;
-
- /* fill the list of devices */
- for (i=0; i<VDI_HASHSZ; i++) {
- vdi = blkif->vdi_hash[i];
- while (vdi) {
- img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
- img_info[nr_vdis].device = vdi->vdevice;
- img_info[nr_vdis].info = 0;
- /* The -1 here accounts for the LSB in the radix tree */
- img_info[nr_vdis].capacity =
- ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE);
- nr_vdis++;
- vdi = vdi->next;
- }
- }
-
-
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_PROBE;
- rsp->status = nr_vdis; /* number of disks */
-
- DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis);
- return BLKTAP_RESPOND;
-err:
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_PROBE;
- rsp->status = BLKIF_RSP_ERROR;
-
- DPRINTF("parallax_probe: send error response\n");
- return BLKTAP_RESPOND;
-}
-
-typedef struct {
- blkif_request_t *req;
- int count;
- int error;
- pthread_mutex_t mutex;
-} pending_t;
-
-#define MAX_REQUESTS 64
-pending_t pending_list[MAX_REQUESTS];
-
-struct cb_param {
- pending_t *pent;
- int segment;
- uint64_t sector;
- uint64_t vblock; /* for debug printing -- can be removed. */
-};
-
-static void read_cb(struct io_ret r, void *in_param)
-{
- struct cb_param *param = (struct cb_param *)in_param;
- pending_t *p = param->pent;
- int segment = param->segment;
- blkif_request_t *req = p->req;
- unsigned long size, offset, start;
- char *dpage, *spage;
-
- spage = IO_BLOCK(r);
- if (spage == NULL) { p->error++; goto finish; }
- dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment);
-
- /* Calculate read size and offset within the read block. */
-
- offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE;
- size = (req->seg[segment].last_sect - req->seg[segment].first_sect + 1) <<
- SECTOR_SHIFT;
- start = req->seg[segment].first_sect << SECTOR_SHIFT;
-
- DPRINTF("ParallaxRead: sect: %lld (%ld,%ld), "
- "vblock %llx, "
- "size %lx\n",
- param->sector,
- p->req->seg[segment].first_sect,
- p->req->seg[segment].last_sect,
- param->vblock, size);
-
- memcpy(dpage + start, spage + offset, size);
- freeblock(spage);
-
- /* Done the read. Now update the pending record. */
- finish:
- pthread_mutex_lock(&p->mutex);
- p->count--;
-
- if (p->count == 0) {
- blkif_response_t *rsp;
-
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_READ;
- if (p->error == 0) {
- rsp->status = BLKIF_RSP_OKAY;
- } else {
- rsp->status = BLKIF_RSP_ERROR;
- }
- blktap_inject_response(rsp);
- }
-
- pthread_mutex_unlock(&p->mutex);
-
- free(param); /* TODO: replace with cached alloc/dealloc */
-}
-
-int parallax_read(blkif_request_t *req, blkif_t *blkif)
-{
- blkif_response_t *rsp;
- uint64_t vblock, gblock;
- vdi_t *vdi;
- uint64_t sector;
- int i;
- char *dpage, *spage;
- pending_t *pent;
-
- vdi = blkif_get_vdi(blkif, req->device);
-
- if ( vdi == NULL )
- goto err;
-
- pent = &pending_list[ID_TO_IDX(req->id)];
- pent->count = req->nr_segments;
- pent->req = req;
- pthread_mutex_init(&pent->mutex, NULL);
-
- for (i = 0; i < req->nr_segments; i++) {
- pthread_t tid;
- int ret;
- struct cb_param *p;
-
- /* Round the requested segment to a block address. */
- sector = req->sector_number + (8*i);
- vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
-
- /* TODO: Replace this call to malloc with a cached allocation */
- p = (struct cb_param *)malloc(sizeof(struct cb_param));
- p->pent = pent;
- p->sector = sector;
- p->segment = i;
- p->vblock = vblock; /* dbg */
-
- /* Get that block from the store. */
- vdi_read(vdi, vblock, read_cb, (void *)p);
- }
-
- return BLKTAP_STOLEN;
-
-err:
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_READ;
- rsp->status = BLKIF_RSP_ERROR;
-
- return BLKTAP_RESPOND;
-}
-
-static void write_cb(struct io_ret r, void *in_param)
-{
- struct cb_param *param = (struct cb_param *)in_param;
- pending_t *p = param->pent;
- blkif_request_t *req = p->req;
-
- /* catch errors from the block code. */
- if (IO_INT(r) < 0) p->error++;
-
- pthread_mutex_lock(&p->mutex);
- p->count--;
-
- if (p->count == 0) {
- blkif_response_t *rsp;
-
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_WRITE;
- if (p->error == 0) {
- rsp->status = BLKIF_RSP_OKAY;
- } else {
- rsp->status = BLKIF_RSP_ERROR;
- }
- blktap_inject_response(rsp);
- }
-
- pthread_mutex_unlock(&p->mutex);
-
- free(param); /* TODO: replace with cached alloc/dealloc */
-}
-
-int parallax_write(blkif_request_t *req, blkif_t *blkif)
-{
- blkif_response_t *rsp;
- uint64_t sector;
- int i, writable = 0;
- uint64_t vblock, gblock;
- char *spage;
- unsigned long size, offset, start;
- vdi_t *vdi;
- pending_t *pent;
-
- vdi = blkif_get_vdi(blkif, req->device);
-
- if ( vdi == NULL )
- goto err;
-
- pent = &pending_list[ID_TO_IDX(req->id)];
- pent->count = req->nr_segments;
- pent->req = req;
- pthread_mutex_init(&pent->mutex, NULL);
-
- for (i = 0; i < req->nr_segments; i++) {
- struct cb_param *p;
-
- spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
-
- /* Round the requested segment to a block address. */
-
- sector = req->sector_number + (8*i);
- vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
-
- /* Calculate read size and offset within the read block. */
-
- offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
- size = (req->seg[i].last_sect - req->seg[i].first_sect + 1) <<
- SECTOR_SHIFT;
- start = req->seg[i].first_sect << SECTOR_SHIFT;
-
- DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld), "
- "vblock %llx, gblock %llx, "
- "size %lx\n",
- sector,
- req->seg[i].first_sect, req->seg[i].last_sect,
- vblock, gblock, size);
-
- /* XXX: For now we just freak out if they try to write a */
- /* non block-sized, block-aligned page. */
-
- if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) {
- printf("]\n] STRANGE WRITE!\n]\n");
- goto err;
- }
-
- /* TODO: Replace this call to malloc with a cached allocation */
- p = (struct cb_param *)malloc(sizeof(struct cb_param));
- p->pent = pent;
- p->sector = sector;
- p->segment = i;
- p->vblock = vblock; /* dbg */
-
- /* Issue the write to the store. */
- vdi_write(vdi, vblock, spage, write_cb, (void *)p);
- }
-
- return BLKTAP_STOLEN;
-
-err:
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_WRITE;
- rsp->status = BLKIF_RSP_ERROR;
-
- return BLKTAP_RESPOND;
-}
-
-int parallax_request(blkif_request_t *req)
-{
- blkif_response_t *rsp;
- domid_t dom = ID_TO_DOM(req->id);
- blkif_t *blkif = blkif_find_by_handle(dom, 0);
-
- if (blkif == NULL)
- goto err;
-
- if ( req->operation == BLKIF_OP_PROBE ) {
-
- return parallax_probe(req, blkif);
-
- } else if ( req->operation == BLKIF_OP_READ ) {
-
- return parallax_read(req, blkif);
-
- } else if ( req->operation == BLKIF_OP_WRITE ) {
-
- return parallax_write(req, blkif);
-
- } else {
- printf("Unknown request message type!\n");
- /* Unknown operation */
- goto err;
- }
-
-err:
- rsp = (blkif_response_t *)req;
- rsp->operation = req->operation;
- rsp->id = req->id;
- rsp->status = BLKIF_RSP_ERROR;
- return BLKTAP_RESPOND;
-}
-
-void __init_parallax(void)
-{
- memset(blkif_hash, 0, sizeof(blkif_hash));
-}
-
-
-
-int main(int argc, char *argv[])
-{
- DPRINTF("parallax: starting.\n");
- __init_blockstore();
- DPRINTF("parallax: initialized blockstore...\n");
- init_block_async();
- DPRINTF("parallax: initialized async blocks...\n");
- __init_vdi();
- DPRINTF("parallax: initialized vdi registry etc...\n");
- __init_parallax();
- DPRINTF("parallax: initialized local stuff..\n");
-
- blktap_register_ctrl_hook("parallax_control", parallax_control);
- blktap_register_request_hook("parallax_request", parallax_request);
- DPRINTF("parallax: added ctrl + request hooks, starting listen...\n");
- blktap_listen();
-
- return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/radix.c
--- a/tools/blktap/parallax/radix.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,631 +0,0 @@
-/*
- * Radix tree for mapping (up to) 63-bit virtual block IDs to
- * 63-bit global block IDs
- *
- * Pointers within the tree set aside the least significant bit to indicate
- * whther or not the target block is writable from this node.
- *
- * The block with ID 0 is assumed to be an empty block of all zeros
- */
-
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-#include <pthread.h>
-#include "blockstore.h"
-#include "radix.h"
-
-#define RADIX_TREE_MAP_SHIFT 9
-#define RADIX_TREE_MAP_MASK 0x1ff
-#define RADIX_TREE_MAP_ENTRIES 512
-
-/*
-#define DEBUG
-*/
-
-/* Experimental radix cache. */
-
-static pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER;
-static int rcache_count = 0;
-#define RCACHE_MAX 1024
-
-typedef struct rcache_st {
- radix_tree_node *node;
- uint64_t id;
- struct rcache_st *hash_next;
- struct rcache_st *cache_next;
- struct rcache_st *cache_prev;
-} rcache_t;
-
-static rcache_t *rcache_head = NULL;
-static rcache_t *rcache_tail = NULL;
-
-#define RCHASH_SIZE 512ULL
-rcache_t *rcache[RCHASH_SIZE];
-#define RCACHE_HASH(_id) ((_id) & (RCHASH_SIZE - 1))
-
-void __rcache_init(void)
-{
- int i;
-
- for (i=0; i<RCHASH_SIZE; i++)
- rcache[i] = NULL;
-}
-
-
-void rcache_write(uint64_t id, radix_tree_node *node)
-{
- rcache_t *r, *tmp, **curs;
-
- pthread_mutex_lock(&rcache_mutex);
-
- /* Is it already in the cache? */
- r = rcache[RCACHE_HASH(id)];
-
- for (;;) {
- if (r == NULL)
- break;
- if (r->id == id)
- {
- memcpy(r->node, node, BLOCK_SIZE);
-
- /* bring to front. */
- if (r != rcache_head) {
-
- if (r == rcache_tail) {
- if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
- rcache_tail->cache_next = NULL;
- }
-
- tmp = r->cache_next;
- if (r->cache_next != NULL) r->cache_next->cache_prev
- = r->cache_prev;
- if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
-
- r->cache_prev = NULL;
- r->cache_next = rcache_head;
- if (rcache_head != NULL) rcache_head->cache_prev = r;
- rcache_head = r;
- }
-
-//printf("Update (%Ld)\n", r->id);
- goto done;
- }
- r = r->hash_next;
- }
-
- if ( rcache_count == RCACHE_MAX )
- {
- /* Remove an entry */
-
- r = rcache_tail;
- if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
- rcache_tail->cache_next = NULL;
- freeblock(r->node);
-
- curs = &rcache[RCACHE_HASH(r->id)];
- while ((*curs) != r)
- curs = &(*curs)->hash_next;
- *curs = r->hash_next;
-//printf("Evict (%Ld)\n", r->id);
-
- } else {
-
- r = (rcache_t *)malloc(sizeof(rcache_t));
- rcache_count++;
- }
-
- r->node = newblock();
- memcpy(r->node, node, BLOCK_SIZE);
- r->id = id;
-
- r->hash_next = rcache[RCACHE_HASH(id)];
- rcache[RCACHE_HASH(id)] = r;
-
- r->cache_prev = NULL;
- r->cache_next = rcache_head;
- if (rcache_head != NULL) rcache_head->cache_prev = r;
- rcache_head = r;
- if (rcache_tail == NULL) rcache_tail = r;
-
-//printf("Added (%Ld, %p)\n", id, r->node);
-done:
- pthread_mutex_unlock(&rcache_mutex);
-}
-
-radix_tree_node *rcache_read(uint64_t id)
-{
- rcache_t *r, *tmp;
- radix_tree_node *node = NULL;
-
- pthread_mutex_lock(&rcache_mutex);
-
- r = rcache[RCACHE_HASH(id)];
-
- for (;;) {
- if (r == NULL) {
-//printf("Miss (%Ld)\n", id);
- goto done;
- }
- if (r->id == id) break;
- r = r->hash_next;
- }
-
- /* bring to front. */
- if (r != rcache_head)
- {
- if (r == rcache_tail) {
- if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
- rcache_tail->cache_next = NULL;
- }
- tmp = r->cache_next;
- if (r->cache_next != NULL) r->cache_next->cache_prev = r->cache_prev;
- if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
-
- r->cache_prev = NULL;
- r->cache_next = rcache_head;
- if (rcache_head != NULL) rcache_head->cache_prev = r;
- rcache_head = r;
- }
-
- node = newblock();
- memcpy(node, r->node, BLOCK_SIZE);
-
-//printf("Hit (%Ld, %p)\n", id, r->node);
-done:
- pthread_mutex_unlock(&rcache_mutex);
-
- return(node);
-}
-
-
-void *rc_readblock(uint64_t id)
-{
- void *ret;
-
- ret = (void *)rcache_read(id);
-
- if (ret != NULL) return ret;
-
- ret = readblock(id);
-
- if (ret != NULL)
- rcache_write(id, ret);
-
- return(ret);
-}
-
-uint64_t rc_allocblock(void *block)
-{
- uint64_t ret;
-
- ret = allocblock(block);
-
- if (ret != ZERO)
- rcache_write(ret, block);
-
- return(ret);
-}
-
-int rc_writeblock(uint64_t id, void *block)
-{
- int ret;
-
- ret = writeblock(id, block);
- rcache_write(id, block);
-
- return(ret);
-}
-
-
-/*
- * block device interface and other helper functions
- * with these functions, block id is just a 63-bit number, with
- * no special consideration for the LSB
- */
-radix_tree_node cloneblock(radix_tree_node block);
-
-/*
- * main api
- * with these functions, the LSB of root always indicates
- * whether or not the block is writable, including the return
- * values of update and snapshot
- */
-uint64_t lookup(int height, uint64_t root, uint64_t key);
-uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val);
-uint64_t snapshot(uint64_t root);
-
-/**
- * cloneblock: clone an existing block in memory
- * @block: the old block
- *
- * @return: new block, with LSB cleared for every entry
- */
-radix_tree_node cloneblock(radix_tree_node block) {
- radix_tree_node node = (radix_tree_node) malloc(BLOCK_SIZE);
- int i;
- if (node == NULL) {
- perror("cloneblock malloc");
- return NULL;
- }
- for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
- node[i] = block[i] & ONEMASK;
- return node;
-}
-
-/**
- * lookup: find a value given a key
- * @height: height in bits of the radix tree
- * @root: root node id, with set LSB indicating writable node
- * @key: key to lookup
- *
- * @return: value on success, zero on error
- */
-
-uint64_t lookup(int height, uint64_t root, uint64_t key) {
- radix_tree_node node;
- uint64_t mask = ONE;
-
- assert(key >> height == 0);
-
- /* the root block may be smaller to ensure all leaves are full */
- height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
-
- /* now carve off equal sized chunks at each step */
- for (;;) {
- uint64_t oldroot;
-
-#ifdef DEBUG
- printf("lookup: height=%3d root=%3Ld offset=%3d%s\n", height, root,
- (int) ((key >> height) & RADIX_TREE_MAP_MASK),
- (iswritable(root) ? "" : " (readonly)"));
-#endif
-
- if (getid(root) == ZERO)
- return ZERO;
-
- oldroot = root;
- node = (radix_tree_node) rc_readblock(getid(root));
- if (node == NULL)
- return ZERO;
-
- root = node[(key >> height) & RADIX_TREE_MAP_MASK];
- mask &= root;
- freeblock(node);
-
- if (height == 0)
- return ( root & ONEMASK ) | mask;
-
- height -= RADIX_TREE_MAP_SHIFT;
- }
-
- return ZERO;
-}
-
-/*
- * update: set a radix tree entry, doing copy-on-write as necessary
- * @height: height in bits of the radix tree
- * @root: root node id, with set LSB indicating writable node
- * @key: key to set
- * @val: value to set, s.t. radix(key)=val
- *
- * @returns: (possibly new) root id on success (with LSB=1), 0 on failure
- */
-
-uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val) {
- int offset;
- uint64_t child;
- radix_tree_node node;
-
- /* base case--return val */
- if (height == 0)
- return val;
-
- /* the root block may be smaller to ensure all leaves are full */
- height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
- offset = (key >> height) & RADIX_TREE_MAP_MASK;
-
-#ifdef DEBUG
- printf("update: height=%3d root=%3Ld offset=%3d%s\n", height, root,
- offset, (iswritable(root)?"":" (clone)"));
-#endif
-
- /* load a block, or create a new one */
- if (root == ZERO) {
- node = (radix_tree_node) newblock();
- } else {
- node = (radix_tree_node) rc_readblock(getid(root));
-
- if (!iswritable(root)) {
- /* need to clone this node */
- radix_tree_node oldnode = node;
- node = cloneblock(node);
- freeblock(oldnode);
- root = ZERO;
- }
- }
-
- if (node == NULL) {
-#ifdef DEBUG
- printf("update: node is null!\n");
-#endif
- return ZERO;
- }
-
- child = update(height, node[offset], key, val);
-
- if (child == ZERO) {
- freeblock(node);
- return ZERO;
- } else if (child == node[offset]) {
- /* no change, so we already owned the child */
- assert(iswritable(root));
-
- freeblock(node);
- return root;
- }
-
- node[offset] = child;
-
- /* new/cloned blocks need to be saved */
- if (root == ZERO) {
- /* mark this as an owned block */
- root = rc_allocblock(node);
- if (root)
- root = writable(root);
- } else if (rc_writeblock(getid(root), node) < 0) {
- freeblock(node);
- return ZERO;
- }
-
- freeblock(node);
- return root;
-}
-
-/**
- * snapshot: create a snapshot
- * @root: old root node
- *
- * @return: new root node, 0 on error
- */
-uint64_t snapshot(uint64_t root) {
- radix_tree_node node, newnode;
-
- if ((node = rc_readblock(getid(root))) == NULL)
- return ZERO;
-
- newnode = cloneblock(node);
- freeblock(node);
- if (newnode == NULL)
- return ZERO;
-
- root = rc_allocblock(newnode);
- freeblock(newnode);
-
- if (root == ZERO)
- return ZERO;
- else
- return writable(root);
-}
-
-/**
- * collapse: collapse a parent onto a child.
- *
- * NOTE: This assumes that parent and child really are, and further that
- * there are no other children forked from this parent. (children of the
- * child are okay...)
- */
-
-int collapse(int height, uint64_t proot, uint64_t croot)
-{
- int i, numlinks, ret, total = 0;
- radix_tree_node pnode, cnode;
-
- if (height == 0) {
- height = -1; /* terminate recursion */
- } else {
- height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
- }
- numlinks = (1UL << RADIX_TREE_MAP_SHIFT);
-
- /* Terminal cases: */
-
- if ( (getid(proot) == ZERO) || (getid(croot) == ZERO) )
- return -1;
-
- /* get roots */
- if ((pnode = readblock(getid(proot))) == NULL)
- return -1;
-
- if ((cnode = readblock(getid(croot))) == NULL)
- {
- freeblock(pnode);
- return -1;
- }
-
- /* For each writable link in proot */
- for (i=0; i<numlinks; i++)
- {
- if ( pnode[i] == cnode[i] ) continue;
-
- /* collapse (next level) */
- /* if height != 0 and writable... */
- if (( height >= 0 ) && ( iswritable(pnode[i]) ) )
- {
- //printf(" %Ld is writable (i=%d).\n", getid(pnode[i]), i);
- ret = collapse(height, pnode[i], cnode[i]);
- if (ret == -1)
- {
- total = -1;
- } else {
- total += ret;
- }
- }
-
-
- }
-
- /* if plink is writable, AND clink is writable -> free plink block */
- if ( ( iswritable(proot) ) && ( iswritable(croot) ) )
- {
- releaseblock(getid(proot));
- if (ret >=0) total++;
- //printf(" Delete %Ld\n", getid(proot));
- }
-//printf("done : %Ld\n", getid(proot));
- return total;
-
-}
-
-
-void print_root(uint64_t root, int height, FILE *dot_f)
-{
- FILE *f;
- int i;
- radix_tree_node node;
- char *style[2] = { "", "style=bold,color=blue," };
-
- if (dot_f == NULL) {
- f = fopen("radix.dot", "w");
- if (f == NULL) {
- perror("print_root: open");
- return;
- }
-
- /* write graph preamble */
- fprintf(f, "digraph G {\n");
-
- /* add a node for this root. */
- fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n",
- getid(root), style[iswritable(root)], getid(root));
- }
-
- printf("print_root(%Ld)\n", getid(root));
-
- /* base case */
- if (height == 0) {
- /* add a node and edge for each child root */
- node = (radix_tree_node) readblock(getid(root));
- if (node == NULL)
- return;
-
- for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) {
- if (node[i] != ZERO) {
- fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n",
- getid(node[i]), style[iswritable(node[i])],
- getid(node[i]));
- fprintf(f, " n%Ld -> n%Ld [label=\"%d\"]\n", getid(root),
- getid(node[i]), i);
- }
- }
- freeblock(node);
- return;
- }
-
- /* the root block may be smaller to ensure all leaves are full */
- height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
-
- if (getid(root) == ZERO)
- return;
-
- node = (radix_tree_node) readblock(getid(root));
- if (node == NULL)
- return;
-
- /* add a node and edge for each child root */
- for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
- if (node[i] != ZERO) {
- fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n",
- getid(node[i]), style[iswritable(node[i])],
- getid(node[i]));
-
- print_root(node[i], height-RADIX_TREE_MAP_SHIFT, f);
- fprintf(f, " n%Ld -> n%Ld [label=\"%d\"]\n", getid(root),
- getid(node[i]), i);
- }
-
- freeblock(node);
-
- /* write graph postamble */
- if (dot_f == NULL) {
- fprintf(f, "}\n");
- fclose(f);
- }
-}
-
-#ifdef RADIX_STANDALONE
-
-int main(int argc, char **argv) {
- uint64_t key = ZERO, val = ZERO;
- uint64_t root = writable(2ULL);
- uint64_t p = ZERO, c = ZERO;
- int v;
- char buff[4096];
-
- __init_blockstore();
-
- memset(buff, 0, 4096);
- /*fp = open("radix.dat", O_RDWR | O_CREAT, 0644);
-
- if (fp < 3) {
- perror("open");
- return -1;
- }
- if (lseek(fp, 0, SEEK_END) == 0) {
- write(fp, buff, 4096);
- }*/
-
- allocblock(buff);
-
- printf("Recognized commands:\n"
- "Note: the LSB of a node number indicates if it is writable\n"
- " root <node> set root to <node>\n"
- " snapshot take a snapshot of the root\n"
- " set <key> <val> set key=val\n"
- " get <key> query key\n"
- " c <proot> <croot> collapse\n"
- " pr print tree to dot\n"
- " pf <1=verbose> print freelist\n"
- " quit\n"
- "\nroot = %Ld\n", root);
- for (;;) {
- //print_root(root, 34, NULL);
- //system("dot radix.dot -Tps -o radix.ps");
-
- printf("> ");
- fflush(stdout);
- fgets(buff, 1024, stdin);
- if (feof(stdin))
- break;
- if (sscanf(buff, " root %Ld", &root) == 1) {
- printf("root set to %Ld\n", root);
- } else if (sscanf(buff, " set %Ld %Ld", &key, &val) == 2) {
- root = update(34, root, key, val);
- printf("root = %Ld\n", root);
- } else if (sscanf(buff, " c %Ld %Ld", &p, &c) == 2) {
- v = collapse(34, p, c);
- printf("reclaimed %d blocks.\n", v);
- } else if (sscanf(buff, " get %Ld", &key) == 1) {
- val = lookup(34, root, key);
- printf("value = %Ld\n", val);
- } else if (!strcmp(buff, "quit\n")) {
- break;
- } else if (!strcmp(buff, "snapshot\n")) {
- root = snapshot(root);
- printf("new root = %Ld\n", root);
- } else if (sscanf(buff, " pr %Ld", &root) == 1) {
- print_root(root, 34, NULL);
- } else if (sscanf(buff, " pf %d", &v) == 1) {
- freelist_count(v);
- } else if (!strcmp(buff, "pf\n")) {
- freelist_count(0);
- } else {
- printf("command not recognized\n");
- }
- }
- return 0;
-}
-
-#endif
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/radix.h
--- a/tools/blktap/parallax/radix.h Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-/*
- * Radix tree for mapping (up to) 63-bit virtual block IDs to
- * 63-bit global block IDs
- *
- * Pointers within the tree set aside the least significant bit to indicate
- * whther or not the target block is writable from this node.
- *
- * The block with ID 0 is assumed to be an empty block of all zeros
- */
-
-#ifndef __RADIX_H__
-#define __RADIX_H__
-
-/* I don't really like exposing these, but... */
-#define getid(x) (((x)>>1)&0x7fffffffffffffffLL)
-#define putid(x) ((x)<<1)
-#define writable(x) (((x)<<1)|1LL)
-#define iswritable(x) ((x)&1LL)
-#define ZERO 0LL
-#define ONE 1LL
-#define ONEMASK 0xffffffffffffffeLL
-
-#define RADIX_TREE_MAP_SHIFT 9
-#define RADIX_TREE_MAP_MASK 0x1ff
-#define RADIX_TREE_MAP_ENTRIES 512
-
-typedef uint64_t *radix_tree_node;
-
-
-/*
- * main api
- * with these functions, the LSB of root always indicates
- * whether or not the block is writable, including the return
- * values of update and snapshot
- */
-uint64_t lookup(int height, uint64_t root, uint64_t key);
-uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val);
-uint64_t snapshot(uint64_t root);
-int collapse(int height, uint64_t proot, uint64_t croot);
-int isprivate(int height, uint64_t root, uint64_t key);
-
-
-void __rcache_init(void);
-
-#endif /* __RADIX_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/requests-async.c
--- a/tools/blktap/parallax/requests-async.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,762 +0,0 @@
-/* requests-async.c
- *
- * asynchronous request dispatcher for radix access in parallax.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <assert.h>
-#include <pthread.h>
-#include <err.h>
-#include <zlib.h> /* for crc32() */
-#include "requests-async.h"
-#include "vdi.h"
-#include "radix.h"
-
-#define L1_IDX(_a) (((_a) & 0x0000000007fc0000ULL) >> 18)
-#define L2_IDX(_a) (((_a) & 0x000000000003fe00ULL) >> 9)
-#define L3_IDX(_a) (((_a) & 0x00000000000001ffULL))
-
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-struct block_info {
- uint32_t crc;
- uint32_t unused;
-};
-
-struct io_req {
- enum { IO_OP_READ, IO_OP_WRITE } op;
- uint64_t root;
- uint64_t vaddr;
- int state;
- io_cb_t cb;
- void *param;
- struct radix_lock *lock;
-
- /* internal stuff: */
- struct io_ret retval;/* holds the return while we unlock. */
- char *block; /* the block to write */
- radix_tree_node radix[3];
- uint64_t radix_addr[3];
- struct block_info bi;
-};
-
-void clear_w_bits(radix_tree_node node)
-{
- int i;
- for (i=0; i<RADIX_TREE_MAP_ENTRIES; i++)
- node[i] = node[i] & ONEMASK;
- return;
-}
-
-void clear_L3_w_bits(radix_tree_node node)
-{
- int i;
- for (i=0; i<RADIX_TREE_MAP_ENTRIES; i+=2)
- node[i] = node[i] & ONEMASK;
- return;
-}
-
-enum states {
- /* both */
- READ_L1,
- READ_L2,
- READ_L3,
-
- /* read */
- READ_LOCKED,
- READ_DATA,
- READ_UNLOCKED,
- RETURN_ZERO,
-
- /* write */
- WRITE_LOCKED,
- WRITE_DATA,
- WRITE_L3,
- WRITE_UNLOCKED,
-
- /* L3 Zero Path */
- ALLOC_DATA_L3z,
- WRITE_L3_L3z,
-
- /* L3 Fault Path */
- ALLOC_DATA_L3f,
- WRITE_L3_L3f,
-
- /* L2 Zero Path */
- ALLOC_DATA_L2z,
- WRITE_L2_L2z,
- ALLOC_L3_L2z,
- WRITE_L2_L3z,
-
- /* L2 Fault Path */
- READ_L3_L2f,
- ALLOC_DATA_L2f,
- WRITE_L2_L2f,
- ALLOC_L3_L2f,
- WRITE_L2_L3f,
-
- /* L1 Zero Path */
- ALLOC_DATA_L1z,
- ALLOC_L3_L1z,
- ALLOC_L2_L1z,
- WRITE_L1_L1z,
-
- /* L1 Fault Path */
- READ_L2_L1f,
- READ_L3_L1f,
- ALLOC_DATA_L1f,
- ALLOC_L3_L1f,
- ALLOC_L2_L1f,
- WRITE_L1_L1f,
-
-};
-
-enum radix_offsets {
- L1 = 0,
- L2 = 1,
- L3 = 2
-};
-
-
-static void read_cb(struct io_ret ret, void *param);
-static void write_cb(struct io_ret ret, void *param);
-
-int vdi_read(vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param)
-{
- struct io_req *req;
-
- if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
- /* Every second line in the bottom-level radix tree is used to */
- /* store crc32 values etc. We shift the vadder here to achied this. */
- vaddr <<= 1;
-
- req = (struct io_req *)malloc(sizeof (struct io_req));
- if (req == NULL) return ERR_NOMEM;
-
- req->radix[0] = req->radix[1] = req->radix[2] = NULL;
- req->op = IO_OP_READ;
- req->root = vdi->radix_root;
- req->lock = vdi->radix_lock;
- req->vaddr = vaddr;
- req->cb = cb;
- req->param = param;
- req->state = READ_LOCKED;
-
- block_rlock(req->lock, L1_IDX(vaddr), read_cb, req);
-
- return 0;
-}
-
-
-int vdi_write(vdi_t *vdi, uint64_t vaddr, char *block,
- io_cb_t cb, void *param)
-{
- struct io_req *req;
-
- if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
- /* Every second line in the bottom-level radix tree is used to */
- /* store crc32 values etc. We shift the vadder here to achied this. */
- vaddr <<= 1;
-
- req = (struct io_req *)malloc(sizeof (struct io_req));
- if (req == NULL) return ERR_NOMEM;
-
- req->radix[0] = req->radix[1] = req->radix[2] = NULL;
- req->op = IO_OP_WRITE;
- req->root = vdi->radix_root;
- req->lock = vdi->radix_lock;
- req->vaddr = vaddr;
- req->block = block;
- /* Todo: add a pseodoheader to the block to include some location */
- /* information in the CRC as well. */
- req->bi.crc = (uint32_t) crc32(0L, Z_NULL, 0);
- req->bi.crc = (uint32_t) crc32(req->bi.crc, block, BLOCK_SIZE);
- req->bi.unused = 0xdeadbeef;
-
- req->cb = cb;
- req->param = param;
- req->radix_addr[L1] = getid(req->root); /* for consistency */
- req->state = WRITE_LOCKED;
-
- block_wlock(req->lock, L1_IDX(vaddr), write_cb, req);
-
-
- return 0;
-}
-
-static void read_cb(struct io_ret ret, void *param)
-{
- struct io_req *req = (struct io_req *)param;
- radix_tree_node node;
- uint64_t idx;
- char *block;
- void *req_param;
-
- DPRINTF("read_cb\n");
- /* get record */
- switch(req->state) {
-
- case READ_LOCKED:
-
- DPRINTF("READ_LOCKED\n");
- req->state = READ_L1;
- block_read(getid(req->root), read_cb, req);
- break;
-
- case READ_L1: /* block is the radix root */
-
- DPRINTF("READ_L1\n");
- block = IO_BLOCK(ret);
- if (block == NULL) goto fail;
- node = (radix_tree_node) block;
- idx = getid( node[L1_IDX(req->vaddr)] );
- free(block);
- if ( idx == ZERO ) {
- req->state = RETURN_ZERO;
- block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
- } else {
- req->state = READ_L2;
- block_read(idx, read_cb, req);
- }
- break;
-
- case READ_L2:
-
- DPRINTF("READ_L2\n");
- block = IO_BLOCK(ret);
- if (block == NULL) goto fail;
- node = (radix_tree_node) block;
- idx = getid( node[L2_IDX(req->vaddr)] );
- free(block);
- if ( idx == ZERO ) {
- req->state = RETURN_ZERO;
- block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
- } else {
- req->state = READ_L3;
- block_read(idx, read_cb, req);
- }
- break;
-
- case READ_L3:
- {
- struct block_info *bi;
-
- DPRINTF("READ_L3\n");
- block = IO_BLOCK(ret);
- if (block == NULL) goto fail;
- node = (radix_tree_node) block;
- idx = getid( node[L3_IDX(req->vaddr)] );
- bi = (struct block_info *) &node[L3_IDX(req->vaddr) + 1];
- req->bi = *bi;
- free(block);
- if ( idx == ZERO ) {
- req->state = RETURN_ZERO;
- block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
- } else {
- req->state = READ_DATA;
- block_read(idx, read_cb, req);
- }
- break;
- }
- case READ_DATA:
- {
- uint32_t crc;
-
- DPRINTF("READ_DATA\n");
- block = IO_BLOCK(ret);
- if (block == NULL) goto fail;
-
- /* crc check */
- crc = (uint32_t) crc32(0L, Z_NULL, 0);
- crc = (uint32_t) crc32(crc, block, BLOCK_SIZE);
- if (crc != req->bi.crc) {
- /* TODO: add a retry loop here. */
- /* Do this after the cache is added -- make sure to */
- /* invalidate the bad page before reissuing the read. */
-
- warn("Bad CRC on vaddr (%Lu:%d)\n", req->vaddr, req->bi.unused);
-#ifdef PRINT_BADCRC_PAGES
- {
- int j;
- for (j=0; j<BLOCK_SIZE; j++) {
- if isprint(block[j]) {
- printf("%c", block[j]);
- } else {
- printf(".");
- }
- if ((j % 64) == 0) printf("\n");
- }
- }
-#endif /* PRINT_BADCRC_PAGES */
-
- /* fast and loose for the moment. */
- /* goto fail; */
- }
-
- req->retval = ret;
- req->state = READ_UNLOCKED;
- block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
- break;
- }
- case READ_UNLOCKED:
- {
- struct io_ret r;
- io_cb_t cb;
- DPRINTF("READ_UNLOCKED\n");
- req_param = req->param;
- r = req->retval;
- cb = req->cb;
- free(req);
- cb(r, req_param);
- break;
- }
-
- case RETURN_ZERO:
- {
- struct io_ret r;
- io_cb_t cb;
- DPRINTF("RETURN_ZERO\n");
- req_param = req->param;
- cb = req->cb;
- free(req);
- r.type = IO_BLOCK_T;
- r.u.b = newblock();
- cb(r, req_param);
- break;
- }
-
- default:
- DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
- goto fail;
- }
-
- return;
-
- fail:
- {
- struct io_ret r;
- io_cb_t cb;
- DPRINTF("asyn_read had a read error.\n");
- req_param = req->param;
- r = ret;
- cb = req->cb;
- free(req);
- cb(r, req_param);
- }
-
-
-}
-
-static void write_cb(struct io_ret r, void *param)
-{
- struct io_req *req = (struct io_req *)param;
- radix_tree_node node;
- uint64_t a, addr;
- void *req_param;
- struct block_info *bi;
-
- switch(req->state) {
-
- case WRITE_LOCKED:
-
- DPRINTF("WRITE_LOCKED (%llu)\n", L1_IDX(req->vaddr));
- req->state = READ_L1;
- block_read(getid(req->root), write_cb, req);
- break;
-
- case READ_L1: /* block is the radix root */
-
- DPRINTF("READ_L1\n");
- node = (radix_tree_node) IO_BLOCK(r);
- if (node == NULL) goto fail;
- a = node[L1_IDX(req->vaddr)];
- addr = getid(a);
-
- req->radix_addr[L2] = addr;
- req->radix[L1] = node;
-
- if ( addr == ZERO ) {
- /* L1 empty subtree: */
- req->state = ALLOC_DATA_L1z;
- block_alloc( req->block, write_cb, req );
- } else if ( !iswritable(a) ) {
- /* L1 fault: */
- req->state = READ_L2_L1f;
- block_read( addr, write_cb, req );
- } else {
- req->state = READ_L2;
- block_read( addr, write_cb, req );
- }
- break;
-
- case READ_L2:
-
- DPRINTF("READ_L2\n");
- node = (radix_tree_node) IO_BLOCK(r);
- if (node == NULL) goto fail;
- a = node[L2_IDX(req->vaddr)];
- addr = getid(a);
-
- req->radix_addr[L3] = addr;
- req->radix[L2] = node;
-
- if ( addr == ZERO ) {
- /* L2 empty subtree: */
- req->state = ALLOC_DATA_L2z;
- block_alloc( req->block, write_cb, req );
- } else if ( !iswritable(a) ) {
- /* L2 fault: */
- req->state = READ_L3_L2f;
- block_read( addr, write_cb, req );
- } else {
- req->state = READ_L3;
- block_read( addr, write_cb, req );
- }
- break;
-
- case READ_L3:
-
- DPRINTF("READ_L3\n");
- node = (radix_tree_node) IO_BLOCK(r);
- if (node == NULL) goto fail;
- a = node[L3_IDX(req->vaddr)];
- addr = getid(a);
-
- req->radix[L3] = node;
-
- if ( addr == ZERO ) {
- /* L3 fault: */
- req->state = ALLOC_DATA_L3z;
- block_alloc( req->block, write_cb, req );
- } else if ( !iswritable(a) ) {
- /* L3 fault: */
- req->state = ALLOC_DATA_L3f;
- block_alloc( req->block, write_cb, req );
- } else {
- req->state = WRITE_DATA;
- block_write( addr, req->block, write_cb, req );
- }
- break;
-
- case WRITE_DATA:
-
- DPRINTF("WRITE_DATA\n");
- /* The L3 radix points to the correct block, we just need to */
- /* update the crc. */
- if (IO_INT(r) < 0) goto fail;
- bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
- req->bi.unused = 101;
- *bi = req->bi;
- req->state = WRITE_L3;
- block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
- break;
-
- /* L3 Zero Path: */
-
- case ALLOC_DATA_L3z:
-
- DPRINTF("ALLOC_DATA_L3z\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L3][L3_IDX(req->vaddr)] = a;
- bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
- req->bi.unused = 102;
- *bi = req->bi;
- req->state = WRITE_L3_L3z;
- block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
- break;
-
- /* L3 Fault Path: */
-
- case ALLOC_DATA_L3f:
-
- DPRINTF("ALLOC_DATA_L3f\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L3][L3_IDX(req->vaddr)] = a;
- bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
- req->bi.unused = 103;
- *bi = req->bi;
- req->state = WRITE_L3_L3f;
- block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
- break;
-
- /* L2 Zero Path: */
-
- case ALLOC_DATA_L2z:
-
- DPRINTF("ALLOC_DATA_L2z\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L3] = newblock();
- req->radix[L3][L3_IDX(req->vaddr)] = a;
- bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
- req->bi.unused = 104;
- *bi = req->bi;
- req->state = ALLOC_L3_L2z;
- block_alloc( (char*)req->radix[L3], write_cb, req );
- break;
-
- case ALLOC_L3_L2z:
-
- DPRINTF("ALLOC_L3_L2z\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L2][L2_IDX(req->vaddr)] = a;
- req->state = WRITE_L2_L2z;
- block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
- break;
-
- /* L2 Fault Path: */
-
- case READ_L3_L2f:
-
- DPRINTF("READ_L3_L2f\n");
- node = (radix_tree_node) IO_BLOCK(r);
- clear_L3_w_bits(node);
- if (node == NULL) goto fail;
- a = node[L2_IDX(req->vaddr)];
- addr = getid(a);
-
- req->radix[L3] = node;
- req->state = ALLOC_DATA_L2f;
- block_alloc( req->block, write_cb, req );
- break;
-
- case ALLOC_DATA_L2f:
-
- DPRINTF("ALLOC_DATA_L2f\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L3][L3_IDX(req->vaddr)] = a;
- bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
- req->bi.unused = 105;
- *bi = req->bi;
- req->state = ALLOC_L3_L2f;
- block_alloc( (char*)req->radix[L3], write_cb, req );
- break;
-
- case ALLOC_L3_L2f:
-
- DPRINTF("ALLOC_L3_L2f\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L2][L2_IDX(req->vaddr)] = a;
- req->state = WRITE_L2_L2f;
- block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
- break;
-
- /* L1 Zero Path: */
-
- case ALLOC_DATA_L1z:
-
- DPRINTF("ALLOC_DATA_L1z\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L3] = newblock();
- req->radix[L3][L3_IDX(req->vaddr)] = a;
- bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
- req->bi.unused = 106;
- *bi = req->bi;
- req->state = ALLOC_L3_L1z;
- block_alloc( (char*)req->radix[L3], write_cb, req );
- break;
-
- case ALLOC_L3_L1z:
-
- DPRINTF("ALLOC_L3_L1z\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L2] = newblock();
- req->radix[L2][L2_IDX(req->vaddr)] = a;
- req->state = ALLOC_L2_L1z;
- block_alloc( (char*)req->radix[L2], write_cb, req );
- break;
-
- case ALLOC_L2_L1z:
-
- DPRINTF("ALLOC_L2_L1z\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L1][L1_IDX(req->vaddr)] = a;
- req->state = WRITE_L1_L1z;
- block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
- break;
-
- /* L1 Fault Path: */
-
- case READ_L2_L1f:
-
- DPRINTF("READ_L2_L1f\n");
- node = (radix_tree_node) IO_BLOCK(r);
- clear_w_bits(node);
- if (node == NULL) goto fail;
- a = node[L2_IDX(req->vaddr)];
- addr = getid(a);
-
- req->radix_addr[L3] = addr;
- req->radix[L2] = node;
-
- if (addr == ZERO) {
- /* nothing below L2, create an empty L3 and alloc data. */
- /* (So skip READ_L3_L1f.) */
- req->radix[L3] = newblock();
- req->state = ALLOC_DATA_L1f;
- block_alloc( req->block, write_cb, req );
- } else {
- req->state = READ_L3_L1f;
- block_read( addr, write_cb, req );
- }
- break;
-
- case READ_L3_L1f:
-
- DPRINTF("READ_L3_L1f\n");
- node = (radix_tree_node) IO_BLOCK(r);
- clear_L3_w_bits(node);
- if (node == NULL) goto fail;
- a = node[L2_IDX(req->vaddr)];
- addr = getid(a);
-
- req->radix[L3] = node;
- req->state = ALLOC_DATA_L1f;
- block_alloc( req->block, write_cb, req );
- break;
-
- case ALLOC_DATA_L1f:
-
- DPRINTF("ALLOC_DATA_L1f\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L3][L3_IDX(req->vaddr)] = a;
- bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
- req->bi.unused = 107;
- *bi = req->bi;
- req->state = ALLOC_L3_L1f;
- block_alloc( (char*)req->radix[L3], write_cb, req );
- break;
-
- case ALLOC_L3_L1f:
-
- DPRINTF("ALLOC_L3_L1f\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L2][L2_IDX(req->vaddr)] = a;
- req->state = ALLOC_L2_L1f;
- block_alloc( (char*)req->radix[L2], write_cb, req );
- break;
-
- case ALLOC_L2_L1f:
-
- DPRINTF("ALLOC_L2_L1f\n");
- addr = IO_ADDR(r);
- a = writable(addr);
- req->radix[L1][L1_IDX(req->vaddr)] = a;
- req->state = WRITE_L1_L1f;
- block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
- break;
-
- case WRITE_L3:
- case WRITE_L3_L3z:
- case WRITE_L3_L3f:
- case WRITE_L2_L2z:
- case WRITE_L2_L2f:
- case WRITE_L1_L1z:
- case WRITE_L1_L1f:
- {
- int i;
- DPRINTF("DONE\n");
- /* free any saved node vals. */
- for (i=0; i<3; i++)
- if (req->radix[i] != 0) free(req->radix[i]);
- req->retval = r;
- req->state = WRITE_UNLOCKED;
- block_wunlock(req->lock, L1_IDX(req->vaddr), write_cb, req);
- break;
- }
- case WRITE_UNLOCKED:
- {
- struct io_ret r;
- io_cb_t cb;
- DPRINTF("WRITE_UNLOCKED!\n");
- req_param = req->param;
- r = req->retval;
- cb = req->cb;
- free(req);
- cb(r, req_param);
- break;
- }
-
- default:
- DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
- goto fail;
- }
-
- return;
-
- fail:
- {
- struct io_ret r;
- io_cb_t cb;
- int i;
-
- DPRINTF("asyn_write had a read error mid-way.\n");
- req_param = req->param;
- cb = req->cb;
- r.type = IO_INT_T;
- r.u.i = -1;
- /* free any saved node vals. */
- for (i=0; i<3; i++)
- free(req->radix[i]);
- free(req);
- cb(r, req_param);
- }
-}
-
-char *vdi_read_s(vdi_t *vdi, uint64_t vaddr)
-{
- pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
- char *block = NULL;
- int ret;
-
- void reads_cb(struct io_ret r, void *param)
- {
- block = IO_BLOCK(r);
- pthread_mutex_unlock((pthread_mutex_t *)param);
- }
-
- pthread_mutex_lock(&m);
- ret = vdi_read(vdi, vaddr, reads_cb, &m);
-
- if (ret == 0) pthread_mutex_lock(&m);
-
- return block;
-}
-
-
-int vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block)
-{
- pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
- int ret, result;
-
- void writes_cb(struct io_ret r, void *param)
- {
- result = IO_INT(r);
- pthread_mutex_unlock((pthread_mutex_t *)param);
- }
-
- pthread_mutex_lock(&m);
- ret = vdi_write(vdi, vaddr, block, writes_cb, &m);
-
- if (ret == 0) pthread_mutex_lock(&m);
-
- return result;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/requests-async.h
--- a/tools/blktap/parallax/requests-async.h Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-#ifndef _REQUESTSASYNC_H_
-#define _REQUESTSASYNC_H_
-
-#include "block-async.h"
-#include "blockstore.h" /* for newblock etc. */
-
-/*
-#define BLOCK_SIZE 4096
-#define ZERO 0ULL
-#define getid(x) (((x)>>1)&0x7fffffffffffffffLLU)
-#define iswritable(x) (((x) & 1LLU) != 0)
-#define writable(x) (((x) << 1) | 1LLU)
-#define readonly(x) ((uint64_t)((x) << 1))
-*/
-
-#define VADDR_MASK 0x0000000003ffffffLLU /* 26-bits = 256Gig */
-#define VALID_VADDR(x) (((x) & VADDR_MASK) == (x))
-
-int vdi_read (vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param);
-int vdi_write(vdi_t *vdi, uint64_t vaddr, char *block, io_cb_t cb, void
*param);
-
-/* synchronous versions: */
-char *vdi_read_s (vdi_t *vdi, uint64_t vaddr);
-int vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block);
-
-#define ERR_BAD_VADDR -1
-#define ERR_NOMEM -2
-
-#endif //_REQUESTSASYNC_H_
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/snaplog.c
--- a/tools/blktap/parallax/snaplog.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,238 +0,0 @@
-/**************************************************************************
- *
- * snaplog.c
- *
- * Snapshot log on-disk data structure.
- *
- */
-
- /* VDI histories are made from chains of snapshot logs. These logs record
- * the (radix) root and timestamp of individual snapshots.
- *
- * creation of a new VDI involves 'forking' a snapshot log, by creating a
- * new, empty log (in a new VDI) and parenting it off of a record in an
- * existing snapshot log.
- *
- * snapshot log blocks have at most one writer.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "snaplog.h"
-
-
-
-snap_block_t *snap_get_block(uint64_t block)
-{
- snap_block_t *blk = (snap_block_t *)readblock(block);
-
- if ( blk == NULL)
- return NULL;
- if ( blk->hdr.magic != SNAP_MAGIC ) {
- freeblock(blk);
- return NULL;
- }
-
- return blk;
-}
-
-int snap_get_id(snap_id_t *id, snap_rec_t *target)
-{
- snap_block_t *blk;
-
- if ( id == NULL )
- return -1;
-
- blk = snap_get_block(id->block);
-
- if ( blk == NULL )
- return -1;
-
- if ( id->index > blk->hdr.nr_entries ) {
- freeblock(blk);
- return -1;
- }
-
- *target = blk->snaps[id->index];
- freeblock(blk);
- return 0;
-}
-
-int __snap_block_create(snap_id_t *parent_id, snap_id_t *fork_id,
- snap_id_t *new_id)
-{
- snap_rec_t parent_rec, fork_rec;
- snap_block_t *blk, *pblk;
- /*
- if ( (parent_id != NULL) && (snap_get_id(parent_id, &parent_rec) != 0) )
- return -1;
-
- if ( (fork_id != NULL) && (snap_get_id(fork_id, &fork_rec) != 0) )
- return -1;
-*/
- blk = (snap_block_t *)newblock();
- blk->hdr.magic = SNAP_MAGIC;
- blk->hdr.nr_entries = 0;
- blk->hdr.log_entries = 0;
- blk->hdr.immutable = 0;
-
- if ( (parent_id != NULL)
- && (parent_id->block != fork_id->block)
- && (parent_id->block != 0)) {
-
- pblk = snap_get_block(parent_id->block);
- blk->hdr.log_entries = pblk->hdr.log_entries;
- freeblock(pblk);
- }
-
- if (parent_id != NULL) {
- blk->hdr.parent_block = *parent_id;
- blk->hdr.fork_block = *fork_id;
- } else {
- blk->hdr.parent_block = null_snap_id;
- blk->hdr.fork_block = null_snap_id;
- }
-
- new_id->index = 0;
- new_id->block = allocblock(blk);
- freeblock(blk);
- if (new_id->block == 0)
- return -1;
-
- return 0;
-}
-
-int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id)
-{
- return __snap_block_create(parent_id, parent_id, new_id);
-}
-
-int snap_append(snap_id_t *old_id, snap_rec_t *rec, snap_id_t *new_id)
-{
- snap_id_t id = *old_id;
- snap_block_t *blk = snap_get_block(id.block);
-
- if ( rec->deleted == 1 ) {
- printf("Attempt to append a deleted snapshot!\n");
- return -1;
- }
-
- if ( blk->hdr.immutable != 0 ) {
- printf("Attempt to snap an immutable snap block!\n");
- return -1;
- }
-
- new_id->block = id.block;
-
- if (blk->hdr.nr_entries == SNAPS_PER_BLOCK) {
- int ret;
-
- id.index--; /* make id point to the last full record */
-
- ret = __snap_block_create(&id, &blk->hdr.fork_block, new_id);
- if ( ret != 0 ) {
- freeblock(blk);
- return -1;
- }
-
- blk->hdr.immutable = 1;
- writeblock(id.block, blk);
- freeblock(blk);
- blk = snap_get_block(new_id->block);
- id = *new_id;
- }
-
- blk->snaps[blk->hdr.nr_entries] = *rec;
- blk->hdr.nr_entries++;
- blk->hdr.log_entries++;
- new_id->index = blk->hdr.nr_entries;
- //printf("snap: %u %u\n", blk->hdr.nr_entries, blk->hdr.log_entries);
- writeblock(id.block, blk);
- freeblock(blk);
- return 0;
-}
-
-int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id)
-{
- snap_block_t *p_blk, *c_blk, *blk;
- snap_rec_t *p_rec, *c_rec;
- int ret = -1;
-
- p_blk = snap_get_block(p_id->block);
-
- if (p_blk == NULL) return(-1);
-
- if (c_id->block == p_id->block)
- {
- c_blk = p_blk;
- } else {
- c_blk = snap_get_block(c_id->block);
- }
-
- if (p_blk == NULL) {
- freeblock(p_blk);
- return(-1);
- }
-
- /* parent and child must not be deleted. */
- p_rec = &p_blk->snaps[p_id->index];
- c_rec = &c_blk->snaps[c_id->index];
- /*
- if ( (p_rec->deleted == 1) || (c_rec->deleted == 1) ) {
- printf("One of those snaps is already deleted.\n");
- goto done;
- }
- */
- /* first non-deleted thing in the log before child must be parent. */
-
- /* XXX todo: text the range here for delete (and eventually fork) bits) */
- /* for now, snaps must be consecutive, on the same log page: */
-
- if ((p_id->block != c_id->block) || (p_id->index != c_id->index-1))
- {
- printf("Deleting non-consecutive snaps is not done yet.\n");
- goto done;
- }
-
- /* mark parent as deleted XXX: may need to lock parent block here.*/
- p_rec->deleted = 1;
- writeblock(p_id->block, p_blk);
-
- /* delete the parent */
- printf("collapse(%Ld, %Ld)\n", p_rec->radix_root, c_rec->radix_root);
- ret = collapse(height, p_rec->radix_root, c_rec->radix_root);
-
- /* return the number of blocks reclaimed. */
-
-done:
- if (c_blk != p_blk) freeblock(c_blk);
- freeblock(p_blk);
-
- return(ret);
-}
-
-void snap_print_history(snap_id_t *snap_id)
-{
- snap_id_t id = *snap_id;
- unsigned int idx = id.index;
- snap_block_t *new_blk, *blk = snap_get_block(id.block);
-
- while ( blk ) {
- printf("[Snap block %Ld]:\n", id.block);
- do {
- printf(" %03u: root: %Ld ts: %ld.%ld\n", idx,
- blk->snaps[idx].radix_root,
- blk->snaps[idx].timestamp.tv_sec,
- blk->snaps[idx].timestamp.tv_usec);
- } while (idx-- != 0);
-
- id = blk->hdr.parent_block;
- if (id.block != 0) {
- new_blk = snap_get_block(id.block);
- }
- freeblock(blk);
- blk = new_blk;
- }
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/snaplog.h
--- a/tools/blktap/parallax/snaplog.h Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-/**************************************************************************
- *
- * snaplog.h
- *
- * Snapshot log on-disk data structure.
- *
- */
-
-#include "radix.h"
-#include "blockstore.h" /* for BLOCK_SIZE */
-
-#ifndef __SNAPLOG_H__
-#define __SNAPLOG_H__
-
-typedef struct snap_id {
- uint64_t block;
- unsigned int index;
-} snap_id_t;
-
-typedef struct snap_rec {
- uint64_t radix_root;
- struct timeval timestamp;
- /* flags: */
- unsigned deleted:1;
-} snap_rec_t;
-
-
-int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id);
-int snap_append(snap_id_t *id, snap_rec_t *rec, snap_id_t *new_id);
-int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id);
-void snap_print_history(snap_id_t *snap_id);
-int snap_get_id(snap_id_t *id, snap_rec_t *target);
-
-
-/* exported for vdi debugging */
-#define SNAP_MAGIC 0xff00ff0aa0ff00ffLL
-
-static const snap_id_t null_snap_id = { 0, 0 };
-
-typedef struct snap_block_hdr {
- uint64_t magic;
- snap_id_t parent_block; /* parent block within this chain */
- snap_id_t fork_block; /* where this log was forked */
- unsigned log_entries; /* total entries since forking */
- unsigned short nr_entries; /* entries in snaps[] */
- unsigned short immutable; /* has this snap page become immutable? */
-} snap_block_hdr_t;
-
-
-#define SNAPS_PER_BLOCK \
- ((BLOCK_SIZE - sizeof(snap_block_hdr_t)) / sizeof(snap_rec_t))
-
-typedef struct snap_block {
- snap_block_hdr_t hdr;
- snap_rec_t snaps[SNAPS_PER_BLOCK];
-} snap_block_t;
-
-
-snap_block_t *snap_get_block(uint64_t block);
-
-#endif /* __SNAPLOG_H__ */
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi.c
--- a/tools/blktap/parallax/vdi.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,367 +0,0 @@
-/**************************************************************************
- *
- * vdi.c
- *
- * Virtual Disk Image (VDI) Interfaces
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <string.h>
-#include <sys/time.h>
-#include <pthread.h>
-#include "blockstore.h"
-#include "block-async.h"
-#include "requests-async.h"
-#include "radix.h"
-#include "vdi.h"
-
-#define VDI_REG_BLOCK 2LL
-#define VDI_RADIX_ROOT writable(3)
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* I haven't decided about this registry stuff, so this is just a really
- * quick lash-up so that there is some way to track VDIs.
- *
- * (Most vdi access should be with a direct handle to the block, so this
- * registry is just for start-of-day lookup and other control operations.)
- */
-
-vdi_registry_t *create_vdi_registry(void)
-{
- vdi_registry_t *reg = (vdi_registry_t *)newblock();
-
- if (reg == NULL)
- return NULL;
-
- /* zero-fill the vdi radix root while we have an empty block. */
- writeblock(VDI_RADIX_ROOT, (void *)reg);
-
-
- DPRINTF("[vdi.c] Creating VDI registry!\n");
- reg->magic = VDI_REG_MAGIC;
- reg->nr_vdis = 0;
-
- writeblock(VDI_REG_BLOCK, (void *)reg);
-
- return reg;
-}
-
-vdi_registry_t *get_vdi_registry(void)
-{
- vdi_registry_t *vdi_reg = (vdi_registry_t *)readblock(VDI_REG_BLOCK);
-
- if ( vdi_reg == NULL )
- vdi_reg = create_vdi_registry();
-
- if ( vdi_reg->magic != VDI_REG_MAGIC ) {
- freeblock(vdi_reg);
- return NULL;
- }
-
- return vdi_reg;
-}
-
-
-vdi_t *vdi_create(snap_id_t *parent_snap, char *name)
-{
- int ret;
- vdi_t *vdi;
- vdi_registry_t *vdi_reg;
- snap_rec_t snap_rec;
-
- /* create a vdi struct */
- vdi = newblock();
- if (vdi == NULL)
- return NULL;
-
- if ( snap_get_id(parent_snap, &snap_rec) == 0 ) {
- vdi->radix_root = snapshot(snap_rec.radix_root);
- } else {
- vdi->radix_root = allocblock((void *)vdi); /* vdi is just zeros here */
- vdi->radix_root = writable(vdi->radix_root); /* grr. */
- }
-
- /* create a snapshot log, and add it to the vdi struct */
-
- ret = snap_block_create(parent_snap, &vdi->snap);
- if ( ret != 0 ) {
- DPRINTF("Error getting snap block in vdi_create.\n");
- freeblock(vdi);
- return NULL;
- }
-
- /* append the vdi to the registry, fill block and id. */
- /* implicit allocation means we have to write the vdi twice here. */
- vdi_reg = get_vdi_registry();
- if ( vdi_reg == NULL ) {
- freeblock(vdi);
- return NULL;
- }
-
- vdi->block = allocblock((void *)vdi);
- vdi->id = vdi_reg->nr_vdis++;
- strncpy(vdi->name, name, VDI_NAME_SZ);
- vdi->name[VDI_NAME_SZ] = '\0';
- vdi->radix_lock = NULL; /* for tidiness */
- writeblock(vdi->block, (void *)vdi);
-
- update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block);
- writeblock(VDI_REG_BLOCK, (void *)vdi_reg);
- freeblock(vdi_reg);
-
- vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
- if (vdi->radix_lock == NULL)
- {
- perror("couldn't malloc radix_lock for new vdi!");
- freeblock(vdi);
- return NULL;
- }
- radix_lock_init(vdi->radix_lock);
-
- return vdi;
-}
-
-/* vdi_get and vdi_put currently act more like alloc/free -- they don't
- * do refcount-based allocation.
- */
-vdi_t *vdi_get(uint64_t vdi_id)
-{
- uint64_t vdi_blk;
- vdi_t *vdi;
-
- vdi_blk = lookup(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi_id);
-
- if ( vdi_blk == 0 )
- return NULL;
-
- vdi = (vdi_t *)readblock(vdi_blk);
-
- vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
- if (vdi->radix_lock == NULL)
- {
- perror("couldn't malloc radix_lock for new vdi!");
- freeblock(vdi);
- return NULL;
- }
- radix_lock_init(vdi->radix_lock);
-
- return vdi;
-}
-
-void vdi_put(vdi_t *vdi)
-{
- free(vdi->radix_lock);
- freeblock(vdi);
-}
-
-void vdi_snapshot(vdi_t *vdi)
-{
- snap_rec_t rec;
- int ret;
-
- rec.radix_root = vdi->radix_root;
- gettimeofday(&rec.timestamp, NULL);
- rec.deleted = 0;
-
- vdi->radix_root = snapshot(vdi->radix_root);
- ret = snap_append(&vdi->snap, &rec, &vdi->snap);
- if ( ret != 0 ) {
- printf("snap_append returned failure\n");
- return;
- }
- writeblock(vdi->block, vdi);
-}
-
-int __init_vdi()
-{
- /* sneak this in here for the moment. */
- __rcache_init();
-
- /* force the registry to be created if it doesn't exist. */
- vdi_registry_t *vdi_reg = get_vdi_registry();
- if (vdi_reg == NULL) {
- printf("[vdi.c] Couldn't get/create a VDI registry!\n");
- return -1;
- }
- freeblock(vdi_reg);
-
-
- return 0;
-}
-
-#ifdef VDI_STANDALONE
-
-#define TEST_VDIS 50
-#define NR_ITERS 50000
-#define FORK_POINTS 200
-#define INIT_VDIS 3
-#define INIT_SNAPS 40
-
-/* These must be of decreasing size: */
-#define NEW_FORK (RAND_MAX-(RAND_MAX/1000))
-#define NEW_ROOT_VDI (RAND_MAX-((RAND_MAX/1000)*2))
-#define NEW_FORK_VDI (RAND_MAX-((RAND_MAX/1000)*3))
-
-#define GRAPH_DOT_FILE "vdi.dot"
-#define GRAPH_PS_FILE "vdi.ps"
-
-
-typedef struct sh_st {
- snap_id_t id;
- struct sh_st *next;
-} sh_t;
-
-#define SNAP_HASHSZ 1024
-sh_t *node_hash[SNAP_HASHSZ];
-#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
-
-#define SNAPID_EQUAL(_a,_b) \
- (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
-int sh_check_and_add(snap_id_t *id)
-{
- sh_t **s = &node_hash[SNAP_HASH(id)];
-
- while (*s != NULL) {
- if (SNAPID_EQUAL(&((*s)->id), id))
- return 1;
- *s = (*s)->next;
- }
-
- *s = (sh_t *)malloc(sizeof(sh_t));
- (*s)->id = *id;
- (*s)->next = NULL;
-
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- vdi_t *vdi_list[TEST_VDIS];
- snap_id_t id, fork_points[FORK_POINTS];
- int nr_vdis = 0, nr_forks = 0;
- int i, j, r;
- FILE *f;
- char name[VDI_NAME_SZ];
-
- __init_blockstore();
- __init_vdi();
-
- printf("[o] Generating seed VDIs. (%d VDIs)\n", INIT_VDIS);
-
- for (i=0; i<INIT_VDIS; i++) {
- r=rand();
-
- sprintf(name, "VDI Number %d", nr_vdis);
- vdi_list[i] = vdi_create(NULL, name);
- for (j=0; j<(r%INIT_SNAPS); j++)
- vdi_snapshot(vdi_list[i]);
- fork_points[i] = vdi_list[i]->snap;
- nr_vdis++;
- nr_forks++;
- }
-
- printf("[o] Running a random workload. (%d iterations)\n", NR_ITERS);
-
- for (i=0; i<NR_ITERS; i++) {
- r = rand();
-
- if ( r > NEW_FORK ) {
- if ( nr_forks > FORK_POINTS )
- continue;
- id = vdi_list[r%nr_vdis]->snap;
- if ( ( id.block == 0 ) || ( id.index == 0 ) )
- continue;
- id.index--;
- fork_points[nr_forks++] = id;
-
- } else if ( r > NEW_ROOT_VDI ) {
-
- if ( nr_vdis == TEST_VDIS )
- continue;
-
- sprintf(name, "VDI Number %d.", nr_vdis);
- vdi_list[nr_vdis++] = vdi_create(NULL, name);
-
- } else if ( r > NEW_FORK_VDI ) {
-
- if ( nr_vdis == TEST_VDIS )
- continue;
-
- sprintf(name, "VDI Number %d.", nr_vdis);
- vdi_list[nr_vdis++] = vdi_create(&fork_points[r%nr_forks], name);
-
- } else /* SNAPSHOT */ {
-
- vdi_snapshot(vdi_list[r%nr_vdis]);
-
- }
- }
-
- /* now dump it out to a dot file. */
- printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
-
- f = fopen(GRAPH_DOT_FILE, "w");
-
- /* write graph preamble */
- fprintf(f, "digraph G {\n");
- fprintf(f, " rankdir=LR\n");
-
- for (i=0; i<nr_vdis; i++) {
- char oldnode[255];
- snap_block_t *blk;
- snap_id_t id = vdi_list[i]->snap;
- int nr_snaps, done=0;
-
- /* add a node for the id */
-printf("vdi: %d\n", i);
- fprintf(f, " n%Ld%d
[color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n",
- id.block, id.index, vdi_list[i]->name,
- id.block, id.index);
- sprintf(oldnode, "n%Ld%d", id.block, id.index);
-
- while (id.block != 0) {
- blk = snap_get_block(id.block);
- nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
- id = blk->hdr.fork_block;
-
- done = sh_check_and_add(&id);
-
- /* add a node for the fork_id */
- if (!done) {
- fprintf(f, " n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n",
- id.block, id.index,
- id.block, id.index);
- }
-
- /* add an edge between them */
- fprintf(f, " n%Ld%d -> %s [label=\"%u snapshots\"]\n",
- id.block, id.index, oldnode, nr_snaps);
- sprintf(oldnode, "n%Ld%d", id.block, id.index);
- freeblock(blk);
-
- if (done) break;
- }
- }
-
- /* write graph postamble */
- fprintf(f, "}\n");
- fclose(f);
-
- printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
- {
- char cmd[255];
- sprintf(cmd, "dot %s -Tps -o %s", GRAPH_DOT_FILE, GRAPH_PS_FILE);
- system(cmd);
- }
- return 0;
-}
-
-#endif
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi.h
--- a/tools/blktap/parallax/vdi.h Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-#ifndef _VDI_H_
-#define _VDI_H_
-/**************************************************************************
- *
- * vdi.h
- *
- * Virtual Disk Image (VDI) Interfaces
- *
- */
-
-#ifndef __VDI_H__
-#define __VDI_H__
-
-#include "blktaplib.h"
-#include "snaplog.h"
-
-#define VDI_HEIGHT 27 /* Note that these are now hard-coded */
-#define VDI_REG_HEIGHT 27 /* in the async lookup code */
-
-#define VDI_NAME_SZ 256
-
-
-typedef struct vdi {
- uint64_t id; /* unique vdi id -- used by the
registry */
- uint64_t block; /* block where this vdi lives (also
unique)*/
- uint64_t radix_root; /* radix root node for block mappings
*/
- snap_id_t snap; /* next snapshot slot for this VDI */
- struct vdi *next; /* used to hash-chain in blkif. */
- blkif_vdev_t vdevice; /* currently mounted as... */
- struct radix_lock *radix_lock;/* per-line L1 RW lock for parallel reqs */
- char name[VDI_NAME_SZ];/* human readable vdi name */
-} vdi_t;
-
-#define VDI_REG_MAGIC 0xff00ff0bb0ff00ffLL
-
-typedef struct vdi_registry {
- uint64_t magic;
- uint64_t nr_vdis;
-} vdi_registry_t;
-
-
-int __init_vdi(void);
-
-vdi_t *vdi_get(uint64_t vdi_id);
-void vdi_put(vdi_t *vdi);
-vdi_registry_t *get_vdi_registry(void);
-vdi_t *vdi_create(snap_id_t *parent_snap, char *name);
-uint64_t vdi_lookup_block(vdi_t *vdi, uint64_t vdi_block, int *writable);
-void vdi_update_block(vdi_t *vdi, uint64_t vdi_block, uint64_t g_block);
-void vdi_snapshot(vdi_t *vdi);
-
-
-#endif /* __VDI_H__ */
-
-#endif //_VDI_H_
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_create.c
--- a/tools/blktap/parallax/vdi_create.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-/**************************************************************************
- *
- * vdi_create.c
- *
- * Create a new vdi.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
- vdi_t *vdi;
- char name[VDI_NAME_SZ] = "";
- snap_id_t id;
- int from_snap = 0;
-
- __init_blockstore();
- __init_vdi();
-
- if ( argc == 1 ) {
- printf("usage: %s <VDI Name> [<snap block> <snap idx>]\n", argv[0]);
- exit(-1);
- }
-
- strncpy( name, argv[1], VDI_NAME_SZ);
- name[VDI_NAME_SZ] = '\0';
-
- if ( argc > 3 ) {
- id.block = (uint64_t) atoll(argv[2]);
- id.index = (unsigned int) atol (argv[3]);
- from_snap = 1;
- }
-
- vdi = vdi_create( from_snap ? &id : NULL, name);
-
- if ( vdi == NULL ) {
- printf("Failed to create VDI!\n");
- freeblock(vdi);
- exit(-1);
- }
-
- freeblock(vdi);
-
- return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_fill.c
--- a/tools/blktap/parallax/vdi_fill.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-/**************************************************************************
- *
- * vdi_fill.c
- *
- * Hoover a file or device into a vdi.
- * You must first create the vdi with vdi_create.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "requests-async.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
- vdi_t *vdi;
- uint64_t id;
- int fd;
- struct stat st;
- uint64_t tot_size;
- char spage[BLOCK_SIZE];
- char *dpage;
- uint64_t vblock = 0, count=0;
-
- __init_blockstore();
- init_block_async();
- __init_vdi();
-
- if ( argc < 3 ) {
- printf("usage: %s <VDI id> <filename>\n", argv[0]);
- exit(-1);
- }
-
- id = (uint64_t) atoll(argv[1]);
-
- vdi = vdi_get( id );
-
- if ( vdi == NULL ) {
- printf("Failed to retreive VDI %Ld!\n", id);
- exit(-1);
- }
-
- fd = open(argv[2], O_RDONLY | O_LARGEFILE);
-
- if (fd < 0) {
- printf("Couldn't open %s!\n", argv[2]);
- exit(-1);
- }
-
- if ( fstat(fd, &st) != 0 ) {
- printf("Couldn't stat %s!\n", argv[2]);
- exit(-1);
- }
-
- tot_size = (uint64_t) st.st_size;
- printf("Filling VDI %Ld with %Ld bytes.\n", id, tot_size);
-
- printf("%011Ld blocks total\n", tot_size / BLOCK_SIZE);
- printf(" ");
- while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
- vdi_write_s(vdi, vblock, spage);
-
- vblock++;
- if ((vblock % 512) == 0)
- printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
- fflush(stdout);
- }
- printf("\n");
-
- freeblock(vdi);
-
- return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_list.c
--- a/tools/blktap/parallax/vdi_list.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-/**************************************************************************
- *
- * vdi_list.c
- *
- * Print a list of VDIs on the block store.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
- vdi_registry_t *reg;
- vdi_t *vdi;
- int i;
-
- __init_blockstore();
- __init_vdi();
-
- reg = get_vdi_registry();
-
- if ( reg == NULL ) {
- printf("couldn't get VDI registry.\n");
- exit(-1);
- }
-
- for (i=0; i < reg->nr_vdis; i++) {
- vdi = vdi_get(i);
-
- if ( vdi != NULL ) {
-
- printf("%10Ld %60s\n", vdi->id, vdi->name);
- freeblock(vdi);
-
- }
- }
-
- freeblock(reg);
-
- return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_snap.c
--- a/tools/blktap/parallax/vdi_snap.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-/**************************************************************************
- *
- * vdi_snap.c
- *
- * Snapshot a vdi.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
- vdi_t *vdi;
- uint64_t id;
-
- __init_blockstore();
- __init_vdi();
-
- if ( argc == 1 ) {
- printf("usage: %s <VDI id>\n", argv[0]);
- exit(-1);
- }
-
- id = (uint64_t) atoll(argv[1]);
-
- vdi = vdi_get(id);
-
- if ( vdi == NULL ) {
- printf("couldn't find the requested VDI.\n");
- freeblock(vdi);
- exit(-1);
- }
-
- vdi_snapshot(vdi);
-
- return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_snap_delete.c
--- a/tools/blktap/parallax/vdi_snap_delete.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-/**************************************************************************
- *
- * vdi_snap_delete.c
- *
- * Delete a snapshot.
- *
- * This is not finished: right now it takes a snap n and calls
- * snap_collapse(n,n+1).
- *
- * TODO: support for non-consecutive, non-same-block snaps
- * Avoid forking probs.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "snaplog.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
- snap_id_t id, c_id;
- int ret;
-
- __init_blockstore();
- __init_vdi();
-
- if ( argc != 3 ) {
- printf("usage: %s <snap block> <snap idx>\n", argv[0]);
- exit(-1);
- }
-
- id.block = (uint64_t) atoll(argv[1]);
- id.index = (unsigned int) atol (argv[2]);
-
- c_id = id;
- c_id.index++;
-
- ret = snap_collapse(VDI_HEIGHT, &id, &c_id);
-
- printf("Freed %d blocks.\n", ret);
-
- return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_snap_list.c
--- a/tools/blktap/parallax/vdi_snap_list.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-/**************************************************************************
- *
- * vdi_snap_list.c
- *
- * Print a list of snapshots for the specified vdi.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-int main(int argc, char *argv[])
-{
- vdi_t *vdi;
- uint64_t id;
- int i, max_snaps = -1;
- snap_block_t *blk;
- snap_id_t sid;
- char *t;
-
- __init_blockstore();
- __init_vdi();
-
- if ( argc == 1 ) {
- printf("usage: %s <VDI id> [max snaps]\n", argv[0]);
- exit(-1);
- }
-
- id = (uint64_t) atoll(argv[1]);
-
- if ( argc > 2 ) {
- max_snaps = atoi(argv[2]);
- }
-
- vdi = vdi_get(id);
-
- if ( vdi == NULL ) {
- printf("couldn't find the requested VDI.\n");
- freeblock(vdi);
- exit(-1);
- }
-
- sid = vdi->snap;
- sid.index--;
-
- //printf("%8s%4s%21s %12s %1s\n", "Block", "idx", "timestamp",
- // "radix root", "d");
- printf("%8s%4s%37s %12s %1s\n", "Block", "idx", "timestamp",
- "radix root", "d");
-
- while (sid.block != 0) {
- blk = snap_get_block(sid.block);
- for (i = sid.index; i >= 0; i--) {
- if ( max_snaps == 0 ) {
- freeblock(blk);
- goto done;
- }
- t = ctime(&blk->snaps[i].timestamp.tv_sec);
- t[strlen(t)-1] = '\0';
- //printf("%8Ld%4u%14lu.%06lu %12Ld %1s\n",
- printf("%8Ld%4u%30s %06lu %12Ld %1s\n",
- sid.block, i,
- //blk->snaps[i].timestamp.tv_sec,
- t,
- blk->snaps[i].timestamp.tv_usec,
- blk->snaps[i].radix_root,
- blk->snaps[i].deleted ? "*" : " ");
- if ( max_snaps != -1 )
- max_snaps--;
- }
- sid = blk->hdr.parent_block;
- freeblock(blk);
- }
-done:
- return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_tree.c
--- a/tools/blktap/parallax/vdi_tree.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,132 +0,0 @@
-/**************************************************************************
- *
- * vdi_tree.c
- *
- * Output current vdi tree to dot and postscript.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-#define GRAPH_DOT_FILE "vdi.dot"
-#define GRAPH_PS_FILE "vdi.ps"
-
-typedef struct sh_st {
- snap_id_t id;
- struct sh_st *next;
-} sh_t;
-
-#define SNAP_HASHSZ 1024
-sh_t *node_hash[SNAP_HASHSZ];
-#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
-
-#define SNAPID_EQUAL(_a,_b) \
- (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
-int sh_check_and_add(snap_id_t *id)
-{
- sh_t **s = &node_hash[SNAP_HASH(id)];
-
- while (*s != NULL) {
- if (SNAPID_EQUAL(&((*s)->id), id))
- return 1;
- *s = (*s)->next;
- }
-
- *s = (sh_t *)malloc(sizeof(sh_t));
- (*s)->id = *id;
- (*s)->next = NULL;
-
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- FILE *f;
- char dot_file[255] = GRAPH_DOT_FILE;
- char ps_file[255] = GRAPH_PS_FILE;
- int nr_vdis = 0, nr_forks = 0;
- vdi_registry_t *reg;
- vdi_t *vdi;
- int i;
-
- __init_blockstore();
- __init_vdi();
-
- reg = get_vdi_registry();
-
- if ( reg == NULL ) {
- printf("couldn't get VDI registry.\n");
- exit(-1);
- }
-
- if ( argc > 1 ) {
- strncpy(ps_file, argv[1], 255);
- ps_file[255] = '\0';
- }
-
- /* now dump it out to a dot file. */
- printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
-
- f = fopen(dot_file, "w");
-
- /* write graph preamble */
- fprintf(f, "digraph G {\n");
- fprintf(f, " rankdir=LR\n");
-
- for (i=0; i<reg->nr_vdis; i++) {
- char oldnode[255];
- snap_block_t *blk;
- snap_id_t id;
- int nr_snaps, done=0;
-
- vdi = vdi_get(i);
- id = vdi->snap;
- /* add a node for the id */
-printf("vdi: %d\n", i);
- fprintf(f, " n%Ld%d
[color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n",
- id.block, id.index, vdi->name,
- id.block, id.index);
- sprintf(oldnode, "n%Ld%d", id.block, id.index);
-
- while (id.block != 0) {
- blk = snap_get_block(id.block);
- nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
- id = blk->hdr.fork_block;
-
- done = sh_check_and_add(&id);
-
- /* add a node for the fork_id */
- if (!done) {
- fprintf(f, " n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n",
- id.block, id.index,
- id.block, id.index);
- }
-
- /* add an edge between them */
- fprintf(f, " n%Ld%d -> %s [label=\"%u snapshots\"]\n",
- id.block, id.index, oldnode, nr_snaps);
- sprintf(oldnode, "n%Ld%d", id.block, id.index);
- freeblock(blk);
-
- if (done) break;
- }
- }
-
- /* write graph postamble */
- fprintf(f, "}\n");
- fclose(f);
-
- printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
- {
- char cmd[255];
- sprintf(cmd, "dot %s -Tps -o %s", dot_file, ps_file);
- system(cmd);
- }
- return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_unittest.c
--- a/tools/blktap/parallax/vdi_unittest.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,184 +0,0 @@
-/**************************************************************************
- *
- * vdi_unittest.c
- *
- * Run a small test workload to ensure that data access through a vdi
- * is (at least superficially) correct.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "requests-async.h"
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-
-#define TEST_PAGES 32
-static char *zero_page;
-static char pages[TEST_PAGES][BLOCK_SIZE];
-static int next_page = 0;
-
-void fill_test_pages(void)
-{
- int i, j;
- long *page;
-
- for (i=0; i< TEST_PAGES; i++) {
- page = (unsigned long *)pages[i];
- for (j=0; j<(BLOCK_SIZE/4); j++) {
- page[j] = random();
- }
- }
-
- zero_page = newblock();
-}
-
-inline uint64_t make_vaddr(uint64_t L1, uint64_t L2, uint64_t L3)
-{
- uint64_t ret = L1;
-
- ret = (ret << 9) | L2;
- ret = (ret << 9) | L3;
-
- return ret;
-}
-
-void touch_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3)
-{
- uint64_t vaddr;
- char *page = pages[next_page++];
- char *rpage = NULL;
-
- printf("TOUCH (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
-
- vaddr = make_vaddr(L1, L2, L3);
- vdi_write_s(vdi, vaddr, page);
- rpage = vdi_read_s(vdi, vaddr);
-
- if (rpage == NULL)
- {
- printf( "read %Lu returned NULL\n", vaddr);
- return;
- }
-
- if (memcmp(page, rpage, BLOCK_SIZE) != 0)
- {
- printf( "read %Lu returned a different page\n", vaddr);
- return;
- }
-
- freeblock(rpage);
-}
-
-void test_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3, char *page)
-{
- uint64_t vaddr;
- char *rpage = NULL;
-
- printf("TEST (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
-
- vaddr = make_vaddr(L1, L2, L3);
- rpage = vdi_read_s(vdi, vaddr);
-
- if (rpage == NULL)
- {
- printf( "read %Lu returned NULL\n", vaddr);
- return;
- }
-
- if (memcmp(page, rpage, BLOCK_SIZE) != 0)
- {
- printf( "read %Lu returned a different page\n", vaddr);
- return;
- }
-
- freeblock(rpage);
-}
-
-void coverage_test(vdi_t *vdi)
-{
- uint64_t vaddr;
- int i, j, k;
-
- /* Do a series of writes and reads to test all paths through the
- * async radix code. The radix request code will dump CRC warnings
- * if there are data problems here as well.
- */
-
- /* L1 Zero */
- touch_block(vdi, 0, 0, 0);
-
- /* L2 Zero */
- i = next_page;
- touch_block(vdi, 0, 1, 0);
-
- /* L3 Zero */
- j = next_page;
- touch_block(vdi, 0, 0, 1);
- k = next_page;
- touch_block(vdi, 0, 1, 1);
-
- /* Direct write */
- touch_block(vdi, 0, 0, 0);
-
- vdi_snapshot(vdi);
-
- /* L1 fault */
- touch_block(vdi, 0, 0, 0);
- /* test the read-only branches that should have been copied over. */
- test_block(vdi, 0, 1, 0, pages[i]);
- test_block(vdi, 0, 0, 1, pages[j]);
-
- /* L2 fault */
- touch_block(vdi, 0, 1, 0);
- test_block(vdi, 0, 1, 1, pages[k]);
-
- /* L3 fault */
- touch_block(vdi, 0, 0, 1);
-
- /* read - L1 zero */
- test_block(vdi, 1, 0, 0, zero_page);
-
- /* read - L2 zero */
- test_block(vdi, 0, 2, 0, zero_page);
-
- /* read - L3 zero */
- test_block(vdi, 0, 0, 2, zero_page);
-}
-
-int main(int argc, char *argv[])
-{
- vdi_t *vdi;
- uint64_t id;
- int fd;
- struct stat st;
- uint64_t tot_size;
- char spage[BLOCK_SIZE];
- char *dpage;
- uint64_t vblock = 0, count=0;
-
- __init_blockstore();
- init_block_async();
- __init_vdi();
-
- vdi = vdi_create( NULL, "UNIT TEST VDI");
-
- if ( vdi == NULL ) {
- printf("Failed to create VDI!\n");
- freeblock(vdi);
- exit(-1);
- }
-
- fill_test_pages();
- coverage_test(vdi);
-
- freeblock(vdi);
-
- return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/parallax/vdi_validate.c
--- a/tools/blktap/parallax/vdi_validate.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-/**************************************************************************
- *
- * vdi_validate.c
- *
- * Intended to sanity-check vm_fill and the underlying vdi code.
- *
- * Block-by-block compare of a vdi with a file/device on the disk.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "blockstore.h"
-#include "radix.h"
-#include "vdi.h"
-#include "requests-async.h"
-
-int main(int argc, char *argv[])
-{
- vdi_t *vdi;
- uint64_t id;
- int fd;
- struct stat st;
- uint64_t tot_size;
- char spage[BLOCK_SIZE], *dpage;
- char *vpage;
- uint64_t vblock = 0, count=0;
-
- __init_blockstore();
- init_block_async();
- __init_vdi();
-
- if ( argc < 3 ) {
- printf("usage: %s <VDI id> <filename>\n", argv[0]);
- exit(-1);
- }
-
- id = (uint64_t) atoll(argv[1]);
-
- vdi = vdi_get( id );
-
- if ( vdi == NULL ) {
- printf("Failed to retreive VDI %Ld!\n", id);
- exit(-1);
- }
-
- fd = open(argv[2], O_RDONLY | O_LARGEFILE);
-
- if (fd < 0) {
- printf("Couldn't open %s!\n", argv[2]);
- exit(-1);
- }
-
- if ( fstat(fd, &st) != 0 ) {
- printf("Couldn't stat %s!\n", argv[2]);
- exit(-1);
- }
-
- tot_size = (uint64_t) st.st_size;
- printf("Testing VDI %Ld (%Ld bytes).\n", id, tot_size);
-
- printf(" ");
- while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
-
- dpage = vdi_read_s(vdi, vblock);
-
- if (dpage == NULL) {
- printf("\n\nfound an unmapped VDI block (%Ld)\n", vblock);
- exit(0);
- }
-
- if (memcmp(spage, dpage, BLOCK_SIZE) != 0) {
- printf("\n\nblocks don't match! (%Ld)\n", vblock);
- exit(0);
- }
-
- freeblock(dpage);
-
- vblock++;
- if ((vblock % 1024) == 0) {
- printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
- fflush(stdout);
- }
- }
- printf("\n");
-
- printf("VDI %Ld looks good!\n", id);
-
- freeblock(vdi);
-
- return (0);
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/Makefile
--- a/tools/blktap/ublkback/Makefile Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-
-XEN_ROOT = ../../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-INCLUDES += -I..
-
-INSTALL = install
-INSTALL_PROG = $(INSTALL) -m0755
-IBIN = ublkback
-INSTALL_DIR = /usr/sbin
-
-CFLAGS += -Werror
-CFLAGS += -Wno-unused
-CFLAGS += -fno-strict-aliasing
-CFLAGS += -I $(XEN_LIBXC)
-CFLAGS += $(INCLUDES) -I.
-CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS += -Wp,-MD,.$(@F).d
-DEPS = .*.d
-
-OBJS = $(patsubst %.c,%.o,$(SRCS))
-
-.PHONY: all
-all: $(IBIN)
-
-LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
-
-.PHONY: install
-install:
- $(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INSTALL_DIR)
-
-.PHONY: clean
-clean:
- rm -rf *.o*~ $(DEPS) xen TAGS $(IBIN)
-
-ublkback:
- $(CC) $(CFLAGS) -o ublkback -L$(XEN_LIBXC) -L. -L.. \
- -lblktap -laio ublkback.c ublkbacklib.c -pg
-
--include $(DEPS)
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/ublkback.c
--- a/tools/blktap/ublkback/ublkback.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-/* ublkback.c
- *
- * libaio-based userlevel backend.
- */
-
-#include "blktaplib.h"
-#include "ublkbacklib.h"
-
-
-int main(int argc, char *argv[])
-{
- ublkback_init();
-
- register_new_blkif_hook(ublkback_new_blkif);
- blktap_listen();
-
- return 0;
-}
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/ublkbacklib.c
--- a/tools/blktap/ublkback/ublkbacklib.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,473 +0,0 @@
-/* ublkbacklib.c
- *
- * file/device image-backed block device -- using linux libaio.
- *
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- *
- * NOTE: This doesn't work. Grrr.
- */
-
-#define _GNU_SOURCE
-#define __USE_LARGEFILE64
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <string.h>
-#include <db.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/poll.h>
-#include <unistd.h>
-#include <errno.h>
-#include <libaio.h>
-#include <pthread.h>
-#include <time.h>
-#include <err.h>
-#include "blktaplib.h"
-
-/* XXXX: */
-/* Current code just mounts this file/device to any requests that come in. */
-//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
-#define TMP_IMAGE_FILE_NAME "fc3.image"
-
-#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */
-#define MAX_SEGMENTS_PER_REQ 11
-#define SECTOR_SHIFT 9
-#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-#if 1
-#define ASSERT(_p) \
- if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
- __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif
-
-/* Note on pending_reqs: I assume all reqs are queued before they start to
- * get filled. so count of 0 is an unused record.
- */
-typedef struct {
- blkif_request_t req;
- blkif_t *blkif;
- int count;
-} pending_req_t;
-
-static pending_req_t pending_list[MAX_REQUESTS];
-static io_context_t ctx;
-static struct iocb *iocb_free[MAX_AIO_REQS];
-static int iocb_free_count;
-
-/* ---[ Notification mecahnism ]--------------------------------------- */
-
-enum {
- READ = 0,
- WRITE = 1
-};
-
-static int aio_notify[2];
-static volatile int aio_listening = 0;
-static pthread_mutex_t notifier_sem = PTHREAD_MUTEX_INITIALIZER;
-
-static struct io_event aio_events[MAX_AIO_REQS];
-static int aio_event_count = 0;
-
-/* this is commented out in libaio.h for some reason. */
-extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);
-
-static void *notifier_thread(void *arg)
-{
- int ret;
- int msg = 0x00feeb00;
-
- DPRINTF("Notifier thread started.\n");
- for (;;) {
- pthread_mutex_lock(¬ifier_sem);
- if ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0) {
- aio_event_count = ret;
- write(aio_notify[WRITE], &msg, sizeof(msg));
- } else {
- printf("[io_queue_wait error! %d]\n", errno);
- pthread_mutex_unlock(¬ifier_sem);
- }
- }
-}
-
-/* --- Talking to xenstore: ------------------------------------------- */
-
-int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done);
-int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done);
-
-typedef struct image {
- /* These need to turn into an array/rbtree for multi-disk support. */
- int fd;
- uint64_t fsid;
- blkif_vdev_t vdevice;
- long int size;
- long int secsize;
- long int info;
-} image_t;
-
-long int ublkback_get_size(blkif_t *blkif)
-{
- image_t *img = (image_t *)blkif->prv;
- return img->size;
-}
-
-long int ublkback_get_secsize(blkif_t *blkif)
-{
- image_t *img = (image_t *)blkif->prv;
- return img->secsize;
-}
-
-unsigned ublkback_get_info(blkif_t *blkif)
-{
- image_t *img = (image_t *)blkif->prv;
- return img->info;
-}
-
-static struct blkif_ops ublkback_ops = {
- get_size: ublkback_get_size,
- get_secsize: ublkback_get_secsize,
- get_info: ublkback_get_info,
-};
-
-int ublkback_new_blkif(blkif_t *blkif)
-{
- image_t *image;
- struct stat stat;
- int ret;
-
- image = (image_t *)malloc(sizeof(image_t));
- if (image == NULL) {
- printf("error allocating image record.\n");
- return -ENOMEM;
- }
-
- /* Open it. */
- image->fd = open(TMP_IMAGE_FILE_NAME,
- O_RDWR | O_DIRECT | O_LARGEFILE);
-
- if ((image->fd < 0) && (errno == EINVAL)) {
- /* Maybe O_DIRECT isn't supported. */
- warn("open() failed on '%s', trying again without O_DIRECT",
- TMP_IMAGE_FILE_NAME);
- image->fd = open(TMP_IMAGE_FILE_NAME, O_RDWR | O_LARGEFILE);
- }
-
- if (image->fd < 0) {
- warn("Couldn't open image file!");
- free(image);
- return -EINVAL;
- }
-
- /* Size it. */
- ret = fstat(image->fd, &stat);
- if (ret != 0) {
- printf("Couldn't stat image in PROBE!");
- return -EINVAL;
- }
-
- image->size = (stat.st_size >> SECTOR_SHIFT);
-
- /* TODO: IOCTL to get size of raw device. */
-/*
- ret = ioctl(img->fd, BLKGETSIZE, &blksize);
- if (ret != 0) {
- printf("Couldn't ioctl image in PROBE!\n");
- goto err;
- }
-*/
- if (image->size == 0)
- image->size =((uint64_t) 16836057);
- image->secsize = 512;
- image->info = 0;
-
- /* Register the hooks */
- blkif_register_request_hook(blkif, "Ublkback req.", ublkback_request);
- blkif_register_response_hook(blkif, "Ublkback resp.", ublkback_response);
-
-
- printf(">X<Created a new blkif! pdev was %ld, but you got %s\n",
- blkif->pdev, TMP_IMAGE_FILE_NAME);
-
- blkif->ops = &ublkback_ops;
- blkif->prv = (void *)image;
-
- return 0;
-}
-
-
-/* --- Moving the bits: ----------------------------------------------- */
-
-static int batch_count = 0;
-int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done)
-{
- int fd;
- uint64_t sector;
- char *spage, *dpage;
- int ret, i, idx;
- blkif_response_t *rsp;
- domid_t dom = ID_TO_DOM(req->id);
- static struct iocb *ioq[MAX_SEGMENTS_PER_REQ*MAX_REQUESTS];
- static int io_idx = 0;
- struct iocb *io;
- image_t *img;
-
- img = (image_t *)blkif->prv;
- fd = img->fd;
-
- switch (req->operation)
- {
- case BLKIF_OP_WRITE:
- {
- unsigned long size;
-
- batch_count++;
-
- idx = ID_TO_IDX(req->id);
- ASSERT(pending_list[idx].count == 0);
- memcpy(&pending_list[idx].req, req, sizeof(*req));
- pending_list[idx].count = req->nr_segments;
- pending_list[idx].blkif = blkif;
-
- for (i = 0; i < req->nr_segments; i++) {
-
- sector = req->sector_number + (8*i);
-
- size = req->seg[i].last_sect - req->seg[i].first_sect + 1;
-
- if (req->seg[i].first_sect != 0)
- DPRINTF("iWR: sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
- "pos: %15lu\n",
- req->sector_number, sector,
- req->seg[i].first_sect, req->seg[i].last_sect,
- (long)(sector << SECTOR_SHIFT));
-
- spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
- spage += req->seg[i].first_sect << SECTOR_SHIFT;
-
- /*convert size and sector to byte offsets */
- size <<= SECTOR_SHIFT;
- sector <<= SECTOR_SHIFT;
-
- io = iocb_free[--iocb_free_count];
- io_prep_pwrite(io, fd, spage, size, sector);
- io->data = (void *)idx;
- //ioq[i] = io;
- ioq[io_idx++] = io;
- }
-
- if (batch_done) {
- ret = io_submit(ctx, io_idx, ioq);
- batch_count = 0;
- if (ret < 0)
- printf("BADNESS: io_submit error! (%d)\n", errno);
- io_idx = 0;
- }
-
- return BLKTAP_STOLEN;
-
- }
- case BLKIF_OP_READ:
- {
- unsigned long size;
-
- batch_count++;
- idx = ID_TO_IDX(req->id);
- ASSERT(pending_list[idx].count == 0);
- memcpy(&pending_list[idx].req, req, sizeof(*req));
- pending_list[idx].count = req->nr_segments;
- pending_list[idx].blkif = blkif;
-
- for (i = 0; i < req->nr_segments; i++) {
-
- sector = req->sector_number + (8*i);
-
- size = req->seg[i].last_sect - req->seg[i].first_sect + 1;
-
- dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
- dpage += req->seg[i].first_sect << SECTOR_SHIFT;
-
- if (req->seg[i].first_sect != 0)
- DPRINTF("iRD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
- "pos: %15lu dpage: %p\n",
- req->sector_number, sector,
- req->seg[i].first_sect, req->seg[i].last_sect,
- (long)(sector << SECTOR_SHIFT), dpage);
-
- /*convert size and sector to byte offsets */
- size <<= SECTOR_SHIFT;
- sector <<= SECTOR_SHIFT;
-
-
- /*
- * NB: Looks like AIO now has non-page aligned support, this path
- * can probably be removed... Only really used for hunting
- * superblocks anyway... ;)
- */
- if ( ((unsigned long)dpage % PAGE_SIZE) != 0 ) {
- /* AIO to raw devices must be page aligned, so do this read
- * synchronously. The OS is probably just looking for
- * a superblock or something, so this won't hurt performance.
- */
- int ret;
-
- printf("Slow path block read.\n");
- /* Question: do in-progress aio ops modify the file cursor? */
- ret = lseek(fd, sector, SEEK_SET);
- if (ret == (off_t)-1)
- printf("lseek failed!\n");
- ret = read(fd, dpage, size);
- if (ret < 0)
- printf("read problem (%d)\n", ret);
- printf("|\n|\n| read: %lld, %lu, %d\n|\n|\n", sector, size,
ret);
-
- /* not an async request any more... */
- pending_list[idx].count--;
-
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = BLKIF_OP_READ;
- rsp->status = BLKIF_RSP_OKAY;
- return BLKTAP_RESPOND;
- /* Doh -- need to flush aio if this is end-of-batch */
- }
-
- io = iocb_free[--iocb_free_count];
-
- io_prep_pread(io, fd, dpage, size, sector);
- io->data = (void *)idx;
-
- ioq[io_idx++] = io;
- //ioq[i] = io;
- }
-
- if (batch_done) {
- ret = io_submit(ctx, io_idx, ioq);
- batch_count = 0;
- if (ret < 0)
- printf("BADNESS: io_submit error! (%d)\n", errno);
- io_idx = 0;
- }
-
- return BLKTAP_STOLEN;
-
- }
- }
-
- printf("Unknown block operation!\n");
-err:
- rsp = (blkif_response_t *)req;
- rsp->id = req->id;
- rsp->operation = req->operation;
- rsp->status = BLKIF_RSP_ERROR;
- return BLKTAP_RESPOND;
-}
-
-
-int ublkback_pollhook(int fd)
-{
- struct io_event *ep;
- int n, ret, idx;
- blkif_request_t *req;
- blkif_response_t *rsp;
- int responses_queued = 0;
- int pages=0;
-
- for (ep = aio_events; aio_event_count-- > 0; ep++) {
- struct iocb *io = ep->obj;
- idx = (int) ep->data;
-
- if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
- printf("invalid index returned(%u)!\n", idx);
- break;
- }
-
- if ((int)ep->res < 0)
- printf("***\n***aio request error! (%d,%d)\n***\n",
- (int)ep->res, (int)ep->res2);
-
- pending_list[idx].count--;
- iocb_free[iocb_free_count++] = io;
- pages++;
-
- if (pending_list[idx].count == 0) {
- blkif_request_t tmp = pending_list[idx].req;
- rsp = (blkif_response_t *)&pending_list[idx].req;
- rsp->id = tmp.id;
- rsp->operation = tmp.operation;
- rsp->status = BLKIF_RSP_OKAY;
- blkif_inject_response(pending_list[idx].blkif, rsp);
- responses_queued++;
- }
- }
-
- if (responses_queued) {
- blktap_kick_responses();
- }
-
- read(aio_notify[READ], &idx, sizeof(idx));
- aio_listening = 1;
- pthread_mutex_unlock(¬ifier_sem);
-
- return 0;
-}
-
-/* the image library terminates the request stream. _resp is a noop. */
-int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done)
-{
- return BLKTAP_PASS;
-}
-
-void ublkback_init(void)
-{
- int i, rc;
- pthread_t p;
-
- for (i = 0; i < MAX_REQUESTS; i++)
- pending_list[i].count = 0;
-
- memset(&ctx, 0, sizeof(ctx));
- rc = io_queue_init(MAX_AIO_REQS, &ctx);
- if (rc != 0) {
- printf("queue_init failed! (%d)\n", rc);
- exit(0);
- }
-
- for (i=0; i<MAX_AIO_REQS; i++) {
- if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) {
- printf("error allocating iocb array\n");
- exit(0);
- }
- iocb_free_count = i;
- }
-
- rc = pipe(aio_notify);
- if (rc != 0) {
- printf("pipe failed! (%d)\n", errno);
- exit(0);
- }
-
- rc = pthread_create(&p, NULL, notifier_thread, NULL);
- if (rc != 0) {
- printf("pthread_create failed! (%d)\n", errno);
- exit(0);
- }
-
- aio_listening = 1;
-
- blktap_attach_poll(aio_notify[READ], POLLIN, ublkback_pollhook);
-}
-
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/ublkback/ublkbacklib.h
--- a/tools/blktap/ublkback/ublkbacklib.h Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-/* blkaiolib.h
- *
- * aio image-backed block device.
- *
- * (c) 2004 Andrew Warfield.
- *
- * Xend has been modified to use an amorfs:[fsid] disk tag.
- * This will show up as device type (maj:240,min:0) = 61440.
- *
- * The fsid is placed in the sec_start field of the disk extent.
- */
-
-int ublkback_request(blkif_request_t *req, int batch_done);
-int ublkback_response(blkif_response_t *rsp); /* noop */
-int ublkback_new_blkif(blkif_t *blkif);
-void ublkback_init(void);
diff -r 533bad7c0883 -r 840f33e54054 tools/blktap/xenbus.c
--- a/tools/blktap/xenbus.c Fri Jun 16 18:19:40 2006 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,568 +0,0 @@
-/*
- * xenbus.c
- *
- * xenbus interface to the blocktap.
- *
- * this handles the top-half of integration with block devices through the
- * store -- the tap driver negotiates the device channel etc, while the
- * userland tap clinet needs to sort out the disk parameters etc.
- *
- * A. Warfield 2005 Based primarily on the blkback and xenbus driver code.
- * Comments there apply here...
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <err.h>
-#include <stdarg.h>
-#include <errno.h>
-#include <xs.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <poll.h>
-#include "blktaplib.h"
-#include "list.h"
-
-#if 0
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
-#else
-#define DPRINTF(_f, _a...) ((void)0)
-#endif
-
-/* --- Xenstore / Xenbus helpers ---------------------------------------- */
-/*
- * These should all be pulled out into the xenstore API. I'm faulting commands
- * in from the xenbus interface as i need them.
- */
-
-
-/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
-int xs_gather(struct xs_handle *xs, const char *dir, ...)
-{
- va_list ap;
- const char *name;
- char *path;
- int ret = 0;
-
- va_start(ap, dir);
- while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
- const char *fmt = va_arg(ap, char *);
- void *result = va_arg(ap, void *);
- char *p;
-
- if (asprintf(&path, "%s/%s", dir, name) == -1)
- {
- warn("allocation error in xs_gather!\n");
- ret = ENOMEM;
- break;
- }
- p = xs_read(xs, path, NULL);
- free(path);
- if (p == NULL) {
- ret = ENOENT;
- break;
- }
- if (fmt) {
- if (sscanf(p, fmt, result) == 0)
- ret = EINVAL;
- free(p);
- } else
- *(char **)result = p;
- }
- va_end(ap);
- return ret;
-}
-
-/* Single printf and write: returns -errno or 0. */
-int xs_printf(struct xs_handle *h, const char *dir, const char *node,
- const char *fmt, ...)
-{
- char *buf, *path;
- va_list ap;
- int ret;
-
- va_start(ap, fmt);
- ret = vasprintf(&buf, fmt, ap);
- va_end(ap);
-
- asprintf(&path, "%s/%s", dir, node);
-
- if ((path == NULL) || (buf == NULL))
- return 0;
-
- ret = xs_write(h, path, buf, strlen(buf)+1);
-
- free(buf);
- free(path);
-
- return ret;
-}
-
-
-int xs_exists(struct xs_handle *h, const char *path)
-{
- char **d;
- int num;
-
- d = xs_directory(h, path, &num);
- if (d == NULL)
- return 0;
- free(d);
- return 1;
-}
-
-
-
-/* This assumes that the domain name we are looking for is unique! */
-char *get_dom_domid(struct xs_handle *h, const char *name)
-{
- char **e, *val, *domid = NULL;
- int num, i, len;
- char *path;
-
- e = xs_directory(h, "/local/domain", &num);
-
- i=0;
- while (i < num) {
- asprintf(&path, "/local/domain/%s/name", e[i]);
- val = xs_read(h, path, &len);
- free(path);
- if (val == NULL)
- continue;
- if (strcmp(val, name) == 0) {
- /* match! */
- asprintf(&path, "/local/domain/%s/domid", e[i]);
- domid = xs_read(h, path, &len);
- free(val);
- free(path);
- break;
- }
- free(val);
- i++;
- }
-
- free(e);
- return domid;
-}
-
-static int strsep_len(const char *str, char c, unsigned int len)
-{
- unsigned int i;
-
- for (i = 0; str[i]; i++)
- if (str[i] == c) {
- if (len == 0)
- return i;
- len--;
- }
- return (len == 0) ? i : -ERANGE;
-}
-
-
-/* xenbus watches: */
-/* Register callback to watch this node. */
-struct xenbus_watch
-{
- struct list_head list;
- char *node;
- void (*callback)(struct xs_handle *h,
- struct xenbus_watch *,
- const char *node);
-};
-
-static LIST_HEAD(watches);
-
-/* A little paranoia: we don't just trust token. */
-static struct xenbus_watch *find_watch(const char *token)
-{
- struct xenbus_watch *i, *cmp;
-
- cmp = (void *)strtoul(token, NULL, 16);
-
- list_for_each_entry(i, &watches, list)
- if (i == cmp)
- return i;
- return NULL;
-}
-
-/* Register callback to watch this node. like xs_watch, return 0 on failure */
-int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
-{
- /* Pointer in ascii is the token. */
- char token[sizeof(watch) * 2 + 1];
- int er;
-
- sprintf(token, "%lX", (long)watch);
- if (find_watch(token))
- {
- warn("watch collision!");
- return -EINVAL;
- }
-
- er = xs_watch(h, watch->node, token);
- if (er != 0) {
- list_add(&watch->list, &watches);
- }
-
- return er;
-}
-
-int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
-{
- char token[sizeof(watch) * 2 + 1];
- int er;
-
- sprintf(token, "%lX", (long)watch);
- if (!find_watch(token))
- {
- warn("no such watch!");
- return -EINVAL;
- }
-
-
- er = xs_unwatch(h, watch->node, token);
- list_del(&watch->list);
-
- if (er == 0)
- warn("XENBUS Failed to release watch %s: %i",
- watch->node, er);
- return 0;
-}
-
-/* Re-register callbacks to all watches. */
-void reregister_xenbus_watches(struct xs_handle *h)
-{
- struct xenbus_watch *watch;
- char token[sizeof(watch) * 2 + 1];
-
- list_for_each_entry(watch, &watches, list) {
- sprintf(token, "%lX", (long)watch);
- xs_watch(h, watch->node, token);
- }
-}
-
-/* based on watch_thread() */
-int xs_fire_next_watch(struct xs_handle *h)
-{
- char **res;
- char *token;
- char *node = NULL;
- struct xenbus_watch *w;
- int er;
- unsigned int num;
-
- res = xs_read_watch(h, &num);
- if (res == NULL)
- return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */
-
- node = res[XS_WATCH_PATH];
- token = res[XS_WATCH_TOKEN];
-
- w = find_watch(token);
- if (!w)
- {
- warn("unregistered watch fired");
- goto done;
- }
- w->callback(h, w, node);
-
- done:
- free(res);
- return 1;
-}
-
-
-
-
-/* ---------------------------------------------------------------------- */
-
-struct backend_info
-{
- /* our communications channel */
- blkif_t *blkif;
-
- long int frontend_id;
- long int pdev;
- long int readonly;
-
- /* watch back end for changes */
- struct xenbus_watch backend_watch;
- char *backpath;
-
- /* watch front end for changes */
- struct xenbus_watch watch;
- char *frontpath;
-
- struct list_head list;
-};
-
-static LIST_HEAD(belist);
-
-static struct backend_info *be_lookup_be(const char *bepath)
-{
- struct backend_info *be;
-
- list_for_each_entry(be, &belist, list)
- if (strcmp(bepath, be->backpath) == 0)
- return be;
- return (struct backend_info *)NULL;
-}
-
-static int be_exists_be(const char *bepath)
-{
- return ( be_lookup_be(bepath) != NULL );
-}
-
-static struct backend_info *be_lookup_fe(const char *fepath)
-{
- struct backend_info *be;
-
- list_for_each_entry(be, &belist, list)
- if (strcmp(fepath, be->frontpath) == 0)
- return be;
- return (struct backend_info *)NULL;
-}
-
-static int backend_remove(struct xs_handle *h, struct backend_info *be)
-{
- /* Turn off watches. */
- if (be->watch.node)
- unregister_xenbus_watch(h, &be->watch);
- if (be->backend_watch.node)
- unregister_xenbus_watch(h, &be->backend_watch);
-
- /* Unhook from be list. */
- list_del(&be->list);
-
- /* Free everything else. */
- if (be->blkif)
- free_blkif(be->blkif);
- free(be->frontpath);
- free(be->backpath);
- free(be);
- return 0;
-}
-
-static void frontend_changed(struct xs_handle *h, struct xenbus_watch *w,
- const char *fepath_im)
-{
- struct backend_info *be;
- char *fepath = NULL;
- int er;
-
- be = be_lookup_fe(w->node);
- if (be == NULL)
- {
- warn("frontend changed called for nonexistent backend! (%s)", fepath);
- goto fail;
- }
-
- /* If other end is gone, delete ourself. */
- if (w->node && !xs_exists(h, be->frontpath)) {
- DPRINTF("DELETING BE: %s\n", be->backpath);
- backend_remove(h, be);
- return;
- }
-
- if (be->blkif == NULL || (be->blkif->state == CONNECTED))
- return;
-
- /* Supply the information about the device the frontend needs */
- er = xs_transaction_start(h, be->backpath);
- if (er == 0) {
- warn("starting transaction");
- goto fail;
- }
-
- er = xs_printf(h, be->backpath, "sectors", "%lu",
- be->blkif->ops->get_size(be->blkif));
- if (er == 0) {
- warn("writing sectors");
- goto fail;
- }
-
- er = xs_printf(h, be->backpath, "info", "%u",
- be->blkif->ops->get_info(be->blkif));
- if (er == 0) {
- warn("writing info");
- goto fail;
- }
-
- er = xs_printf(h, be->backpath, "sector-size", "%lu",
- be->blkif->ops->get_secsize(be->blkif));
- if (er == 0) {
- warn("writing sector-size");
- goto fail;
- }
-
- be->blkif->state = CONNECTED;
-
- xs_transaction_end(h, 0);
-
- return;
-
- fail:
- free(fepath);
-}
-
-
-static void backend_changed(struct xs_handle *h, struct xenbus_watch *w,
- const char *bepath_im)
-{
- struct backend_info *be;
- char *path = NULL, *p;
- int len, er;
- long int pdev = 0, handle;
-
- be = be_lookup_be(w->node);
- if (be == NULL)
- {
- warn("backend changed called for nonexistent backend! (%s)", w->node);
- goto fail;
- }
-
- er = xs_gather(h, be->backpath, "physical-device", "%li", &pdev, NULL);
- if (er != 0)
- goto fail;
-
- if (be->pdev && be->pdev != pdev) {
- warn("changing physical-device not supported");
- goto fail;
- }
- be->pdev = pdev;
-
- asprintf(&path, "%s/%s", w->node, "read-only");
- if (xs_exists(h, path))
- be->readonly = 1;
-
- if (be->blkif == NULL) {
- /* Front end dir is a number, which is used as the handle. */
- p = strrchr(be->frontpath, '/') + 1;
- handle = strtoul(p, NULL, 0);
-
- be->blkif = alloc_blkif(be->frontend_id);
- if (be->blkif == NULL)
- goto fail;
-
- er = blkif_init(be->blkif, handle, be->pdev, be->readonly);
- if (er)
- goto fail;
-
- DPRINTF("[BECHG]: ADDED A NEW BLKIF (%s)\n", w->node);
-
- /* Pass in NULL node to skip exist test. */
- frontend_changed(h, &be->watch, NULL);
- }
-
- fail:
- free(path);
-}
-
-static void blkback_probe(struct xs_handle *h, struct xenbus_watch *w,
- const char *bepath_im)
-{
- struct backend_info *be = NULL;
- char *frontend = NULL, *bepath = NULL;
- int er, len;
-
- bepath = strdup(bepath_im);
- if (!bepath)
- return;
- len = strsep_len(bepath, '/', 6);
- if (len < 0)
- goto free_be;
-
- bepath[len] = '\0'; /*truncate the passed-in string with predjudice. */
-
- be = malloc(sizeof(*be));
- if (!be) {
- warn("allocating backend structure");
- goto free_be;
- }
- memset(be, 0, sizeof(*be));
-
- frontend = NULL;
- er = xs_gather(h, bepath,
- "frontend-id", "%li", &be->frontend_id,
- "frontend", NULL, &frontend,
- NULL);
- if (er)
- goto free_be;
-
- if (strlen(frontend) == 0 || !xs_exists(h, frontend)) {
- /* If we can't get a frontend path and a frontend-id,
- * then our bus-id is no longer valid and we need to
- * destroy the backend device.
- */
- DPRINTF("No frontend (%s)\n", frontend);
- goto free_be;
- }
-
- /* Are we already tracking this device? */
- if (be_exists_be(bepath))
- goto free_be;
-
- be->backpath = bepath;
- be->backend_watch.node = be->backpath;
- be->backend_watch.callback = backend_changed;
- er = register_xenbus_watch(h, &be->backend_watch);
- if (er == 0) {
- be->backend_watch.node = NULL;
- warn("error adding backend watch on %s", bepath);
- goto free_be;
- }
-
- be->frontpath = frontend;
- be->watch.node = be->frontpath;
- be->watch.callback = frontend_changed;
- er = register_xenbus_watch(h, &be->watch);
- if (er == 0) {
- be->watch.node = NULL;
- warn("adding frontend watch on %s", be->frontpath);
- goto free_be;
- }
-
- list_add(&be->list, &belist);
-
- DPRINTF("[PROBE]: ADDED NEW DEVICE (%s)\n", bepath_im);
-
- backend_changed(h, &be->backend_watch, bepath);
- return;
-
- free_be:
- if (be && (be->backend_watch.node))
- unregister_xenbus_watch(h, &be->backend_watch);
- free(frontend);
- free(bepath);
- free(be);
- return;
-}
-
-
-int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname)
-{
- char *domid, *path;
- struct xenbus_watch *vbd_watch;
- int er;
-
- domid = get_dom_domid(h, domname);
-
- DPRINTF("%s: %s\n", domname, (domid != NULL) ? domid : "[ not found! ]");
-
- asprintf(&path, "/local/domain/%s/backend/vbd", domid);
- if (path == NULL)
- return -ENOMEM;
-
- vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch));
- vbd_watch->node = path;
- vbd_watch->callback = blkback_probe;
- er = register_xenbus_watch(h, vbd_watch);
- if (er == 0) {
- warn("Error adding vbd probe watch %s", path);
- return -EINVAL;
- }
-
- return 0;
-}
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|