[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 4/4] (Refactored) Add libvdisk, and vdisk_tool



[PATCH 4/4] (Refactored) Add libvdisk, and vdisk_tool
vdisk-support.patch
provides libvdisk, and vdisk_tool, as described in [PATCH 0/4]
Signed-off-by: Boris Ostrovsky <bostrovsky@xxxxxxxxxxxxxxx>
Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxxx>

diff -r 75c61490cc06 tools/Makefile
--- a/tools/Makefile    Thu Jun 21 13:05:29 2007 -0400
+++ b/tools/Makefile    Thu Jun 21 13:05:31 2007 -0400
@@ -17,6 +17,7 @@ SUBDIRS-$(VTPM_TOOLS) += vtpm
 SUBDIRS-$(VTPM_TOOLS) += vtpm
 SUBDIRS-y += xenstat
 SUBDIRS-y += libaio
+SUBDIRS-y += vdisk
 SUBDIRS-y += blktap
 SUBDIRS-y += libfsimage
 SUBDIRS-$(XENFB_TOOLS) += xenfb
diff -r 75c61490cc06 tools/vdisk/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/Makefile      Thu Jun 21 13:05:45 2007 -0400
@@ -0,0 +1,65 @@
+#
+# Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+#
+# Portions have been modified by Virtual Iron Software, Inc.
+# (c) 2007. This file and the modifications can be redistributed and/or
+# modified under the terms and conditions of the GNU General Public
+# License, version 2.1 and not any later version of the GPL, as published
+# by the Free Software Foundation.
+#
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+LIBVHD_SRC     = vhd.c vhd_utils.c
+LIBVDISK_SRC   = vdisk_utils.c vdisk_common.c
+TOOL_SRC       = vdisk_tool.c
+
+LIBAIO_DIR   = ../libaio/src
+BLKTAP_DIR  = ../blktap/drivers
+
+CFLAGS         = -O2 -fno-strict-aliasing -fPIC -Wall -Werror -rdynamic \
+               -D_FILE_OFFSET_BITS=64 \
+               -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -I./ \
+               -I$(LIBAIO_DIR) \
+               -I$(BLKTAP_DIR)
+
+LIB_LDFLAGS    = -dy -shared -L$(LIBAIO_DIR) -laio
+
+INSTALL                = /usr/bin/install
+
+all: default
+default: vdisk_tool libvdisk_vhd.so libvdisk.so
+
+
+%.o: %.c
+       $(CC) $(CFLAGS) -rdynamic  -c $< -o $@
+
+vdisk_tool: $(TOOL_SRC:%.c=%.o) libvdisk_vhd.so libvdisk.so
+       gcc $(LOCAL_CFLAGS) -o vdisk_tool -g $(TOOL_SRC) -L./ \
+               -I$(LIBAIO_DIR) \
+               -I$(BLKTAP_DIR) \
+               -L$(LIBAIO_DIR) -L. -lvdisk -ldl -laio
+
+libvdisk_vhd.so: $(LIBVHD_SRC:%.c=%.o) libvdisk.so
+       $(LD) $(LIB_LDFLAGS) -o $@ $^
+
+libvdisk.so: $(LIBVDISK_SRC:%.c=%.o)
+       $(LD) $(LIB_LDFLAGS) -o $@ $^
+
+install: all
+       $(INSTALL) -d $(DESTDIR)/usr/bin
+       $(INSTALL) -d $(DESTDIR)/usr/lib64
+       $(INSTALL) vdisk_tool $(DESTDIR)/usr/bin
+       $(INSTALL) libvdisk_vhd.so libvdisk.so $(DESTDIR)/usr/lib64
+       $(INSTALL) -d $(DESTDIR)/usr/include
+       for header in *.h; do $(INSTALL) $$header $(DESTDIR)/usr/include; done
+
+clean:
+       /bin/rm -f *.o libvdisk_vhd.so vdisk_tool libvdisk.so
+
+depend .depend dep:
+       $(CC) $(CFLAGS) -M $(LIBVDISK_SRC) $(LIBVHD_SRC) $(TOOL_SRC)> .depend
+
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff -r 75c61490cc06 tools/vdisk/list.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/list.h        Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,168 @@
+// Copy of /usr/include/linux/list.h that does not
+// depend on __KERNEL__ and _LVM_H_INCLUDE
+
+#ifndef _LIST_H
+#define _LIST_H
+
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+       struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+       struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+       (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries. 
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add(struct list_head * new,
+       struct list_head * prev,
+       struct list_head * next)
+{
+       next->prev = new;
+       new->next = next;
+       new->prev = prev;
+       prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static __inline__ void list_add(struct list_head *new, struct list_head *head)
+{
+       __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static __inline__ void list_add_tail(struct list_head *new, struct list_head 
*head)
+{
+       __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_del(struct list_head * prev,
+                                 struct list_head * next)
+{
+       next->prev = prev;
+       prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in 
an undefined state.
+ */
+static __inline__ void list_del(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+       entry->next = entry->prev = 0;
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static __inline__ void list_del_init(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+       INIT_LIST_HEAD(entry); 
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static __inline__ int list_empty(struct list_head *head)
+{
+       return head->next == head;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static __inline__ void list_splice(struct list_head *list, struct list_head 
*head)
+{
+       struct list_head *first = list->next;
+
+       if (first != list) {
+               struct list_head *last = list->prev;
+               struct list_head *at = head->next;
+
+               first->prev = head;
+               head->next = first;
+
+               last->next = at;
+               at->prev = last;
+       }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:       the &struct list_head pointer.
+ * @type:      the type of the struct this is embedded in.
+ * @member:    the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each       -       iterate over a list
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @head:      the head for your list.
+ */
+#define list_for_each(pos, head) \
+       for (pos = (head)->next; pos != (head); \
+               pos = pos->next)
+               
+/**
+ * list_for_each_safe  -       iterate over a list safe against removal of 
list entry
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @n:         another &struct list_head to use as temporary storage
+ * @head:      the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+       for (pos = (head)->next, n = pos->next; pos != (head); \
+               pos = n, n = pos->next)
+
+
+
+#endif
diff -r 75c61490cc06 tools/vdisk/vdisk.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk.h       Thu Jun 21 13:05:48 2007 -0400
@@ -0,0 +1,215 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VDISK_H
+#define __VDISK_H
+
+#include <sys/types.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <linux/limits.h>
+#include <syslog.h>
+#include <libaio.h>
+#include "list.h"
+#include "tapaio.h"
+
+// vdisk_tool's operations
+#define VDISK_OP_CREATE    (1<<0)
+#define VDISK_OP_HEADERS   (1<<1)
+#define VDISK_OP_DUMP      (1<<2)
+#define VDISK_OP_MODIFY    (1<<3)
+
+// Return codes
+#define VID_BLOCK_MAPPED    (0)
+#define VID_BLOCK_NOTMAPPED (-1)
+#define VID_BLOCK_TOOBIG    (-2)
+#define VID_BLOCK_MAPERR    (-3)
+
+// IO operation codes
+#define VDISK_READ  (0)
+#define VDISK_WRITE (1)
+
+// Async IO macros
+#define VDISK_HASH_SZ        (2048)
+#define VDISK_HASH_IDX(x)    ((x) & (VDISK_HASH_SZ-1))
+#define VDISK_INVALID_HASH   (-1)
+#define REQUEST_ASYNC_FD     (1) // Should really be defined in kernel
+
+#define SECTOR_SIZE          (512)
+
+// vdisk device flags
+#define VDISK_SYNCIO_BUF     (1<<0)
+#define VDISK_RO             (1<<1)
+
+// vdisk file flags
+#define VDF_LEAF    (1<<0) // last COW child (writeable)
+
+// Statistics gathering
+#define        VDISK_STATS          (0)
+#define VDISK_SYNCIO_STATS   (0)
+
+#if VDISK_STATS
+#define        DO_STATS(x)     x
+#else
+#define        DO_STATS(x)
+#endif
+
+
+
+
+// Datatype for addressing host memory 
+#if defined __x86_64__
+typedef uint64_t addr_t;
+#else
+typedef uint32_t addr_t;
+#endif
+
+typedef        int file_t;
+
+// Forward declaration
+struct vdisk_dev;
+
+// Stores info about a pending async IO
+typedef struct pending_aio {
+       uint32_t block;
+       uint32_t num_blocks;
+       void *arg;
+       void *aiocb;
+       off_t off;
+       file_t fd;
+       int op;
+       int res;
+} pending_aio_t;
+
+// Hash that stores async IO data
+typedef struct vdisk_hash {
+       uint64_t key;
+       struct iocb io;
+       pending_aio_t pio;
+} vdisk_hash_t;
+
+// run data to allow coalescing of writes when doing posix_fadvise() sync/flush
+typedef struct vdisk_syncio {
+       int     is_set;
+       off_t   io_start;
+       off_t   io_len;
+#if VDISK_SYNCIO_STATS
+       unsigned long   total_writes;
+       unsigned long   contig_writes;
+       unsigned long   flush_size_sub1MB;
+       unsigned long   flush_size_sub2MB;
+       unsigned long   flush_size_sub4MB;
+       unsigned long   flush_size_sub8MB;
+       unsigned long   flush_size_ovr8MB;
+       unsigned long   flush_size_force;
+       time_t          last_dbg_print;
+#endif
+} vdisk_syncio_t;
+
+// Per-file structure
+typedef struct vd_file {
+       struct list_head vdf_list;
+       char name[PATH_MAX];
+       file_t fd;
+       int flags;
+       int batch_sz;           // number of blocks that are mapped sequentially
+       void *vdf;              // format-specific data
+       vdisk_syncio_t *syncio; // allows sync io to buffer in pagecache for 
+                               //  better io performance
+} vd_file_t;
+
+// Data describing format's properties (ops etc.)
+typedef struct vdf_data {
+       char ftype[8];                    // File name extension
+
+       int (*open)(struct vdisk_dev *vdisk, char *filename);
+       void (*close)(struct vdisk_dev *vdisk);
+       int (*map_block)(vd_file_t *vf, uint32_t *blockno, int num_blocks, 
+                        int op, void **arg);
+       int (*xfer_commit)(void *arg, int err);
+       int (*print_header)(vd_file_t *vf);
+       int (*parse_args)(int argc, int operations, char *argv[], void **optp);
+       int (*create_vdisk)(char *filename, void *optp);
+       int (*modify_vdisk)(struct vdisk_dev *vdisk, void *optp);
+       struct list_head vdfd_list; // connects to global format list
+} vdf_data_t;
+
+// Top-level datastructure
+typedef struct vdisk_dev {
+
+       struct vdisk_geom {
+               int cyls;
+               int heads;
+               int secs;
+       } geom;
+
+        ssize_t sz;      // Device size (bytes)
+       
+       int flags;
+
+       // head of vdisk files (vd_file_t) list
+       struct list_head vdf_head;
+
+       vdf_data_t *vdfd;
+
+       // AIO data
+       vdisk_hash_t hash[VDISK_HASH_SZ];
+       struct iocb *aio_submit[VDISK_HASH_SZ];
+       struct io_event aio_events[VDISK_HASH_SZ];
+       tap_aio_context_t   aio_ctx;
+       int use_aio;
+       int aio_fd;
+       int aio_cnt;
+
+       // Stats
+       uint64_t busyio;
+       uint64_t syncio;
+       uint64_t asyncio;
+       uint64_t tot_io;
+} vdisk_dev_t;
+
+struct program_props {
+       void *alloc_func;
+       void *free_func;
+       int out_target;
+};
+
+
+#define VDISK_OUT_STDERR (0)
+#define VDISK_OUT_SYSLOG (1)
+extern int vdisk_dbg_level;
+extern int vdisk_out_target;
+#define VIDDBG(n, fmt, args...) vdisk_log_error(n, __FILE__, __LINE__, fmt, 
##args)
+
+#define ASSERT(expr)                                                    \
+       ((expr) ? 0 :                                                   \
+        ({                                                             \
+                VIDDBG(0, "Assertion failed: %s\n", __STRING(expr));   \
+                abort();                                               \
+        }));
+
+extern int vdisk_pagesz; //4K
+
+extern void vdisk_log_error(int level, char *file, int line, char *fmt, ...);
+extern int vdf_read_state(vdisk_dev_t *vdisk, char *filename);
+extern int vdf_print_headers(vdisk_dev_t *vdisk, char *filename);
+extern int vdisk_register (vdf_data_t *vdfd);
+extern void vdisk_unregister (vdf_data_t *vdfd);
+extern int vdf_init(vdisk_dev_t *vdisk, char *fname);
+extern int vdisk_common_init(vdisk_dev_t *vdisk);
+extern int vdf_find_vdfd(vdisk_dev_t *vdisk, char *ftype);
+extern int vdisk_xfer_cb(vdisk_dev_t *vdisk, struct pending_aio *pio);
+extern int vdisk_rw(void *hdl, int64_t sector_num, 
+                   uint8_t *buf, int nb_sectors, int write, void *aiocb);
+extern void vdisk_alloc_init(void *alloc_func, void *free_func);
+extern int vdisk_init(vdisk_dev_t *vdisk, char *filename,
+                     struct program_props *props, uint8_t flags);
+extern void vdisk_fini(vdisk_dev_t *vdisk);
+
+#endif /* __VDISK_H */
diff -r 75c61490cc06 tools/vdisk/vdisk_common.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_common.c        Thu Jun 21 13:05:53 2007 -0400
@@ -0,0 +1,616 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dlfcn.h>
+
+#include "vdisk.h"
+#include "vdisk_utils.h"
+
+
+static int vdisk_initialized = 0;
+int vdisk_pagesz = 0;
+
+void vdisk_fini(vdisk_dev_t *vdisk)
+{
+       struct list_head *ptr;
+       vd_file_t *vdf;
+
+       // We may have already closed the device
+       if ((vdisk == NULL) || (vdisk->vdfd == NULL) ||
+           (vdisk->vdfd->close == NULL))
+               return;
+
+       list_for_each(ptr, &vdisk->vdf_head) {
+               vdf = list_entry(ptr, vd_file_t, vdf_list);
+               free(vdf->syncio);
+               vdf->syncio = NULL;
+       }
+
+       vdisk->vdfd->close(vdisk);
+}
+
+int vdisk_init(vdisk_dev_t *vdisk, char *filename,
+              struct program_props *props, uint8_t flags)
+{
+       int err;
+       char *fname;
+
+       vdisk_common_init(NULL/*XXX: ?? */);
+
+       if (props != NULL) {
+               // Set where output is directed
+               vdisk_out_target = props->out_target;
+               vdisk_alloc_init(props->alloc_func, props->free_func);
+       } else {
+               vdisk_out_target = VDISK_OUT_STDERR;
+               vdisk_alloc_init(NULL, NULL);
+       }
+
+       fname = strchr(filename, ':');
+       if (fname == NULL)
+               fname = filename;
+       else
+               fname++;
+
+       vdisk->flags = flags;
+
+       err = vdf_init(vdisk, fname);
+       if (err != 0) {
+               VIDDBG(0, "Can't initialize format's data for %s\n",
+                       filename);
+               return (err);
+       }
+
+       return (0);
+}
+
+int
+vdf_init(vdisk_dev_t *vdisk, char *fname) 
+{
+       char *ext;
+       int err;
+       struct list_head *ptr;
+       vd_file_t *vdf;
+
+       ext = strrchr(fname, '.');
+       if (ext == NULL) {
+               VIDDBG(0, "Can't determine file type for %s\n", fname);
+               return (EINVAL);
+       }
+
+       ext++; // Skip '.'
+
+       err = vdf_find_vdfd(vdisk, ext);
+       if (err) {
+               VIDDBG(0, "Can't find format's data\n");
+               return (err);
+       }
+
+       err = vdf_read_state(vdisk, fname);
+       if (err) {
+               VIDDBG(0, "failed to read headers\n");
+               return (-1);
+       }
+
+       if (vdisk->flags & VDISK_SYNCIO_BUF) {
+               list_for_each(ptr, &vdisk->vdf_head) {
+                       vdf = list_entry(ptr, vd_file_t, vdf_list);
+                       vdf->syncio = calloc( 1, sizeof(vdisk_syncio_t));
+                       if (!vdf->syncio) {
+                               VIDDBG(0, "vdisk_alloc_syncio_run_data() "
+                                      "failed '%s', thus no speed up\n",
+                                      strerror(errno));
+                       }
+               }
+       }
+
+       return (0);
+}
+
+int
+vdisk_map_block(struct vdisk_dev *dev, 
+               uint32_t *blockno,      /* IN/OUT */
+               int op,
+               vd_file_t **vf,
+               void **arg)
+{
+       struct list_head *ptr;
+       vd_file_t *vdf;
+       int res = VID_BLOCK_NOTMAPPED;
+
+       list_for_each(ptr, &dev->vdf_head) {
+
+               *vf = vdf = list_entry(ptr, vd_file_t, vdf_list);
+
+               res = dev->vdfd->map_block(vdf, blockno, 1, op, arg);
+               if (res == VID_BLOCK_MAPPED)
+                       return (res);           
+       }
+
+       if (op == VDISK_WRITE)
+               VIDDBG(0, "Couldn't map block %d\n", *blockno);
+
+       return (res);
+}
+
+int
+vdf_read_state(vdisk_dev_t *vdisk, char *filename)
+{
+       int err;
+       int i;
+
+       INIT_LIST_HEAD(&vdisk->vdf_head);
+
+       if (vdisk->use_aio) {
+               for (i=0;i<VDISK_HASH_SZ;i++)
+                       vdisk->hash[i].key = VDISK_INVALID_HASH;
+       
+               memset(&vdisk->aio_ctx.aio_ctx, 0, sizeof(io_context_t));
+               err = io_queue_init(100, &vdisk->aio_ctx.aio_ctx);
+               if (err) {
+                       VIDDBG(0, "io_queue_init() failed: %s. "
+                              " Async IO will not be available\n", 
+                              strerror(-1*err));
+                       vdisk->use_aio = 0;
+               }
+       }
+
+       err = vdisk->vdfd->open(vdisk, filename);
+       if (err) {
+               VIDDBG(0, "Problems opening vdisk %s (error %d)\n", 
+                      filename, err);
+               return (err);
+       }
+       return (0);
+}
+
+int
+vdf_print_headers(vdisk_dev_t *vdisk, char *filename)
+{
+       int err;
+       vd_file_t *vf;
+       //struct list_head *ptr;
+
+       err = vdf_read_state(vdisk, filename);
+       if (err) {
+               VIDDBG(0, "Failed to read state for %s\n", filename);
+               return (err);
+       }
+
+#if 0  
+       list_for_each(ptr, &vdisk->vdf_head) {
+
+               vf = list_entry(ptr, vd_file_t, vdf_list);
+               (void)vdisk->vdfd->print_header(vf->vdf);
+       }
+#endif
+       vf = list_entry(vdisk->vdf_head.next, vd_file_t, vdf_list);
+       (void)vdisk->vdfd->print_header(vf);
+
+       return (0);
+}
+
+int
+vdisk_xfer_cb(vdisk_dev_t *vdisk, struct pending_aio *pio)
+{
+       uint32_t blk;
+       int err = 0;
+
+       ASSERT(pio != NULL);
+
+       err = vdisk->vdfd->xfer_commit(pio->arg, pio->res);
+       if (err)
+               VIDDBG(0, "Failed to commit transfer (error %d)\n", err);
+       
+       if (pio->op == VDISK_WRITE) {
+               err = fsync(pio->fd);
+               if (err)
+                       VIDDBG(0, "fsync: %s\n", strerror(errno));
+       }
+       
+       /*
+        * posix_fadvise() (or, rather, kernel's sys_fadvise64_64())
+        * invalidates whole pages only.
+        */
+       err = posix_fadvise(pio->fd, (pio->off & (~((off_t)vdisk_pagesz-1))),
+                           (ssize_t)(pio->num_blocks<<9) + (off_t)vdisk_pagesz,
+                           POSIX_FADV_DONTNEED);
+       if (err)
+               VIDDBG(0, "posix_fadvise: %s\n", strerror(errno));
+       
+
+       for (blk=pio->block; blk < (pio->block + pio->num_blocks); blk++)
+               vdisk->hash[VDISK_HASH_IDX(blk)].key = VDISK_INVALID_HASH;
+
+       return (err);
+}
+
+
+int vdisk_rw(void *hdl, int64_t block, 
+            uint8_t *buf, int nb_blocks,
+            int op, void *aiocb)
+{
+        off_t offset;
+       unsigned long bytes;
+       uint32_t real_block, blk;
+       vd_file_t *vdf = NULL;
+       void *arg = NULL;
+       struct vdisk_dev *vdisk = (struct vdisk_dev *)hdl;
+       int i;
+       struct list_head *ptr;
+       int res = 0;
+       char *b = (char *)buf;
+       char *pool = NULL;
+       int batch;
+       int use_aio = vdisk->use_aio;
+       int busy = 0;
+       int hash_index;
+       int zero_blocks = 0;
+
+       VIDDBG(50, "block=0x%" PRIx64 ", nb_blocks=%d\n", 
+              block, nb_blocks);
+
+       if (((block + (nb_blocks-1)) << 9) >= vdisk->sz) {
+               return (-ENOSPC);
+       }
+
+       vdisk->tot_io++;
+
+       if (use_aio) {
+               // Check whether the hash has available slots and reserve them
+               // We reserve them as we go because we want to make sure that
+               // the request fits in the hash.
+               for (i=0, blk=block; i<nb_blocks; i++, blk++) {
+                       hash_index = VDISK_HASH_IDX(blk);
+                       VIDDBG(50, "block=0x%" PRIx64 ", nb_blocks=%d i=%d "
+                              "blk=0x%x, vdisk->hash.key[%d]=0x%" PRIx64 "\n", 
+                              block, nb_blocks, i,
+                              blk, hash_index, 
+                              vdisk->hash[hash_index].key);
+                       if (vdisk->hash[hash_index].key != VDISK_INVALID_HASH) {
+                               vdisk->busyio++;
+                               if (vdisk->hash[hash_index].key != blk)
+                                       busy = 1;
+                               use_aio = 0;
+                               break;
+                       }
+                       vdisk->hash[hash_index].key = blk;
+                       VIDDBG(50, "hash_index=%d, blk=%d\n", 
+                              hash_index, blk);
+               }
+
+               // We need to free hash entries that we've just reserved.
+               if (!use_aio) {
+                       uint32_t b;
+                       
+                       VIDDBG(50, "Freeing hash for block %" PRId64 "\n",
+                              block);
+                       if (blk != 0) {
+                               for (b=blk-1; b>=block; b--) {
+                                       hash_index = VDISK_HASH_IDX(b);
+                                       vdisk->hash[hash_index].key = 
+                                               VDISK_INVALID_HASH;
+                               }
+                       }
+                       VIDDBG(50, "Done\n");
+                       if (busy) {
+                               VIDDBG(50, "Busy\n");
+                               return (-EBUSY);
+                       }
+                       vdisk->syncio++;
+               }
+       }
+
+       // We can only transfer to/from an aligned buffer
+       if ((addr_t)buf & 511) {
+               b = pool = vdisk_malloc((nb_blocks+1) * 512);
+               if (pool == NULL) {
+                       VIDDBG(0, "Can't create buffer\n");
+                       return (-ENOMEM);
+               }
+               while ((addr_t)b & 511) b++;
+               VIDDBG(10, "Aligned buffer %p (pool %p, b %p)\n", buf, pool, b);
+
+               use_aio = 0;
+       }
+
+       i = 0; // block in the buf[]
+       while (nb_blocks>0) {
+
+               // Find largest contiguous set of blocks that we
+               // we can access in a single IO.
+
+               batch = nb_blocks;
+       again:
+               arg = NULL;
+               list_for_each(ptr, &vdisk->vdf_head) {
+                                       
+                       vdf = list_entry(ptr, vd_file_t, vdf_list);
+
+                       real_block = (uint32_t)block;
+
+                       // Make batch fit into a single vdf->batch_sz
+                       if ( ((block + batch - 1) & ~(vdf->batch_sz-1))
+                            != (block & ~(vdf->batch_sz-1)))
+                               batch = ( (block + vdf->batch_sz) & 
+                                         ~(vdf->batch_sz-1) )
+                                       - block;
+
+                       // Map the requested block set to address in the file   
                
+                       res = vdisk->vdfd->map_block(vdf, &real_block, 
+                                                    batch, op, &arg);
+
+                       if (res == VID_BLOCK_TOOBIG) {
+                               // Some blocks are mapped and some are not.
+                               // Need to try a smaller batch
+
+                               batch >>= 1;
+                               if (!batch) {
+                                       int j;
+                                       // Free hash entries
+                                       for (j=0,blk=block; j<nb_blocks; 
j++,blk++) {
+                                               hash_index = 
VDISK_HASH_IDX(blk);
+                                               
ASSERT(vdisk->hash[hash_index].key
+                                                      == blk);
+                                               vdisk->hash[hash_index].key = 
+                                                       VDISK_INVALID_HASH;
+                                       }
+
+                                       VIDDBG(0, "Inconsistent mapping 
error\n");
+                                       return EINVAL;
+                               }
+                               goto again;
+                       }
+
+                       if ((res != VID_BLOCK_NOTMAPPED) ||
+                           ((vdf->flags & VDF_LEAF) && (op == VDISK_WRITE)))
+                               break;
+               }
+
+               if (res != VID_BLOCK_MAPPED) {
+                       
+                       // Unallocated blocks return zeroes for reads
+                       if ((op == VDISK_READ) && (res == VID_BLOCK_NOTMAPPED)) 
{
+                               
+                               if (use_aio) {
+                                       int j;
+                                       // Free up hash entries
+                                       for (j=0,blk=block; j<batch; j++,blk++) 
{
+                                               hash_index = 
VDISK_HASH_IDX(blk);
+                                               
ASSERT(vdisk->hash[hash_index].key
+                                                      == blk);
+                                               vdisk->hash[hash_index].key = 
+                                                       VDISK_INVALID_HASH;
+                                       }
+                               }
+
+                               memset(&buf[i*512], 0, batch*512);
+                               i += batch;
+                               b += batch * 512;
+                               block += batch;
+                               nb_blocks -= batch;
+                               zero_blocks += batch;
+                               VIDDBG(10, "Skipping %d blocks\n", batch);
+                               continue;
+                       }
+
+                       VIDDBG(0, "Couldn't map block %d (%d)\n", 
+                              block, res);
+                       if (pool)
+                               vdisk_free(pool);
+                       return (-1*res);
+               }
+
+               VIDDBG(50, "mapped sector %" PRId64 " to block %d for read\n", 
+                      block, real_block);
+
+               // Offset in the file
+               offset = (uint64_t)real_block << 9;
+
+               if (use_aio)
+                       vdisk->asyncio++;
+
+               // Perform IO
+               if (op == VDISK_WRITE) {
+                       if (pool)
+                               memcpy(b, &buf[i*512], batch * 512); 
+                       if (!use_aio)
+                               bytes = vdisk_syncio(vdf->fd, b, batch * 512, 
+                                                    offset, VDISK_WRITE, 
vdf->syncio);
+                       else
+                               bytes = vdisk_asyncio(vdisk, block, vdf->fd, 
+                                                     b, batch * 512, offset, 
+                                                     arg, aiocb, VDISK_WRITE);
+               } else /* VDISK_READ */ {
+                       if (!use_aio) {
+                               bytes = vdisk_syncio(vdf->fd, b, batch * 512, 
+                                                    offset, VDISK_READ, NULL);
+                               if (pool)
+                                       memcpy(&buf[i*512], b, batch * 512);
+                       } else {
+                               bytes = vdisk_asyncio(vdisk, block, vdf->fd,
+                                                     b, batch * 512, offset, 
+                                                     arg, aiocb, VDISK_READ);
+                       }
+               }
+
+               if (bytes != batch * 512) {
+                       VIDDBG(0, "%s %ld bytes (block %d) instead of "
+                              "%d (%s)\n", (op==VDISK_WRITE)?"Wrote":"Read", 
+                              bytes, real_block, batch * 512, vdf->name);
+                       if ((signed long)bytes == -1)
+                               res = errno;
+               }
+       
+               if (!use_aio)
+                       if (vdisk->vdfd->xfer_commit(arg, res))
+                               VIDDBG(0, "Couldn't commit transfer\n");
+               
+               i += batch;
+               b += batch * 512;
+               block += batch;
+               nb_blocks -= batch;
+       }
+
+       if (pool)
+               vdisk_free(pool);
+
+       /*
+        * Returning number of processed bytes to caller who requested AIO 
+        * (vdisk->use_aio && aiocb) will tell him that there is no 
+        * need to wait for AIO completion
+        * There are two cases when this happens:
+        *  - We couldn't perform any AIOs (use_aio == 0)
+        *  - Some requests have been reads to unallocated blocks (and 
+        *    thus are read as zeroes). Note that if *some* blocks have been
+        *    sent as AIOs, the caller will need to wait for completions 
+        *    (and we return zero).
+        */
+       if (!use_aio)
+               return (i * 512); // 'i' is number of accessed sectors;
+       else if (vdisk->use_aio && aiocb && (zero_blocks != 0))
+               return (zero_blocks * 512); 
+       else
+               return (0);
+}
+
+LIST_HEAD(vdfd_head);
+
+// Register new file format
+int
+vdisk_register(vdf_data_t *new_vdfd)
+{
+       struct list_head *ptr;
+       vdf_data_t *vdfd;
+
+       list_for_each(ptr, &vdfd_head) {
+               vdfd = list_entry(ptr, vdf_data_t, vdfd_list);
+               if (vdfd == new_vdfd) {
+                       return (-1);
+               }
+       }
+
+       list_add(&new_vdfd->vdfd_list, &vdfd_head);
+       VIDDBG(10, "Registered \"%s\" format\n", new_vdfd->ftype);
+       return (0);
+}
+
+// Unregister file format
+void
+vdisk_unregister(vdf_data_t *vdfd)
+{
+       struct list_head *ptr;
+       
+       list_for_each(ptr, &vdfd_head) {
+               if (vdfd == list_entry(ptr, vdf_data_t, vdfd_list)) {
+                       list_del(&vdfd->vdfd_list);
+                       break;
+               }
+       }
+}
+
+// Find format-specific library, load it and call its init routine
+int
+vdisk_init_format(char *name)
+{
+       void *handle;
+       char libname[64];
+       char initfunc[32];
+       void (*init)();
+       char *err;
+
+       // Construct library name
+       (void)strcpy(libname, "libvdisk_");
+       (void)strcat(libname, name);
+       (void)strcat(libname, ".so");
+
+       handle = dlopen (libname, RTLD_LAZY);
+       if (!handle) {
+               VIDDBG(0, "%s\n", dlerror());
+               return (-1);
+       }
+
+       dlerror();    // Clear any existing error
+
+       // Construct init function name
+       (void)strcpy(initfunc, name);
+       (void)strcat(initfunc, "_init");
+
+       *(void **) (&init) = dlsym(handle, initfunc);
+       if ((err = dlerror()) != NULL)  {
+                      VIDDBG(0, "%s\n", err);
+                      return (-1);
+       }
+
+       // Call format-specific init routine
+       (*init)();
+
+       return (0);
+}
+
+int
+vdf_find_vdfd(vdisk_dev_t *vdisk, char *ftype)
+{
+       struct list_head *ptr;
+       vdf_data_t *vdfd;
+       int err;
+       int attempt = 0;
+
+       while (attempt < 2) {
+               list_for_each(ptr, &vdfd_head) {
+                       
+                       vdfd = list_entry(ptr, vdf_data_t, vdfd_list);
+                       
+                       if (!strcmp(vdfd->ftype, ftype)) {
+                               
+                               vdisk->vdfd = vdfd;
+                               return (0);
+                       }
+               }
+
+               if (attempt) {
+                       VIDDBG(0, "Unknown format %s\n", ftype);
+                       return (EINVAL);
+               }
+
+               // Didn't find vdfd for this extension, maybe we need
+               // to initialize it and try again.
+               err = vdisk_init_format(ftype);
+               if (err != 0) {
+                       VIDDBG(0, "Can't initialize format %s\n", ftype);
+                       return (err);
+               }
+               attempt++;
+       }
+
+       /*NOTREACHED*/
+       return (EINVAL);
+}
+
+int
+vdisk_common_init(vdisk_dev_t *vdisk)
+{
+       if (vdisk_initialized)
+               return (0);
+
+       INIT_LIST_HEAD(&vdfd_head);
+
+       vdisk_pagesz = getpagesize();
+
+       vdisk_initialized = 1;
+
+       return (0);
+}
diff -r 75c61490cc06 tools/vdisk/vdisk_tool.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_tool.c  Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,338 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE  // for strndup()
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdint.h>
+#include <getopt.h>
+
+#include "vdisk.h"
+
+extern int vdisk_init_format(char *);
+static char *supported_formats[] = {"vhd", NULL};
+
+int
+init_tool()
+{
+       int err;
+       int i;
+       
+       err = vdisk_common_init(NULL/*XXX: ?? */);
+       if (err) {
+               VIDDBG(0, "Failed to initialize vdisk\n");
+               return (err);
+       }
+       
+       for (i=0; ;i++) {
+               if (supported_formats[i] == NULL)
+                       break;
+               
+               err = vdisk_init_format(supported_formats[i]);
+               if (err) {
+                       VIDDBG(0, "Failed to initialize %s format\n",
+                               supported_formats[i]);
+                       return (err);
+               }
+       }
+       return (0);
+}
+
+static void
+print_usage(char *prog)
+{
+       int i;
+
+       fprintf(stderr, "Usage: %s OPTIONS -# <format-specific options> "
+               "<filename>\n", prog);
+       fprintf(stderr, 
+               " OPTIONS:\n"
+               "          [-f <format>] [-C] [-H] [-M] "
+               "[-D <block> [-b <num_blocks>] [-o outfile]]\n"
+                "      -C              Create a vdisk\n"
+                "      -H              Read vdisk headers from file\n" 
+                "      -M              Modify a vdisk\n"
+                "      -D              Dump a vhd\n"
+                "        block           first block to read (required)\n"
+                "        num_blocks      number of blocks to read. If not\n" 
+                "                          specified, whole file will be 
read\n"
+                "        outfile         output file. If not specified,\n" 
+                "                          stdout is used\n"                
+               " Supported formats: ");
+       for (i=0; ;i++) {
+               if (supported_formats[i] == NULL) {
+                       fprintf(stderr, "\n");
+                       break;
+               }
+               fprintf(stderr, "%s ", supported_formats[i]);
+       }
+}
+
+int
+main(int argc, char *argv[])
+{
+       char filename[PATH_MAX];
+       char *outfile = NULL;
+       char format[16] = "vhd";
+       int operations = 0; 
+       char c = 0;
+       extern char *optarg;
+       extern int optind, opterr, optopt;
+       vdisk_dev_t vdisk;
+       int err;
+       void *optp = NULL; // Format-specific options
+       char *file_fmt;
+       int i;
+       int first_block = 0, num_blocks = -1;
+       struct program_props props;
+       uint8_t flags;
+
+       //init_tool();
+
+       /* 
+        * Read the filename argument first -- we may need 
+        * it to determine format 
+        */
+       strcpy(filename, argv[argc-1]);
+       file_fmt = strrchr(filename, '.');
+
+       // See whether what we think is file's format is supported
+       if (file_fmt) {
+               file_fmt++; // Skip '.'
+               for (i=0; ;i++) {
+                       if (supported_formats[i] == NULL) {
+                               // Not a supported format, ignore suffix
+                               file_fmt = NULL;
+                               break;
+                       }
+                       
+                       if (!strcmp(file_fmt, supported_formats[i]))
+                               break;  // Found it
+               }
+       }
+
+       vdisk.vdfd = NULL;
+
+       while (c != '#') {
+
+               c = getopt(argc, argv, "f:CHMD:b:o:#");
+               if (c == -1)
+                       break;
+
+               switch (c) {
+               case 'f':
+                       strcpy(format, optarg);
+
+                       /*
+                        * If we either coudn't determine format from filename
+                        * argument or we thought we could but '-f' specifies
+                        * different format, we append appropriate suffix
+                        */
+                       if (!file_fmt || strcmp(format, file_fmt)) {
+                               (void)strcat(filename, ".");
+                               (void)strcat(filename, format);
+                               file_fmt = format;
+                       } 
+
+                       break;
+               case 'C':
+                       operations |= VDISK_OP_CREATE;
+                       break;
+               case 'H':
+                       /* File to read headers from */
+                       operations |= VDISK_OP_HEADERS;
+                       break;
+               case 'M':
+                       /* File to read headers from */
+                       operations |= VDISK_OP_MODIFY;
+                       break;
+               case 'D':
+                       first_block = atol(optarg);
+                       operations |= VDISK_OP_DUMP;
+                       break;
+               case 'b':
+                       num_blocks = atol(optarg);
+                       if (num_blocks < 0) {
+                               VIDDBG(0, "Number of blocks must be a "
+                                      "non-negative number\n");
+                               exit(1);
+                       }
+                       break;
+               case 'o':
+                       // Don't confuse vdisk with output file
+                       if (optarg == argv[argc-1]) {
+                               print_usage(argv[0]);
+                               exit(1);
+                       }
+                       outfile = strndup(optarg, strlen(optarg));
+                       if (outfile == NULL) {
+                               VIDDBG(0, "Out of memory\n");
+                               exit(1);
+                       }
+                       
+               case '#':
+
+                       if (file_fmt) {
+                               err = vdf_find_vdfd(&vdisk, file_fmt);
+                               if (err) {
+                                       VIDDBG(0, "Fail to initialize "
+                                               "format data for %s\n",
+                                               format);
+                                       return (err);
+                               }
+                       } else {
+                               VIDDBG(0, "Unspecified or unsupported 
format\n");
+                               print_usage(argv[0]);
+                               return (EINVAL);
+                       }
+
+                       if (vdisk.vdfd->parse_args(argc, operations,
+                                                   argv, &optp) != 0) {
+                               print_usage(argv[0]);
+                               return (EINVAL);
+                       }
+                       
+                       break;
+               default:
+                       print_usage(argv[0]);
+                       return (EINVAL);
+               }
+       }
+
+       /* 
+        * At least one operation type is needed and 
+        * filename needs to be specified
+        */
+       if (!operations || !file_fmt) {
+               print_usage(argv[0]);
+               return (EINVAL);
+       }
+
+       // XXX: We probably should have initialized by now
+       if (vdisk.vdfd == NULL) {
+               err = vdf_find_vdfd(&vdisk, file_fmt);
+               if (err) {
+                       VIDDBG(0, "Fail to initialize format data for %s\n",
+                               format);
+                       return (err);
+               }
+       }
+
+
+       // First create file, if requested
+       if (operations & VDISK_OP_CREATE) {
+               err = vdisk.vdfd->create_vdisk(filename, optp);
+               if (err) {
+                       VIDDBG(0, "Can't create file\n");
+                       return (err);
+               }
+       }
+        
+       props.alloc_func = NULL;
+       props.free_func = NULL;
+       props.out_target = VDISK_OUT_STDERR;
+
+       if (!(operations & VDISK_OP_CREATE) && 
+           !(operations & VDISK_OP_MODIFY))
+               flags = VDISK_RO;
+       else
+               flags = 0;
+
+       err = vdisk_init(&vdisk, filename, &props, flags);
+       if (err) {
+               VIDDBG(0, "Fail to initialize from file %s\n",
+                       format);
+               return (err);
+       }
+
+       if (operations & VDISK_OP_HEADERS) {
+               err = vdf_print_headers(&vdisk, filename);
+               if (err) {
+                       VIDDBG(0, "Can't read headers\n");
+                       return (err);
+               }
+       }
+
+       if (operations & VDISK_OP_MODIFY) {
+               err = vdisk.vdfd->modify_vdisk(&vdisk, optp);
+               if (err) {
+                       VIDDBG(0, "Can't modify headers\n");
+                       return (err);
+               }
+       }
+
+       if (operations & VDISK_OP_DUMP) {
+               uint8_t *buf, *p;
+               int bytes;
+               int chunk_log = 21; // 2MB
+               int nblocks;
+               int fd;
+               
+               // Open output file (use stdout if not specified)
+               if (outfile != NULL) {
+                       fd = open(outfile, O_RDWR|O_CREAT, 
+                                 S_IRUSR|S_IWUSR);
+                       if (fd == -1) {
+                               VIDDBG(0, "Can't open %s: %s\n",
+                                      outfile, strerror(errno));
+                               exit(1);
+                       }
+               } else
+                       fd = 1; // stdout
+
+               // Allocate 512b-aligned read buffer
+               p = malloc((1<<chunk_log) + 512);               
+               while (p == NULL) { // Try smaller chunks if we fail
+                       if (chunk_log == 0) {
+                               VIDDBG(0, "Can't allocate buffer\n");
+                               exit(1);
+                       }
+                       chunk_log--;
+                       p = malloc((1<<chunk_log) + 512);
+               }
+               buf = p;
+               while ((addr_t)buf & 511) buf++;
+
+               // nblocks per transfer
+               nblocks = (1<<chunk_log) >> 9;
+
+               // If number of blocks to read is not specified,
+               // read whole vdisk
+               if (num_blocks < 0) 
+                       num_blocks = vdisk.sz >> 9;
+
+               for (i=0; i<num_blocks; i+=nblocks) {
+
+                       // This could happen on last iteration
+                       if ((i+nblocks) > num_blocks)
+                               nblocks = num_blocks - i;
+
+                       bytes = vdisk_rw(&vdisk, first_block+i, buf, nblocks,
+                                        VDISK_READ, NULL);
+                       if (bytes != (nblocks << 9)) {
+                               VIDDBG(0, "vdisk_rw() returned %d\n", bytes);
+                               exit(1);
+                       }
+
+                       bytes = write(fd, buf, nblocks<<9);
+                       if (bytes == -1) {
+                               VIDDBG(0, "write: %s\n", strerror(errno));
+                               exit(1);
+                       }
+               }
+
+               free(p);
+       }
+       return 0;
+}
diff -r 75c61490cc06 tools/vdisk/vdisk_utils.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_utils.c Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,435 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE // for O_DIRECT
+#include <stdio.h>
+#include <stdlib.h> 
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdarg.h>
+#include <libaio.h>
+#include <time.h>
+#include <limits.h>
+
+#include "list.h"
+#include "vdisk.h"
+#include "vdisk_utils.h"
+
+#define VDISK_MAX_ERRORS       (100)
+#define VDISK_ERR_STRING_LEN   (512)
+
+static void *(*vdisk_alloc_func)(size_t sz) = malloc;
+static void (*vdisk_free_func)(void *ptr) = free;
+
+int vdisk_dbg_level = 1;
+int vdisk_out_target = VDISK_OUT_STDERR; // where to print messages
+
+// Don't want to keep this on stack
+static char argstring[VDISK_ERR_STRING_LEN];
+
+// Data structure to help with message throttling
+struct vdisk_log_mgt {
+       int vdisk_err_cnt;
+       int interval;
+       int restart;
+       time_t last_error;
+       time_t next_check;
+};
+static struct vdisk_log_mgt vdisk_log = {
+       .vdisk_err_cnt = 0,
+       .restart       = 0,
+       .interval      = 0,
+       .last_error    = (time_t)0,
+       .next_check    = (time_t)LONG_MAX,
+};
+
+// Print the message to either syslog or stderr, optionally
+// specifying filename and line number
+static void
+vdisk_print_msg(char *file, int line, char *msg)
+{
+       if (file) {
+               if (vdisk_out_target == VDISK_OUT_SYSLOG) {
+                       syslog(LOG_DEBUG, "%s:%d: %s",
+                              file, line, msg);
+               } else {
+                       fprintf(stderr, "%s:%d: %s",
+                               file, line, msg);
+               }
+       } else {
+               if (vdisk_out_target == VDISK_OUT_SYSLOG) {
+                       syslog(LOG_DEBUG, "%s", msg);
+               } else {
+                       fprintf(stderr, "%s", msg);
+               }
+       }
+}
+
+void
+vdisk_log_error(int level, char *file, int line, char *fmt, ...)
+{
+       int print_msg;
+
+       if (level > vdisk_dbg_level)
+               return;
+                               
+       print_msg = 0;
+
+       // Decide whether to print the message.
+       // Only manage message reporting for level 0, which is
+       // usually reserved for errors. Other messages will be
+       // printed unconditionally.     
+       if (level == 0) {                       
+               time_t now;
+
+               if (time(&now) == (time_t)-1) {
+                       // This should never happen ;-()
+                       vdisk_print_msg(NULL, 0, "vdisk: Can't get time, "
+                                       "error reporting stopped\n");
+                       return; // XXX: Or continue?
+               }
+
+               if (now >= vdisk_log.next_check) {
+
+                       if (now - vdisk_log.last_error > 
+                           (time_t)vdisk_log.interval) {
+                               // reset message throttling
+                               vdisk_log.restart = 0;
+                               vdisk_log.interval = 0;
+                               vdisk_log.vdisk_err_cnt = 0;
+                               vdisk_log.next_check = LONG_MAX;
+                               vdisk_print_msg(NULL, 0, "vdisk: Restoring "
+                                               "error reporting\n");
+                       }
+
+                       if (vdisk_log.restart) {                                
+                               // Double the interval, max at 128 seconds
+                               vdisk_log.interval = (vdisk_log.interval > 64) 
? 
+                                       vdisk_log.interval :
+                                       (vdisk_log.interval * 2);
+                               vdisk_log.next_check += 
+                                       (time_t)vdisk_log.interval;
+                               vdisk_log.restart = 0;
+                       } 
+                
+                       vdisk_log.vdisk_err_cnt = 0;    
+
+               } else {
+                       // Message received during throttling interval.
+                       // We will need to double the interval later
+                       vdisk_log.restart = 1;
+               }
+
+               if (vdisk_log.vdisk_err_cnt < VDISK_MAX_ERRORS) {
+                       vdisk_log.vdisk_err_cnt++;
+                       print_msg = 1;
+               }                       
+
+               if (vdisk_log.vdisk_err_cnt == VDISK_MAX_ERRORS) {
+                       vdisk_log.vdisk_err_cnt++;
+                       if (vdisk_log.interval == 0) {
+                               // Start interval management
+                               vdisk_print_msg(NULL, 0, "vdisk: Too many "
+                                               "errors, slowing down rate "
+                                               "of reporting\n");
+                               vdisk_log.interval = 1;
+                               vdisk_log.next_check = now + 
+                                       (time_t)vdisk_log.interval;
+                       }
+               } 
+
+               vdisk_log.last_error = now;
+
+       } else
+               print_msg = 1;
+
+
+       if (print_msg) {
+               va_list args;
+               
+               // Roll arguments into a string
+               va_start(args, fmt);
+               (void)vsnprintf(argstring, VDISK_ERR_STRING_LEN,
+                               fmt, args);
+               va_end(args);
+
+               vdisk_print_msg(file, line, argstring);
+       }
+}
+
+void
+vdisk_alloc_init(void *alloc_func, void *free_func)
+{
+       if (alloc_func != NULL)
+               vdisk_alloc_func = alloc_func;
+
+       if (free_func != NULL)
+               vdisk_free_func = free_func;
+}
+
+void *
+vdisk_malloc(size_t sz)
+{
+       void *ptr;
+
+       ptr = vdisk_alloc_func(sz);
+       if (ptr)
+               memset(ptr, 0, sz);
+       return (ptr);
+}
+
+void
+vdisk_free(void *ptr)
+{
+       vdisk_free_func(ptr);
+       ptr = NULL;
+}
+
+int
+vdisk_close(int fp)
+{
+       int err;
+
+       err = fsync(fp);
+       if (err)
+               VIDDBG(0, "fsync(): %s\n", strerror(errno));
+
+       // Invalidate all pages from page cache
+       err = posix_fadvise(fp, 0, 0, POSIX_FADV_DONTNEED);
+       if (err)
+               VIDDBG(0, "posix_fadvise(): %s\n", strerror(errno));
+
+       err = close(fp);
+       return (err);
+}
+
+size_t
+vdisk_size(int f, size_t *sz)
+{
+       size_t cur;
+       int err;
+
+       /*
+        * XXX: Obviously, we should use fstat(). Unfortunately, I couldn't 
+        * figure out how to make a dynamic library that calls fstat. 
+        * See glibc FAQ for descritpion of *problem* (why couldn't they
+        * provide a solution as well?)
+        */
+
+       // Remember current position
+       cur = lseek(f, 0, SEEK_CUR);
+       if (cur == -1) {
+               err = errno;
+               VIDDBG(0, "lseek: Can't seek to current: %s\n", 
strerror(errno));
+               return (err);
+       }
+
+       *sz = lseek(f, 0, SEEK_END);
+       if (*sz == -1) {
+               err = errno;
+               VIDDBG(0, "lseek: Can't seek to end: %s\n", strerror(errno));
+               return (err);
+       }
+
+       // Restore current position
+       cur = lseek(f, 0, SEEK_SET);
+       if (cur == -1) {
+               err = errno;
+               VIDDBG(0, "lseek: Can't seek to current: %s\n", 
strerror(errno));
+               return (err);
+       }
+       
+       return (0);
+}
+
+size_t
+vdisk_asyncio(vdisk_dev_t *vdisk, uint64_t block, 
+             int fp, char *buf, 
+             size_t size, off_t off, 
+             void *arg, void *aiocb,
+             int op)
+{
+       int hash_index = VDISK_HASH_IDX(block);
+       struct iocb *io;
+       struct pending_aio *pio;
+
+
+       ASSERT(vdisk->aio_cnt < VDISK_HASH_SZ);
+       ASSERT(vdisk->hash[hash_index].key == block);
+
+       io = &vdisk->hash[hash_index].io;
+       pio = &vdisk->hash[hash_index].pio;
+       
+       pio->arg = arg;
+       pio->block = block;
+       pio->aiocb = aiocb;
+       pio->num_blocks = size>>9;
+       pio->off = off;
+       pio->fd = fp;
+       pio->op = op;
+
+       if (op == VDISK_WRITE)
+               io_prep_pwrite(io, fp, buf, size, off);
+       else
+               io_prep_pread(io, fp, buf, size, off);
+
+       io->data = pio;
+
+       VIDDBG(50, "Using hash entry %d (block %d)\n", 
+              VDISK_HASH_IDX(pio->block), pio->block);
+
+       vdisk->aio_submit[vdisk->aio_cnt++] = io;
+
+       return (size);
+}
+
+static void
+vdisk_manage_pcache(int fp, vdisk_syncio_t *syncio, off_t start, off_t len)
+{
+#define        WRITE_RUN       (1<<22) //4MB
+       int res;
+       DO_STATS(time_t now);
+
+       DO_STATS(++(syncio->total_writes));
+
+       if (syncio->is_set) {
+               if (start >= syncio->io_start &&
+                   start <= syncio->io_start + syncio->io_len) {
+                       syncio->io_len -= (syncio->io_start + 
+                                          syncio->io_len) - start;
+                       syncio->io_len += len;
+                       DO_STATS(++(syncio->contig_writes));
+                       if (syncio->io_len > WRITE_RUN) {
+                               DO_STATS(++(syncio->flush_size_force));
+
+                               syncio->is_set = 0;
+                               
+                               res = fsync(fp);
+                               if (res)
+                                       VIDDBG(0, "fsync: %s\n",
+                                              strerror(errno));
+                               
+                               res = posix_fadvise(fp, syncio->io_start,
+                                                   syncio->io_len, 
+                                                   POSIX_FADV_DONTNEED);
+                               if (res)
+                                       VIDDBG(0, "posix_fadvise: %s\n", 
+                                              strerror(errno));
+                       }
+                       len = 0; // NOTE:len is consumed into previous
+               } else {
+#if VDISK_SYNCIO_STATS
+                       if (syncio->io_len < (1<<20))
+                               ++(syncio->flush_size_sub1MB);
+                       else if (syncio->io_len < (1<<21))
+                               ++(syncio->flush_size_sub2MB);
+                       else if (syncio->io_len < (1<<22))
+                               ++(syncio->flush_size_sub4MB);
+                       else if (syncio->io_len < (1<<23))
+                               ++(syncio->flush_size_sub8MB);
+                       else
+                               ++(syncio->flush_size_ovr8MB);
+#endif /* VDISK_SYNCIO_STATS */
+                       syncio->is_set = 0;
+                       res = fsync(fp);
+                       if (res)
+                               VIDDBG(0, "fsync: %s\n", strerror(errno));
+                       res = posix_fadvise(fp, syncio->io_start,
+                                           syncio->io_len, 
+                                           POSIX_FADV_DONTNEED);
+                       if (res)
+                               VIDDBG(0, "posix_fadvise: %s\n", 
+                                      strerror(errno));
+               }
+       }
+       if (len > 0) {
+               if (len <= WRITE_RUN) {
+                       syncio->is_set = 1;
+                       syncio->io_start = start;
+                       syncio->io_len = len;
+               } else {
+                       DO_STATS(++(syncio->flush_size_force));
+                       res = fsync(fp);
+                       if (res)
+                               VIDDBG(0, "fsync: %s\n", strerror(errno));
+                       res = posix_fadvise(fp, start, len, 
+                                           POSIX_FADV_DONTNEED);
+                       if (res)
+                               VIDDBG(0, "posix_fadvise: %s\n",
+                                      strerror(errno));
+               }
+       }
+#if VDISK_SYNCIO_STATS
+       now = time(NULL);
+       if (now >= syncio->last_dbg_print + 60) {
+               VIDDBG(0, ":WRITE_PERF: [%lu] tWrts %lu | conWrts %lu | s1M %lu"
+                      " | s2M %lu | s4M %lu | s8M %lu | o8M %lu | f %lu\n",
+                      (unsigned long)(now - syncio->last_dbg_print),
+                      syncio->total_writes, syncio->contig_writes,
+                      syncio->flush_size_sub1MB, syncio->flush_size_sub2MB,
+                      syncio->flush_size_sub4MB, syncio->flush_size_sub8MB,
+                      syncio->flush_size_ovr8MB, syncio->flush_size_force);
+               syncio->last_dbg_print = now;
+       }
+#endif /* VDISK_SYNCIO_STATS */
+}
+
+size_t
+vdisk_syncio(int fp, char *buf, size_t size, off_t off, int op, 
+            vdisk_syncio_t *syncio)
+{
+       size_t bytes;
+       off_t res;
+       off_t io_start;
+       off_t io_len;
+
+       ASSERT(!(size & 511));
+       ASSERT(!(off & 511));
+       ASSERT(!((addr_t)buf & 511));
+
+       res = vdisk_seek(fp, off, SEEK_SET);
+       if (res != off) {
+               VIDDBG(0, "lseek couldn't set offset to 0x%" PRIx64 ": %s\n",
+                      off, strerror(errno));
+               return (-1);
+       }
+
+       if (op == VDISK_WRITE) {
+               bytes = write(fp, buf, size);
+       } else
+               bytes = read(fp, buf, size);
+
+       if (bytes != size) {
+               VIDDBG(0, "%s %zd bytes instead of %zd: %s\n",
+                      (op == VDISK_WRITE)?"Wrote":"Read",
+                      bytes, size, strerror(errno));
+       } 
+
+       io_start = (off & (~((off_t)vdisk_pagesz-1)));
+       io_len = (size + vdisk_pagesz);
+
+       if (op == VDISK_READ) {
+               res = posix_fadvise(fp, io_start, io_len, POSIX_FADV_DONTNEED);
+               if (res)
+                       VIDDBG(0, "posix_fadvise: %s\n", strerror(errno));
+       } else if (syncio) {
+               vdisk_manage_pcache(fp, syncio, io_start, io_len);
+       } else {
+               res = fsync(fp);
+               if (res)
+                       VIDDBG(0, "fsync: %s\n", strerror(errno));
+               res = posix_fadvise(fp, io_start, io_len, POSIX_FADV_DONTNEED);
+               if (res)
+                       VIDDBG(0, "posix_fadvise: %s\n", strerror(errno));
+       }
+
+       return (bytes); 
+}
diff -r 75c61490cc06 tools/vdisk/vdisk_utils.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_utils.h Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,36 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VDISK_UTILS
+#define __VDISK_UTILS
+
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "vdisk.h"
+
+
+
+#define vdisk_open(cp, fl, mode) open((cp), (fl), (mode))
+#define vdisk_seek(fp, off, whence) lseek64((fp), (off), (whence))
+
+extern void *vdisk_malloc(size_t sz);
+extern void vdisk_free(void *ptr);
+extern int vdisk_close(int fp);
+extern size_t vdisk_size(int f, size_t *sz);
+extern size_t vdisk_syncio(int fp, char *buf, size_t sz, loff_t off, 
+                          int op, vdisk_syncio_t *syncio);
+extern size_t vdisk_asyncio(vdisk_dev_t *, uint64_t, int, char *, size_t, 
+                           loff_t, void *, void *, int);
+
+
+#endif /* __VDISK_UTILS */
diff -r 75c61490cc06 tools/vdisk/vhd.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd.c Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,925 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE // for O_DIRECT
+#include <stdio.h>
+#include <stdlib.h> 
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "list.h"
+#include "vdisk.h"
+#include "vdisk_utils.h"
+#include "vhd.h"
+#include "vhd_footer.h"
+
+char __vhd_zeroes[VHD_FTR_SZ+512];
+char *vhd_zeroes;
+
+#define BLOCK_MASK (~(((addr_t)1<<9)-1))
+
+
+int
+vhd_verify_metadata(vd_file_t *vf)
+{
+       // XXX: Something more robust, maybe?
+       return (0);
+}
+
+int
+vhd_read_footer(vd_file_t *vf)
+{
+       off_t ftr_off, res;
+       vhd_file_t *vhd = vf->vdf;
+       size_t bytes;
+
+       if (vhd->ftr_mem == NULL) {
+               vhd->ftr_mem = vdisk_malloc(VHD_FTR_SZ+512);
+               if (vhd->ftr_mem == NULL) {
+                       VIDDBG(0, "Couldn't allocate dynamic header\n");
+                       return (ENOMEM);
+               }
+               vhd->ftr = vhd->ftr_mem;
+               while ((addr_t)vhd->ftr & 511) vhd->ftr++; 
+       }
+
+       /* Find file size (seek to the end) */
+       res = vdisk_seek(vf->fd, 0, SEEK_END);
+       if (res == -1) {
+               VIDDBG(0, "lseek couldn't set offset to end of file\n");
+               vdisk_free(vhd->ftr_mem);
+               return (-1);
+       }
+
+       ftr_off = res - 512;
+
+       res = vdisk_seek(vf->fd, ftr_off, SEEK_SET);
+       if (res != ftr_off) {
+               VIDDBG(0, "lseek couldn't set offset to 0x%" PRIx64 "\n",
+                      ftr_off);
+               vdisk_free(vhd->ftr_mem);
+               return (-1);
+       }
+
+       if ((bytes = vdisk_syncio(vf->fd, vhd->ftr, 512, ftr_off, 
+                                 VDISK_READ, NULL)) != 512) {
+               VIDDBG(0, "vdisk read from offset 0x%" PRIx64 " failed "
+                      "(read %zd insted of 512) %d\n", 
+                      ftr_off, bytes, errno);
+               vdisk_free(vhd->ftr_mem);
+               return (-1);
+       }
+
+       return 0;
+}
+
+int
+vhd_read_dynhdr(vd_file_t *vf)
+{
+       off_t res;
+       vhd_file_t *vhd = vf->vdf;
+       size_t bat_sz;
+       int err = 0;
+
+
+       vhd->dhdr_mem = vdisk_malloc(VHD_DHDR_SZ+512);
+       if (vhd->dhdr_mem == NULL) {
+               VIDDBG(0, "Couldn't allocate dynamic header\n");
+               err = ENOMEM;
+               goto fail;
+       }
+
+       vhd->dhdr = vhd->dhdr_mem;
+       while ((addr_t)vhd->dhdr & 511) vhd->dhdr++; 
+
+       // Skip  copy of the footer
+       res = vdisk_seek(vf->fd, VHD_FTR_SZ, SEEK_SET);
+       if (res != VHD_FTR_SZ) {
+               VIDDBG(0, "Couldn't skip copy of the footer\n");
+               err = -1;
+               goto fail;
+       }
+       
+       if (vdisk_syncio(vf->fd, vhd->dhdr, VHD_DHDR_SZ, VHD_FTR_SZ, 
VDISK_READ, NULL)
+           != VHD_DHDR_SZ) {
+               VIDDBG(0, "Failed to read dynamic header");
+               err = -1;
+               goto fail;
+       }
+
+       // Read BAT (in 512B units)
+       // XXX: May need to only keep a part of BAT due to memory size concerns
+       bat_sz = vhd_get_dhdr_tbl_entries(vhd->dhdr) << 2;
+       if (bat_sz & 511)
+               bat_sz += (512-(bat_sz & 511));
+
+       vhd->bat_mem = vdisk_malloc(bat_sz+512);
+       if (vhd->bat_mem == NULL) {
+               VIDDBG(0, "Couldn't allocate BAT\n");
+               err = ENOMEM;
+               goto fail;
+       }
+       vhd->bat = vhd->bat_mem;
+       while ((addr_t)vhd->bat & 511) vhd->bat++; 
+
+       if (vdisk_syncio(vf->fd, (char *)vhd->bat, bat_sz, 
+                        VHD_DHDR_SZ+VHD_FTR_SZ, VDISK_READ, NULL) != bat_sz) {
+               VIDDBG(0, "Failed to read BAT");
+               err = -1;
+               goto fail;
+       }
+
+       return (0);
+
+fail:
+       return (err);
+}
+
+int
+vhd_read_metadata(vdisk_dev_t *vdisk, vd_file_t *vf)
+{
+       int err;
+       int type;
+       vhd_file_t *vhd = NULL;
+       int secs_per_block;
+       uint32_t geom;
+
+       vf->vdf = (vhd_file_t *)vdisk_malloc(sizeof(vhd_file_t));
+       if (vf->vdf == NULL) {
+               VIDDBG(0, "Couldn't allocate format-specific data\n");
+               err = ENOMEM;
+               goto fail;
+       }
+       
+       vhd = vf->vdf;
+       memset(vhd, 0, sizeof(vhd_file_t));
+
+       err = vhd_read_footer(vf);
+       if (err) {
+               VIDDBG(0, "Couldn't read footer\n");
+               goto fail;
+       }
+
+       vdisk->sz = vhd_get_ftr_orig_sz(vhd->ftr);
+
+       type = vhd_get_ftr_type(vhd->ftr);
+       if ( (type != VHD_TYPE_FIXED) &&
+            (type != VHD_TYPE_DYNAMIC) &&
+            (type != VHD_TYPE_DIFF)){
+               // Return error for VHD_TYPE_NONE as well.
+               VIDDBG(0, "Unsupported VHD file type (%d)\n", type);
+               err = EIO; // XXX: Something else?
+               goto fail;
+       }
+
+       if (type != VHD_TYPE_FIXED) {
+               size_t sz;
+               int i;
+
+               // We should have a dynamic header
+               err = vhd_read_dynhdr(vf);
+               if (err) {
+                       VIDDBG(0, "Couldn't read dynamic header\n");
+                       goto fail;
+               }
+
+               // No fls() in userland, so we do log2 ourselves
+               vhd->sec_per_block_log = 0;
+               secs_per_block = vhd_get_dhdr_blksz(vhd->dhdr) >> 9;
+               while (secs_per_block >>= 1)
+                       vhd->sec_per_block_log++;
+
+               if (type == VHD_TYPE_DYNAMIC) {
+                       // How many sectors are mapped sequentially
+                       vf->batch_sz = (1<<vhd->sec_per_block_log);
+               } else {
+                       // XXX: Need to think about this.
+                       vf->batch_sz = 1;       
+               }
+
+               // bytes for sectormap is ((sectors per block) / 8)
+               vhd->sectormap_sz = (vhd_get_dhdr_blksz(vhd->dhdr) >> 9) >> 3;
+
+               // Align on 512-byte boundary
+               if ((vhd->sectormap_sz == 0) || (vhd->sectormap_sz & 511)) 
+                       vhd->sectormap_sz += 512 - (vhd->sectormap_sz & 511);
+               
+               // First new block will be allocated where the footer
+               // currently is, which is at the end of the file
+               err = vdisk_size(vf->fd, &sz);
+               if (err) {
+                       VIDDBG(0, "Couldn't get file size\n");
+                       goto fail;
+               }
+               vhd->next_block_off = (sz-VHD_FTR_SZ) >> 9;
+
+               // Allocate sectormap buffer
+               vhd->sec_mem = vdisk_malloc(512*2);
+               if (vhd->sec_mem == NULL) {
+                       //XXX: free everything
+                       VIDDBG(0, "Can't allocate sectormap\n");
+                       err = ENOMEM;
+                       goto fail;
+               }
+               vhd->secmap_chunk = vhd->sec_mem;
+               while ((addr_t)vhd->secmap_chunk & 511) vhd->secmap_chunk++;
+
+               // Allocate sectormap cache
+               for (i=0;i<VHD_CACHE_SZ;i++) {
+                       vhd->cache[i].sec_mem = vdisk_malloc(512*2);
+                       if (vhd->cache[i].sec_mem == NULL) {
+                               //XXX: free everything
+                               VIDDBG(0, "Can't allocate sectormap\n");
+                               err = ENOMEM;
+                               goto fail;
+                       }
+                       vhd->cache[i].secmap_chunk = vhd->cache[i].sec_mem;
+                       while ((addr_t)vhd->cache[i].secmap_chunk & 511) 
+                               vhd->cache[i].secmap_chunk++;
+
+                       // Point to sector 0 (or any other sector),
+                       // but make the map empty
+                       vhd->cache[i].first_sector = 0; //VHD_INVALID_SECTOR;
+                       memset(vhd->cache[i].secmap_chunk, 0, 512);
+               }
+
+               if (VHD_CACHE_SZ > 0) {
+                       vhd->cache_head = &vhd->cache[0];
+                       vhd->cache[0].prev = NULL;
+                       for (i=1;i<VHD_CACHE_SZ;i++) {
+                               vhd->cache[i-1].next = &vhd->cache[i];
+                               vhd->cache[i].prev = &vhd->cache[i-1];
+                       }
+                       vhd->cache_tail = &vhd->cache[VHD_CACHE_SZ-1];
+                       vhd->cache[VHD_CACHE_SZ-1].next = NULL;
+               } //else
+               //vhd->cache_head == NULL;
+       } else
+               vf->batch_sz = (1<<30); // (signed) infinity
+
+       vf->flags = 0;
+
+       err = vhd_verify_metadata(vf);
+       if (err) {
+               VIDDBG(0, "File appears to be corrupted\n");
+
+               // XXX: It may be salvageable
+               if (type != VHD_TYPE_FIXED) {
+                       vdisk_free(vhd->dhdr_mem);
+                       vdisk_free(vhd->bat_mem);
+                       vdisk_free(vhd->sec_mem);
+               }
+               err = EIO;
+               goto fail;
+       }
+
+       // We are assuming here that all files of the
+       // vdisk have the same geometry.
+       geom = vhd_get_ftr_geom(vhd->ftr);
+       vdisk->geom.cyls = (geom >> 16) & 0xffff;
+       vdisk->geom.heads = (geom >> 8) & 0xff;
+       vdisk->geom.secs = geom & 0xff;
+
+               return (0);
+
+fail:
+       if (vhd) {
+               if (vhd->ftr_mem)
+                       vdisk_free(vhd->ftr_mem);
+               if (vhd->dhdr_mem)
+                       vdisk_free(vhd->dhdr_mem);
+               if (vhd->bat_mem)
+                       vdisk_free(vhd->bat_mem);
+               if (vhd->sec_mem)
+                       vdisk_free(vhd->sec_mem);
+               vdisk_free(vhd);
+       }
+       return (err);
+}
+
+int
+vhd_alloc_block(vd_file_t *vf, uint32_t blockno)
+{
+       size_t bytes;
+       off_t bat_off;
+       char *ptr;
+       vhd_file_t *vhd = vf->vdf;
+       size_t blocksz;
+
+
+       ASSERT(__arch__swab32(vhd->bat[blockno]) == VHD_BAT_INVALID_ENTRY);
+       ASSERT((vhd_get_dhdr_blksz(vhd->dhdr) & 511) == 0);
+       ASSERT((vhd->sectormap_sz & 511) == 0);
+
+       blocksz = vhd_get_dhdr_blksz(vhd->dhdr) + vhd->sectormap_sz;
+
+       /*
+        * First try to write footer at new position.
+        * The hole should be filled with zeroes
+        * XXX: Are we sure?
+        */
+       bytes = vdisk_syncio(vf->fd, vhd->ftr, VHD_FTR_SZ, 
+                            (vhd->next_block_off<<9) + blocksz,
+                            VDISK_WRITE, NULL);
+       if (bytes != VHD_FTR_SZ) {
+               VIDDBG(0, "Can't append footer\n");
+               return (EIO);
+       }
+       
+
+       // Overwrite footer with zeroes
+       bytes = vdisk_syncio(vf->fd, vhd_zeroes, VHD_FTR_SZ, 
+                            vhd->next_block_off<<9, VDISK_WRITE, NULL);
+       if (bytes != VHD_FTR_SZ) {
+               VIDDBG(0, "Can't overwrite footer\n");
+               return (EIO);
+       }
+
+       // Now update BAT in a 512-b chunk
+       vhd->bat[blockno] = __arch__swab32(vhd->next_block_off);
+       bat_off = (VHD_FTR_SZ + VHD_DHDR_SZ + (blockno<<2)) & BLOCK_MASK;
+       ptr = (char *)(((addr_t)&vhd->bat[blockno]) & BLOCK_MASK);
+       bytes = vdisk_syncio(vf->fd, ptr, 512, bat_off, VDISK_WRITE, NULL);
+       if (bytes != 512) {
+               VIDDBG(0, "Can't update BAT\n");
+               return (EIO);
+       }
+
+       vhd->next_block_off += (blocksz >> 9);
+
+       return(0);
+}
+
+/*
+ * It would be easier to use test_bit()/set_bit() routines,
+ * but x86 bit test/set instructions count bits (in the last byte)
+ * from LSb, which is not what we want. We could recompute pos
+ * (pos=(pos&(~7))+7-(pos&7)) but doing this operation more
+ * explicitely seems to be safer.
+ */
+inline int
+vhd_test_bit(int pos, char *buf)
+{
+       char *addr = (char *)((addr_t)buf + (pos>>3));
+       uint8_t byte = *addr;
+       uint8_t bitinbyte = 7-(pos&7);
+       
+       return (byte & (1<<bitinbyte));
+}
+
+inline int
+vhd_test_bitset(int start, int bits, char *buf)
+{
+       int i;
+
+       for (i=0;i<bits;i++)
+               if (!vhd_test_bit(start+i, buf))
+                       return (0);
+
+       return (1);
+}
+
+inline void
+vhd_set_bit(int pos, char *buf)
+{
+       char *addr = (char *)((addr_t)buf + (pos>>3));
+       uint8_t byte = *addr;
+       uint8_t bitinbyte = 7-(pos&7);
+       
+       *addr = byte | (1<<bitinbyte);
+}
+
+inline void
+vhd_set_bitset(int start, int bits, char *buf)
+{
+       int i;
+
+       for (i=0;i<bits;i++)
+               vhd_set_bit(start+i, buf);
+}
+
+
+int
+vhd_xfer_commit(void *arg, int err)
+{
+       vhd_xfer_t *vhdx = arg;
+       size_t bytes;
+
+       if (arg == NULL)
+               return (0);
+
+       if (err == 0) {
+
+               // Read the 512b chunk of sector map 
+               bytes = vdisk_syncio(vhdx->fd, vhdx->secmap_chunk, 512, 
+                                    vhdx->secmap_addr, VDISK_READ, NULL);
+               if (bytes != 512) {
+                       VIDDBG(0, "Failed to read sector bitmap\n");
+                       vdisk_free(vhdx->mem);
+                       return (EIO);
+               }
+               
+               // Set sector bit
+               vhd_set_bitset(vhdx->sector_bit, vhdx->num_secs, 
+                              vhdx->secmap_chunk);
+               
+               // and write it back
+               bytes = vdisk_syncio(vhdx->fd, vhdx->secmap_chunk, 512, 
+                                    vhdx->secmap_addr, VDISK_WRITE, NULL);
+               if (bytes != 512) {
+                       VIDDBG(0, "Can't commit access\n");
+                       vdisk_free(vhdx->mem);
+                       return (EIO);
+               }
+               
+               if (vhdx->cache && vhdx->first_sector != VHD_INVALID_SECTOR) {
+                       ASSERT(vhdx->cache->first_sector == VHD_INVALID_SECTOR);
+                       memcpy(vhdx->cache->secmap_chunk, vhdx->secmap_chunk, 
512);
+                       vhdx->cache->first_sector = vhdx->first_sector;
+               }               
+       }
+
+       vdisk_free(vhdx->mem);
+       return (0);
+}
+
+// Microsoft uses "sector" for 512-byte unit that we 
+// refer to as "block" elsewhere.
+// This routine is *NOT* SMP-safe!
+int
+vhd_map_block(vd_file_t *vf, 
+             uint32_t *sectorno,      /* IN/OUT */
+             int num_secs,
+             int op,
+             void **arg)       
+{
+       vhd_file_t *vhd = vf->vdf;
+       int type = vhd_get_ftr_type(vhd->ftr);
+       uint32_t blockno; // block of sectors in the file
+       int err;
+       size_t bytes;
+       int sector_bit; // bit offset into 512b chunk of sectormap
+       int sector_in_block;
+       off_t sectormap_addr;
+       uint32_t first_sector;
+       vhd_cache_t *cache = vhd->cache_head;
+
+
+       if (type == VHD_TYPE_FIXED)
+               return (VID_BLOCK_MAPPED);
+
+       vhd->stats.access++;
+       
+       blockno = *sectorno >> vhd->sec_per_block_log;
+       
+       // We can only map sequence on sectors in the same block
+       ASSERT(((*sectorno+num_secs-1) >> vhd->sec_per_block_log)
+              == blockno);
+
+       // First sector in the block (really, blockno<<vhd->sec_per_block_log)
+       first_sector = *sectorno & (~(((uint32_t)1<<vhd->sec_per_block_log)-1));
+
+       // This sector's offset in the block
+       sector_in_block = *sectorno & (((uint32_t)1<<vhd->sec_per_block_log)-1);
+
+       sector_bit = sector_in_block & ((512*8)-1); // 8 bits in a byte
+       while (cache != NULL) {
+               if (cache->first_sector == first_sector) {
+                       // Sectormap is cached
+                       if (vhd_test_bitset(sector_bit, num_secs, 
+                                           cache->secmap_chunk)) {
+                               
+                               // sector is mapped
+                               *sectorno = cache->phys_first_sector +
+                                       + sector_in_block;
+                               
+                               vhd->stats.cache_hit++;
+                               
+                               // Make the line LRU
+                               if (cache->prev) {
+                                       cache->prev->next = cache->next;
+                                       if (cache->next)
+                                               cache->next->prev =
+                                                       cache->prev;
+                                       else
+                                               vhd->cache_tail = cache->prev;
+                                       
+                                       cache->next = vhd->cache_head;
+                                       cache->next->prev = cache;
+                                       cache->prev = NULL;
+                                       vhd->cache_head = cache;
+                               }
+                               
+                               return (VID_BLOCK_MAPPED);
+                       } else {
+                               break;
+                       }
+               }
+               cache = cache->next;
+       }
+       
+       if (__arch__swab32(vhd->bat[blockno]) == VHD_BAT_INVALID_ENTRY) {
+               
+               // For reads, the caller will assume that
+               // read returned zeroes
+               if (op == VDISK_READ)
+                       return (VID_BLOCK_NOTMAPPED);
+               
+               err = vhd_alloc_block(vf, blockno);
+               vhd->stats.block_alloc++;
+               VIDDBG(100, "Allocated block %d\n", blockno);
+               if (err) {
+                       VIDDBG(0, "Failed to allocate block\n");
+                       return (err);
+               }
+       }
+       
+       if (VHD_CACHE_SZ > 0) {
+               
+               if (vhd->cache_tail->first_sector != VHD_INVALID_SECTOR) {
+                       if ((cache == NULL) && (vhd->cache_head != NULL)) {
+                               vhd_cache_t *oldh = vhd->cache_head;
+                               vhd_cache_t *oldt = vhd->cache_tail;
+                               
+                               vhd->cache_head = oldt;
+                               vhd->cache_tail = oldt->prev;
+                               
+                               vhd->cache_head->next = oldh;
+                               oldh->prev = oldt;
+                               
+                               vhd->cache_head->prev = NULL;
+                               
+                               vhd->cache_tail->next = NULL;
+                               
+                               cache = vhd->cache_head;
+                       }
+                       
+               } else {
+                       // tail cache fill is in-flight. We assume that
+                       // all others are in-flight as well.
+                       // We will not be caching
+                       // XXX: we should probably walk the list
+                       //first_sector = VHD_INVALID_SECTOR;
+               }
+       }
+       
+       // Read a block worth of sector bitmap
+       sectormap_addr = 
+               ((uint64_t)__arch__swab32(vhd->bat[blockno])<<9) +
+               ((sector_in_block>>3) & BLOCK_MASK);
+       bytes = vdisk_syncio(vf->fd, vhd->secmap_chunk, 512, 
+                            sectormap_addr, VDISK_READ, NULL);
+       if (bytes != 512) {
+               VIDDBG(0, "Failed to read sector bitmap\n");
+               return (EIO);
+       }
+       
+       // See whether the sector is present
+       if (!vhd_test_bitset(sector_bit, num_secs, vhd->secmap_chunk)) {
+               vhd_xfer_t *vhdx;
+               int byteaddr, bitno;
+               char *ptr;
+               
+               // For reads, the caller will assume that
+               // read returned zeroes
+               if (op == VDISK_READ) {
+                       int i;
+                       int mapped = 0;
+                       
+                       for (i=0; i<num_secs; i++) {
+                               if (vhd_test_bit(sector_bit+i, 
+                                                vhd->secmap_chunk)) {
+                                       mapped = 1;
+                                       break;
+                               }
+                       }
+
+                       if (!mapped) {
+                               // None of blocks is mapped
+                               return (VID_BLOCK_NOTMAPPED);
+                       } else {
+                               // Some blocks are mapped and some are not
+                               return (VID_BLOCK_TOOBIG);
+                       }
+               }
+
+               byteaddr = sector_bit >> 3; // Find word in the map
+               bitno = sector_bit & 7;     // Bit in the word
+               ASSERT(byteaddr<512);
+               
+               // sectormap is the first member and will be aligned
+               vhdx = vdisk_malloc(sizeof(vhd_xfer_t)+512);
+               if (vhdx == NULL) {
+                       VIDDBG(0, "Failed to allocate commit data\n");
+                       return (EIO);
+               }
+               
+               ptr = (char *)vhdx;
+               while ((addr_t)ptr & 511) ptr++;
+               
+               if (((addr_t)ptr - (addr_t)vhdx) >= 512)
+                       VIDDBG(0, "vhdx=%p, ptr=%p\n", vhdx, ptr);
+               
+               ASSERT(((addr_t)ptr - (addr_t)vhdx) < 512);
+               
+               ((vhd_xfer_t *)ptr)->mem = (void *)vhdx;
+               vhdx = (vhd_xfer_t *)ptr;
+               vhdx->fd = vf->fd;
+               vhdx->secmap_addr = sectormap_addr;
+               vhdx->sector_bit = sector_bit;
+               vhdx->num_secs = num_secs;
+               
+               if (VHD_CACHE_SZ > 0) {
+                       //vhdx->cache = &vhd->cache[cache_index];
+                       vhdx->cache = cache;
+                       vhdx->first_sector = first_sector;
+                       if (cache) // Flush old cache entry 
+                               cache->first_sector = VHD_INVALID_SECTOR;
+               } else
+                       vhdx->first_sector = VHD_INVALID_SECTOR;
+               
+               *arg = vhdx;
+               
+               vhd->stats.sec_alloc++;
+               
+       } else {
+               // cache the map
+               if (VHD_CACHE_SZ > 0) {
+                       if (cache && 
+                           (cache->first_sector != VHD_INVALID_SECTOR)) {
+                               memcpy(cache->secmap_chunk, 
+                                      vhd->secmap_chunk, 512);
+                               cache->first_sector = first_sector;
+                       }
+               }
+       }       
+
+       if (cache)
+               cache->phys_first_sector = __arch__swab32(vhd->bat[blockno]) +
+                       (vhd->sectormap_sz >> 9);
+       
+       // Sector in the backing file
+       *sectorno = (__arch__swab32(vhd->bat[blockno])) + sector_in_block 
+               + (vhd->sectormap_sz >> 9);
+       
+
+       return (VID_BLOCK_MAPPED);
+}
+
+void
+vhd_close(struct vdisk_dev *vdisk)
+{
+       struct list_head *ptr, *tmp;
+       vd_file_t *vf;
+       vhd_file_t *vhd;
+       int err;
+
+       if (vdisk == NULL) {
+               VIDDBG(0, "Invalid vdisk pointer\n");
+               return;
+       }
+
+       list_for_each_safe(ptr, tmp, &vdisk->vdf_head) {
+
+               vf = list_entry(ptr, vd_file_t, vdf_list);
+               if (vf == NULL) {
+                       VIDDBG(0, "Invalid vdisk file pointer\n");
+                       return;
+               }
+
+               vhd = vf->vdf;
+               if (vhd) {
+                       VIDDBG(10, "VHD Stats for %s: \n"
+                              "\t accesses:\t%" PRId64 "\n"
+                              "\t cache_hit:\t%" PRId64 "\n"
+                              "\t block_alloc:\t%" PRId64 "\n"
+                              "\t sec_alloc:\t%" PRId64 "\n"
+                              "\t total IOs:\t%" PRId64 "\n"
+                              "\t busy:\t%" PRId64 "\n"
+                              "\t sync:\t%" PRId64 "\n"
+                              "\t async:\t%" PRId64 "\n",
+                              vf->name,
+                              vhd->stats.access, 
+                              vhd->stats.cache_hit,
+                              vhd->stats.block_alloc, 
+                              vhd->stats.sec_alloc,
+                              vdisk->tot_io, 
+                              vdisk->busyio, 
+                              vdisk->syncio, 
+                              vdisk->asyncio);
+
+                       if (vhd->ftr_mem)
+                               vdisk_free(vhd->ftr_mem);
+                       if (vhd->dhdr_mem)
+                               vdisk_free(vhd->dhdr_mem);
+                       if (vhd->bat_mem)
+                               vdisk_free(vhd->bat_mem);
+                       if (vhd->sec_mem)
+                               vdisk_free(vhd->sec_mem);
+                       vdisk_free(vhd);
+               }
+
+               list_del(&vf->vdf_list);
+               
+               err = vdisk_close(vf->fd);
+               if (err)
+                       VIDDBG(0, "close(%s): %s\n", vf->name, strerror(errno));
+
+               vdisk_free(vf);
+
+               if (list_empty(&vdisk->vdf_head))
+                       break;
+       }
+}
+
+
+int vhd_open(struct vdisk_dev *vdisk, char *filename)
+{
+       int ret = 0;
+       int err;
+       vd_file_t *vf, *child_vf = NULL;
+       char *f, *child = NULL;
+       vhd_file_t *vhd;
+       int rw;
+
+       if (vdisk->flags & VDISK_RO)
+               rw = O_RDONLY;
+       else
+               rw = O_RDWR;
+
+       f = (char *)filename;
+       
+       while (f != NULL) { // Read all file associated with this VD file
+               
+               vf = (vd_file_t *)vdisk_malloc(sizeof(vd_file_t));
+               if (vf == NULL) {
+                       VIDDBG(0, "Couldn't allocate vd_file structure\n");
+                       vdisk_free(vdisk);
+                       return (ENOMEM);
+               }
+               memset(vf, 0, sizeof(vd_file_t));
+
+               if (strlen(f) > PATH_MAX) {
+                       strncpy(vf->name, f, PATH_MAX-1);
+                       vf->name[PATH_MAX] = 0;
+               } else
+                       strcpy(vf->name, f);
+
+               vf->fd = vdisk_open(f, rw, 0);
+               if (vf->fd < 0) {
+                       VIDDBG(0, "Failed to open %s\n", f);
+                       vdisk_free(vf);
+                       vhd_close((void *)vdisk);
+                       return (errno);
+               }       
+               err = vhd_read_metadata(vdisk, vf);
+               if (err) {
+                       VIDDBG(0, "Couldn't read metadata for %s\n", f);
+                       vdisk_free(vf);
+                       vhd_close((void *)vdisk);                               
+                       return (err);
+               }
+               
+               
+               if (child_vf == NULL) {
+                       vf->flags |= VDF_LEAF;
+                       rw = O_RDONLY; // for next iteration
+               }
+
+#if 0
+               // If this is a parent, verify paternity
+               if (!vhd_isfamily(vf, child_vf)) {
+                       VIDDBG(0, "%s is not parent of %s\n",
+                              f, child_vf);
+               }
+#endif         
+
+               list_add_tail(&vf->vdf_list, &vdisk->vdf_head);
+               
+               vhd = (vhd_file_t *)(vf->vdf);
+               if (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DIFF ) {
+                       int i;
+                       
+                       child = f;
+                       child_vf = vf;
+                       
+                       for (i=0;i<8;i++) {
+                               ple_t ple;
+                               int fd;
+                               
+                               vhd_get_dhdr_ple(vhd->dhdr, &ple, i);
+                               if (ple.code == VHD_DYN_PLE_ABS ||
+                                   ple.code == VHD_DYN_PLE_REL ) {
+                                       f = vhd_get_parent_name(vf, &ple);
+                                       if (f == NULL) {
+                                               VIDDBG(0, "Can't locate parent "
+                                                      "info for %s\n", f);
+                                               ret = EINVAL;
+                                               goto out;
+                                       }
+                                       
+                                       // stat would be better
+                                       fd = open(f, O_RDONLY);
+                                       if (fd < 0) {
+                                               if (errno == ENOENT ||
+                                                   errno == ELOOP ||
+                                                   errno == ENOTDIR ||
+                                                   errno == ENODEV ||
+                                                   errno == EFAULT) {
+                                                       continue;
+                                               } else  {
+                                                       VIDDBG(0, "stat(%s): 
%s\n",
+                                                              f, 
strerror(errno));
+                                                       ret = errno;
+                                                       goto out;
+                                               }
+                                       } else {
+                                               (void)close(fd);
+                                               break;
+                                       }
+                               }
+                       }
+               } else
+                       break;
+       }
+out:
+       return ret;     
+}
+
+uint64_t
+vhd_size(void *hdl)
+{
+       struct vdisk_dev *vdisk = (struct vdisk_dev *)hdl;
+       return (vdisk->sz);
+}
+
+int
+vhd_get_geometry(void *hdl, int *cyls, int *heads, int *secs)
+{
+       struct vdisk_dev *vdisk = (struct vdisk_dev *)hdl;
+       vd_file_t *vf = NULL;
+       struct list_head *ptr;
+       vhd_file_t *vhd;
+       uint32_t geom;
+
+
+       // Assume that the last file (base) has all the info
+       list_for_each(ptr, &vdisk->vdf_head)
+               vf = list_entry(ptr, vd_file_t, vdf_list);
+
+       if (!vf) {
+               VIDDBG(0, "Can't find base file\n");
+               return (-1);
+       }
+
+       vhd = (vhd_file_t *)vf->vdf;
+       if (vhd == NULL) {
+               VIDDBG(0, "Can't find VHD data\n");
+               return (-1);
+       }
+       geom = vhd_get_ftr_geom(vhd->ftr);
+
+       *cyls = (geom >> 16) & 0xffff;
+       *heads = (geom >> 8) & 0xff;
+       *secs = geom & 0xff;
+
+       VIDDBG(10, "geom = 0x%x (0x%x 0x%x 0x%x)\n", geom, *cyls, *heads, 
*secs);
+       
+       return (0);
+}
+
+vdf_data_t vdfd_vhd = {
+       VHD_EXTENSION,
+       vhd_open,
+       vhd_close,
+       vhd_map_block,
+       vhd_xfer_commit,
+       vhd_print_header,
+       vhd_parse_args,
+       vhd_create_vdisk,
+       vhd_modify_vdisk,
+       {NULL,NULL},
+};
+
+void
+vhd_init()
+{
+       vhd_zeroes = __vhd_zeroes;
+       while ((addr_t)vhd_zeroes & 511) vhd_zeroes++;
+
+       vdisk_register(&vdfd_vhd);
+       memset(vhd_zeroes, 0, VHD_FTR_SZ);
+}
+
+void
+vhd_exit()
+{
+       vdisk_unregister(&vdfd_vhd);
+}
diff -r 75c61490cc06 tools/vdisk/vhd.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd.h Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,107 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VHD_H
+#define __VHD_H
+
+#define VHD_EXTENSION "vhd"
+
+#define VHD_FTR_SZ   (512)
+#define VHD_DHDR_SZ  (1024)
+
+#define VHD_BAT_INVALID_ENTRY (0xffffffff)
+
+#define VHD_CACHE_SZ       (16)
+#define VHD_INVALID_SECTOR (0xffffffff)
+
+//Parent locator entry
+typedef struct ple {
+       uint32_t code;
+       uint32_t data_space;
+       uint32_t data_len;
+       uint32_t reserved; // XXX: do we care?
+       uint64_t data_off;
+} ple_t;
+
+typedef struct vhd_cache {
+       char *secmap_chunk;         // 512b chunk of block's sectormap 
+       char *sec_mem;
+       uint32_t first_sector;      // First sector of the sectormap
+       uint32_t phys_first_sector; // Sector in the file that first_sector
+                                   // maps to
+       struct vhd_cache *next, *prev;
+} vhd_cache_t;
+
+typedef struct vhd_stat {
+       uint64_t access;
+       uint64_t cache_hit;
+       uint64_t block_alloc;
+       uint64_t sec_alloc;
+} vhd_stat_t;
+
+typedef struct vhd_xfer {
+       // sectormap *must* be first member!
+       char secmap_chunk[512];   // 512b chunk of sectormap. 
+       off_t secmap_addr;        // Address of the chunk
+       int sector_bit;           // bit to be set in sectormap chunk
+       int num_secs;
+       vhd_cache_t *cache;
+       int first_sector;
+       file_t fd;
+       void *mem;             // memory for vhd_xfer
+} vhd_xfer_t;
+
+typedef struct vhd_file {
+       char *secmap_chunk;    // 512B-aligned block of sectormap. 
+       char *sec_mem;         // memory for sectormap section
+       char *ftr;             // 512B-aligned footer
+       char *ftr_mem;         // memory for footer
+       char *dhdr;            // 512B-aligned dynamic header
+       char *dhdr_mem;        // memory for dynamic header
+       uint32_t *bat;         // 512B-aligned Block Allocation Table
+       uint32_t *bat_mem;     // memory for BAT
+       vhd_cache_t cache[VHD_CACHE_SZ];
+       vhd_cache_t *cache_head;
+       vhd_cache_t *cache_tail;
+       vhd_stat_t stats;
+       int sec_per_block_log;
+       int sectormap_sz;
+       off_t next_block_off;
+} vhd_file_t;
+
+
+#define VHD_ARG_SZ        (1<<0)
+#define VHD_ARG_TYPE      (1<<1)
+#define VHD_ARG_BLOCKSZ   (1<<2)
+#define VHD_ARG_UUID      (1<<3)
+#define VHD_ARG_TIME      (1<<4)
+#define VHD_ARG_PARENT    (1<<5)
+
+
+typedef struct vhd_args {
+       size_t vhd_sz;
+       uint8_t type;
+       size_t blocksz;
+       uint8_t uuid[16];
+       char *parent;
+       uint64_t args_mask;
+} vhd_args_t;
+
+
+extern vdf_data_t vdfd_vhd;
+extern char *vhd_zeroes; // Just a bunch of zeroes
+
+extern int vhd_print_header(vd_file_t *vf);
+extern int vhd_parse_args(int argc, int operations, char *argv[], void **args);
+extern int vhd_create_vdisk(char *filename, void *args);
+extern int vhd_modify_vdisk(struct vdisk_dev *vdisk, void *args);
+extern char *vhd_get_parent_name(vd_file_t *vf, ple_t *ple);
+extern void vhd_init(void);
+extern void vhd_exit(void);
+
+#endif /* __VHD_H */
diff -r 75c61490cc06 tools/vdisk/vhd_footer.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd_footer.h  Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,316 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VHD_FOOTER_H
+#define __VHD_FOOTER_H
+
+#include <string.h>
+#include <linux/types.h>
+#include <linux/byteorder/swab.h>
+
+#define VHD_COOKIE (uint64_t) (   (uint64_t)'c'                \
+                              | ((uint64_t)'o'<<(8*1)) \
+                              | ((uint64_t)'n'<<(8*2)) \
+                              | ((uint64_t)'e'<<(8*3)) \
+                              | ((uint64_t)'c'<<(8*4)) \
+                              | ((uint64_t)'t'<<(8*5)) \
+                              | ((uint64_t)'i'<<(8*6)) \
+                              | ((uint64_t)'x'<<(8*7)))
+
+#define VHD_FEATURES_NONE (0)
+#define VHD_FEATURES_TEMP (1)
+#define VHD_FEATURES_RSVD (2) 
+
+#define VHD_FORMAT_VER_1 (0x00010000)
+
+/* data offset for fixed disks */
+#define VHD_FIXED_OFFSET ((uint64_t)-1)
+
+#define VHD_CREATOR_APP ((uint32_t)'v' \
+                        | ((uint32_t)'i'<<8) \
+                        | ((uint32_t)'t'<<16) \
+                        | ((uint32_t)'l'<<24))
+#define VHD_CREATOR_VER_1 (0x00010000)
+
+#define VHD_CREATOR_HOST_OS ((uint32_t)'L' \
+                            | ((uint32_t)'i'<<8) \
+                            | ((uint32_t)'n'<<16) \
+                            | ((uint32_t)'x'<<24))
+
+#define VHD_TYPE_NONE       (0)
+#define VHD_TYPE_FIXED      (2)
+#define VHD_TYPE_DYNAMIC    (3)
+#define VHD_TYPE_DIFF       (4)
+
+
+
+#define VHD_GEOM(c,h,s) { \
+               ASSERT((c<=0xffff) && (h<=0xff) && (s<=0xff)) ; \
+               (s | (h<<8) | (c<<16)); }
+
+
+static inline uint64_t vhd_get_ftr_cookie(char *ftr) {
+       uint64_t tmp = *(uint64_t *)(&ftr[0]);
+       return (tmp);
+}
+static inline void vhd_set_ftr_cookie(char *ftr, uint64_t val) {
+       uint64_t tmp = val;
+       *(uint64_t *)(&ftr[0]) = tmp;
+}
+
+static inline uint32_t vhd_get_ftr_features(char *ftr) {
+       uint32_t tmp = *(uint32_t *)(&ftr[8]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_features(char *ftr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&ftr[8]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_fformat(char *ftr) {
+       uint32_t tmp = *(uint32_t *)(&ftr[12]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_fformat(char *ftr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&ftr[12]) = __arch__swab32(tmp);
+}
+
+static inline uint64_t vhd_get_ftr_dataoff(char *ftr) {
+       uint64_t tmp = *(uint64_t *)(&ftr[16]);
+       return __arch__swab64(tmp);
+}
+static inline void vhd_set_ftr_dataoff(char *ftr, uint64_t val) {
+       uint64_t tmp = val;
+       *(uint64_t *)(&ftr[16]) = __arch__swab64(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_timestamp(char *ftr) {
+       uint32_t tmp = *(uint32_t *)(&ftr[24]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_timestamp(char *ftr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&ftr[24]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_cr_app(char *ftr) {
+       uint32_t tmp = *(uint32_t *)(&ftr[28]);
+       return (tmp);
+}
+static inline void vhd_set_ftr_cr_app(char *ftr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&ftr[28]) = tmp;
+}
+
+static inline uint32_t vhd_get_ftr_cr_ver(char *ftr) {
+       uint32_t tmp = *(uint32_t *)(&ftr[32]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_cr_ver(char *ftr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&ftr[32]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_cr_hostos(char *ftr) {
+       uint32_t tmp = *(uint32_t *)(&ftr[36]);
+       return (tmp);
+}
+static inline void vhd_set_ftr_cr_hostos(char *ftr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&ftr[36]) = tmp;
+}
+
+static inline uint64_t vhd_get_ftr_orig_sz(char *ftr) {
+       uint64_t tmp = *(uint64_t *)(&ftr[40]);
+       return __arch__swab64(tmp);
+}
+static inline void vhd_set_ftr_orig_sz(char *ftr, uint64_t val) {
+       uint64_t tmp = val;
+       *(uint64_t *)(&ftr[40]) = __arch__swab64(tmp);
+}
+
+static inline uint64_t vhd_get_ftr_cur_sz(char *ftr) {
+       uint64_t tmp = *(uint64_t *)(&ftr[48]);
+       return __arch__swab64(tmp);
+}
+static inline void vhd_set_ftr_cur_sz(char *ftr, uint64_t val) {
+       uint64_t tmp = val;
+       *(uint64_t *)(&ftr[48]) = __arch__swab64(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_geom(char *ftr) {
+       uint32_t tmp = *(uint32_t *)(&ftr[56]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_geom(char *ftr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&ftr[56]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_type(char *ftr) {
+       uint32_t tmp = *(uint32_t *)(&ftr[60]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_type(char *ftr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&ftr[60]) = __arch__swab32(tmp);
+}
+
+#define VHD_FTR_CHKSUM_OFF (64)
+static inline uint32_t vhd_get_ftr_chksum(char *ftr) {
+       uint32_t tmp = *(uint32_t *)(&ftr[64]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_chksum(char *ftr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&ftr[64]) = __arch__swab32(tmp);
+}
+
+static inline uint8_t *vhd_get_ftr_uid(char *ftr) {
+       return (uint8_t *)&ftr[68];
+}
+static inline void vhd_set_ftr_uid(char *ftr, uint8_t *val) {
+       memcpy(&ftr[68], val, 16);
+}
+
+static inline uint8_t vhd_get_ftr_saved_state(char *ftr) {
+       uint8_t tmp = *(uint8_t *)(&ftr[84]);
+       return (tmp);
+}
+static inline void vhd_set_ftr_saved_state(char *ftr, uint8_t val) {
+       uint8_t tmp = val;
+       *(uint8_t *)(&ftr[84]) = tmp;
+}
+
+
+
+#define VHD_DYN_COOKIE (uint64_t) (   (uint64_t)'c'            \
+                                     | ((uint64_t)'x'<<(8*1))  \
+                                     | ((uint64_t)'s'<<(8*2))  \
+                                     | ((uint64_t)'p'<<(8*3))  \
+                                     | ((uint64_t)'a'<<(8*4))  \
+                                     | ((uint64_t)'r'<<(8*5))  \
+                                     | ((uint64_t)'s'<<(8*6))  \
+                                     | ((uint64_t)'e'<<(8*7)))
+
+#define VHD_DYN_OFFSET      ((uint64_t)-1)
+#define VHD_DYN_HDR_VER_1   (0x00010000)
+
+// Parent locator codes (our own)
+#define VHD_DYN_PLE_ABS   (   (uint64_t)'u'            \
+                             | ((uint64_t)'x'<<(8*1))  \
+                             | ((uint64_t)'n'<<(8*2))  \
+                             | ((uint64_t)'L'<<(8*3)))
+#define VHD_DYN_PLE_REL   (   (uint64_t)'k'            \
+                             | ((uint64_t)'x'<<(8*1))  \
+                             | ((uint64_t)'n'<<(8*2))  \
+                             | ((uint64_t)'L'<<(8*3)))
+
+
+static inline uint64_t vhd_get_dhdr_cookie(char *hdr) {
+       uint64_t tmp = *(uint64_t *)(&hdr[0]);
+       return (tmp);
+}
+static inline void vhd_set_dhdr_cookie(char *hdr, uint64_t val) {
+       uint64_t tmp = val;
+       *(uint64_t *)(&hdr[0]) = tmp;
+}
+
+static inline uint64_t vhd_get_dhdr_dataoff(char *hdr) {
+       uint64_t tmp = *(uint64_t *)(&hdr[8]);
+       return __arch__swab64(tmp);
+}
+static inline void vhd_set_dhdr_dataoff(char *hdr, uint64_t val) {
+       uint64_t tmp = val;
+       *(uint64_t *)(&hdr[8]) = __arch__swab64(tmp);
+}
+
+static inline uint64_t vhd_get_dhdr_tbloff(char *hdr) {
+       uint64_t tmp = *(uint64_t *)(&hdr[16]);
+       return __arch__swab64(tmp);
+}
+static inline void vhd_set_dhdr_tbloff(char *hdr, uint64_t val) {
+       uint64_t tmp = val;
+       *(uint64_t *)(&hdr[16]) = __arch__swab64(tmp);
+}
+
+static inline uint32_t vhd_get_dhdr_hdrver(char *hdr) {
+       uint32_t tmp = *(uint32_t *)(&hdr[24]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_hdrver(char *hdr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&hdr[24]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_dhdr_tbl_entries(char *hdr) {
+       uint32_t tmp = *(uint32_t *)(&hdr[28]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_tbl_entries(char *hdr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&hdr[28]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_dhdr_blksz(char *hdr) {
+       uint32_t tmp = *(uint32_t *)(&hdr[32]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_blksz(char *hdr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&hdr[32]) = __arch__swab32(tmp);
+}
+
+#define VHD_DHDR_CHKSUM_OFF (36)
+static inline uint32_t vhd_get_dhdr_chksum(char *hdr) {
+       uint32_t tmp = *(uint32_t *)(&hdr[36]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_chksum(char *hdr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&hdr[36]) = __arch__swab32(tmp);
+}
+
+static inline uint8_t *vhd_get_dhdr_puid(char *hdr) {
+       return (uint8_t *)&hdr[40];
+}
+static inline void vhd_set_dhdr_puid(char *hdr,  uint8_t *val) {
+       memcpy(&hdr[40], val, 16);
+}
+
+static inline uint32_t vhd_get_dhdr_ptimestamp(char *hdr) {
+       uint32_t tmp = *(uint32_t *)(&hdr[56]);
+       return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_ptimestamp(char *hdr, uint32_t val) {
+       uint32_t tmp = val;
+       *(uint32_t *)(&hdr[56]) = __arch__swab32(tmp);
+}
+
+static inline void vhd_get_dhdr_ple(char *hdr, ple_t *ple, int idx) {
+       char *tmp = &hdr[576+24*idx];
+       
+       ple->code = __arch__swab32(*(uint32_t *)tmp);
+       ple->data_space = __arch__swab32(*(uint32_t *)(tmp+4));
+       ple->data_len = __arch__swab32(*(uint32_t *)(tmp+8));
+       ple->data_off = __arch__swab64(*(uint64_t *)(tmp+16));
+}
+
+static inline void vhd_set_dhdr_ple(char *hdr, ple_t *ple, int idx) {
+       char *tmp = &hdr[576+24*idx];
+       
+       *(uint32_t *)(tmp) = __arch__swab32(ple->code);
+       *(uint32_t *)(tmp+4) = __arch__swab32(ple->data_space);
+       *(uint32_t *)(tmp+8) = __arch__swab32(ple->data_len);
+       *(uint64_t *)(tmp+16) = __arch__swab64(ple->data_off);
+}
+
+
+
+#endif /* __VHD_FOOTER_H */
diff -r 75c61490cc06 tools/vdisk/vhd_utils.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd_utils.c   Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,964 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/stddef.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#include "vdisk.h"
+#include "vdisk_utils.h"
+#include "vhd.h"
+#include "vhd_footer.h"
+
+
+// Taken from Microsoft's VHD spec (hence notations...)
+static uint32_t
+vhd_chs(ssize_t sz)
+{
+       uint32_t totalSectors = (sz >> 9); // Assume sz in whole #sectors 
+       int sectorsPerTrack, heads, cylinderTimesHeads, cylinders;
+
+
+       if (totalSectors > 65535 * 16 * 255)
+               totalSectors = 65535 * 16 * 255;
+
+       if (totalSectors >= 65535 * 16 * 63) {
+               sectorsPerTrack = 255;
+               heads = 16;
+               cylinderTimesHeads = totalSectors / sectorsPerTrack;
+       } else {
+               sectorsPerTrack = 17; 
+               cylinderTimesHeads = totalSectors / sectorsPerTrack;
+
+               heads = (cylinderTimesHeads + 1023) / 1024;
+      
+               if (heads < 4)
+                       heads = 4;
+
+               if (cylinderTimesHeads >= (heads * 1024) || heads > 16) {
+                       sectorsPerTrack = 31;
+                       heads = 16;
+                       cylinderTimesHeads = totalSectors / sectorsPerTrack;    
+               }
+
+               if (cylinderTimesHeads >= (heads * 1024)) {
+                       sectorsPerTrack = 63;
+                       heads = 16;
+                       cylinderTimesHeads = totalSectors / sectorsPerTrack;
+               }
+       }
+       cylinders = cylinderTimesHeads / heads;
+
+       return (VHD_GEOM(cylinders, heads, sectorsPerTrack));
+}
+
+uint32_t
+vhd_chksum(char *ptr, size_t sz, char *excl)
+{
+       uint32_t chksum = 0;
+       int i;
+
+       if (ptr == NULL)
+               return (0);
+
+       for (i=0; i<sz; i++)
+               chksum += (uint8_t)ptr[i];
+
+       if (excl != NULL) {
+               // Subtract 4 bytes of checksum
+               chksum -= (excl[0] + excl[1] + excl[2] + excl[3]);
+       }
+
+       return (~chksum);
+}
+
+
+static char *
+vhd_time(uint32_t *file_time, uint32_t *cur_time, int f2c)
+{
+       time_t tm, tm_1970, tm_2000;
+       char *timestr;
+       struct tm epoch_1970 = {0,0,0,1,0,70,0,0,0};
+       struct tm epoch_2000 = {0,0,0,1,0,100,0,0,0};
+       
+
+       tm_1970 = mktime(&epoch_1970);
+       tm_2000 = mktime(&epoch_2000);
+
+       if (f2c) {
+               if (file_time == NULL) {
+                       VIDDBG(0, "Invalid time\n");
+                       if (cur_time != NULL)
+                               *cur_time = 0;
+                       return NULL;
+               }
+
+               tm = *file_time + (tm_2000-tm_1970);
+               timestr = ctime(&tm);
+               if (timestr == NULL) {
+                       VIDDBG(0, "Couldn't convert time (0x%x)\n", *file_time);
+                       return NULL;
+               }
+               if (cur_time != NULL)
+                       *cur_time = tm;
+               timestr[strlen(timestr)-1] = '\0';
+       } else {
+               if (cur_time == NULL) {
+                       VIDDBG(0, "Invalid time\n");
+                       if (file_time != NULL)
+                               *file_time = 0;
+                       return NULL;
+               }
+               tm = *cur_time - (tm_2000-tm_1970);
+               timestr = ctime(&tm);
+               if (timestr == NULL) {
+                       VIDDBG(0, "Couldn't convert time (0x%x)\n", *cur_time);
+                       return NULL;
+               }
+               if (file_time != NULL)
+                       *file_time = tm;
+               timestr[strlen(timestr)-1] = '\0';
+       }
+
+       return timestr;
+}
+
+
+char *
+vhd_get_parent_name(vd_file_t *vf, ple_t *ple)
+{
+       char *cp, *buf, *pool;
+       int bytes;
+
+       if ((ple == NULL) || (ple->data_len == 0)) {
+               VIDDBG(0, "Invalid data\n");
+               return (NULL);
+       }
+
+       // The file is opened with O_DIRECT, so we need to
+       // align buffer on 512-byte boundary
+       pool = buf = vdisk_malloc(ple->data_space+512);
+       if (buf == NULL) {
+               perror("malloc");
+               return (NULL);
+       }
+       while((addr_t)buf & 511) buf++;
+
+       if (lseek(vf->fd, ple->data_off, SEEK_SET) != ple->data_off) {
+               perror("lseek");
+               vdisk_free(pool);
+               return NULL;
+       }
+
+       bytes = read(vf->fd, buf, (size_t)ple->data_space);
+       if (bytes != ple->data_space) {
+               perror("read");
+               vdisk_free(pool);
+               VIDDBG(0, "fd = %d\n", vf->fd);
+               return NULL;
+       }
+
+       cp = vdisk_malloc(ple->data_len+1);
+       if (cp == NULL) {
+               perror("malloc");
+               vdisk_free(pool);
+               return (NULL);
+       }
+       
+       strncpy(cp, buf, ple->data_len);
+       buf[ple->data_len] = 0;
+       vdisk_free(pool);
+
+       //XXX: for codes W2Ru and W2ku we need to convert from UTF-16 to ASCII
+       return cp;
+}
+
+int
+vhd_print_header(vd_file_t *vf)
+{
+       char *cp;
+       uint64_t v64;
+       uint32_t v32;
+       vhd_file_t *vhd = (vhd_file_t *)(vf->vdf);
+       size_t sz, max_sz;
+       int i;
+       int err;
+
+       // Figure out max file size
+
+       err = vdisk_size(vf->fd, &sz);
+       if (err) {
+               VIDDBG(0, "Couldn't get file size\n");
+               return (err);
+       }
+
+       if (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_FIXED)
+               max_sz = sz;
+       else {
+               uint64_t unmapped_blocks = 0;
+               size_t new_bytes;
+
+               // Count blocks that haven't been allocated
+               for (i=0; i< vhd_get_dhdr_tbl_entries(vhd->dhdr); i++)
+                       if (__arch__swab32(vhd->bat[i]) == 
+                           VHD_BAT_INVALID_ENTRY)
+                               unmapped_blocks++;
+
+               // XXX: Assume that block size is in 512-byte chunks
+               new_bytes = unmapped_blocks * (vhd->sectormap_sz + 
+                                              vhd_get_dhdr_blksz(vhd->dhdr));
+               max_sz = sz + new_bytes;
+       }
+
+       printf("FILE %s:\n", vf->name);
+       printf("\tMaximum file size:\t0x%016zx\n\n", max_sz);
+
+
+       v64 = vhd_get_ftr_cookie(vhd->ftr);
+       cp = (char *)&v64;
+       printf("\tCookie:\t\t\t0x%016" PRIx64 " (\"%c%c%c%c%c%c%c%c\")\n", v64,
+              cp[0], cp[1], cp[2], cp[3], cp[4], cp[5], cp[6], cp[7]);
+       
+       printf("\tFeatures:\t\t0x%08x\n", vhd_get_ftr_features(vhd->ftr));
+       printf("\tFile format vervion:\t0x%08x\n", 
vhd_get_ftr_fformat(vhd->ftr));
+       printf("\tData Offset:\t\t0x%016" PRIx64 "\n", 
+              vhd_get_ftr_dataoff(vhd->ftr));
+
+       v32 = vhd_get_ftr_timestamp(vhd->ftr);
+       printf("\ttimestamp:\t\t0x%08x (%s)\n", v32, vhd_time(&v32, NULL, 1));
+       
+       printf("\tCreator App:\t\t0x%08x\n", vhd_get_ftr_cr_app(vhd->ftr));
+       printf("\tCreator Ver:\t\t0x%08x\n", vhd_get_ftr_cr_ver(vhd->ftr));
+       printf("\tCreator Host OS:\t0x%08x\n", vhd_get_ftr_cr_hostos(vhd->ftr));
+       printf("\tOriginal size:\t\t0x%016" PRIx64 "\n",
+              vhd_get_ftr_orig_sz(vhd->ftr));
+       printf("\tCurrent size:\t\t0x%016" PRIx64 "\n", 
+              vhd_get_ftr_cur_sz(vhd->ftr));
+       printf("\tGeometry:\t\t0x%08x\n", vhd_get_ftr_geom(vhd->ftr));
+       printf("\tType:\t\t\t0x%08x\n", vhd_get_ftr_type(vhd->ftr));
+       printf("\tChecksum:\t\t0x%08x\n", vhd_get_ftr_chksum(vhd->ftr));
+
+       printf("\tUnique ID:\t\t");
+       cp = (char *)vhd_get_ftr_uid(vhd->ftr);
+       for (i=0;i<16;i++)
+               printf("%02x", (*cp++) & 0xff);
+
+       printf("\n\tSaved state:\t\t0x%08x\n", 
vhd_get_ftr_saved_state(vhd->ftr));
+       if ((vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DYNAMIC ) ||
+           (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DIFF )) {
+               
+               printf(" Dynamic Header:\n");
+               
+               v64 = vhd_get_dhdr_cookie(vhd->dhdr);
+               cp = (char *)&v64;
+               printf("\t Cookie:\t\t0x%016" PRIx64 " 
(\"%c%c%c%c%c%c%c%c\")\n",
+                      v64, cp[0], cp[1], cp[2], cp[3], cp[4], cp[5], cp[6], 
cp[7]);
+               printf("\t Data Offset:\t\t0x%016" PRIx64 "\n",
+                      vhd_get_dhdr_dataoff(vhd->dhdr));
+               printf("\t Table Offset:\t\t0x%016" PRIx64 "\n",
+                      vhd_get_dhdr_tbloff(vhd->dhdr));
+               printf("\t Max Table Entries:\t0x%08x\n", 
+                      vhd_get_dhdr_tbl_entries(vhd->dhdr));
+               printf("\t Block Size:\t\t0x%08x\n", 
+                      vhd_get_dhdr_blksz(vhd->dhdr));
+               printf("\t Checksum:\t\t0x%08x\n", 
+                      vhd_get_dhdr_chksum(vhd->dhdr));
+       }
+
+       if (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DIFF ) {
+               
+               printf("\t Parent Unique ID:\t"); 
+               cp = (char *)vhd_get_dhdr_puid(vhd->dhdr);
+               for (i=0;i<16;i++)
+                       printf("%02x", (*cp++) & 0xff);
+               v32 = vhd_get_dhdr_ptimestamp(vhd->dhdr);
+               printf("\n\t Parent Timestamp:\t0x%08x (%s)\n", 
+                      v32, vhd_time(&v32, NULL, 1));
+
+               for (i=0;i<8;i++) {
+                       ple_t ple;
+
+                       vhd_get_dhdr_ple(vhd->dhdr, &ple, i);
+                       if (ple.code != 0) {
+                               printf("\t Parent Locator Entry %d:\n", i);
+                               
+                               cp = (char *)&ple.code;
+                               printf("\t\tPlatform Code:\t0x%08x "
+                                      "(\"%c%c%c%c\")\n", 
+                                      ple.code, cp[3], cp[2], cp[1], cp[0]);
+                               printf("\t\tData Space:\t0x%08x\n",
+                                      ple.data_space);
+                               printf("\t\tData Length:\t0x%08x\n",
+                                      ple.data_len);
+                               printf("\t\tData Offset:\t0x%016" PRIx64" \n",
+                                      ple.data_off);
+                               cp = vhd_get_parent_name(vf, &ple);
+                               if (cp == NULL) {
+                                       VIDDBG(0, "Can't locate parent info "
+                                              "in file\n");
+                                       continue;
+                               }
+                               printf("\t\tParent Locator:\t%s\n", cp);
+                               free(cp);
+                       }
+               }
+       }
+
+       return (0);
+}
+
+int
+vhd_parse_args(int argc, int operations, char *argv[], void **args)
+{
+       char c;
+       int i;
+       extern char *optarg;
+       extern int optind, opterr, optopt;
+       vhd_args_t *vhd_args;
+
+       void vhd_usage() {
+               fprintf(stderr, "VHD-specific options: "
+                       "-S <size(MB)> [-f|-d [-p <parent>]] [-B <size(B)>]"
+                       " [-u UUID] [-t]\n");
+       }
+
+       vhd_args = malloc(sizeof(vhd_args_t));
+       if (vhd_args == NULL) {
+               VIDDBG(0, "Can't allocate arguments\n");
+               return (-1);
+       }
+
+       memset(vhd_args, 0, sizeof(vhd_args_t));
+       vhd_args->type = VHD_TYPE_NONE;
+       vhd_args->blocksz = 0x200000; // 2MB
+
+       while (1) {
+
+               c = getopt(argc, argv, "S:fdstB:u:p:");
+               if (c == -1)
+                       break;
+
+               switch (c) {
+               case 'f':
+                       vhd_args->type = VHD_TYPE_FIXED;
+                       vhd_args->args_mask |= VHD_ARG_TYPE;
+                       break;
+               case 's': // 's' for "sparse"
+                       VIDDBG(0, "'-s' option is obsolete. Use '-d' 
instead\n");
+               case 'd':
+                       vhd_args->type = VHD_TYPE_DYNAMIC;
+                       vhd_args->args_mask |= VHD_ARG_TYPE;
+                       break;
+               case 'S':
+                       vhd_args->vhd_sz = atol(optarg) * 1024 * 1024;
+                       vhd_args->args_mask |= VHD_ARG_SZ;
+                       break;
+               case 't':
+                       vhd_args->args_mask |= VHD_ARG_TIME;
+                       break;
+               case 'p':
+                       vhd_args->args_mask |= VHD_ARG_PARENT;
+                       vhd_args->parent = malloc(strlen(optarg)+1);
+                       if (vhd_args->parent == NULL) {
+                               VIDDBG(0, "Out of memory\n");
+                               goto fail;
+                       }
+                       strncpy(vhd_args->parent, optarg, strlen(optarg)+1);
+                       break;
+               case 'B':
+                       vhd_args->blocksz = atol(optarg);
+                       // Must be in 512 byte chunks
+                       if (vhd_args->blocksz & 511) {
+                               VIDDBG(0, 
+                                      "block size must be divisible by 512\n");
+                               goto fail;
+                       }
+                       vhd_args->args_mask |= VHD_ARG_BLOCKSZ;
+                       break;
+               case 'u':
+                       if ((optarg == NULL) || (strlen(optarg) != 32)) {
+                               VIDDBG(0, "UUID is a 16-byte (32-character)"
+                                      " string\n");
+                               goto fail;
+                       }
+
+                       // Convert UUID characters to hex
+                       for(i=0;i<32;i++) {
+                               uint8_t val;
+
+                               val = optarg[i];
+                               if (!isxdigit(val)) {
+                                       VIDDBG(0, "Invalid character in UUID "
+                                              "string ('%c')\n", optarg[i]);
+                                       free(vhd_args);
+                                       vhd_usage();
+                                       return (-1);
+                               }
+                               if (isalpha(val)) {
+                                       val = tolower(val);
+                                       val -= ('a' - 0xa);
+                               } else
+                                       val -= '0';
+       
+                               // two hex numbers per byte
+                               vhd_args->uuid[i>>1] |= (val << (4*((i&1)^1)));
+                       }
+                       vhd_args->args_mask |= VHD_ARG_UUID;
+                       break;
+               default:
+                       vhd_usage();
+                       goto fail;
+               }
+       }
+
+       if ((vhd_args->parent != NULL) && (vhd_args->type == VHD_TYPE_FIXED)) {
+               VIDDBG(0, "Fixed VHD cannot have a parent\n");
+               goto fail;
+       }
+
+       if (operations & VDISK_OP_CREATE) {
+               if (vhd_args->parent == NULL) {
+                       if ((vhd_args->vhd_sz == 0) ||
+                           (vhd_args->type == VHD_TYPE_NONE))
+                               goto fail;
+               }
+       }
+
+       if (vhd_args->vhd_sz % vhd_args->blocksz) {
+               VIDDBG(0, "File size must be multiple of block size\n");
+               goto fail;
+       }
+
+       if (operations & VDISK_OP_MODIFY) {
+               if (vhd_args->args_mask & VHD_ARG_PARENT) {
+                       if (vhd_args->type == VHD_TYPE_FIXED) {
+                               VIDDBG(0, "Fixed VHDs can't have parents\n");
+                               goto fail;
+                       }
+               }
+               if (vhd_args->args_mask & (VHD_ARG_SZ | VHD_ARG_BLOCKSZ)) {
+                       VIDDBG(0, "Can't modify VHD's size or block size\n");
+                       goto fail;
+               }
+       }
+
+       if (vhd_args->parent != NULL) {
+               vhd_args->type = VHD_TYPE_DIFF;
+               if (vhd_args->args_mask & (VHD_ARG_SZ | VHD_ARG_BLOCKSZ)) {
+                       VIDDBG(0, "Differencing VHD's size and block size "
+                              "are inherited from parent\n");
+                       goto fail;
+               }
+       }
+
+
+       *args = vhd_args;
+       return (0);
+
+fail:
+       if (vhd_args->parent)
+               free(vhd_args->parent);
+       free(vhd_args);
+       vhd_usage();
+       return (-1);
+}
+
+// Store differencing file's parent information
+static int
+vhd_store_parent(int vfd, vhd_file_t *vhd, vhd_file_t *pvhd, 
+                char *parentname, loff_t *data)
+{
+       uint32_t bat_sz;
+       ple_t ple;
+       int i;
+       int err;
+       size_t bytes;
+
+       vhd_set_dhdr_puid(vhd->dhdr, vhd_get_ftr_uid(pvhd->ftr));
+       vhd_set_dhdr_ptimestamp(vhd->dhdr, 
+                               vhd_get_ftr_timestamp(pvhd->ftr));
+       
+       memset(&ple, 0, sizeof(ple_t));
+       for (i=0;i<8;i++)
+               vhd_set_dhdr_ple(vhd->dhdr, &ple, i);
+       
+       if (parentname[0] == '/')
+               ple.code = VHD_DYN_PLE_ABS;
+       else
+               ple.code = VHD_DYN_PLE_REL;
+       
+       // XXX: The spec says this is number of 512b sectors,
+       // but file created by MS's Virtual PC tool seems to
+       // think this is number of bytes, aligned at 512b
+       ple.data_space = (strlen(parentname) + 1 + 512)
+               & (~511);
+       ple.data_len = strlen(parentname) + 1;
+       
+       bat_sz = vhd_get_dhdr_tbl_entries(vhd->dhdr) << 2;
+
+       ple.data_off = VHD_DHDR_SZ + VHD_FTR_SZ +
+               bat_sz + 
+               ((bat_sz & 511) ? (512-(bat_sz&511)) : 0) +
+               512; // XXX: see comment in vhd_create_vdisk()
+       vhd_set_dhdr_ple(vhd->dhdr, &ple, 0);
+
+       // Recalculate checksum
+       vhd_set_dhdr_chksum(vhd->dhdr, 
+                           vhd_chksum(vhd->dhdr, VHD_DHDR_SZ, 
+                                      &vhd->dhdr[VHD_DHDR_CHKSUM_OFF]));
+       
+       if (lseek(vfd, VHD_FTR_SZ, SEEK_SET) !=  VHD_FTR_SZ) {
+               err = errno;
+               VIDDBG(0, "lseek: %s", strerror(err));
+               return (err);
+       }
+       
+       // Write the dynamic header
+       bytes = write(vfd, vhd->dhdr, VHD_DHDR_SZ);
+       if (bytes != VHD_DHDR_SZ) {
+               err = errno;
+               VIDDBG(0, "write: %s", strerror(err));
+               return (err);
+       }
+       
+       // Write PLE
+       bytes = lseek(vfd, ple.data_off, SEEK_SET);
+       if (bytes != ple.data_off) {
+               err = errno;
+               VIDDBG(0, "lseek: %s", strerror(err));
+               return (err);
+       }
+       bytes = write(vfd, parentname, strlen(parentname)+1);
+       if (bytes != strlen(parentname)+1) {
+               err = errno;
+               VIDDBG(0, "write: %s", strerror(err));
+               return (err);
+       }
+
+       if (data != NULL)
+               *data = (loff_t)ple.data_off + (loff_t)ple.data_space;
+
+       return (0);
+}
+
+
+int
+vhd_modify_vdisk(struct vdisk_dev *vdisk, void *args)
+{
+       vhd_args_t *vhd_args = args;
+       vd_file_t *vf = NULL;
+       vhd_file_t *vhd;
+       size_t sz, bytes;
+       int err;
+       int store_footer = 0;
+       struct list_head *ptr;
+       int stop = 0;
+
+
+       // XXX: We always make a single pass
+       list_for_each(ptr, &vdisk->vdf_head) {
+
+               vf = list_entry(ptr, vd_file_t, vdf_list);
+               if ((vf == NULL) || (vf->vdf == NULL)) {
+                       VIDDBG(0, "Can't access vdisk's structures\n");
+                       return (-1);
+               }
+               vhd = (vhd_file_t *)vf->vdf;
+
+               // Close and reopen file (it may have been open O_DIRECT)
+               err = vdisk_close(vf->fd);
+               if (err) {
+                       VIDDBG(0, "Can't close %s:%d\n", vf->name, err);
+                       return (err);
+               }
+
+               vf->fd = open(vf->name, O_RDWR, 0644);
+               if (vf->fd == -1) {
+                       err = errno;
+                       VIDDBG(0, "Can't open %s:%d\n", vf->name, 
strerror(errno));
+                       return (err);
+               }
+
+               // Update UUID
+               if (vhd_args->args_mask & VHD_ARG_UUID) {
+
+                       vhd_set_ftr_uid(vhd->ftr, vhd_args->uuid);
+                       
+                       store_footer = 1;
+                       stop = 1;
+               }
+
+               // Change parent name
+               if (vhd_args->args_mask & VHD_ARG_PARENT) {
+                       vhd_file_t *pvhd;
+                       struct vdisk_dev parent;
+                       vd_file_t *pvf;
+
+                       // Open parent file
+                       err = vdisk_init(&parent, vhd_args->parent, NULL, 0);
+                       if (err) {
+                               VIDDBG(0, "Failed to initialize state for "
+                                      "parent %s\n", vhd_args->parent);
+                               return (err);
+                       }
+                       pvf = list_entry(parent.vdf_head.next, vd_file_t, 
vdf_list);
+                       pvhd = (vhd_file_t *)pvf->vdf;
+
+                       // Update dynamic header and parent data
+                       err = vhd_store_parent(vf->fd, vhd, pvhd,
+                                              vhd_args->parent, NULL);
+                       if (err) {
+                               VIDDBG(0, "Failed to store parent name (%s)\n",
+                                      vhd_args->parent);
+                               vdisk_fini(&parent);
+                               return (err);
+                       }
+                       vdisk_fini(&parent);
+
+                       store_footer = 1;
+                       stop = 1;
+               }
+
+               // Update timestamp
+               if (vhd_args->args_mask & VHD_ARG_TIME) {
+                       uint32_t curtime, ftime;
+                       
+                       curtime = time(NULL);
+                       if (curtime == -1) {
+                               perror("time");
+                               return (errno);
+                       }
+                       (void)vhd_time(&ftime, &curtime, 0);
+                       vhd_set_ftr_timestamp(vhd->ftr, ftime);
+
+                       stop = 1;
+               }
+               
+               // Recompute footer's checksum
+               vhd_set_ftr_chksum(vhd->ftr, 
+                                  vhd_chksum(vhd->ftr, VHD_FTR_SZ, 
+                                             &vhd->ftr[VHD_FTR_CHKSUM_OFF]));
+               
+               // Write the footer back if needed
+               if (store_footer) {
+
+                       err = vdisk_size(vf->fd, &sz);
+                       if (err != 0) {
+                               VIDDBG(0, "Can't determine vdisk's size\n");
+                               return (-1);
+                       }
+
+                       if (lseek(vf->fd, (sz-VHD_FTR_SZ), SEEK_SET) != 
+                           (sz - VHD_FTR_SZ)) {
+                               perror("lseek");
+                               return (errno);
+                       }
+                       bytes = write(vf->fd, vhd->ftr, VHD_FTR_SZ);
+                       if (bytes != VHD_FTR_SZ) {
+                               perror("write");
+                               return (errno);
+                       }                       
+                       
+                       // For non-fixed disks write footer at front as well 
+                       if (vhd_get_ftr_type(vhd->ftr) != VHD_TYPE_FIXED) {
+                               if (lseek(vf->fd, 0, SEEK_SET) != 0) {
+                                       perror("lseek");
+                                       return (errno);
+                               }
+                               bytes = write(vf->fd, vhd->ftr, VHD_FTR_SZ);
+                               if (bytes != VHD_FTR_SZ) {
+                                       perror("write");
+                                       return (errno);
+                               }
+                       }
+               }
+
+               if (stop)
+                       break;
+       }
+
+       if (fsync(vf->fd))
+               VIDDBG(0, "fsync: %s\n", strerror(errno));
+
+       return (0);
+}
+
+int
+vhd_create_vdisk(char *filename, void *args)
+{
+       vhd_args_t *vhd_args = args;
+       vhd_file_t vhd;
+       uint32_t curtime, ftime;
+       int vfd = -1;
+       ssize_t bytes;
+       int i;
+       int err = 0;
+       char *hdr_pool = NULL, *ftr_pool = NULL;
+       struct vdisk_dev parent;
+
+       vfd = open(filename, O_CREAT|O_EXCL|O_RDWR, 0644);
+       if (vfd == -1) {
+               if (errno == EEXIST) {
+                       size_t sz;
+
+                       // File already exists
+                       if (vhd_args->type != VHD_TYPE_FIXED) {
+                               VIDDBG(0, "Raw files can only be converted to "
+                                      "fixed VHD format\n");
+                               return (EINVAL);
+                       }
+
+                       vfd = open(filename, O_RDWR, 0644);
+                       if (vfd == -1) {
+                               err = errno;
+                               VIDDBG(0, "vfd open(%s, O_RDWR) failed: %s\n", 
+                                      filename, strerror(err));
+                               return (err);
+                       }
+
+                       err = vdisk_size(vfd, &sz);
+                       if (err) {
+                               VIDDBG(0, "vdisk_size(%s) failed: %s\n",
+                                      filename, strerror(err));
+                               return (err);
+                       }
+
+                       if (vhd_args->vhd_sz < sz) {
+                               VIDDBG(0, "WARNING: Truncating %s (%ld bytes) "
+                                      "to %ld bytes\n", 
+                                      filename, sz, vhd_args->vhd_sz);
+
+                               err = ftruncate(vfd, vhd_args->vhd_sz);
+                               if (err == -1) {
+                                       err = errno;
+                                       VIDDBG(0, "ftruncate(%s, %ld): %s\n",
+                                              filename, vhd_args->vhd_sz, 
+                                              strerror(err));
+                               return (err);   
+                               }
+                       }
+               } else {
+                       err = errno;
+                       VIDDBG(0, "vfd open(%s, O_CREAT|O_EXCL|O_RDWR) "
+                              "failed: %s\n", filename, strerror(err));
+                       return (err);
+               }
+       }
+       
+       parent.vdfd = NULL; 
+
+       memset((char *)&vhd, 0, sizeof(vhd));
+       ftr_pool = vhd.ftr = vdisk_malloc(VHD_FTR_SZ+512);
+       if (vhd.ftr == NULL) {
+               VIDDBG(0, "Couldn't allocate VHD footer\n");
+               close(vfd);
+               return (ENOMEM);
+       }
+       while ((addr_t)vhd.ftr & 511) vhd.ftr++; 
+
+       vhd_set_ftr_cookie(vhd.ftr, VHD_COOKIE);
+       vhd_set_ftr_features(vhd.ftr, VHD_FEATURES_RSVD);
+       vhd_set_ftr_fformat(vhd.ftr, VHD_FORMAT_VER_1);
+       vhd_set_ftr_type(vhd.ftr, vhd_args->type);
+       
+       curtime = time(NULL);
+       if (curtime == -1) {
+               err = errno;
+               perror("time");
+               goto out;
+       }
+       (void)vhd_time(&ftime, &curtime, 0);
+       vhd_set_ftr_timestamp(vhd.ftr, ftime);
+
+       vhd_set_ftr_cr_app(vhd.ftr, VHD_CREATOR_APP);
+       vhd_set_ftr_cr_ver(vhd.ftr, VHD_CREATOR_VER_1);
+       vhd_set_ftr_cr_hostos(vhd.ftr, VHD_CREATOR_HOST_OS);
+       vhd_set_ftr_orig_sz(vhd.ftr, vhd_args->vhd_sz);
+       vhd_set_ftr_cur_sz(vhd.ftr, vhd_args->vhd_sz);
+       vhd_set_ftr_geom(vhd.ftr, vhd_chs(vhd_args->vhd_sz));
+
+       vhd_set_ftr_uid(vhd.ftr, vhd_args->uuid);
+
+       if (vhd_args->type == VHD_TYPE_FIXED)
+               vhd_set_ftr_dataoff(vhd.ftr, VHD_FIXED_OFFSET);
+       else if ((vhd_args->type == VHD_TYPE_DYNAMIC) || 
+                (vhd_args->type == VHD_TYPE_DIFF))
+               vhd_set_ftr_dataoff(vhd.ftr, VHD_FTR_SZ);
+       else
+               ASSERT(0);
+
+       vhd_set_ftr_chksum(vhd.ftr, vhd_chksum(vhd.ftr, VHD_FTR_SZ, 
+                                              &vhd.ftr[VHD_FTR_CHKSUM_OFF]));  
+
+       // Create dynamic header
+       if ((vhd_args->type == VHD_TYPE_DYNAMIC) || 
+           (vhd_args->type == VHD_TYPE_DIFF)) {
+
+               uint32_t bat_entry, bat_sz;
+               loff_t data;
+               vhd_file_t *pvhd = NULL;
+               vd_file_t *pvf;
+
+               if (vhd_args->type == VHD_TYPE_DIFF) {
+                       // Read parent data
+                       err = vdisk_init(&parent, vhd_args->parent, NULL, 0);
+                       if (err) {
+                               VIDDBG(0, "Failed to initialize state for "
+                                      "parent %s\n", vhd_args->parent);
+                               return (err);
+                       }
+                       pvf = list_entry(parent.vdf_head.next, 
+                                        vd_file_t, vdf_list);
+                       pvhd = (vhd_file_t *)pvf->vdf;
+
+                       // Update footer fields inherited from parent
+                       vhd_set_ftr_orig_sz(vhd.ftr, 
+                                           vhd_get_ftr_orig_sz(pvhd->ftr));
+                       vhd_set_ftr_cur_sz(vhd.ftr, 
+                                          vhd_get_ftr_cur_sz(pvhd->ftr));
+                       vhd_set_ftr_geom(vhd.ftr, 
+                                        vhd_get_ftr_geom(pvhd->ftr));
+
+                       vhd_args->vhd_sz = vhd_get_ftr_cur_sz(vhd.ftr);
+               }
+
+               hdr_pool = vhd.dhdr = vdisk_malloc(VHD_DHDR_SZ+512);
+               if (vhd.dhdr == NULL) {
+                       vdisk_free(ftr_pool);
+                       VIDDBG(0, "Couldn't allocate dynamic header\n");
+                       err = ENOMEM;
+                       goto out;
+               }
+               while ((addr_t)vhd.dhdr & 511) vhd.dhdr++; 
+
+               vhd_set_dhdr_cookie(vhd.dhdr, VHD_DYN_COOKIE);
+               vhd_set_dhdr_dataoff(vhd.dhdr, VHD_DYN_OFFSET);
+               vhd_set_dhdr_tbloff(vhd.dhdr, VHD_FTR_SZ+VHD_DHDR_SZ);
+               vhd_set_dhdr_hdrver(vhd.dhdr, VHD_DYN_HDR_VER_1);
+               vhd_set_dhdr_tbl_entries(vhd.dhdr,
+                                        vhd_args->vhd_sz/vhd_args->blocksz);
+               vhd_set_dhdr_blksz(vhd.dhdr, vhd_args->blocksz);
+
+               vhd_set_dhdr_chksum(vhd.dhdr, 
+                                   vhd_chksum(vhd.dhdr, VHD_DHDR_SZ, 
+                                              &vhd.dhdr[VHD_DHDR_CHKSUM_OFF]));
+
+               // Write the copy of the footer first
+               bytes = write(vfd, vhd.ftr, VHD_FTR_SZ);
+               if (bytes != VHD_FTR_SZ) {
+                       perror("write");
+                       err = errno;
+                       goto out;
+               }
+
+               // Write the dynamic header
+               bytes = write(vfd, vhd.dhdr, VHD_DHDR_SZ);
+               if (bytes != VHD_DHDR_SZ) {
+                       perror("write");
+                       vdisk_free(vhd.dhdr);
+                       close(vfd);
+                       return (errno);
+               }
+
+               // Initialize BAT
+               // XXX: Make it faster perhaps?
+               bat_entry = VHD_BAT_INVALID_ENTRY;
+               for (i=0; i< vhd_get_dhdr_tbl_entries(vhd.dhdr); i++) {
+                       bytes = write(vfd, &bat_entry, 4);
+                       if (bytes != 4) {
+                               err = errno;
+                               perror("write");
+                               goto out;
+                       }
+               }
+
+               // BAT must end on sector boundary (512 bytes)
+               bat_entry = 0;
+               bat_sz = vhd_get_dhdr_tbl_entries(vhd.dhdr) << 2;
+               if (bat_sz & 511) {
+                       for (i=0; i<512-(bat_sz&511);i++) {
+                               // Write 1 byte at a time
+                               bytes = write(vfd, &bat_entry, 1);
+                               if (bytes != 1) {
+                                       perror("write");
+                                       err = errno;
+                                       goto out;
+                               }
+                       }
+               }
+
+               // XXX: It appears that there is a 512B block
+               // at the end of BAT, which is not mentioned in the spec
+               for (i=0; i<512>>2; i++) {
+                       bytes = write(vfd, &bat_entry, 4);
+                       if (bytes != 4) {
+                               err = errno;
+                               perror("write");
+                               goto out;
+                       }
+               }
+
+               if (vhd_args->type == VHD_TYPE_DIFF) {
+                       // This will store dynamic header again, but that's OK
+                       err = vhd_store_parent(vfd, &vhd, pvhd,
+                                              vhd_args->parent, &data);
+                       if (err) {
+                               VIDDBG(0, "Failed to store parent name (%s)\n",
+                                      vhd_args->parent);
+                               return (err);
+                       }
+
+                       bytes = lseek(vfd, data, SEEK_SET);
+                       if (bytes != data) {
+                               err = errno;
+                               perror("lseek");
+                               goto out;
+                       }
+               }
+       } else {
+               // for fixed disk, seek to the end of the file
+               if (lseek(vfd, vhd_args->vhd_sz, SEEK_SET) != 
+                   vhd_args->vhd_sz) {
+                       perror("lseek");
+                       err = errno;
+                       goto out;
+               }
+       }
+
+       // Write footer. For fixed disks allocate whole filesize
+       bytes = write(vfd, vhd.ftr, VHD_FTR_SZ);
+       if (bytes != VHD_FTR_SZ) {
+               perror("write");
+               err = errno;
+               goto out;
+       }
+
+out:
+       if (parent.vdfd != NULL)
+               vdisk_fini(&parent);
+
+       if (ftr_pool)
+               vdisk_free(ftr_pool);
+       if (hdr_pool)
+               vdisk_free(hdr_pool);
+       if (vfd != -1) {
+               if (fsync(vfd))
+                       VIDDBG(0, "fsync: %s\n", strerror(errno));
+               close(vfd);
+       }
+
+       return (err);
+}

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.