[Xen-changelog] Merge latest xen-unstable into xen-ia64-unstable

# HG changeset patch
# User djm@xxxxxxxxxxxxxxx
# Node ID 06d84bf87159174ef040a67f4ce90fcb72469b14
# Parent  97dbd9524a7e918f2ffb2d5155a7e32c48f7f355
# Parent  2f83ff9f6bd2a7987c297b84bfce1f8e22409cae
Merge latest xen-unstable into xen-ia64-unstable

diff -r 97dbd9524a7e -r 06d84bf87159 .hgignore
--- a/.hgignore Thu Sep 22 17:34:14 2005
+++ b/.hgignore Thu Sep 22 17:42:01 2005
@@ -86,6 +86,9 @@
 ^tools/check/\..*$
 ^tools/console/xenconsoled$
 ^tools/console/xenconsole$
+^tools/debugger/gdb/gdb-6\.2\.1\.tar\.bz2$
+^tools/debugger/gdb/gdb-6\.2\.1/.*$
+^tools/debugger/gdb/gdb-6\.2\.1-linux-i386-xen/.*$
 ^tools/debugger/pdb/pdb$
 ^tools/debugger/pdb/linux-[0-9.]*-module/.*\.ko$
 ^tools/debugger/pdb/linux-[0-9.]*-module/.*\.mod.c$
@@ -136,9 +139,10 @@
 ^tools/vnet/vnet-module/\..*\.cmd$
 ^tools/vnet/vnet-module/\.tmp_versions/.*$
 ^tools/vnet/vnet-module/vnet_module\.mod\..*$
-^tools/vtpm/vtpm*
-^tools/vtpm/tpm_emulator-*
-^tools/vtpm_manager/manager/vtpm_managerd
+^tools/vtpm/tpm_emulator/.*$
+^tools/vtpm/tpm_emulator-.*\.tar\.gz$
+^tools/vtpm/vtpm/.*$
+^tools/vtpm_manager/manager/vtpm_managerd$
 ^tools/xcutils/xc_restore$
 ^tools/xcutils/xc_save$
 ^tools/xenstat/xentop/xentop$
@@ -156,6 +160,7 @@
 ^tools/xenstore/xs_stress$
 ^tools/xenstore/xs_test$
 ^tools/xenstore/xs_watch_stress$
+^tools/xentrace/xenctx$
 ^tools/xentrace/xentrace$
 ^xen/BLOG$
 ^xen/TAGS$
diff -r 97dbd9524a7e -r 06d84bf87159 Makefile
--- a/Makefile  Thu Sep 22 17:34:14 2005
+++ b/Makefile  Thu Sep 22 17:42:01 2005
@@ -98,11 +98,14 @@
        $(MAKE) -C tools clean
        $(MAKE) -C docs clean
 
-# clean, but blow away kernel build tree plus tar balls
-mrproper: clean
+# clean, but blow away kernel build tree plus tarballs
+distclean: clean
        rm -rf dist patches/tmp
        for i in $(ALLKERNELS) ; do $(MAKE) $$i-delete ; done
        for i in $(ALLSPARSETREES) ; do $(MAKE) $$i-mrproper ; done
+
+# Linux name for GNU distclean
+mrproper: distclean
 
 install-logging: LOGGING=logging-0.4.9.2
 install-logging:
@@ -142,7 +145,7 @@
        @echo 'Cleaning targets:'
        @echo '  clean            - clean the Xen, tools and docs (but not'
        @echo '                     guest kernel) trees'
-       @echo '  mrproper         - clean plus delete kernel tarballs and 
kernel'
+       @echo '  distclean        - clean plus delete kernel tarballs and 
kernel'
        @echo '                     build trees'
        @echo '  kdelete          - delete guest kernel build trees'
        @echo '  kclean           - clean guest kernel build trees'
@@ -163,27 +166,25 @@
 uninstall:
        [ -d $(D)/etc/xen ] && mv -f $(D)/etc/xen $(D)/etc/xen.old-`date +%s`
        rm -rf $(D)/etc/init.d/xend*
-       rm -rf $(D)/usr/$(LIBDIR)/libxc* $(D)/usr/$(LIBDIR)/libxutil*
-       rm -rf $(D)/usr/$(LIBDIR)/python/xen $(D)/usr/include/xen
-       rm -rf $(D)/usr/$(LIBDIR)/share/xen $(D)/usr/$(LIBDIR)/libxenstore*
+       rm -rf $(D)/etc/hotplug/xen-backend.agent
        rm -rf $(D)/var/run/xen* $(D)/var/lib/xen*
-       rm -rf $(D)/usr/include/xcs_proto.h $(D)/usr/include/xc.h
-       rm -rf $(D)/usr/include/xs_lib.h $(D)/usr/include/xs.h
-       rm -rf $(D)/usr/sbin/xcs $(D)/usr/sbin/xcsdump $(D)/usr/sbin/xen*
-       rm -rf $(D)/usr/sbin/netfix
-       rm -rf $(D)/usr/sbin/xfrd $(D)/usr/sbin/xm
-       rm -rf $(D)/usr/share/doc/xen  $(D)/usr/man/man*/xentrace*
-       rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/miniterm
        rm -rf $(D)/boot/*xen*
        rm -rf $(D)/lib/modules/*xen*
+       rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/lomount
        rm -rf $(D)/usr/bin/cpuperf-perfcntr $(D)/usr/bin/cpuperf-xen
        rm -rf $(D)/usr/bin/xc_shadow
-       rm -rf $(D)/usr/share/xen $(D)/usr/libexec/xen
+       rm -rf $(D)/usr/include/xenctrl.h
+       rm -rf $(D)/usr/include/xs_lib.h $(D)/usr/include/xs.h
+       rm -rf $(D)/usr/include/xen
+       rm -rf $(D)/usr/$(LIBDIR)/libxenctrl* $(D)/usr/$(LIBDIR)/libxenguest*
+       rm -rf $(D)/usr/$(LIBDIR)/libxenstore*
+       rm -rf $(D)/usr/$(LIBDIR)/python/xen $(D)/usr/$(LIBDIR)/xen 
+       rm -rf $(D)/usr/libexec/xen
+       rm -rf $(D)/usr/sbin/xen* $(D)/usr/sbin/netfix $(D)/usr/sbin/xm
+       rm -rf $(D)/usr/share/doc/xen
+       rm -rf $(D)/usr/share/xen
        rm -rf $(D)/usr/share/man/man1/xen*
        rm -rf $(D)/usr/share/man/man8/xen*
-       rm -rf $(D)/usr/lib/xen
-       rm -rf $(D)/etc/hotplug.d/xen-backend
-       rm -rf $(D)/etc/hotplug/xen-backend.agent
 
 # Legacy targets for compatibility
 linux24:
diff -r 97dbd9524a7e -r 06d84bf87159 docs/Makefile
--- a/docs/Makefile     Thu Sep 22 17:34:14 2005
+++ b/docs/Makefile     Thu Sep 22 17:42:01 2005
@@ -12,7 +12,7 @@
 
 pkgdocdir      := /usr/share/doc/xen
 
-DOC_TEX                := $(wildcard src/*.tex)
+DOC_TEX                := src/user.tex src/interface.tex
 DOC_PS         := $(patsubst src/%.tex,ps/%.ps,$(DOC_TEX))
 DOC_PDF                := $(patsubst src/%.tex,pdf/%.pdf,$(DOC_TEX))
 DOC_HTML       := $(patsubst src/%.tex,html/%/index.html,$(DOC_TEX))
@@ -36,11 +36,12 @@
        $(MAKE) $(DOC_HTML); fi
 
 python-dev-docs:
-       mkdir -p api/tools/python
+       @mkdir -v -p api/tools/python
        @if which $(DOXYGEN) 1>/dev/null 2>/dev/null; then         \
         echo "Running doxygen to generate Python tools APIs ... "; \
        $(DOXYGEN) Doxyfile;                                       \
-       $(MAKE) -C api/tools/python/latex ; fi
+       $(MAKE) -C api/tools/python/latex ; else                   \
+        echo "Doxygen not installed; skipping python-dev-docs."; fi
 
 clean:
        rm -rf .word_count *.aux *.dvi *.bbl *.blg *.glo *.idx *~ 
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface.tex
--- a/docs/src/interface.tex    Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface.tex    Thu Sep 22 17:42:01 2005
@@ -87,1084 +87,23 @@
 mechanism and policy within the system.
 
 
+%% chapter Virtual Architecture moved to architecture.tex
+\include{src/interface/architecture}
 
-\chapter{Virtual Architecture}
+%% chapter Memory moved to memory.tex
+\include{src/interface/memory}
 
-On a Xen-based system, the hypervisor itself runs in {\it ring 0}.  It
-has full access to the physical memory available in the system and is
-responsible for allocating portions of it to the domains.  Guest
-operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
-they see fit. Segmentation is used to prevent the guest OS from
-accessing the portion of the address space that is reserved for
-Xen. We expect most guest operating systems will use ring 1 for their
-own operation and place applications in ring 3.
+%% chapter Devices moved to devices.tex
+\include{src/interface/devices}
 
-In this chapter we consider the basic virtual architecture provided 
-by Xen: the basic CPU state, exception and interrupt handling, and
-time. Other aspects such as memory and device access are discussed 
-in later chapters. 
-
-\section{CPU state}
-
-All privileged state must be handled by Xen.  The guest OS has no
-direct access to CR3 and is not permitted to update privileged bits in
-EFLAGS. Guest OSes use \emph{hypercalls} to invoke operations in Xen; 
-these are analogous to system calls but occur from ring 1 to ring 0. 
-
-A list of all hypercalls is given in Appendix~\ref{a:hypercalls}. 
-
-
-
-\section{Exceptions}
-
-A virtual IDT is provided --- a domain can submit a table of trap
-handlers to Xen via the {\tt set\_trap\_table()} hypercall.  Most trap
-handlers are identical to native x86 handlers, although the page-fault
-handler is somewhat different.
-
-
-\section{Interrupts and events}
-
-Interrupts are virtualized by mapping them to \emph{events}, which are
-delivered asynchronously to the target domain using a callback
-supplied via the {\tt set\_callbacks()} hypercall.  A guest OS can map
-these events onto its standard interrupt dispatch mechanisms.  Xen is
-responsible for determining the target domain that will handle each
-physical interrupt source. For more details on the binding of event
-sources to events, see Chapter~\ref{c:devices}. 
-
-
-
-\section{Time}
-
-Guest operating systems need to be aware of the passage of both real
-(or wallclock) time and their own `virtual time' (the time for
-which they have been executing). Furthermore, Xen has a notion of 
-time which is used for scheduling. The following notions of 
-time are provided: 
-
-\begin{description}
-\item[Cycle counter time.]
-
-This provides a fine-grained time reference.  The cycle counter time is
-used to accurately extrapolate the other time references.  On SMP machines
-it is currently assumed that the cycle counter time is synchronized between
-CPUs.  The current x86-based implementation achieves this within inter-CPU
-communication latencies.
-
-\item[System time.]
-
-This is a 64-bit counter which holds the number of nanoseconds that
-have elapsed since system boot.
-
-
-\item[Wall clock time.]
-
-This is the time of day in a Unix-style {\tt struct timeval} (seconds
-and microseconds since 1 January 1970, adjusted by leap seconds).  An
-NTP client hosted by {\it domain 0} can keep this value accurate.  
-
-
-\item[Domain virtual time.]
-
-This progresses at the same pace as system time, but only while a
-domain is executing --- it stops while a domain is de-scheduled.
-Therefore the share of the CPU that a domain receives is indicated by
-the rate at which its virtual time increases.
-
-\end{description}
-
-
-Xen exports timestamps for system time and wall-clock time to guest
-operating systems through a shared page of memory.  Xen also provides
-the cycle counter time at the instant the timestamps were calculated,
-and the CPU frequency in Hertz.  This allows the guest to extrapolate
-system and wall-clock times accurately based on the current cycle
-counter time.
-
-Since all time stamps need to be updated and read \emph{atomically}
-two version numbers are also stored in the shared info page. The 
-first is incremented prior to an update, while the second is only
-incremented afterwards. Thus a guest can be sure that it read a consistent 
-state by checking the two version numbers are equal. 
-
-Xen includes a periodic ticker which sends a timer event to the
-currently executing domain every 10ms.  The Xen scheduler also sends a
-timer event whenever a domain is scheduled; this allows the guest OS
-to adjust for the time that has passed while it has been inactive.  In
-addition, Xen allows each domain to request that they receive a timer
-event sent at a specified system time by using the {\tt
-set\_timer\_op()} hypercall.  Guest OSes may use this timer to
-implement timeout values when they block.
-
-
-
-%% % akw: demoting this to a section -- not sure if there is any point
-%% % though, maybe just remove it.
-
-\section{Xen CPU Scheduling}
-
-Xen offers a uniform API for CPU schedulers.  It is possible to choose
-from a number of schedulers at boot and it should be easy to add more.
-The BVT, Atropos and Round Robin schedulers are part of the normal
-Xen distribution.  BVT provides proportional fair shares of the CPU to
-the running domains.  Atropos can be used to reserve absolute shares
-of the CPU for each domain.  Round-robin is provided as an example of
-Xen's internal scheduler API.
-
-\paragraph*{Note: SMP host support}
-Xen has always supported SMP host systems.  Domains are statically assigned to
-CPUs, either at creation time or when manually pinning to a particular CPU.
-The current schedulers then run locally on each CPU to decide which of the
-assigned domains should be run there. The user-level control software 
-can be used to perform coarse-grain load-balancing between CPUs. 
-
-
-%% More information on the characteristics and use of these schedulers is
-%% available in {\tt Sched-HOWTO.txt}.
-
-
-\section{Privileged operations}
-
-Xen exports an extended interface to privileged domains (viz.\ {\it
-  Domain 0}). This allows such domains to build and boot other domains 
-on the server, and provides control interfaces for managing 
-scheduling, memory, networking, and block devices. 
-
-
-\chapter{Memory}
-\label{c:memory} 
-
-Xen is responsible for managing the allocation of physical memory to
-domains, and for ensuring safe use of the paging and segmentation
-hardware.
-
-
-\section{Memory Allocation}
-
-
-Xen resides within a small fixed portion of physical memory; it also
-reserves the top 64MB of every virtual address space. The remaining
-physical memory is available for allocation to domains at a page
-granularity.  Xen tracks the ownership and use of each page, which
-allows it to enforce secure partitioning between domains.
-
-Each domain has a maximum and current physical memory allocation. 
-A guest OS may run a `balloon driver' to dynamically adjust its 
-current memory allocation up to its limit. 
-
-
-%% XXX SMH: I use machine and physical in the next section (which 
-%% is kinda required for consistency with code); wonder if this 
-%% section should use same terms? 
-%%
-%% Probably. 
-%%
-%% Merging this and below section at some point prob makes sense. 
-
-\section{Pseudo-Physical Memory}
-
-Since physical memory is allocated and freed on a page granularity,
-there is no guarantee that a domain will receive a contiguous stretch
-of physical memory. However most operating systems do not have good
-support for operating in a fragmented physical address space. To aid
-porting such operating systems to run on top of Xen, we make a
-distinction between \emph{machine memory} and \emph{pseudo-physical
-memory}.
-
-Put simply, machine memory refers to the entire amount of memory
-installed in the machine, including that reserved by Xen, in use by
-various domains, or currently unallocated. We consider machine memory
-to comprise a set of 4K \emph{machine page frames} numbered
-consecutively starting from 0. Machine frame numbers mean the same
-within Xen or any domain.
-
-Pseudo-physical memory, on the other hand, is a per-domain
-abstraction. It allows a guest operating system to consider its memory
-allocation to consist of a contiguous range of physical page frames
-starting at physical frame 0, despite the fact that the underlying
-machine page frames may be sparsely allocated and in any order.
-
-To achieve this, Xen maintains a globally readable {\it
-machine-to-physical} table which records the mapping from machine page
-frames to pseudo-physical ones. In addition, each domain is supplied
-with a {\it physical-to-machine} table which performs the inverse
-mapping. Clearly the machine-to-physical table has size proportional
-to the amount of RAM installed in the machine, while each
-physical-to-machine table has size proportional to the memory
-allocation of the given domain.
-
-Architecture dependent code in guest operating systems can then use
-the two tables to provide the abstraction of pseudo-physical
-memory. In general, only certain specialized parts of the operating
-system (such as page table management) needs to understand the
-difference between machine and pseudo-physical addresses.
-
-\section{Page Table Updates}
-
-In the default mode of operation, Xen enforces read-only access to
-page tables and requires guest operating systems to explicitly request
-any modifications.  Xen validates all such requests and only applies
-updates that it deems safe.  This is necessary to prevent domains from
-adding arbitrary mappings to their page tables.
-
-To aid validation, Xen associates a type and reference count with each
-memory page. A page has one of the following
-mutually-exclusive types at any point in time: page directory ({\sf
-PD}), page table ({\sf PT}), local descriptor table ({\sf LDT}),
-global descriptor table ({\sf GDT}), or writable ({\sf RW}). Note that
-a guest OS may always create readable mappings of its own memory 
-regardless of its current type. 
-%%% XXX: possibly explain more about ref count 'lifecyle' here?
-This mechanism is used to
-maintain the invariants required for safety; for example, a domain
-cannot have a writable mapping to any part of a page table as this
-would require the page concerned to simultaneously be of types {\sf
-  PT} and {\sf RW}.
-
-
-%\section{Writable Page Tables}
-
-Xen also provides an alternative mode of operation in which guests be
-have the illusion that their page tables are directly writable.  Of
-course this is not really the case, since Xen must still validate
-modifications to ensure secure partitioning. To this end, Xen traps
-any write attempt to a memory page of type {\sf PT} (i.e., that is
-currently part of a page table).  If such an access occurs, Xen
-temporarily allows write access to that page while at the same time
-{\em disconnecting} it from the page table that is currently in
-use. This allows the guest to safely make updates to the page because
-the newly-updated entries cannot be used by the MMU until Xen
-revalidates and reconnects the page.
-Reconnection occurs automatically in a number of situations: for
-example, when the guest modifies a different page-table page, when the
-domain is preempted, or whenever the guest uses Xen's explicit
-page-table update interfaces.
-
-Finally, Xen also supports a form of \emph{shadow page tables} in
-which the guest OS uses a independent copy of page tables which are
-unknown to the hardware (i.e.\ which are never pointed to by {\tt
-cr3}). Instead Xen propagates changes made to the guest's tables to the
-real ones, and vice versa. This is useful for logging page writes
-(e.g.\ for live migration or checkpoint). A full version of the shadow
-page tables also allows guest OS porting with less effort.
-
-\section{Segment Descriptor Tables}
-
-On boot a guest is supplied with a default GDT, which does not reside
-within its own memory allocation.  If the guest wishes to use other
-than the default `flat' ring-1 and ring-3 segments that this GDT
-provides, it must register a custom GDT and/or LDT with Xen,
-allocated from its own memory. Note that a number of GDT 
-entries are reserved by Xen -- any custom GDT must also include
-sufficient space for these entries. 
-
-For example, the following hypercall is used to specify a new GDT: 
-
-\begin{quote}
-int {\bf set\_gdt}(unsigned long *{\em frame\_list}, int {\em entries})
-
-{\em frame\_list}: An array of up to 16 machine page frames within
-which the GDT resides.  Any frame registered as a GDT frame may only
-be mapped read-only within the guest's address space (e.g., no
-writable mappings, no use as a page-table page, and so on).
-
-{\em entries}: The number of descriptor-entry slots in the GDT.  Note
-that the table must be large enough to contain Xen's reserved entries;
-thus we must have `{\em entries $>$ LAST\_RESERVED\_GDT\_ENTRY}\ '.
-Note also that, after registering the GDT, slots {\em FIRST\_} through
-{\em LAST\_RESERVED\_GDT\_ENTRY} are no longer usable by the guest and
-may be overwritten by Xen.
-\end{quote}
-
-The LDT is updated via the generic MMU update mechanism (i.e., via 
-the {\tt mmu\_update()} hypercall. 
-
-\section{Start of Day} 
-
-The start-of-day environment for guest operating systems is rather
-different to that provided by the underlying hardware. In particular,
-the processor is already executing in protected mode with paging
-enabled.
-
-{\it Domain 0} is created and booted by Xen itself. For all subsequent
-domains, the analogue of the boot-loader is the {\it domain builder},
-user-space software running in {\it domain 0}. The domain builder 
-is responsible for building the initial page tables for a domain  
-and loading its kernel image at the appropriate virtual address. 
-
-
-
-\chapter{Devices}
-\label{c:devices}
-
-Devices such as network and disk are exported to guests using a
-split device driver.  The device driver domain, which accesses the
-physical device directly also runs a {\em backend} driver, serving
-requests to that device from guests.  Each guest will use a simple
-{\em frontend} driver, to access the backend.  Communication between these
-domains is composed of two parts:  First, data is placed onto a shared
-memory page between the domains.  Second, an event channel between the
-two domains is used to pass notification that data is outstanding.
-This separation of notification from data transfer allows message
-batching, and results in very efficient device access.  
-
-Event channels are used extensively in device virtualization; each
-domain has a number of end-points or \emph{ports} each of which
-may be bound to one of the following \emph{event sources}:
-\begin{itemize} 
-  \item a physical interrupt from a real device, 
-  \item a virtual interrupt (callback) from Xen, or 
-  \item a signal from another domain 
-\end{itemize}
-
-Events are lightweight and do not carry much information beyond 
-the source of the notification. Hence when performing bulk data
-transfer, events are typically used as synchronization primitives
-over a shared memory transport. Event channels are managed via 
-the {\tt event\_channel\_op()} hypercall; for more details see
-Section~\ref{s:idc}. 
-
-This chapter focuses on some individual device interfaces
-available to Xen guests. 
-
-\section{Network I/O}
-
-Virtual network device services are provided by shared memory
-communication with a backend domain.  From the point of view of
-other domains, the backend may be viewed as a virtual ethernet switch
-element with each domain having one or more virtual network interfaces
-connected to it.
-
-\subsection{Backend Packet Handling}
-
-The backend driver is responsible for a variety of actions relating to
-the transmission and reception of packets from the physical device.
-With regard to transmission, the backend performs these key actions:
-
-\begin{itemize}
-\item {\bf Validation:} To ensure that domains do not attempt to
-  generate invalid (e.g. spoofed) traffic, the backend driver may
-  validate headers ensuring that source MAC and IP addresses match the
-  interface that they have been sent from.
-
-  Validation functions can be configured using standard firewall rules
-  ({\small{\tt iptables}} in the case of Linux).
-  
-\item {\bf Scheduling:} Since a number of domains can share a single
-  physical network interface, the backend must mediate access when
-  several domains each have packets queued for transmission.  This
-  general scheduling function subsumes basic shaping or rate-limiting
-  schemes.
-  
-\item {\bf Logging and Accounting:} The backend domain can be
-  configured with classifier rules that control how packets are
-  accounted or logged.  For example, log messages might be generated
-  whenever a domain attempts to send a TCP packet containing a SYN.
-\end{itemize}
-
-On receipt of incoming packets, the backend acts as a simple
-demultiplexer:  Packets are passed to the appropriate virtual
-interface after any necessary logging and accounting have been carried
-out.
-
-\subsection{Data Transfer}
-
-Each virtual interface uses two ``descriptor rings'', one for transmit,
-the other for receive.  Each descriptor identifies a block of contiguous
-physical memory allocated to the domain.  
-
-The transmit ring carries packets to transmit from the guest to the
-backend domain.  The return path of the transmit ring carries messages
-indicating that the contents have been physically transmitted and the
-backend no longer requires the associated pages of memory.
-
-To receive packets, the guest places descriptors of unused pages on
-the receive ring.  The backend will return received packets by
-exchanging these pages in the domain's memory with new pages
-containing the received data, and passing back descriptors regarding
-the new packets on the ring.  This zero-copy approach allows the
-backend to maintain a pool of free pages to receive packets into, and
-then deliver them to appropriate domains after examining their
-headers.
-
-%
-%Real physical addresses are used throughout, with the domain performing 
-%translation from pseudo-physical addresses if that is necessary.
-
-If a domain does not keep its receive ring stocked with empty buffers then 
-packets destined to it may be dropped.  This provides some defence against 
-receive livelock problems because an overload domain will cease to receive
-further data.  Similarly, on the transmit path, it provides the application
-with feedback on the rate at which packets are able to leave the system.
-
-
-Flow control on rings is achieved by including a pair of producer
-indexes on the shared ring page.  Each side will maintain a private
-consumer index indicating the next outstanding message.  In this
-manner, the domains cooperate to divide the ring into two message
-lists, one in each direction.  Notification is decoupled from the
-immediate placement of new messages on the ring; the event channel
-will be used to generate notification when {\em either} a certain
-number of outstanding messages are queued, {\em or} a specified number
-of nanoseconds have elapsed since the oldest message was placed on the
-ring.
-
-% Not sure if my version is any better -- here is what was here before:
-%% Synchronization between the backend domain and the guest is achieved using 
-%% counters held in shared memory that is accessible to both.  Each ring has
-%% associated producer and consumer indices indicating the area in the ring
-%% that holds descriptors that contain data.  After receiving {\it n} packets
-%% or {\t nanoseconds} after receiving the first packet, the hypervisor sends
-%% an event to the domain. 
-
-\section{Block I/O}
-
-All guest OS disk access goes through the virtual block device VBD
-interface.  This interface allows domains access to portions of block
-storage devices visible to the the block backend device.  The VBD
-interface is a split driver, similar to the network interface
-described above.  A single shared memory ring is used between the
-frontend and backend drivers, across which read and write messages are
-sent.
-
-Any block device accessible to the backend domain, including
-network-based block (iSCSI, *NBD, etc), loopback and LVM/MD devices,
-can be exported as a VBD.  Each VBD is mapped to a device node in the
-guest, specified in the guest's startup configuration.
-
-Old (Xen 1.2) virtual disks are not supported under Xen 2.0, since
-similar functionality can be achieved using the more complete LVM
-system, which is already in widespread use.
-
-\subsection{Data Transfer}
-
-The single ring between the guest and the block backend supports three
-messages:
-
-\begin{description}
-\item [{\small {\tt PROBE}}:] Return a list of the VBDs available to this guest
-  from the backend.  The request includes a descriptor of a free page
-  into which the reply will be written by the backend.
-
-\item [{\small {\tt READ}}:] Read data from the specified block device.  The
-  front end identifies the device and location to read from and
-  attaches pages for the data to be copied to (typically via DMA from
-  the device).  The backend acknowledges completed read requests as
-  they finish.
-
-\item [{\small {\tt WRITE}}:] Write data to the specified block device.  This
-  functions essentially as {\small {\tt READ}}, except that the data moves to
-  the device instead of from it.
-\end{description}
-
-% um... some old text
-%% In overview, the same style of descriptor-ring that is used for
-%% network packets is used here.  Each domain has one ring that carries
-%% operation requests to the hypervisor and carries the results back
-%% again.
-
-%% Rather than copying data, the backend simply maps the domain's buffers
-%% in order to enable direct DMA to them.  The act of mapping the buffers
-%% also increases the reference counts of the underlying pages, so that
-%% the unprivileged domain cannot try to return them to the hypervisor,
-%% install them as page tables, or any other unsafe behaviour.
-%% %block API here 
-
-
-\chapter{Further Information} 
-
-
-If you have questions that are not answered by this manual, the
-sources of information listed below may be of interest to you.  Note
-that bug reports, suggestions and contributions related to the
-software (or the documentation) should be sent to the Xen developers'
-mailing list (address below).
-
-\section{Other documentation}
-
-If you are mainly interested in using (rather than developing for)
-Xen, the {\em Xen Users' Manual} is distributed in the {\tt docs/}
-directory of the Xen source distribution.  
-
-% Various HOWTOs are also available in {\tt docs/HOWTOS}.
-
-\section{Online references}
-
-The official Xen web site is found at:
-\begin{quote}
-{\tt http://www.cl.cam.ac.uk/Research/SRG/netos/xen/}
-\end{quote}
-
-This contains links to the latest versions of all on-line 
-documentation. 
-
-\section{Mailing lists}
-
-There are currently four official Xen mailing lists:
-
-\begin{description}
-\item[xen-devel@xxxxxxxxxxxxxxxxxxx] Used for development
-discussions and bug reports.  Subscribe at: \\
-{\small {\tt http://lists.xensource.com/xen-devel}}
-\item[xen-users@xxxxxxxxxxxxxxxxxxx] Used for installation and usage
-discussions and requests for help.  Subscribe at: \\
-{\small {\tt http://lists.xensource.com/xen-users}}
-\item[xen-announce@xxxxxxxxxxxxxxxxxxx] Used for announcements only.
-Subscribe at: \\
-{\small {\tt http://lists.xensource.com/xen-announce}}
-\item[xen-changelog@xxxxxxxxxxxxxxxxxxx]  Changelog feed
-from the unstable and 2.0 trees - developer oriented.  Subscribe at: \\
-{\small {\tt http://lists.xensource.com/xen-changelog}}
-\end{description}
-
-Of these, xen-devel is the most active.
-
-
+%% chapter Further Information moved to further_info.tex
+\include{src/interface/further_info}
 
 
 \appendix
 
-%\newcommand{\hypercall}[1]{\vspace{5mm}{\large\sf #1}}
-
-
-
-
-
-\newcommand{\hypercall}[1]{\vspace{2mm}{\sf #1}}
-
-
-
-
-
-
-\chapter{Xen Hypercalls}
-\label{a:hypercalls}
-
-Hypercalls represent the procedural interface to Xen; this appendix 
-categorizes and describes the current set of hypercalls. 
-
-\section{Invoking Hypercalls} 
-
-Hypercalls are invoked in a manner analogous to system calls in a
-conventional operating system; a software interrupt is issued which
-vectors to an entry point within Xen. On x86\_32 machines the
-instruction required is {\tt int \$82}; the (real) IDT is setup so
-that this may only be issued from within ring 1. The particular 
-hypercall to be invoked is contained in {\tt EAX} --- a list 
-mapping these values to symbolic hypercall names can be found 
-in {\tt xen/include/public/xen.h}. 
-
-On some occasions a set of hypercalls will be required to carry
-out a higher-level function; a good example is when a guest 
-operating wishes to context switch to a new process which 
-requires updating various privileged CPU state. As an optimization
-for these cases, there is a generic mechanism to issue a set of 
-hypercalls as a batch: 
-
-\begin{quote}
-\hypercall{multicall(void *call\_list, int nr\_calls)}
-
-Execute a series of hypervisor calls; {\tt nr\_calls} is the length of
-the array of {\tt multicall\_entry\_t} structures pointed to be {\tt
-call\_list}. Each entry contains the hypercall operation code followed
-by up to 7 word-sized arguments.
-\end{quote}
-
-Note that multicalls are provided purely as an optimization; there is
-no requirement to use them when first porting a guest operating
-system.
-
-
-\section{Virtual CPU Setup} 
-
-At start of day, a guest operating system needs to setup the virtual
-CPU it is executing on. This includes installing vectors for the
-virtual IDT so that the guest OS can handle interrupts, page faults,
-etc. However the very first thing a guest OS must setup is a pair 
-of hypervisor callbacks: these are the entry points which Xen will
-use when it wishes to notify the guest OS of an occurrence. 
-
-\begin{quote}
-\hypercall{set\_callbacks(unsigned long event\_selector, unsigned long
-  event\_address, unsigned long failsafe\_selector, unsigned long
-  failsafe\_address) }
-
-Register the normal (``event'') and failsafe callbacks for 
-event processing. In each case the code segment selector and 
-address within that segment are provided. The selectors must
-have RPL 1; in XenLinux we simply use the kernel's CS for both 
-{\tt event\_selector} and {\tt failsafe\_selector}.
-
-The value {\tt event\_address} specifies the address of the guest OSes
-event handling and dispatch routine; the {\tt failsafe\_address}
-specifies a separate entry point which is used only if a fault occurs
-when Xen attempts to use the normal callback. 
-\end{quote} 
-
-
-After installing the hypervisor callbacks, the guest OS can 
-install a `virtual IDT' by using the following hypercall: 
-
-\begin{quote} 
-\hypercall{set\_trap\_table(trap\_info\_t *table)} 
-
-Install one or more entries into the per-domain 
-trap handler table (essentially a software version of the IDT). 
-Each entry in the array pointed to by {\tt table} includes the 
-exception vector number with the corresponding segment selector 
-and entry point. Most guest OSes can use the same handlers on 
-Xen as when running on the real hardware; an exception is the 
-page fault handler (exception vector 14) where a modified 
-stack-frame layout is used. 
-
-
-\end{quote} 
-
-
-
-\section{Scheduling and Timer}
-
-Domains are preemptively scheduled by Xen according to the 
-parameters installed by domain 0 (see Section~\ref{s:dom0ops}). 
-In addition, however, a domain may choose to explicitly 
-control certain behavior with the following hypercall: 
-
-\begin{quote} 
-\hypercall{sched\_op(unsigned long op)} 
-
-Request scheduling operation from hypervisor. The options are: {\it
-yield}, {\it block}, and {\it shutdown}.  {\it yield} keeps the
-calling domain runnable but may cause a reschedule if other domains
-are runnable.  {\it block} removes the calling domain from the run
-queue and cause is to sleeps until an event is delivered to it.  {\it
-shutdown} is used to end the domain's execution; the caller can
-additionally specify whether the domain should reboot, halt or
-suspend.
-\end{quote} 
-
-To aid the implementation of a process scheduler within a guest OS,
-Xen provides a virtual programmable timer:
-
-\begin{quote}
-\hypercall{set\_timer\_op(uint64\_t timeout)} 
-
-Request a timer event to be sent at the specified system time (time 
-in nanoseconds since system boot). The hypercall actually passes the 
-64-bit timeout value as a pair of 32-bit values. 
-
-\end{quote} 
-
-Note that calling {\tt set\_timer\_op()} prior to {\tt sched\_op} 
-allows block-with-timeout semantics. 
-
-
-\section{Page Table Management} 
-
-Since guest operating systems have read-only access to their page 
-tables, Xen must be involved when making any changes. The following
-multi-purpose hypercall can be used to modify page-table entries, 
-update the machine-to-physical mapping table, flush the TLB, install 
-a new page-table base pointer, and more.
-
-\begin{quote} 
-\hypercall{mmu\_update(mmu\_update\_t *req, int count, int *success\_count)} 
-
-Update the page table for the domain; a set of {\tt count} updates are
-submitted for processing in a batch, with {\tt success\_count} being 
-updated to report the number of successful updates.  
-
-Each element of {\tt req[]} contains a pointer (address) and value; 
-the least significant 2-bits of the pointer are used to distinguish 
-the type of update requested as follows:
-\begin{description} 
-
-\item[\it MMU\_NORMAL\_PT\_UPDATE:] update a page directory entry or
-page table entry to the associated value; Xen will check that the
-update is safe, as described in Chapter~\ref{c:memory}.
-
-\item[\it MMU\_MACHPHYS\_UPDATE:] update an entry in the
-  machine-to-physical table. The calling domain must own the machine
-  page in question (or be privileged).
-
-\item[\it MMU\_EXTENDED\_COMMAND:] perform additional MMU operations.
-The set of additional MMU operations is considerable, and includes
-updating {\tt cr3} (or just re-installing it for a TLB flush),
-flushing the cache, installing a new LDT, or pinning \& unpinning
-page-table pages (to ensure their reference count doesn't drop to zero
-which would require a revalidation of all entries).
-
-Further extended commands are used to deal with granting and 
-acquiring page ownership; see Section~\ref{s:idc}. 
-
-
-\end{description}
-
-More details on the precise format of all commands can be 
-found in {\tt xen/include/public/xen.h}. 
-
-
-\end{quote}
-
-Explicitly updating batches of page table entries is extremely
-efficient, but can require a number of alterations to the guest
-OS. Using the writable page table mode (Chapter~\ref{c:memory}) is
-recommended for new OS ports.
-
-Regardless of which page table update mode is being used, however,
-there are some occasions (notably handling a demand page fault) where
-a guest OS will wish to modify exactly one PTE rather than a
-batch. This is catered for by the following:
-
-\begin{quote} 
-\hypercall{update\_va\_mapping(unsigned long page\_nr, unsigned long
-val, \\ unsigned long flags)}
-
-Update the currently installed PTE for the page {\tt page\_nr} to 
-{\tt val}. As with {\tt mmu\_update()}, Xen checks the modification 
-is safe before applying it. The {\tt flags} determine which kind
-of TLB flush, if any, should follow the update. 
-
-\end{quote} 
-
-Finally, sufficiently privileged domains may occasionally wish to manipulate 
-the pages of others: 
-\begin{quote}
-
-\hypercall{update\_va\_mapping\_otherdomain(unsigned long page\_nr,
-unsigned long val, unsigned long flags, uint16\_t domid)}
-
-Identical to {\tt update\_va\_mapping()} save that the pages being
-mapped must belong to the domain {\tt domid}. 
-
-\end{quote}
-
-This privileged operation is currently used by backend virtual device
-drivers to safely map pages containing I/O data. 
-
-
-
-\section{Segmentation Support}
-
-Xen allows guest OSes to install a custom GDT if they require it; 
-this is context switched transparently whenever a domain is 
-[de]scheduled.  The following hypercall is effectively a 
-`safe' version of {\tt lgdt}: 
-
-\begin{quote}
-\hypercall{set\_gdt(unsigned long *frame\_list, int entries)} 
-
-Install a global descriptor table for a domain; {\tt frame\_list} is
-an array of up to 16 machine page frames within which the GDT resides,
-with {\tt entries} being the actual number of descriptor-entry
-slots. All page frames must be mapped read-only within the guest's
-address space, and the table must be large enough to contain Xen's
-reserved entries (see {\tt xen/include/public/arch-x86\_32.h}).
-
-\end{quote}
-
-Many guest OSes will also wish to install LDTs; this is achieved by
-using {\tt mmu\_update()} with an extended command, passing the
-linear address of the LDT base along with the number of entries. No
-special safety checks are required; Xen needs to perform this task
-simply since {\tt lldt} requires CPL 0.
-
-
-Xen also allows guest operating systems to update just an 
-individual segment descriptor in the GDT or LDT:  
-
-\begin{quote}
-\hypercall{update\_descriptor(unsigned long ma, unsigned long word1,
-unsigned long word2)}
-
-Update the GDT/LDT entry at machine address {\tt ma}; the new
-8-byte descriptor is stored in {\tt word1} and {\tt word2}.
-Xen performs a number of checks to ensure the descriptor is 
-valid. 
-
-\end{quote}
-
-Guest OSes can use the above in place of context switching entire 
-LDTs (or the GDT) when the number of changing descriptors is small. 
-
-\section{Context Switching} 
-
-When a guest OS wishes to context switch between two processes, 
-it can use the page table and segmentation hypercalls described
-above to perform the the bulk of the privileged work. In addition, 
-however, it will need to invoke Xen to switch the kernel (ring 1) 
-stack pointer: 
-
-\begin{quote} 
-\hypercall{stack\_switch(unsigned long ss, unsigned long esp)} 
-
-Request kernel stack switch from hypervisor; {\tt ss} is the new 
-stack segment, which {\tt esp} is the new stack pointer. 
-
-\end{quote} 
-
-A final useful hypercall for context switching allows ``lazy'' 
-save and restore of floating point state: 
-
-\begin{quote}
-\hypercall{fpu\_taskswitch(void)} 
-
-This call instructs Xen to set the {\tt TS} bit in the {\tt cr0}
-control register; this means that the next attempt to use floating
-point will cause a trap which the guest OS can trap. Typically it will
-then save/restore the FP state, and clear the {\tt TS} bit. 
-\end{quote} 
-
-This is provided as an optimization only; guest OSes can also choose
-to save and restore FP state on all context switches for simplicity. 
-
-
-\section{Physical Memory Management}
-
-As mentioned previously, each domain has a maximum and current 
-memory allocation. The maximum allocation, set at domain creation 
-time, cannot be modified. However a domain can choose to reduce 
-and subsequently grow its current allocation by using the
-following call: 
-
-\begin{quote} 
-\hypercall{dom\_mem\_op(unsigned int op, unsigned long *extent\_list,
-  unsigned long nr\_extents, unsigned int extent\_order)}
-
-Increase or decrease current memory allocation (as determined by 
-the value of {\tt op}). Each invocation provides a list of 
-extents each of which is $2^s$ pages in size, 
-where $s$ is the value of {\tt extent\_order}. 
-
-\end{quote} 
-
-In addition to simply reducing or increasing the current memory
-allocation via a `balloon driver', this call is also useful for 
-obtaining contiguous regions of machine memory when required (e.g. 
-for certain PCI devices, or if using superpages).  
-
-
-\section{Inter-Domain Communication}
-\label{s:idc} 
-
-Xen provides a simple asynchronous notification mechanism via
-\emph{event channels}. Each domain has a set of end-points (or
-\emph{ports}) which may be bound to an event source (e.g. a physical
-IRQ, a virtual IRQ, or an port in another domain). When a pair of
-end-points in two different domains are bound together, then a `send'
-operation on one will cause an event to be received by the destination
-domain.
-
-The control and use of event channels involves the following hypercall: 
-
-\begin{quote}
-\hypercall{event\_channel\_op(evtchn\_op\_t *op)} 
-
-Inter-domain event-channel management; {\tt op} is a discriminated 
-union which allows the following 7 operations: 
-
-\begin{description} 
-
-\item[\it alloc\_unbound:] allocate a free (unbound) local
-  port and prepare for connection from a specified domain. 
-\item[\it bind\_virq:] bind a local port to a virtual 
-IRQ; any particular VIRQ can be bound to at most one port per domain. 
-\item[\it bind\_pirq:] bind a local port to a physical IRQ;
-once more, a given pIRQ can be bound to at most one port per
-domain. Furthermore the calling domain must be sufficiently
-privileged.
-\item[\it bind\_interdomain:] construct an interdomain event 
-channel; in general, the target domain must have previously allocated 
-an unbound port for this channel, although this can be bypassed by 
-privileged domains during domain setup. 
-\item[\it close:] close an interdomain event channel. 
-\item[\it send:] send an event to the remote end of a 
-interdomain event channel. 
-\item[\it status:] determine the current status of a local port. 
-\end{description} 
-
-For more details see
-{\tt xen/include/public/event\_channel.h}. 
-
-\end{quote} 
-
-Event channels are the fundamental communication primitive between 
-Xen domains and seamlessly support SMP. However they provide little
-bandwidth for communication {\sl per se}, and hence are typically 
-married with a piece of shared memory to produce effective and 
-high-performance inter-domain communication. 
-
-Safe sharing of memory pages between guest OSes is carried out by
-granting access on a per page basis to individual domains. This is
-achieved by using the {\tt grant\_table\_op()} hypercall.
-
-\begin{quote}
-\hypercall{grant\_table\_op(unsigned int cmd, void *uop, unsigned int count)}
-
-Grant or remove access to a particular page to a particular domain. 
-
-\end{quote} 
-
-This is not currently widely in use by guest operating systems, but 
-we intend to integrate support more fully in the near future. 
-
-\section{PCI Configuration} 
-
-Domains with physical device access (i.e.\ driver domains) receive
-limited access to certain PCI devices (bus address space and
-interrupts). However many guest operating systems attempt to 
-determine the PCI configuration by directly access the PCI BIOS, 
-which cannot be allowed for safety. 
-
-Instead, Xen provides the following hypercall: 
-
-\begin{quote}
-\hypercall{physdev\_op(void *physdev\_op)}
-
-Perform a PCI configuration option; depending on the value 
-of {\tt physdev\_op} this can be a PCI config read, a PCI config 
-write, or a small number of other queries. 
-
-\end{quote} 
-
-
-For examples of using {\tt physdev\_op()}, see the 
-Xen-specific PCI code in the linux sparse tree. 
-
-\section{Administrative Operations}
-\label{s:dom0ops}
-
-A large number of control operations are available to a sufficiently
-privileged domain (typically domain 0). These allow the creation and
-management of new domains, for example. A complete list is given 
-below: for more details on any or all of these, please see 
-{\tt xen/include/public/dom0\_ops.h} 
-
-
-\begin{quote}
-\hypercall{dom0\_op(dom0\_op\_t *op)} 
-
-Administrative domain operations for domain management. The options are:
-
-\begin{description} 
-\item [\it DOM0\_CREATEDOMAIN:] create a new domain
-
-\item [\it DOM0\_PAUSEDOMAIN:] remove a domain from the scheduler run 
-queue. 
-
-\item [\it DOM0\_UNPAUSEDOMAIN:] mark a paused domain as schedulable
-  once again. 
-
-\item [\it DOM0\_DESTROYDOMAIN:] deallocate all resources associated
-with a domain
-
-\item [\it DOM0\_GETMEMLIST:] get list of pages used by the domain
-
-\item [\it DOM0\_SCHEDCTL:]
-
-\item [\it DOM0\_ADJUSTDOM:] adjust scheduling priorities for domain
-
-\item [\it DOM0\_BUILDDOMAIN:] do final guest OS setup for domain
-
-\item [\it DOM0\_GETDOMAINFO:] get statistics about the domain
-
-\item [\it DOM0\_GETPAGEFRAMEINFO:] 
-
-\item [\it DOM0\_GETPAGEFRAMEINFO2:]
-
-\item [\it DOM0\_IOPL:] set I/O privilege level
-
-\item [\it DOM0\_MSR:] read or write model specific registers
-
-\item [\it DOM0\_DEBUG:] interactively invoke the debugger
-
-\item [\it DOM0\_SETTIME:] set system time
-
-\item [\it DOM0\_READCONSOLE:] read console content from hypervisor buffer ring
-
-\item [\it DOM0\_PINCPUDOMAIN:] pin domain to a particular CPU
-
-\item [\it DOM0\_GETTBUFS:] get information about the size and location of
-                      the trace buffers (only on trace-buffer enabled builds)
-
-\item [\it DOM0\_PHYSINFO:] get information about the host machine
-
-\item [\it DOM0\_PCIDEV\_ACCESS:] modify PCI device access permissions
-
-\item [\it DOM0\_SCHED\_ID:] get the ID of the current Xen scheduler
-
-\item [\it DOM0\_SHADOW\_CONTROL:] switch between shadow page-table modes
-
-\item [\it DOM0\_SETDOMAININITIALMEM:] set initial memory allocation of a 
domain
-
-\item [\it DOM0\_SETDOMAINMAXMEM:] set maximum memory allocation of a domain
-
-\item [\it DOM0\_SETDOMAINVMASSIST:] set domain VM assist options
-\end{description} 
-\end{quote} 
-
-Most of the above are best understood by looking at the code 
-implementing them (in {\tt xen/common/dom0\_ops.c}) and in 
-the user-space tools that use them (mostly in {\tt tools/libxc}). 
-
-\section{Debugging Hypercalls} 
-
-A few additional hypercalls are mainly useful for debugging: 
-
-\begin{quote} 
-\hypercall{console\_io(int cmd, int count, char *str)}
-
-Use Xen to interact with the console; operations are:
-
-{\it CONSOLEIO\_write}: Output count characters from buffer str.
-
-{\it CONSOLEIO\_read}: Input at most count characters into buffer str.
-\end{quote} 
-
-A pair of hypercalls allows access to the underlying debug registers: 
-\begin{quote}
-\hypercall{set\_debugreg(int reg, unsigned long value)}
-
-Set debug register {\tt reg} to {\tt value} 
-
-\hypercall{get\_debugreg(int reg)}
-
-Return the contents of the debug register {\tt reg}
-\end{quote}
-
-And finally: 
-\begin{quote}
-\hypercall{xen\_version(int cmd)}
-
-Request Xen version number.
-\end{quote} 
-
-This is useful to ensure that user-space tools are in sync 
-with the underlying hypervisor. 
-
-\section{Deprecated Hypercalls}
-
-Xen is under constant development and refinement; as such there 
-are plans to improve the way in which various pieces of functionality 
-are exposed to guest OSes. 
-
-\begin{quote} 
-\hypercall{vm\_assist(unsigned int cmd, unsigned int type)}
-
-Toggle various memory management modes (in particular wrritable page
-tables and superpage support). 
-
-\end{quote} 
-
-This is likely to be replaced with mode values in the shared 
-information page since this is more resilient for resumption 
-after migration or checkpoint. 
-
-
-
-
-
-
+%% chapter hypercalls moved to hypercalls.tex
+\include{src/interface/hypercalls}
 
 
 %% 
@@ -1173,279 +112,9 @@
 %% new scheduler... not clear how many of them there are...
 %%
 
-\begin{comment}
-
-\chapter{Scheduling API}  
-
-The scheduling API is used by both the schedulers described above and should
-also be used by any new schedulers.  It provides a generic interface and also
-implements much of the ``boilerplate'' code.
-
-Schedulers conforming to this API are described by the following
-structure:
-
-\begin{verbatim}
-struct scheduler
-{
-    char *name;             /* full name for this scheduler      */
-    char *opt_name;         /* option name for this scheduler    */
-    unsigned int sched_id;  /* ID for this scheduler             */
-
-    int          (*init_scheduler) ();
-    int          (*alloc_task)     (struct task_struct *);
-    void         (*add_task)       (struct task_struct *);
-    void         (*free_task)      (struct task_struct *);
-    void         (*rem_task)       (struct task_struct *);
-    void         (*wake_up)        (struct task_struct *);
-    void         (*do_block)       (struct task_struct *);
-    task_slice_t (*do_schedule)    (s_time_t);
-    int          (*control)        (struct sched_ctl_cmd *);
-    int          (*adjdom)         (struct task_struct *,
-                                    struct sched_adjdom_cmd *);
-    s32          (*reschedule)     (struct task_struct *);
-    void         (*dump_settings)  (void);
-    void         (*dump_cpu_state) (int);
-    void         (*dump_runq_el)   (struct task_struct *);
-};
-\end{verbatim}
-
-The only method that {\em must} be implemented is
-{\tt do\_schedule()}.  However, if there is not some implementation for the
-{\tt wake\_up()} method then waking tasks will not get put on the runqueue!
-
-The fields of the above structure are described in more detail below.
-
-\subsubsection{name}
-
-The name field should point to a descriptive ASCII string.
-
-\subsubsection{opt\_name}
-
-This field is the value of the {\tt sched=} boot-time option that will select
-this scheduler.
-
-\subsubsection{sched\_id}
-
-This is an integer that uniquely identifies this scheduler.  There should be a
-macro corrsponding to this scheduler ID in {\tt <xen/sched-if.h>}.
-
-\subsubsection{init\_scheduler}
-
-\paragraph*{Purpose}
-
-This is a function for performing any scheduler-specific initialisation.  For
-instance, it might allocate memory for per-CPU scheduler data and initialise it
-appropriately.
-
-\paragraph*{Call environment}
-
-This function is called after the initialisation performed by the generic
-layer.  The function is called exactly once, for the scheduler that has been
-selected.
-
-\paragraph*{Return values}
-
-This should return negative on failure --- this will cause an
-immediate panic and the system will fail to boot.
-
-\subsubsection{alloc\_task}
-
-\paragraph*{Purpose}
-Called when a {\tt task\_struct} is allocated by the generic scheduler
-layer.  A particular scheduler implementation may use this method to
-allocate per-task data for this task.  It may use the {\tt
-sched\_priv} pointer in the {\tt task\_struct} to point to this data.
-
-\paragraph*{Call environment}
-The generic layer guarantees that the {\tt sched\_priv} field will
-remain intact from the time this method is called until the task is
-deallocated (so long as the scheduler implementation does not change
-it explicitly!).
-
-\paragraph*{Return values}
-Negative on failure.
-
-\subsubsection{add\_task}
-
-\paragraph*{Purpose}
-
-Called when a task is initially added by the generic layer.
-
-\paragraph*{Call environment}
-
-The fields in the {\tt task\_struct} are now filled out and available for use.
-Schedulers should implement appropriate initialisation of any per-task private
-information in this method.
-
-\subsubsection{free\_task}
-
-\paragraph*{Purpose}
-
-Schedulers should free the space used by any associated private data
-structures.
-
-\paragraph*{Call environment}
-
-This is called when a {\tt task\_struct} is about to be deallocated.
-The generic layer will have done generic task removal operations and
-(if implemented) called the scheduler's {\tt rem\_task} method before
-this method is called.
-
-\subsubsection{rem\_task}
-
-\paragraph*{Purpose}
-
-This is called when a task is being removed from scheduling (but is
-not yet being freed).
-
-\subsubsection{wake\_up}
-
-\paragraph*{Purpose}
-
-Called when a task is woken up, this method should put the task on the runqueue
-(or do the scheduler-specific equivalent action).
-
-\paragraph*{Call environment}
-
-The task is already set to state RUNNING.
-
-\subsubsection{do\_block}
-
-\paragraph*{Purpose}
-
-This function is called when a task is blocked.  This function should
-not remove the task from the runqueue.
-
-\paragraph*{Call environment}
-
-The EVENTS\_MASTER\_ENABLE\_BIT is already set and the task state changed to
-TASK\_INTERRUPTIBLE on entry to this method.  A call to the {\tt
-  do\_schedule} method will be made after this method returns, in
-order to select the next task to run.
-
-\subsubsection{do\_schedule}
-
-This method must be implemented.
-
-\paragraph*{Purpose}
-
-The method is called each time a new task must be chosen for scheduling on the
-current CPU.  The current time as passed as the single argument (the current
-task can be found using the {\tt current} macro).
-
-This method should select the next task to run on this CPU and set it's minimum
-time to run as well as returning the data described below.
-
-This method should also take the appropriate action if the previous
-task has blocked, e.g. removing it from the runqueue.
-
-\paragraph*{Call environment}
-
-The other fields in the {\tt task\_struct} are updated by the generic layer,
-which also performs all Xen-specific tasks and performs the actual task switch
-(unless the previous task has been chosen again).
-
-This method is called with the {\tt schedule\_lock} held for the current CPU
-and local interrupts disabled.
-
-\paragraph*{Return values}
-
-Must return a {\tt struct task\_slice} describing what task to run and how long
-for (at maximum).
-
-\subsubsection{control}
-
-\paragraph*{Purpose}
-
-This method is called for global scheduler control operations.  It takes a
-pointer to a {\tt struct sched\_ctl\_cmd}, which it should either
-source data from or populate with data, depending on the value of the
-{\tt direction} field.
-
-\paragraph*{Call environment}
-
-The generic layer guarantees that when this method is called, the
-caller selected the correct scheduler ID, hence the scheduler's
-implementation does not need to sanity-check these parts of the call.
-
-\paragraph*{Return values}
-
-This function should return the value to be passed back to user space, hence it
-should either be 0 or an appropriate errno value.
-
-\subsubsection{sched\_adjdom}
-
-\paragraph*{Purpose}
-
-This method is called to adjust the scheduling parameters of a particular
-domain, or to query their current values.  The function should check
-the {\tt direction} field of the {\tt sched\_adjdom\_cmd} it receives in
-order to determine which of these operations is being performed.
-
-\paragraph*{Call environment}
-
-The generic layer guarantees that the caller has specified the correct
-control interface version and scheduler ID and that the supplied {\tt
-task\_struct} will not be deallocated during the call (hence it is not
-necessary to {\tt get\_task\_struct}).
-
-\paragraph*{Return values}
-
-This function should return the value to be passed back to user space, hence it
-should either be 0 or an appropriate errno value.
-
-\subsubsection{reschedule}
-
-\paragraph*{Purpose}
-
-This method is called to determine if a reschedule is required as a result of a
-particular task.
-
-\paragraph*{Call environment}
-The generic layer will cause a reschedule if the current domain is the idle
-task or it has exceeded its minimum time slice before a reschedule.  The
-generic layer guarantees that the task passed is not currently running but is
-on the runqueue.
-
-\paragraph*{Return values}
-
-Should return a mask of CPUs to cause a reschedule on.
-
-\subsubsection{dump\_settings}
-
-\paragraph*{Purpose}
-
-If implemented, this should dump any private global settings for this
-scheduler to the console.
-
-\paragraph*{Call environment}
-
-This function is called with interrupts enabled.
-
-\subsubsection{dump\_cpu\_state}
-
-\paragraph*{Purpose}
-
-This method should dump any private settings for the specified CPU.
-
-\paragraph*{Call environment}
-
-This function is called with interrupts disabled and the {\tt schedule\_lock}
-for the specified CPU held.
-
-\subsubsection{dump\_runq\_el}
-
-\paragraph*{Purpose}
-
-This method should dump any private settings for the specified task.
-
-\paragraph*{Call environment}
-
-This function is called with interrupts disabled and the {\tt schedule\_lock}
-for the task's CPU held.
-
-\end{comment} 
-
+%% \include{src/interface/scheduling}
+%% scheduling information moved to scheduling.tex
+%% still commented out
 
 
 
@@ -1457,74 +126,9 @@
 %% (and/or kip's stuff?) and write about that instead? 
 %%
 
-\begin{comment} 
-
-\chapter{Debugging}
-
-Xen provides tools for debugging both Xen and guest OSes.  Currently, the
-Pervasive Debugger provides a GDB stub, which provides facilities for symbolic
-debugging of Xen itself and of OS kernels running on top of Xen.  The Trace
-Buffer provides a lightweight means to log data about Xen's internal state and
-behaviour at runtime, for later analysis.
-
-\section{Pervasive Debugger}
-
-Information on using the pervasive debugger is available in pdb.txt.
-
-
-\section{Trace Buffer}
-
-The trace buffer provides a means to observe Xen's operation from domain 0.
-Trace events, inserted at key points in Xen's code, record data that can be
-read by the {\tt xentrace} tool.  Recording these events has a low overhead
-and hence the trace buffer may be useful for debugging timing-sensitive
-behaviours.
-
-\subsection{Internal API}
-
-To use the trace buffer functionality from within Xen, you must {\tt \#include
-<xen/trace.h>}, which contains definitions related to the trace buffer.  Trace
-events are inserted into the buffer using the {\tt TRACE\_xD} ({\tt x} = 0, 1,
-2, 3, 4 or 5) macros.  These all take an event number, plus {\tt x} additional
-(32-bit) data as their arguments.  For trace buffer-enabled builds of Xen these
-will insert the event ID and data into the trace buffer, along with the current
-value of the CPU cycle-counter.  For builds without the trace buffer enabled,
-the macros expand to no-ops and thus can be left in place without incurring
-overheads.
-
-\subsection{Trace-enabled builds}
-
-By default, the trace buffer is enabled only in debug builds (i.e. {\tt NDEBUG}
-is not defined).  It can be enabled separately by defining {\tt TRACE\_BUFFER},
-either in {\tt <xen/config.h>} or on the gcc command line.
-
-The size (in pages) of the per-CPU trace buffers can be specified using the
-{\tt tbuf\_size=n } boot parameter to Xen.  If the size is set to 0, the trace
-buffers will be disabled.
-
-\subsection{Dumping trace data}
-
-When running a trace buffer build of Xen, trace data are written continuously
-into the buffer data areas, with newer data overwriting older data.  This data
-can be captured using the {\tt xentrace} program in domain 0.
-
-The {\tt xentrace} tool uses {\tt /dev/mem} in domain 0 to map the trace
-buffers into its address space.  It then periodically polls all the buffers for
-new data, dumping out any new records from each buffer in turn.  As a result,
-for machines with multiple (logical) CPUs, the trace buffer output will not be
-in overall chronological order.
-
-The output from {\tt xentrace} can be post-processed using {\tt
-xentrace\_cpusplit} (used to split trace data out into per-cpu log files) and
-{\tt xentrace\_format} (used to pretty-print trace data).  For the predefined
-trace points, there is an example format file in {\tt tools/xentrace/formats }.
-
-For more information, see the manual pages for {\tt xentrace}, {\tt
-xentrace\_format} and {\tt xentrace\_cpusplit}.
-
-\end{comment} 
-
-
+%% \include{src/interface/debugging}
+%% debugging information moved to debugging.tex
+%% still commented out
 
 
 \end{document}
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user.tex
--- a/docs/src/user.tex Thu Sep 22 17:34:14 2005
+++ b/docs/src/user.tex Thu Sep 22 17:42:01 2005
@@ -59,1803 +59,36 @@
 \renewcommand{\floatpagefraction}{.8}
 \setstretch{1.1}
 
+
 \part{Introduction and Tutorial}
-\chapter{Introduction}
-
-Xen is a {\em paravirtualising} virtual machine monitor (VMM), or
-`hypervisor', for the x86 processor architecture.  Xen can securely
-execute multiple virtual machines on a single physical system with
-close-to-native performance.  The virtual machine technology
-facilitates enterprise-grade functionality, including:
-
-\begin{itemize}
-\item Virtual machines with performance close to native
-  hardware.
-\item Live migration of running virtual machines between physical hosts.
-\item Excellent hardware support (supports most Linux device drivers).
-\item Sandboxed, restartable device drivers.
-\end{itemize}
-
-Paravirtualisation permits very high performance virtualisation,
-even on architectures like x86 that are traditionally
-very hard to virtualise.
-The drawback of this approach is that it requires operating systems to
-be {\em ported} to run on Xen.  Porting an OS to run on Xen is similar
-to supporting a new hardware platform, however the process
-is simplified because the paravirtual machine architecture is very
-similar to the underlying native hardware. Even though operating system
-kernels must explicitly support Xen, a key feature is that user space
-applications and libraries {\em do not} require modification.
-
-Xen support is available for increasingly many operating systems:
-right now, Linux 2.4, Linux 2.6 and NetBSD are available for Xen 2.0.
-A FreeBSD port is undergoing testing and will be incorporated into the
-release soon. Other OS ports, including Plan 9, are in progress.  We
-hope that that arch-xen patches will be incorporated into the
-mainstream releases of these operating systems in due course (as has
-already happened for NetBSD).
-
-Possible usage scenarios for Xen include:
-\begin{description}
-\item [Kernel development.] Test and debug kernel modifications in a
-      sandboxed virtual machine --- no need for a separate test
-      machine.
-\item [Multiple OS configurations.] Run multiple operating systems
-      simultaneously, for instance for compatibility or QA purposes.
-\item [Server consolidation.] Move multiple servers onto a single
-      physical host with performance and fault isolation provided at
-      virtual machine boundaries. 
-\item [Cluster computing.] Management at VM granularity provides more
-      flexibility than separately managing each physical host, but
-      better control and isolation than single-system image solutions, 
-      particularly by using live migration for load balancing. 
-\item [Hardware support for custom OSes.] Allow development of new OSes
-      while benefiting from the wide-ranging hardware support of
-      existing OSes such as Linux.
-\end{description}
-
-\section{Structure of a Xen-Based System}
-
-A Xen system has multiple layers, the lowest and most privileged of
-which is Xen itself. 
-Xen in turn may host multiple {\em guest} operating systems, each of
-which is executed within a secure virtual machine (in Xen terminology,
-a {\em domain}). Domains are scheduled by Xen to make effective use of
-the available physical CPUs.  Each guest OS manages its own
-applications, which includes responsibility for scheduling each
-application within the time allotted to the VM by Xen.
-
-The first domain, {\em domain 0}, is created automatically when the
-system boots and has special management privileges. Domain 0 builds
-other domains and manages their virtual devices. It also performs
-administrative tasks such as suspending, resuming and migrating other
-virtual machines.
-
-Within domain 0, a process called \emph{xend} runs to manage the system.
-\Xend is responsible for managing virtual machines and providing access
-to their consoles.  Commands are issued to \xend over an HTTP
-interface, either from a command-line tool or from a web browser.
-
-\section{Hardware Support}
-
-Xen currently runs only on the x86 architecture, requiring a `P6' or
-newer processor (e.g. Pentium Pro, Celeron, Pentium II, Pentium III,
-Pentium IV, Xeon, AMD Athlon, AMD Duron).  Multiprocessor machines are
-supported, and we also have basic support for HyperThreading (SMT),
-although this remains a topic for ongoing research. A port
-specifically for x86/64 is in progress, although Xen already runs on
-such systems in 32-bit legacy mode. In addition a port to the IA64
-architecture is approaching completion. We hope to add other
-architectures such as PPC and ARM in due course.
-
-
-Xen can currently use up to 4GB of memory.  It is possible for x86
-machines to address up to 64GB of physical memory but there are no
-current plans to support these systems: The x86/64 port is the
-planned route to supporting larger memory sizes.
-
-Xen offloads most of the hardware support issues to the guest OS
-running in Domain~0.  Xen itself contains only the code required to
-detect and start secondary processors, set up interrupt routing, and
-perform PCI bus enumeration.  Device drivers run within a privileged
-guest OS rather than within Xen itself. This approach provides
-compatibility with the majority of device hardware supported by Linux.
-The default XenLinux build contains support for relatively modern
-server-class network and disk hardware, but you can add support for
-other hardware by configuring your XenLinux kernel in the normal way.
-
-\section{History}
-
-Xen was originally developed by the Systems Research Group at the
-University of Cambridge Computer Laboratory as part of the XenoServers
-project, funded by the UK-EPSRC.
-XenoServers aim to provide a `public infrastructure for
-global distributed computing', and Xen plays a key part in that,
-allowing us to efficiently partition a single machine to enable
-multiple independent clients to run their operating systems and
-applications in an environment providing protection, resource
-isolation and accounting.  The project web page contains further
-information along with pointers to papers and technical reports:
-\path{http://www.cl.cam.ac.uk/xeno} 
-
-Xen has since grown into a fully-fledged project in its own right,
-enabling us to investigate interesting research issues regarding the
-best techniques for virtualising resources such as the CPU, memory,
-disk and network.  The project has been bolstered by support from
-Intel Research Cambridge, and HP Labs, who are now working closely
-with us.
-
-Xen was first described in a paper presented at SOSP in
-2003\footnote{\tt
-http://www.cl.cam.ac.uk/netos/papers/2003-xensosp.pdf}, and the first
-public release (1.0) was made that October.  Since then, Xen has
-significantly matured and is now used in production scenarios on
-many sites.
-
-Xen 2.0 features greatly enhanced hardware support, configuration
-flexibility, usability and a larger complement of supported operating
-systems. This latest release takes Xen a step closer to becoming the 
-definitive open source solution for virtualisation.
-
-\chapter{Installation}
-
-The Xen distribution includes three main components: Xen itself, ports
-of Linux 2.4 and 2.6 and NetBSD to run on Xen, and the user-space
-tools required to manage a Xen-based system.  This chapter describes
-how to install the Xen 2.0 distribution from source.  Alternatively,
-there may be pre-built packages available as part of your operating
-system distribution.
-
-\section{Prerequisites}
-\label{sec:prerequisites}
-
-The following is a full list of prerequisites.  Items marked `$\dag$'
-are required by the \xend control tools, and hence required if you
-want to run more than one virtual machine; items marked `$*$' are only
-required if you wish to build from source.
-\begin{itemize}
-\item A working Linux distribution using the GRUB bootloader and
-running on a P6-class (or newer) CPU.
-\item [$\dag$] The \path{iproute2} package. 
-\item [$\dag$] The Linux bridge-utils\footnote{Available from 
-{\tt http://bridge.sourceforge.net}} (e.g., \path{/sbin/brctl})
-\item [$\dag$] An installation of Twisted v1.3 or
-above\footnote{Available from {\tt
-http://www.twistedmatrix.com}}. There may be a binary package
-available for your distribution; alternatively it can be installed by
-running `{\sl make install-twisted}' in the root of the Xen source
-tree.
-\item [$*$] Build tools (gcc v3.2.x or v3.3.x, binutils, GNU make).
-\item [$*$] Development installation of libcurl (e.g., libcurl-devel) 
-\item [$*$] Development installation of zlib (e.g., zlib-dev).
-\item [$*$] Development installation of Python v2.2 or later (e.g., 
python-dev).
-\item [$*$] \LaTeX and transfig are required to build the documentation.
-\end{itemize}
-
-Once you have satisfied the relevant prerequisites, you can 
-now install either a binary or source distribution of Xen. 
-
-\section{Installing from Binary Tarball} 
-
-Pre-built tarballs are available for download from the Xen 
-download page
-\begin{quote} 
-{\tt http://xen.sf.net}
-\end{quote} 
-
-Once you've downloaded the tarball, simply unpack and install: 
-\begin{verbatim}
-# tar zxvf xen-2.0-install.tgz
-# cd xen-2.0-install
-# sh ./install.sh 
-\end{verbatim} 
-
-Once you've installed the binaries you need to configure
-your system as described in Section~\ref{s:configure}. 
-
-\section{Installing from Source} 
-
-This section describes how to obtain, build, and install 
-Xen from source. 
-
-\subsection{Obtaining the Source} 
-
-The Xen source tree is available as either a compressed source tar
-ball or as a clone of our master BitKeeper repository.
-
-\begin{description} 
-\item[Obtaining the Source Tarball]\mbox{} \\  
-Stable versions (and daily snapshots) of the Xen source tree are
-available as compressed tarballs from the Xen download page
-\begin{quote} 
-{\tt http://xen.sf.net}
-\end{quote} 
-
-\item[Using BitKeeper]\mbox{} \\  
-If you wish to install Xen from a clone of our latest BitKeeper
-repository then you will need to install the BitKeeper tools.
-Download instructions for BitKeeper can be obtained by filling out the
-form at:
-
-\begin{quote} 
-{\tt http://www.bitmover.com/cgi-bin/download.cgi}
-\end{quote}
-The public master BK repository for the 2.0 release lives at: 
-\begin{quote}
-{\tt bk://xen.bkbits.net/xen-2.0.bk}  
-\end{quote} 
-You can use BitKeeper to
-download it and keep it updated with the latest features and fixes.
-
-Change to the directory in which you want to put the source code, then
-run:
-\begin{verbatim}
-# bk clone bk://xen.bkbits.net/xen-2.0.bk
-\end{verbatim}
-
-Under your current directory, a new directory named \path{xen-2.0.bk}
-has been created, which contains all the source code for Xen, the OS
-ports, and the control tools. You can update your repository with the
-latest changes at any time by running:
-\begin{verbatim}
-# cd xen-2.0.bk # to change into the local repository
-# bk pull       # to update the repository
-\end{verbatim}
-\end{description} 
-
-%\section{The distribution}
-%
-%The Xen source code repository is structured as follows:
-%
-%\begin{description}
-%\item[\path{tools/}] Xen node controller daemon (Xend), command line tools, 
-%  control libraries
-%\item[\path{xen/}] The Xen VMM.
-%\item[\path{linux-*-xen-sparse/}] Xen support for Linux.
-%\item[\path{linux-*-patches/}] Experimental patches for Linux.
-%\item[\path{netbsd-*-xen-sparse/}] Xen support for NetBSD.
-%\item[\path{docs/}] Various documentation files for users and developers.
-%\item[\path{extras/}] Bonus extras.
-%\end{description}
-
-\subsection{Building from Source} 
-
-The top-level Xen Makefile includes a target `world' that will do the
-following:
-
-\begin{itemize}
-\item Build Xen
-\item Build the control tools, including \xend
-\item Download (if necessary) and unpack the Linux 2.6 source code,
-      and patch it for use with Xen
-\item Build a Linux kernel to use in domain 0 and a smaller
-      unprivileged kernel, which can optionally be used for
-      unprivileged virtual machines.
-\end{itemize}
-
-
-After the build has completed you should have a top-level 
-directory called \path{dist/} in which all resulting targets 
-will be placed; of particular interest are the two kernels 
-XenLinux kernel images, one with a `-xen0' extension
-which contains hardware device drivers and drivers for Xen's virtual
-devices, and one with a `-xenU' extension that just contains the
-virtual ones. These are found in \path{dist/install/boot/} along
-with the image for Xen itself and the configuration files used
-during the build. 
-
-The NetBSD port can be built using: 
-\begin{quote}
-\begin{verbatim}
-# make netbsd20
-\end{verbatim} 
-\end{quote} 
-NetBSD port is built using a snapshot of the netbsd-2-0 cvs branch.
-The snapshot is downloaded as part of the build process, if it is not
-yet present in the \path{NETBSD\_SRC\_PATH} search path.  The build
-process also downloads a toolchain which includes all the tools
-necessary to build the NetBSD kernel under Linux.
-
-To customize further the set of kernels built you need to edit
-the top-level Makefile. Look for the line: 
-
-\begin{quote}
-\begin{verbatim}
-KERNELS ?= mk.linux-2.6-xen0 mk.linux-2.6-xenU
-\end{verbatim} 
-\end{quote} 
-
-You can edit this line to include any set of operating system kernels
-which have configurations in the top-level \path{buildconfigs/}
-directory, for example \path{mk.linux-2.4-xenU} to build a Linux 2.4
-kernel containing only virtual device drivers.
-
-%% Inspect the Makefile if you want to see what goes on during a build.
-%% Building Xen and the tools is straightforward, but XenLinux is more
-%% complicated.  The makefile needs a `pristine' Linux kernel tree to which
-%% it will then add the Xen architecture files.  You can tell the
-%% makefile the location of the appropriate Linux compressed tar file by
-%% setting the LINUX\_SRC environment variable, e.g. \\
-%% \verb!# LINUX_SRC=/tmp/linux-2.6.11.tar.bz2 make world! \\ or by
-%% placing the tar file somewhere in the search path of {\tt
-%% LINUX\_SRC\_PATH} which defaults to `{\tt .:..}'.  If the makefile
-%% can't find a suitable kernel tar file it attempts to download it from
-%% kernel.org (this won't work if you're behind a firewall).
-
-%% After untaring the pristine kernel tree, the makefile uses the {\tt
-%% mkbuildtree} script to add the Xen patches to the kernel. 
-
-
-%% The procedure is similar to build the Linux 2.4 port: \\
-%% \verb!# LINUX_SRC=/path/to/linux2.4/source make linux24!
-
-
-%% \framebox{\parbox{5in}{
-%% {\bf Distro specific:} \\
-%% {\it Gentoo} --- if not using udev (most installations, currently), you'll 
need
-%% to enable devfs and devfs mount at boot time in the xen0 config.
-%% }}
-
-\subsection{Custom XenLinux Builds}
-
-% If you have an SMP machine you may wish to give the {\tt '-j4'}
-% argument to make to get a parallel build.
-
-If you wish to build a customized XenLinux kernel (e.g. to support
-additional devices or enable distribution-required features), you can
-use the standard Linux configuration mechanisms, specifying that the
-architecture being built for is \path{xen}, e.g:
-\begin{quote}
-\begin{verbatim} 
-# cd linux-2.6.11-xen0 
-# make ARCH=xen xconfig 
-# cd ..
-# make
-\end{verbatim} 
-\end{quote} 
-
-You can also copy an existing Linux configuration (\path{.config}) 
-into \path{linux-2.6.11-xen0} and execute:  
-\begin{quote}
-\begin{verbatim} 
-# make ARCH=xen oldconfig 
-\end{verbatim} 
-\end{quote} 
-
-You may be prompted with some Xen-specific options; we 
-advise accepting the defaults for these options.
-
-Note that the only difference between the two types of Linux kernel
-that are built is the configuration file used for each.  The "U"
-suffixed (unprivileged) versions don't contain any of the physical
-hardware device drivers, leading to a 30\% reduction in size; hence
-you may prefer these for your non-privileged domains.  The `0'
-suffixed privileged versions can be used to boot the system, as well
-as in driver domains and unprivileged domains.
-
-
-\subsection{Installing the Binaries}
-
-
-The files produced by the build process are stored under the
-\path{dist/install/} directory. To install them in their default
-locations, do:
-\begin{quote}
-\begin{verbatim}
-# make install
-\end{verbatim} 
-\end{quote}
-
-
-Alternatively, users with special installation requirements may wish
-to install them manually by copying the files to their appropriate
-destinations.
-
-%% Files in \path{install/boot/} include:
-%% \begin{itemize}
-%% \item \path{install/boot/xen-2.0.gz} Link to the Xen 'kernel'
-%% \item \path{install/boot/vmlinuz-2.6-xen0}  Link to domain 0 XenLinux kernel
-%% \item \path{install/boot/vmlinuz-2.6-xenU}  Link to unprivileged XenLinux 
kernel
-%% \end{itemize}
-
-The \path{dist/install/boot} directory will also contain the config files
-used for building the XenLinux kernels, and also versions of Xen and
-XenLinux kernels that contain debug symbols (\path{xen-syms-2.0.6} and
-\path{vmlinux-syms-2.6.11.11-xen0}) which are essential for interpreting crash
-dumps.  Retain these files as the developers may wish to see them if
-you post on the mailing list.
-
-
-
-
-
-\section{Configuration}
-\label{s:configure}
-Once you have built and installed the Xen distribution, it is 
-simple to prepare the machine for booting and running Xen. 
-
-\subsection{GRUB Configuration}
-
-An entry should be added to \path{grub.conf} (often found under
-\path{/boot/} or \path{/boot/grub/}) to allow Xen / XenLinux to boot.
-This file is sometimes called \path{menu.lst}, depending on your
-distribution.  The entry should look something like the following:
-
-{\small
-\begin{verbatim}
-title Xen 2.0 / XenLinux 2.6
-  kernel /boot/xen-2.0.gz dom0_mem=131072
-  module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro console=tty0
-\end{verbatim}
-}
-
-The kernel line tells GRUB where to find Xen itself and what boot
-parameters should be passed to it (in this case, setting domain 0's
-memory allocation in kilobytes and the settings for the serial port). For more
-details on the various Xen boot parameters see Section~\ref{s:xboot}. 
-
-The module line of the configuration describes the location of the
-XenLinux kernel that Xen should start and the parameters that should
-be passed to it (these are standard Linux parameters, identifying the
-root device and specifying it be initially mounted read only and
-instructing that console output be sent to the screen).  Some
-distributions such as SuSE do not require the \path{ro} parameter.
-
-%% \framebox{\parbox{5in}{
-%% {\bf Distro specific:} \\
-%% {\it SuSE} --- Omit the {\tt ro} option from the XenLinux kernel
-%% command line, since the partition won't be remounted rw during boot.
-%% }}
-
-
-If you want to use an initrd, just add another \path{module} line to
-the configuration, as usual:
-{\small
-\begin{verbatim}
-  module /boot/my_initrd.gz
-\end{verbatim}
-}
-
-As always when installing a new kernel, it is recommended that you do
-not delete existing menu options from \path{menu.lst} --- you may want
-to boot your old Linux kernel in future, particularly if you
-have problems.
-
-
-\subsection{Serial Console (optional)}
-
-%%   kernel /boot/xen-2.0.gz dom0_mem=131072 com1=115200,8n1
-%%   module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro 
-
-
-In order to configure Xen serial console output, it is necessary to add 
-an boot option to your GRUB config; e.g. replace the above kernel line 
-with: 
-\begin{quote}
-{\small
-\begin{verbatim}
-   kernel /boot/xen.gz dom0_mem=131072 com1=115200,8n1
-\end{verbatim}}
-\end{quote}
-
-This configures Xen to output on COM1 at 115,200 baud, 8 data bits, 
-1 stop bit and no parity. Modify these parameters for your set up. 
-
-One can also configure XenLinux to share the serial console; to 
-achieve this append ``\path{console=ttyS0}'' to your 
-module line. 
-
-
-If you wish to be able to log in over the XenLinux serial console it
-is necessary to add a line into \path{/etc/inittab}, just as per 
-regular Linux. Simply add the line:
-\begin{quote}
-{\small 
-{\tt c:2345:respawn:/sbin/mingetty ttyS0}
-}
-\end{quote} 
-
-and you should be able to log in. Note that to successfully log in 
-as root over the serial line will require adding \path{ttyS0} to
-\path{/etc/securetty} in most modern distributions. 
-
-\subsection{TLS Libraries}
-
-Users of the XenLinux 2.6 kernel should disable Thread Local Storage
-(e.g.\ by doing a \path{mv /lib/tls /lib/tls.disabled}) before
-attempting to run with a XenLinux kernel\footnote{If you boot without first
-disabling TLS, you will get a warning message during the boot
-process. In this case, simply perform the rename after the machine is
-up and then run \texttt{/sbin/ldconfig} to make it take effect.}.  You can
-always reenable it by restoring the directory to its original location
-(i.e.\ \path{mv /lib/tls.disabled /lib/tls}).
-
-The reason for this is that the current TLS implementation uses
-segmentation in a way that is not permissible under Xen.  If TLS is
-not disabled, an emulation mode is used within Xen which reduces
-performance substantially.
-
-We hope that this issue can be resolved by working with Linux
-distribution vendors to implement a minor backward-compatible change
-to the TLS library.
-
-\section{Booting Xen} 
-
-It should now be possible to restart the system and use Xen.  Reboot
-as usual but choose the new Xen option when the Grub screen appears.
-
-What follows should look much like a conventional Linux boot.  The
-first portion of the output comes from Xen itself, supplying low level
-information about itself and the machine it is running on.  The
-following portion of the output comes from XenLinux.
-
-You may see some errors during the XenLinux boot.  These are not
-necessarily anything to worry about --- they may result from kernel
-configuration differences between your XenLinux kernel and the one you
-usually use.
-
-When the boot completes, you should be able to log into your system as
-usual.  If you are unable to log in to your system running Xen, you
-should still be able to reboot with your normal Linux kernel.
-
-
-\chapter{Starting Additional Domains}
-
-The first step in creating a new domain is to prepare a root
-filesystem for it to boot off.  Typically, this might be stored in a
-normal partition, an LVM or other volume manager partition, a disk
-file or on an NFS server.  A simple way to do this is simply to boot
-from your standard OS install CD and install the distribution into
-another partition on your hard drive.
-
-To start the \xend control daemon, type
-\begin{quote}
-\verb!# xend start!
-\end{quote}
-If you
-wish the daemon to start automatically, see the instructions in
-Section~\ref{s:xend}. Once the daemon is running, you can use the
-\path{xm} tool to monitor and maintain the domains running on your
-system. This chapter provides only a brief tutorial: we provide full
-details of the \path{xm} tool in the next chapter. 
-
-%\section{From the web interface}
-%
-%Boot the Xen machine and start Xensv (see Chapter~\ref{cha:xensv} for
-%more details) using the command: \\
-%\verb_# xensv start_ \\
-%This will also start Xend (see Chapter~\ref{cha:xend} for more information).
-%
-%The domain management interface will then be available at {\tt
-%http://your\_machine:8080/}.  This provides a user friendly wizard for
-%starting domains and functions for managing running domains.
-%
-%\section{From the command line}
-
-
-\section{Creating a Domain Configuration File} 
-
-Before you can start an additional domain, you must create a
-configuration file. We provide two example files which you 
-can use as a starting point: 
-\begin{itemize} 
-  \item \path{/etc/xen/xmexample1} is a simple template configuration file
-    for describing a single VM.
-
-  \item \path{/etc/xen/xmexample2} file is a template description that
-    is intended to be reused for multiple virtual machines.  Setting
-    the value of the \path{vmid} variable on the \path{xm} command line
-    fills in parts of this template.
-\end{itemize} 
-
-Copy one of these files and edit it as appropriate.
-Typical values you may wish to edit include: 
-
-\begin{quote}
-\begin{description}
-\item[kernel] Set this to the path of the kernel you compiled for use
-              with Xen (e.g.\  \path{kernel = '/boot/vmlinuz-2.6-xenU'})
-\item[memory] Set this to the size of the domain's memory in
-megabytes (e.g.\ \path{memory = 64})
-\item[disk] Set the first entry in this list to calculate the offset
-of the domain's root partition, based on the domain ID.  Set the
-second to the location of \path{/usr} if you are sharing it between
-domains (e.g.\ \path{disk = ['phy:your\_hard\_drive\%d,sda1,w' \%
-(base\_partition\_number + vmid), 'phy:your\_usr\_partition,sda6,r' ]}
-\item[dhcp] Uncomment the dhcp variable, so that the domain will
-receive its IP address from a DHCP server (e.g.\ \path{dhcp='dhcp'})
-\end{description}
-\end{quote}
-
-You may also want to edit the {\bf vif} variable in order to choose
-the MAC address of the virtual ethernet interface yourself.  For
-example: 
-\begin{quote}
-\verb_vif = ['mac=00:06:AA:F6:BB:B3']_
-\end{quote}
-If you do not set this variable, \xend will automatically generate a
-random MAC address from an unused range.
-
-
-\section{Booting the Domain}
-
-The \path{xm} tool provides a variety of commands for managing domains.
-Use the \path{create} command to start new domains. Assuming you've 
-created a configuration file \path{myvmconf} based around
-\path{/etc/xen/xmexample2}, to start a domain with virtual 
-machine ID~1 you should type: 
-
-\begin{quote}
-\begin{verbatim}
-# xm create -c myvmconf vmid=1
-\end{verbatim}
-\end{quote}
-
-
-The \path{-c} switch causes \path{xm} to turn into the domain's
-console after creation.  The \path{vmid=1} sets the \path{vmid}
-variable used in the \path{myvmconf} file. 
-
-
-You should see the console boot messages from the new domain 
-appearing in the terminal in which you typed the command, 
-culminating in a login prompt. 
-
-
-\section{Example: ttylinux}
-
-Ttylinux is a very small Linux distribution, designed to require very
-few resources.  We will use it as a concrete example of how to start a
-Xen domain.  Most users will probably want to install a full-featured
-distribution once they have mastered the basics\footnote{ttylinux is
-maintained by Pascal Schmidt. You can download source packages from
-the distribution's home page: {\tt http://www.minimalinux.org/ttylinux/}}.
-
-\begin{enumerate}
-\item Download and extract the ttylinux disk image from the Files
-section of the project's SourceForge site (see 
-\path{http://sf.net/projects/xen/}).
-\item Create a configuration file like the following:
-\begin{verbatim}
-kernel = "/boot/vmlinuz-2.6-xenU"
-memory = 64
-name = "ttylinux"
-nics = 1
-ip = "1.2.3.4"
-disk = ['file:/path/to/ttylinux/rootfs,sda1,w']
-root = "/dev/sda1 ro"
-\end{verbatim}
-\item Now start the domain and connect to its console:
-\begin{verbatim}
-xm create configfile -c
-\end{verbatim}
-\item Login as root, password root.
-\end{enumerate}
-
-
-\section{Starting / Stopping Domains Automatically}
-
-It is possible to have certain domains start automatically at boot
-time and to have dom0 wait for all running domains to shutdown before
-it shuts down the system.
-
-To specify a domain is to start at boot-time, place its
-configuration file (or a link to it) under \path{/etc/xen/auto/}.
-
-A Sys-V style init script for RedHat and LSB-compliant systems is
-provided and will be automatically copied to \path{/etc/init.d/}
-during install.  You can then enable it in the appropriate way for
-your distribution.
-
-For instance, on RedHat:
-
-\begin{quote}
-\verb_# chkconfig --add xendomains_
-\end{quote}
-
-By default, this will start the boot-time domains in runlevels 3, 4
-and 5.
-
-You can also use the \path{service} command to run this script
-manually, e.g:
-
-\begin{quote}
-\verb_# service xendomains start_
-
-Starts all the domains with config files under /etc/xen/auto/.
-\end{quote}
-
-
-\begin{quote}
-\verb_# service xendomains stop_
-
-Shuts down ALL running Xen domains.
-\end{quote}
-
-\chapter{Domain Management Tools}
-
-The previous chapter described a simple example of how to configure
-and start a domain.  This chapter summarises the tools available to
-manage running domains.
-
-\section{Command-line Management}
-
-Command line management tasks are also performed using the \path{xm}
-tool.  For online help for the commands available, type:
-\begin{quote}
-\verb_# xm help_
-\end{quote}
-
-You can also type \path{xm help $<$command$>$} for more information 
-on a given command. 
-
-\subsection{Basic Management Commands}
-
-The most important \path{xm} commands are: 
-\begin{quote}
-\verb_# xm list_: Lists all domains running.\\
-\verb_# xm consoles_ : Gives information about the domain consoles.\\
-\verb_# xm console_: Opens a console to a domain (e.g.\
-  \verb_# xm console myVM_
-\end{quote}
-
-\subsection{\tt xm list}
-
-The output of \path{xm list} is in rows of the following format:
-\begin{center}
-{\tt name domid memory cpu state cputime console}
-\end{center}
-
-\begin{quote}
-\begin{description}
-\item[name]  The descriptive name of the virtual machine.
-\item[domid] The number of the domain ID this virtual machine is running in.
-\item[memory] Memory size in megabytes.
-\item[cpu]   The CPU this domain is running on.
-\item[state] Domain state consists of 5 fields:
-  \begin{description}
-  \item[r] running
-  \item[b] blocked
-  \item[p] paused
-  \item[s] shutdown
-  \item[c] crashed
-  \end{description}
-\item[cputime] How much CPU time (in seconds) the domain has used so far.
-\item[console] TCP port accepting connections to the domain's console.
-\end{description}
-\end{quote}
-
-The \path{xm list} command also supports a long output format when the
-\path{-l} switch is used.  This outputs the fulls details of the
-running domains in \xend's SXP configuration format.
-
-For example, suppose the system is running the ttylinux domain as
-described earlier.  The list command should produce output somewhat
-like the following:
-\begin{verbatim}
-# xm list
-Name              Id  Mem(MB)  CPU  State  Time(s)  Console
-Domain-0           0      251    0  r----    172.2        
-ttylinux           5       63    0  -b---      3.0    9605
-\end{verbatim}
-
-Here we can see the details for the ttylinux domain, as well as for
-domain 0 (which, of course, is always running).  Note that the console
-port for the ttylinux domain is 9605.  This can be connected to by TCP
-using a terminal program (e.g. \path{telnet} or, better, 
-\path{xencons}).  The simplest way to connect is to use the \path{xm console}
-command, specifying the domain name or ID.  To connect to the console
-of the ttylinux domain, we could use any of the following: 
-\begin{verbatim}
-# xm console ttylinux
-# xm console 5
-# xencons localhost 9605
-\end{verbatim}
-
-\section{Domain Save and Restore}
-
-The administrator of a Xen system may suspend a virtual machine's
-current state into a disk file in domain 0, allowing it to be resumed
-at a later time.
-
-The ttylinux domain described earlier can be suspended to disk using
-the command:
-\begin{verbatim}
-# xm save ttylinux ttylinux.xen
-\end{verbatim}
-
-This will stop the domain named `ttylinux' and save its current state
-into a file called \path{ttylinux.xen}.
-
-To resume execution of this domain, use the \path{xm restore} command:
-\begin{verbatim}
-# xm restore ttylinux.xen
-\end{verbatim}
-
-This will restore the state of the domain and restart it.  The domain
-will carry on as before and the console may be reconnected using the
-\path{xm console} command, as above.
-
-\section{Live Migration}
-
-Live migration is used to transfer a domain between physical hosts
-whilst that domain continues to perform its usual activities --- from
-the user's perspective, the migration should be imperceptible.
-
-To perform a live migration, both hosts must be running Xen / \xend and
-the destination host must have sufficient resources (e.g. memory
-capacity) to accommodate the domain after the move. Furthermore we
-currently require both source and destination machines to be on the 
-same L2 subnet. 
-
-Currently, there is no support for providing automatic remote access
-to filesystems stored on local disk when a domain is migrated.
-Administrators should choose an appropriate storage solution
-(i.e. SAN, NAS, etc.) to ensure that domain filesystems are also
-available on their destination node. GNBD is a good method for
-exporting a volume from one machine to another. iSCSI can do a similar
-job, but is more complex to set up.
-
-When a domain migrates, it's MAC and IP address move with it, thus it
-is only possible to migrate VMs within the same layer-2 network and IP
-subnet. If the destination node is on a different subnet, the
-administrator would need to manually configure a suitable etherip or
-IP tunnel in the domain 0 of the remote node. 
-
-A domain may be migrated using the \path{xm migrate} command.  To
-live migrate a domain to another machine, we would use
-the command:
-
-\begin{verbatim}
-# xm migrate --live mydomain destination.ournetwork.com
-\end{verbatim}
-
-Without the \path{--live} flag, \xend simply stops the domain and
-copies the memory image over to the new node and restarts it. Since
-domains can have large allocations this can be quite time consuming,
-even on a Gigabit network. With the \path{--live} flag \xend attempts
-to keep the domain running while the migration is in progress,
-resulting in typical `downtimes' of just 60--300ms.
-
-For now it will be necessary to reconnect to the domain's console on
-the new machine using the \path{xm console} command.  If a migrated
-domain has any open network connections then they will be preserved,
-so SSH connections do not have this limitation.
-
-\section{Managing Domain Memory}
-
-XenLinux domains have the ability to relinquish / reclaim machine
-memory at the request of the administrator or the user of the domain.
-
-\subsection{Setting memory footprints from dom0}
-
-The machine administrator can request that a domain alter its memory
-footprint using the \path{xm set-mem} command.  For instance, we can
-request that our example ttylinux domain reduce its memory footprint
-to 32 megabytes.
-
-\begin{verbatim}
-# xm set-mem ttylinux 32
-\end{verbatim}
-
-We can now see the result of this in the output of \path{xm list}:
-
-\begin{verbatim}
-# xm list
-Name              Id  Mem(MB)  CPU  State  Time(s)  Console
-Domain-0           0      251    0  r----    172.2        
-ttylinux           5       31    0  -b---      4.3    9605
-\end{verbatim}
-
-The domain has responded to the request by returning memory to Xen. We
-can restore the domain to its original size using the command line:
-
-\begin{verbatim}
-# xm set-mem ttylinux 64
-\end{verbatim}
-
-\subsection{Setting memory footprints from within a domain}
-
-The virtual file \path{/proc/xen/balloon} allows the owner of a
-domain to adjust their own memory footprint.  Reading the file
-(e.g. \path{cat /proc/xen/balloon}) prints out the current
-memory footprint of the domain.  Writing the file
-(e.g. \path{echo new\_target > /proc/xen/balloon}) requests
-that the kernel adjust the domain's memory footprint to a new value.
-
-\subsection{Setting memory limits}
-
-Xen associates a memory size limit with each domain.  By default, this
-is the amount of memory the domain is originally started with,
-preventing the domain from ever growing beyond this size.  To permit a
-domain to grow beyond its original allocation or to prevent a domain
-you've shrunk from reclaiming the memory it relinquished, use the 
-\path{xm maxmem} command.
-
-\chapter{Domain Filesystem Storage}
-
-It is possible to directly export any Linux block device in dom0 to
-another domain, or to export filesystems / devices to virtual machines
-using standard network protocols (e.g. NBD, iSCSI, NFS, etc).  This
-chapter covers some of the possibilities.
-
-
-\section{Exporting Physical Devices as VBDs} 
-\label{s:exporting-physical-devices-as-vbds}
-
-One of the simplest configurations is to directly export 
-individual partitions from domain 0 to other domains. To 
-achieve this use the \path{phy:} specifier in your domain 
-configuration file. For example a line like
-\begin{quote}
-\verb_disk = ['phy:hda3,sda1,w']_
-\end{quote}
-specifies that the partition \path{/dev/hda3} in domain 0 
-should be exported read-write to the new domain as \path{/dev/sda1}; 
-one could equally well export it as \path{/dev/hda} or 
-\path{/dev/sdb5} should one wish. 
-
-In addition to local disks and partitions, it is possible to export
-any device that Linux considers to be ``a disk'' in the same manner.
-For example, if you have iSCSI disks or GNBD volumes imported into
-domain 0 you can export these to other domains using the \path{phy:}
-disk syntax. E.g.:
-\begin{quote}
-\verb_disk = ['phy:vg/lvm1,sda2,w']_
-\end{quote}
-
-
-
-\begin{center}
-\framebox{\bf Warning: Block device sharing}
-\end{center}
-\begin{quote}
-Block devices should typically only be shared between domains in a
-read-only fashion otherwise the Linux kernel's file systems will get
-very confused as the file system structure may change underneath them
-(having the same ext3 partition mounted rw twice is a sure fire way to
-cause irreparable damage)!  \Xend will attempt to prevent you from
-doing this by checking that the device is not mounted read-write in
-domain 0, and hasn't already been exported read-write to another
-domain.
-If you want read-write sharing, export the directory to other domains
-via NFS from domain0 (or use a cluster file system such as GFS or
-ocfs2).
-
-\end{quote}
-
-
-\section{Using File-backed VBDs}
-
-It is also possible to use a file in Domain 0 as the primary storage
-for a virtual machine.  As well as being convenient, this also has the
-advantage that the virtual block device will be {\em sparse} --- space
-will only really be allocated as parts of the file are used.  So if a
-virtual machine uses only half of its disk space then the file really
-takes up half of the size allocated.
-
-For example, to create a 2GB sparse file-backed virtual block device
-(actually only consumes 1KB of disk):
-\begin{quote}
-\verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=1_
-\end{quote}
-
-Make a file system in the disk file: 
-\begin{quote}
-\verb_# mkfs -t ext3 vm1disk_
-\end{quote}
-
-(when the tool asks for confirmation, answer `y')
-
-Populate the file system e.g. by copying from the current root:
-\begin{quote}
-\begin{verbatim}
-# mount -o loop vm1disk /mnt
-# cp -ax /{root,dev,var,etc,usr,bin,sbin,lib} /mnt
-# mkdir /mnt/{proc,sys,home,tmp}
-\end{verbatim}
-\end{quote}
-
-Tailor the file system by editing \path{/etc/fstab},
-\path{/etc/hostname}, etc (don't forget to edit the files in the
-mounted file system, instead of your domain 0 filesystem, e.g. you
-would edit \path{/mnt/etc/fstab} instead of \path{/etc/fstab} ).  For
-this example put \path{/dev/sda1} to root in fstab.
-
-Now unmount (this is important!):
-\begin{quote}
-\verb_# umount /mnt_
-\end{quote}
-
-In the configuration file set:
-\begin{quote}
-\verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_
-\end{quote}
-
-As the virtual machine writes to its `disk', the sparse file will be
-filled in and consume more space up to the original 2GB.
-
-{\bf Note that file-backed VBDs may not be appropriate for backing
-I/O-intensive domains.}  File-backed VBDs are known to experience
-substantial slowdowns under heavy I/O workloads, due to the I/O handling
-by the loopback block device used to support file-backed VBDs in dom0.
-Better I/O performance can be achieved by using either LVM-backed VBDs
-(Section~\ref{s:using-lvm-backed-vbds}) or physical devices as VBDs
-(Section~\ref{s:exporting-physical-devices-as-vbds}).
-
-Linux supports a maximum of eight file-backed VBDs across all domains by
-default.  This limit can be statically increased by using the {\em
-max\_loop} module parameter if CONFIG\_BLK\_DEV\_LOOP is compiled as a
-module in the dom0 kernel, or by using the {\em max\_loop=n} boot option
-if CONFIG\_BLK\_DEV\_LOOP is compiled directly into the dom0 kernel.
-
-
-\section{Using LVM-backed VBDs}
-\label{s:using-lvm-backed-vbds}
-
-A particularly appealing solution is to use LVM volumes 
-as backing for domain file-systems since this allows dynamic
-growing/shrinking of volumes as well as snapshot and other 
-features. 
-
-To initialise a partition to support LVM volumes:
-\begin{quote}
-\begin{verbatim} 
-# pvcreate /dev/sda10           
-\end{verbatim} 
-\end{quote}
-
-Create a volume group named `vg' on the physical partition:
-\begin{quote}
-\begin{verbatim} 
-# vgcreate vg /dev/sda10
-\end{verbatim} 
-\end{quote}
-
-Create a logical volume of size 4GB named `myvmdisk1':
-\begin{quote}
-\begin{verbatim} 
-# lvcreate -L4096M -n myvmdisk1 vg
-\end{verbatim} 
-\end{quote}
-
-You should now see that you have a \path{/dev/vg/myvmdisk1}
-Make a filesystem, mount it and populate it, e.g.:
-\begin{quote}
-\begin{verbatim} 
-# mkfs -t ext3 /dev/vg/myvmdisk1
-# mount /dev/vg/myvmdisk1 /mnt
-# cp -ax / /mnt
-# umount /mnt
-\end{verbatim} 
-\end{quote}
-
-Now configure your VM with the following disk configuration:
-\begin{quote}
-\begin{verbatim} 
- disk = [ 'phy:vg/myvmdisk1,sda1,w' ]
-\end{verbatim} 
-\end{quote}
-
-LVM enables you to grow the size of logical volumes, but you'll need
-to resize the corresponding file system to make use of the new
-space. Some file systems (e.g. ext3) now support on-line resize.  See
-the LVM manuals for more details.
-
-You can also use LVM for creating copy-on-write clones of LVM
-volumes (known as writable persistent snapshots in LVM
-terminology). This facility is new in Linux 2.6.8, so isn't as
-stable as one might hope. In particular, using lots of CoW LVM
-disks consumes a lot of dom0 memory, and error conditions such as
-running out of disk space are not handled well. Hopefully this
-will improve in future.
-
-To create two copy-on-write clone of the above file system you
-would use the following commands:
-
-\begin{quote}
-\begin{verbatim} 
-# lvcreate -s -L1024M -n myclonedisk1 /dev/vg/myvmdisk1
-# lvcreate -s -L1024M -n myclonedisk2 /dev/vg/myvmdisk1
-\end{verbatim} 
-\end{quote}
-
-Each of these can grow to have 1GB of differences from the master
-volume. You can grow the amount of space for storing the
-differences using the lvextend command, e.g.:
-\begin{quote}
-\begin{verbatim} 
-# lvextend +100M /dev/vg/myclonedisk1
-\end{verbatim} 
-\end{quote}
-
-Don't let the `differences volume' ever fill up otherwise LVM gets
-rather confused. It may be possible to automate the growing
-process by using \path{dmsetup wait} to spot the volume getting full
-and then issue an \path{lvextend}.
-
-In principle, it is possible to continue writing to the volume
-that has been cloned (the changes will not be visible to the
-clones), but we wouldn't recommend this: have the cloned volume
-as a `pristine' file system install that isn't mounted directly
-by any of the virtual machines.
-
-
-\section{Using NFS Root}
-
-First, populate a root filesystem in a directory on the server
-machine. This can be on a distinct physical machine, or simply 
-run within a virtual machine on the same node.
-
-Now configure the NFS server to export this filesystem over the
-network by adding a line to \path{/etc/exports}, for instance:
-
-\begin{quote}
-\begin{small}
-\begin{verbatim}
-/export/vm1root      1.2.3.4/24 (rw,sync,no_root_squash)
-\end{verbatim}
-\end{small}
-\end{quote}
-
-Finally, configure the domain to use NFS root.  In addition to the
-normal variables, you should make sure to set the following values in
-the domain's configuration file:
-
-\begin{quote}
-\begin{small}
-\begin{verbatim}
-root       = '/dev/nfs'
-nfs_server = '2.3.4.5'       # substitute IP address of server 
-nfs_root   = '/path/to/root' # path to root FS on the server
-\end{verbatim}
-\end{small}
-\end{quote}
-
-The domain will need network access at boot time, so either statically
-configure an IP address (Using the config variables \path{ip}, 
-\path{netmask}, \path{gateway}, \path{hostname}) or enable DHCP (
-\path{dhcp='dhcp'}).
-
-Note that the Linux NFS root implementation is known to have stability
-problems under high load (this is not a Xen-specific problem), so this
-configuration may not be appropriate for critical servers.
+
+%% Chapter Introduction moved to introduction.tex
+\include{src/user/introduction}
+
+%% Chapter Installation moved to installation.tex
+\include{src/user/installation}
+
+%% Chapter Starting Additional Domains  moved to start_addl_dom.tex
+\include{src/user/start_addl_dom}
+
+%% Chapter Domain Management Tools moved to domain_mgmt.tex
+\include{src/user/domain_mgmt}
+
+%% Chapter Domain Filesystem Storage moved to domain_filesystem.tex
+\include{src/user/domain_filesystem}
+
 
 
 \part{User Reference Documentation}
 
-\chapter{Control Software} 
-
-The Xen control software includes the \xend node control daemon (which 
-must be running), the xm command line tools, and the prototype 
-xensv web interface. 
-
-\section{\Xend (node control daemon)}
-\label{s:xend}
-
-The Xen Daemon (\Xend) performs system management functions related to
-virtual machines.  It forms a central point of control for a machine
-and can be controlled using an HTTP-based protocol.  \Xend must be
-running in order to start and manage virtual machines.
-
-\Xend must be run as root because it needs access to privileged system
-management functions.  A small set of commands may be issued on the
-\xend command line:
-
-\begin{tabular}{ll}
-\verb!# xend start! & start \xend, if not already running \\
-\verb!# xend stop!  & stop \xend if already running       \\
-\verb!# xend restart! & restart \xend if running, otherwise start it \\
-% \verb!# xend trace_start! & start \xend, with very detailed debug logging \\
-\verb!# xend status! & indicates \xend status by its return code
-\end{tabular}
-
-A SysV init script called {\tt xend} is provided to start \xend at boot
-time.  {\tt make install} installs this script in {\path{/etc/init.d}.
-To enable it, you have to make symbolic links in the appropriate
-runlevel directories or use the {\tt chkconfig} tool, where available.
-
-Once \xend is running, more sophisticated administration can be done
-using the xm tool (see Section~\ref{s:xm}) and the experimental
-Xensv web interface (see Section~\ref{s:xensv}).
-
-As \xend runs, events will be logged to \path{/var/log/xend.log} and, 
-if the migration assistant daemon (\path{xfrd}) has been started, 
-\path{/var/log/xfrd.log}. These may be of use for troubleshooting
-problems.
-
-\section{Xm (command line interface)}
-\label{s:xm}
-
-The xm tool is the primary tool for managing Xen from the console.
-The general format of an xm command line is:
-
-\begin{verbatim}
-# xm command [switches] [arguments] [variables]
-\end{verbatim}
-
-The available {\em switches} and {\em arguments} are dependent on the
-{\em command} chosen.  The {\em variables} may be set using
-declarations of the form {\tt variable=value} and command line
-declarations override any of the values in the configuration file
-being used, including the standard variables described above and any
-custom variables (for instance, the \path{xmdefconfig} file uses a
-{\tt vmid} variable).
-
-The available commands are as follows:
-
-\begin{description}
-\item[set-mem] Request a domain to adjust its memory footprint.
-\item[create] Create a new domain.
-\item[destroy] Kill a domain immediately.
-\item[list] List running domains.
-\item[shutdown] Ask a domain to shutdown.
-\item[dmesg] Fetch the Xen (not Linux!) boot output.
-\item[consoles] Lists the available consoles.
-\item[console] Connect to the console for a domain.
-\item[help] Get help on xm commands.
-\item[save] Suspend a domain to disk.
-\item[restore] Restore a domain from disk.
-\item[pause] Pause a domain's execution.
-\item[unpause] Unpause a domain.
-\item[pincpu] Pin a domain to a CPU.
-\item[bvt] Set BVT scheduler parameters for a domain.
-\item[bvt\_ctxallow] Set the BVT context switching allowance for the system.
-\item[atropos] Set the atropos parameters for a domain.
-\item[rrobin] Set the round robin time slice for the system.
-\item[info] Get information about the Xen host.
-\item[call] Call a \xend HTTP API function directly.
-\end{description}
-
-For a detailed overview of switches, arguments and variables to each command
-try
-\begin{quote}
-\begin{verbatim}
-# xm help command
-\end{verbatim}
-\end{quote}
-
-\section{Xensv (web control interface)}
-\label{s:xensv}
-
-Xensv is the experimental web control interface for managing a Xen
-machine.  It can be used to perform some (but not yet all) of the
-management tasks that can be done using the xm tool.
-
-It can be started using:
-\begin{quote}
-\verb_# xensv start_
-\end{quote}
-and stopped using: 
-\begin{quote}
-\verb_# xensv stop_
-\end{quote}
-
-By default, Xensv will serve out the web interface on port 8080.  This
-can be changed by editing 
-\path{/usr/lib/python2.3/site-packages/xen/sv/params.py}.
-
-Once Xensv is running, the web interface can be used to create and
-manage running domains.
-
-
-
-
-\chapter{Domain Configuration}
-\label{cha:config}
-
-The following contains the syntax of the domain configuration 
-files and description of how to further specify networking, 
-driver domain and general scheduling behaviour. 
-
-\section{Configuration Files}
-\label{s:cfiles}
-
-Xen configuration files contain the following standard variables.
-Unless otherwise stated, configuration items should be enclosed in
-quotes: see \path{/etc/xen/xmexample1} and \path{/etc/xen/xmexample2} 
-for concrete examples of the syntax.
-
-\begin{description}
-\item[kernel] Path to the kernel image 
-\item[ramdisk] Path to a ramdisk image (optional).
-% \item[builder] The name of the domain build function (e.g. {\tt'linux'} or 
{\tt'netbsd'}.
-\item[memory] Memory size in megabytes.
-\item[cpu] CPU to run this domain on, or {\tt -1} for
-  auto-allocation. 
-\item[console] Port to export the domain console on (default 9600 + domain ID).
-\item[nics] Number of virtual network interfaces.
-\item[vif] List of MAC addresses (random addresses are assigned if not
-  given) and bridges to use for the domain's network interfaces, e.g.
-\begin{verbatim}
-vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0',
-        'bridge=xen-br1' ]
-\end{verbatim}
-  to assign a MAC address and bridge to the first interface and assign
-  a different bridge to the second interface, leaving \xend to choose
-  the MAC address.
-\item[disk] List of block devices to export to the domain,  e.g. \\
-  \verb_disk = [ 'phy:hda1,sda1,r' ]_ \\
-  exports physical device \path{/dev/hda1} to the domain 
-  as \path{/dev/sda1} with read-only access. Exporting a disk read-write 
-  which is currently mounted is dangerous -- if you are \emph{certain}
-  you wish to do this, you can specify \path{w!} as the mode. 
-\item[dhcp] Set to {\tt 'dhcp'} if you want to use DHCP to configure
-  networking. 
-\item[netmask] Manually configured IP netmask.
-\item[gateway] Manually configured IP gateway. 
-\item[hostname] Set the hostname for the virtual machine.
-\item[root] Specify the root device parameter on the kernel command
-  line. 
-\item[nfs\_server] IP address for the NFS server (if any). 
-\item[nfs\_root] Path of the root filesystem on the NFS server (if any).
-\item[extra] Extra string to append to the kernel command line (if
-  any) 
-\item[restart] Three possible options:
-  \begin{description}
-  \item[always] Always restart the domain, no matter what
-                its exit code is.
-  \item[never]  Never restart the domain.
-  \item[onreboot] Restart the domain iff it requests reboot.
-  \end{description}
-\end{description}
-
-For additional flexibility, it is also possible to include Python
-scripting commands in configuration files.  An example of this is the
-\path{xmexample2} file, which uses Python code to handle the 
-\path{vmid} variable.
-
-
-%\part{Advanced Topics}
-
-\section{Network Configuration}
-
-For many users, the default installation should work `out of the box'.
-More complicated network setups, for instance with multiple ethernet
-interfaces and/or existing bridging setups will require some
-special configuration.
-
-The purpose of this section is to describe the mechanisms provided by
-\xend to allow a flexible configuration for Xen's virtual networking.
-
-\subsection{Xen virtual network topology}
-
-Each domain network interface is connected to a virtual network
-interface in dom0 by a point to point link (effectively a `virtual
-crossover cable').  These devices are named {\tt
-vif$<$domid$>$.$<$vifid$>$} (e.g. {\tt vif1.0} for the first interface
-in domain 1, {\tt vif3.1} for the second interface in domain 3).
-
-Traffic on these virtual interfaces is handled in domain 0 using
-standard Linux mechanisms for bridging, routing, rate limiting, etc.
-Xend calls on two shell scripts to perform initial configuration of
-the network and configuration of new virtual interfaces.  By default,
-these scripts configure a single bridge for all the virtual
-interfaces.  Arbitrary routing / bridging configurations can be
-configured by customising the scripts, as described in the following
-section.
-
-\subsection{Xen networking scripts}
-
-Xen's virtual networking is configured by two shell scripts (by
-default \path{network} and \path{vif-bridge}).  These are
-called automatically by \xend when certain events occur, with
-arguments to the scripts providing further contextual information.
-These scripts are found by default in \path{/etc/xen/scripts}.  The
-names and locations of the scripts can be configured in
-\path{/etc/xen/xend-config.sxp}.
-
-\begin{description} 
-
-\item[network:] This script is called whenever \xend is started or
-stopped to respectively initialise or tear down the Xen virtual
-network. In the default configuration initialisation creates the
-bridge `xen-br0' and moves eth0 onto that bridge, modifying the
-routing accordingly. When \xend exits, it deletes the Xen bridge and
-removes eth0, restoring the normal IP and routing configuration.
-
-%% In configurations where the bridge already exists, this script could
-%% be replaced with a link to \path{/bin/true} (for instance).
-
-\item[vif-bridge:] This script is called for every domain virtual
-interface and can configure firewalling rules and add the vif 
-to the appropriate bridge. By default, this adds and removes 
-VIFs on the default Xen bridge.
-
-\end{description} 
-
-For more complex network setups (e.g. where routing is required or
-integrate with existing bridges) these scripts may be replaced with
-customised variants for your site's preferred configuration.
-
-%% There are two possible types of privileges:  IO privileges and
-%% administration privileges.
-
-\section{Driver Domain Configuration} 
-
-I/O privileges can be assigned to allow a domain to directly access
-PCI devices itself.  This is used to support driver domains.
-
-Setting backend privileges is currently only supported in SXP format
-config files.  To allow a domain to function as a backend for others,
-somewhere within the {\tt vm} element of its configuration file must
-be a {\tt backend} element of the form {\tt (backend ({\em type}))}
-where {\tt \em type} may be either {\tt netif} or {\tt blkif},
-according to the type of virtual device this domain will service.
-%% After this domain has been built, \xend will connect all new and
-%% existing {\em virtual} devices (of the appropriate type) to that
-%% backend.
-
-Note that a block backend cannot currently import virtual block
-devices from other domains, and a network backend cannot import
-virtual network devices from other domains.  Thus (particularly in the
-case of block backends, which cannot import a virtual block device as
-their root filesystem), you may need to boot a backend domain from a
-ramdisk or a network device.
-
-Access to PCI devices may be configured on a per-device basis.  Xen
-will assign the minimal set of hardware privileges to a domain that
-are required to control its devices.  This can be configured in either
-format of configuration file:
-
-\begin{itemize}
-\item SXP Format: Include device elements of the form: \\
-\centerline{  {\tt (device (pci (bus {\em x}) (dev {\em y}) (func {\em z})))}} 
\\
-  inside the top-level {\tt vm} element.  Each one specifies the address
-  of a device this domain is allowed to access ---
-  the numbers {\em x},{\em y} and {\em z} may be in either decimal or
-  hexadecimal format.
-\item Flat Format: Include a list of PCI device addresses of the
-  format: \\ 
-\centerline{{\tt pci = ['x,y,z', ...]}} \\ 
-where each element in the
-  list is a string specifying the components of the PCI device
-  address, separated by commas.  The components ({\tt \em x}, {\tt \em
-  y} and {\tt \em z}) of the list may be formatted as either decimal
-  or hexadecimal.
-\end{itemize}
-
-%% \section{Administration Domains}
-
-%% Administration privileges allow a domain to use the `dom0
-%% operations' (so called because they are usually available only to
-%% domain 0).  A privileged domain can build other domains, set scheduling
-%% parameters, etc.
-
-% Support for other administrative domains is not yet available...  perhaps
-% we should plumb it in some time
-
-
-
-
-
-\section{Scheduler Configuration}
-\label{s:sched} 
-
-
-Xen offers a boot time choice between multiple schedulers.  To select
-a scheduler, pass the boot parameter {\em sched=sched\_name} to Xen,
-substituting the appropriate scheduler name.  Details of the schedulers
-and their parameters are included below; future versions of the tools
-will provide a higher-level interface to these tools.
-
-It is expected that system administrators configure their system to
-use the scheduler most appropriate to their needs.  Currently, the BVT
-scheduler is the recommended choice. 
-
-\subsection{Borrowed Virtual Time}
-
-{\tt sched=bvt} (the default) \\ 
-
-BVT provides proportional fair shares of the CPU time.  It has been
-observed to penalise domains that block frequently (e.g. I/O intensive
-domains), but this can be compensated for by using warping. 
-
-\subsubsection{Global Parameters}
-
-\begin{description}
-\item[ctx\_allow]
-  the context switch allowance is similar to the `quantum'
-  in traditional schedulers.  It is the minimum time that
-  a scheduled domain will be allowed to run before being
-  pre-empted. 
-\end{description}
-
-\subsubsection{Per-domain parameters}
-
-\begin{description}
-\item[mcuadv]
-  the MCU (Minimum Charging Unit) advance determines the
-  proportional share of the CPU that a domain receives.  It
-  is set inversely proportionally to a domain's sharing weight.
-\item[warp]
-  the amount of `virtual time' the domain is allowed to warp
-  backwards
-\item[warpl]
-  the warp limit is the maximum time a domain can run warped for
-\item[warpu]
-  the unwarp requirement is the minimum time a domain must
-  run unwarped for before it can warp again
-\end{description}
-
-\subsection{Atropos}
-
-{\tt sched=atropos} \\
-
-Atropos is a soft real time scheduler.  It provides guarantees about
-absolute shares of the CPU, with a facility for sharing
-slack CPU time on a best-effort basis. It can provide timeliness
-guarantees for latency-sensitive domains.
-
-Every domain has an associated period and slice.  The domain should
-receive `slice' nanoseconds every `period' nanoseconds.  This allows
-the administrator to configure both the absolute share of the CPU a
-domain receives and the frequency with which it is scheduled. 
-
-%%  When
-%% domains unblock, their period is reduced to the value of the latency
-%% hint (the slice is scaled accordingly so that they still get the same
-%% proportion of the CPU).  For each subsequent period, the slice and
-%% period times are doubled until they reach their original values.
-
-Note: don't overcommit the CPU when using Atropos (i.e. don't reserve
-more CPU than is available --- the utilisation should be kept to
-slightly less than 100\% in order to ensure predictable behaviour).
-
-\subsubsection{Per-domain parameters}
-
-\begin{description}
-\item[period] The regular time interval during which a domain is
-  guaranteed to receive its allocation of CPU time.
-\item[slice]
-  The length of time per period that a domain is guaranteed to run
-  for (in the absence of voluntary yielding of the CPU). 
-\item[latency]
-  The latency hint is used to control how soon after
-  waking up a domain it should be scheduled.
-\item[xtratime] This is a boolean flag that specifies whether a domain
-  should be allowed a share of the system slack time.
-\end{description}
-
-\subsection{Round Robin}
-
-{\tt sched=rrobin} \\
-
-The round robin scheduler is included as a simple demonstration of
-Xen's internal scheduler API.  It is not intended for production use. 
-
-\subsubsection{Global Parameters}
-
-\begin{description}
-\item[rr\_slice]
-  The maximum time each domain runs before the next
-  scheduling decision is made.
-\end{description}
-
-
-
-
-
-
-
-
-
-
-
-
-\chapter{Build, Boot and Debug options} 
-
-This chapter describes the build- and boot-time options 
-which may be used to tailor your Xen system. 
-
-\section{Xen Build Options}
-
-Xen provides a number of build-time options which should be 
-set as environment variables or passed on make's command-line.  
-
-\begin{description} 
-\item[verbose=y] Enable debugging messages when Xen detects an unexpected 
condition.
-Also enables console output from all domains.
-\item[debug=y] 
-Enable debug assertions.  Implies {\bf verbose=y}.
-(Primarily useful for tracing bugs in Xen).       
-\item[debugger=y] 
-Enable the in-Xen debugger. This can be used to debug 
-Xen, guest OSes, and applications.
-\item[perfc=y] 
-Enable performance counters for significant events
-within Xen. The counts can be reset or displayed
-on Xen's console via console control keys.
-\item[trace=y] 
-Enable per-cpu trace buffers which log a range of
-events within Xen for collection by control
-software. 
-\end{description} 
-
-\section{Xen Boot Options}
-\label{s:xboot}
-
-These options are used to configure Xen's behaviour at runtime.  They
-should be appended to Xen's command line, either manually or by
-editing \path{grub.conf}.
-
-\begin{description}
-\item [noreboot ] 
- Don't reboot the machine automatically on errors.  This is
- useful to catch debug output if you aren't catching console messages
- via the serial line. 
-
-\item [nosmp ] 
- Disable SMP support.
- This option is implied by `ignorebiostables'. 
-
-\item [watchdog ] 
- Enable NMI watchdog which can report certain failures. 
-
-\item [noirqbalance ] 
- Disable software IRQ balancing and affinity. This can be used on
- systems such as Dell 1850/2850 that have workarounds in hardware for
- IRQ-routing issues.
-
-\item [badpage=$<$page number$>$,$<$page number$>$, \ldots ] 
- Specify a list of pages not to be allocated for use 
- because they contain bad bytes. For example, if your
- memory tester says that byte 0x12345678 is bad, you would
- place `badpage=0x12345' on Xen's command line. 
-
-\item [com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$
- com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\ 
- Xen supports up to two 16550-compatible serial ports.
- For example: `com1=9600, 8n1, 0x408, 5' maps COM1 to a
- 9600-baud port, 8 data bits, no parity, 1 stop bit,
- I/O port base 0x408, IRQ 5.
- If some configuration options are standard (e.g., I/O base and IRQ),
- then only a prefix of the full configuration string need be
- specified. If the baud rate is pre-configured (e.g., by the
- bootloader) then you can specify `auto' in place of a numeric baud
- rate. 
-
-\item [console=$<$specifier list$>$ ] 
- Specify the destination for Xen console I/O.
- This is a comma-separated list of, for example:
-\begin{description}
- \item[vga]  use VGA console and allow keyboard input
- \item[com1] use serial port com1
- \item[com2H] use serial port com2. Transmitted chars will
-   have the MSB set. Received chars must have
-   MSB set.
- \item[com2L] use serial port com2. Transmitted chars will
-   have the MSB cleared. Received chars must
-   have MSB cleared.
-\end{description}
- The latter two examples allow a single port to be
- shared by two subsystems (e.g. console and
- debugger). Sharing is controlled by MSB of each
- transmitted/received character.
- [NB. Default for this option is `com1,vga'] 
-
-\item [sync\_console ]
- Force synchronous console output. This is useful if you system fails
- unexpectedly before it has sent all available output to the
- console. In most cases Xen will automatically enter synchronous mode
- when an exceptional event occurs, but this option provides a manual
- fallback.
-
-\item [conswitch=$<$switch-char$><$auto-switch-char$>$ ] 
- Specify how to switch serial-console input between
- Xen and DOM0. The required sequence is CTRL-$<$switch-char$>$
- pressed three times. Specifying the backtick character 
- disables switching.
- The $<$auto-switch-char$>$ specifies whether Xen should
- auto-switch input to DOM0 when it boots --- if it is `x'
- then auto-switching is disabled.  Any other value, or
- omitting the character, enables auto-switching.
- [NB. default switch-char is `a'] 
-
-\item [nmi=xxx ] 
- Specify what to do with an NMI parity or I/O error. \\
- `nmi=fatal':  Xen prints a diagnostic and then hangs. \\
- `nmi=dom0':   Inform DOM0 of the NMI. \\
- `nmi=ignore': Ignore the NMI. 
-
-\item [mem=xxx ]
- Set the physical RAM address limit. Any RAM appearing beyond this
- physical address in the memory map will be ignored. This parameter
- may be specified with a B, K, M or G suffix, representing bytes,
- kilobytes, megabytes and gigabytes respectively. The
- default unit, if no suffix is specified, is kilobytes.
-
-\item [dom0\_mem=xxx ] 
- Set the amount of memory to be allocated to domain0. In Xen 3.x the parameter
- may be specified with a B, K, M or G suffix, representing bytes,
- kilobytes, megabytes and gigabytes respectively; if no suffix is specified, 
- the parameter defaults to kilobytes. In previous versions of Xen, suffixes
- were not supported and the value is always interpreted as kilobytes. 
-
-\item [tbuf\_size=xxx ] 
- Set the size of the per-cpu trace buffers, in pages
- (default 1).  Note that the trace buffers are only
- enabled in debug builds.  Most users can ignore
- this feature completely. 
-
-\item [sched=xxx ] 
- Select the CPU scheduler Xen should use.  The current
- possibilities are `bvt' (default), `atropos' and `rrobin'. 
- For more information see Section~\ref{s:sched}. 
-
-\item [apic\_verbosity=debug,verbose ]
- Print more detailed information about local APIC and IOAPIC configuration.
-
-\item [lapic ]
- Force use of local APIC even when left disabled by uniprocessor BIOS.
-
-\item [nolapic ]
- Ignore local APIC in a uniprocessor system, even if enabled by the BIOS.
-
-\item [apic=bigsmp,default,es7000,summit ]
- Specify NUMA platform. This can usually be probed automatically.
-
-\end{description} 
-
-In addition, the following options may be specified on the Xen command
-line. Since domain 0 shares responsibility for booting the platform,
-Xen will automatically propagate these options to its command
-line. These options are taken from Linux's command-line syntax with
-unchanged semantics.
-
-\begin{description}
-\item [acpi=off,force,strict,ht,noirq,\ldots ] 
- Modify how Xen (and domain 0) parses the BIOS ACPI tables.
-
-\item [acpi\_skip\_timer\_override ]
- Instruct Xen (and domain 0) to ignore timer-interrupt override
- instructions specified by the BIOS ACPI tables.
-
-\item [noapic ]
- Instruct Xen (and domain 0) to ignore any IOAPICs that are present in
- the system, and instead continue to use the legacy PIC.
-
-\end{description} 
-
-\section{XenLinux Boot Options}
-
-In addition to the standard Linux kernel boot options, we support: 
-\begin{description} 
-\item[xencons=xxx ] Specify the device node to which the Xen virtual
-console driver is attached. The following options are supported:
-\begin{center}
-\begin{tabular}{l}
-`xencons=off': disable virtual console \\ 
-`xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\
-`xencons=ttyS': attach console to /dev/ttyS0
-\end{tabular}
-\end{center}
-The default is ttyS for dom0 and tty for all other domains.
-\end{description} 
-
-
-
-\section{Debugging}
-\label{s:keys} 
-
-Xen has a set of debugging features that can be useful to try and
-figure out what's going on. Hit 'h' on the serial line (if you
-specified a baud rate on the Xen command line) or ScrollLock-h on the
-keyboard to get a list of supported commands.
-
-If you have a crash you'll likely get a crash dump containing an EIP
-(PC) which, along with an \path{objdump -d image}, can be useful in
-figuring out what's happened.  Debug a Xenlinux image just as you
-would any other Linux kernel.
-
-%% We supply a handy debug terminal program which you can find in
-%% \path{/usr/local/src/xen-2.0.bk/tools/misc/miniterm/}
-%% This should be built and executed on another machine that is connected
-%% via a null modem cable. Documentation is included.
-%% Alternatively, if the Xen machine is connected to a serial-port server
-%% then we supply a dumb TCP terminal client, {\tt xencons}.
-
-
+%% Chapter Control Software moved to control_software.tex
+\include{src/user/control_software}
+
+%% Chapter Domain Configuration moved to domain_configuration.tex
+\include{src/user/domain_configuration}
+
+%% Chapter Build, Boot and Debug Options moved to build.tex
+\include{src/user/build}
 
 
 \chapter{Further Support}
@@ -1875,6 +108,7 @@
 %Various HOWTOs are available in \path{docs/HOWTOS} but this content is
 %being integrated into this manual.
 
+
 \section{Online References}
 
 The official Xen web site is found at:
@@ -1884,6 +118,7 @@
 
 This contains links to the latest versions of all on-line 
 documentation (including the lateset version of the FAQ). 
+
 
 \section{Mailing Lists}
 
@@ -1905,326 +140,18 @@
 \end{description}
 
 
+
 \appendix
 
-
-\chapter{Installing Xen / XenLinux on Debian}
-
-The Debian project provides a tool called \path{debootstrap} which
-allows a base Debian system to be installed into a filesystem without
-requiring the host system to have any Debian-specific software (such
-as \path{apt}. 
-
-Here's some info how to install Debian 3.1 (Sarge) for an unprivileged
-Xen domain:
-
-\begin{enumerate}
-\item Set up Xen 2.0 and test that it's working, as described earlier in
-      this manual.
-
-\item Create disk images for root-fs and swap (alternatively, you
-      might create dedicated partitions, LVM logical volumes, etc. if
-      that suits your setup).
-\begin{small}\begin{verbatim}  
-dd if=/dev/zero of=/path/diskimage bs=1024k count=size_in_mbytes
-dd if=/dev/zero of=/path/swapimage bs=1024k count=size_in_mbytes
-\end{verbatim}\end{small}
-      If you're going to use this filesystem / disk image only as a
-      `template' for other vm disk images, something like 300 MB should
-      be enough.. (of course it depends what kind of packages you are
-      planning to install to the template)
-
-\item Create the filesystem and initialise the swap image
-\begin{small}\begin{verbatim}
-mkfs.ext3 /path/diskimage
-mkswap /path/swapimage
-\end{verbatim}\end{small}
-
-\item Mount the disk image for installation
-\begin{small}\begin{verbatim}
-mount -o loop /path/diskimage /mnt/disk
-\end{verbatim}\end{small}
-
-\item Install \path{debootstrap}
-
-Make sure you have debootstrap installed on the host.  If you are
-running Debian sarge (3.1 / testing) or unstable you can install it by
-running \path{apt-get install debootstrap}.  Otherwise, it can be
-downloaded from the Debian project website.
-
-\item Install Debian base to the disk image:
-\begin{small}\begin{verbatim}
-debootstrap --arch i386 sarge /mnt/disk  \
-            http://ftp.<countrycode>.debian.org/debian
-\end{verbatim}\end{small}
-
-You can use any other Debian http/ftp mirror you want.
-
-\item When debootstrap completes successfully, modify settings:
-\begin{small}\begin{verbatim}
-chroot /mnt/disk /bin/bash
-\end{verbatim}\end{small}
-
-Edit the following files using vi or nano and make needed changes:
-\begin{small}\begin{verbatim}
-/etc/hostname
-/etc/hosts
-/etc/resolv.conf
-/etc/network/interfaces
-/etc/networks
-\end{verbatim}\end{small}
-
-Set up access to the services, edit:
-\begin{small}\begin{verbatim}
-/etc/hosts.deny
-/etc/hosts.allow
-/etc/inetd.conf
-\end{verbatim}\end{small}
-
-Add Debian mirror to:   
-\begin{small}\begin{verbatim}
-/etc/apt/sources.list
-\end{verbatim}\end{small}
-
-Create fstab like this:
-\begin{small}\begin{verbatim}
-/dev/sda1       /       ext3    errors=remount-ro       0       1
-/dev/sda2       none    swap    sw                      0       0
-proc            /proc   proc    defaults                0       0
-\end{verbatim}\end{small}
-
-Logout
-
-\item      Unmount the disk image
-\begin{small}\begin{verbatim}
-umount /mnt/disk
-\end{verbatim}\end{small}
-
-\item Create Xen 2.0 configuration file for the new domain. You can
-        use the example-configurations coming with Xen as a template.
-
-        Make sure you have the following set up:
-\begin{small}\begin{verbatim}
-disk = [ 'file:/path/diskimage,sda1,w', 'file:/path/swapimage,sda2,w' ]
-root = "/dev/sda1 ro"
-\end{verbatim}\end{small}
-
-\item Start the new domain
-\begin{small}\begin{verbatim}
-xm create -f domain_config_file
-\end{verbatim}\end{small}
-
-Check that the new domain is running:
-\begin{small}\begin{verbatim}
-xm list
-\end{verbatim}\end{small}
-
-\item   Attach to the console of the new domain.
-        You should see something like this when starting the new domain:
-
-\begin{small}\begin{verbatim}
-Started domain testdomain2, console on port 9626
-\end{verbatim}\end{small}
-        
-        There you can see the ID of the console: 26. You can also list
-        the consoles with \path{xm consoles} (ID is the last two
-        digits of the port number.)
-
-        Attach to the console:
-
-\begin{small}\begin{verbatim}
-xm console 26
-\end{verbatim}\end{small}
-
-        or by telnetting to the port 9626 of localhost (the xm console
-        program works better).
-
-\item   Log in and run base-config
-
-        As a default there's no password for the root.
-
-        Check that everything looks OK, and the system started without
-        errors.  Check that the swap is active, and the network settings are
-        correct.
-
-        Run \path{/usr/sbin/base-config} to set up the Debian settings.
-
-        Set up the password for root using passwd.
-
-\item     Done. You can exit the console by pressing \path{Ctrl + ]}
-
-\end{enumerate}
-
-If you need to create new domains, you can just copy the contents of
-the `template'-image to the new disk images, either by mounting the
-template and the new image, and using \path{cp -a} or \path{tar} or by
-simply copying the image file.  Once this is done, modify the
-image-specific settings (hostname, network settings, etc).
-
-\chapter{Installing Xen / XenLinux on Redhat or Fedora Core}
-
-When using Xen / XenLinux on a standard Linux distribution there are
-a couple of things to watch out for:
-
-Note that, because domains>0 don't have any privileged access at all,
-certain commands in the default boot sequence will fail e.g. attempts
-to update the hwclock, change the console font, update the keytable
-map, start apmd (power management), or gpm (mouse cursor).  Either
-ignore the errors (they should be harmless), or remove them from the
-startup scripts.  Deleting the following links are a good start:
-{\path{S24pcmcia}}, {\path{S09isdn}},
-{\path{S17keytable}}, {\path{S26apmd}},
-{\path{S85gpm}}.
-
-If you want to use a single root file system that works cleanly for
-both domain 0 and unprivileged domains, a useful trick is to use
-different 'init' run levels. For example, use
-run level 3 for domain 0, and run level 4 for other domains. This
-enables different startup scripts to be run in depending on the run
-level number passed on the kernel command line.
-
-If using NFS root files systems mounted either from an
-external server or from domain0 there are a couple of other gotchas.
-The default {\path{/etc/sysconfig/iptables}} rules block NFS, so part
-way through the boot sequence things will suddenly go dead.
-
-If you're planning on having a separate NFS {\path{/usr}} partition, the
-RH9 boot scripts don't make life easy - they attempt to mount NFS file
-systems way to late in the boot process. The easiest way I found to do
-this was to have a {\path{/linuxrc}} script run ahead of
-{\path{/sbin/init}} that mounts {\path{/usr}}:
-
-\begin{quote}
-\begin{small}\begin{verbatim}
- #!/bin/bash
- /sbin/ipconfig lo 127.0.0.1
- /sbin/portmap
- /bin/mount /usr
- exec /sbin/init "$@" <>/dev/console 2>&1
-\end{verbatim}\end{small}
-\end{quote}
-
-%$ XXX SMH: font lock fix :-)  
-
-The one slight complication with the above is that
-{\path{/sbin/portmap}} is dynamically linked against
-{\path{/usr/lib/libwrap.so.0}} Since this is in
-{\path{/usr}}, it won't work. This can be solved by copying the
-file (and link) below the /usr mount point, and just let the file be
-'covered' when the mount happens.
-
-In some installations, where a shared read-only {\path{/usr}} is
-being used, it may be desirable to move other large directories over
-into the read-only {\path{/usr}}. For example, you might replace
-{\path{/bin}}, {\path{/lib}} and {\path{/sbin}} with
-links into {\path{/usr/root/bin}}, {\path{/usr/root/lib}}
-and {\path{/usr/root/sbin}} respectively. This creates other
-problems for running the {\path{/linuxrc}} script, requiring
-bash, portmap, mount, ifconfig, and a handful of other shared
-libraries to be copied below the mount point --- a simple
-statically-linked C program would solve this problem.
-
-
-
-
-\chapter{Glossary of Terms}
-
-\begin{description}
-\item[Atropos]             One of the CPU schedulers provided by Xen.
-                           Atropos provides domains with absolute shares
-                           of the CPU, with timeliness guarantees and a
-                           mechanism for sharing out `slack time'.
-
-\item[BVT]                 The BVT scheduler is used to give proportional
-                           fair shares of the CPU to domains.
-
-\item[Exokernel]           A minimal piece of privileged code, similar to
-                           a {\bf microkernel} but providing a more
-                           `hardware-like' interface to the tasks it
-                           manages.  This is similar to a paravirtualising
-                           VMM like {\bf Xen} but was designed as a new
-                           operating system structure, rather than
-                           specifically to run multiple conventional OSs.
-
-\item[Domain]              A domain is the execution context that
-                           contains a running {\bf virtual machine}.
-                           The relationship between virtual machines
-                           and domains on Xen is similar to that between
-                           programs and processes in an operating
-                           system: a virtual machine is a persistent
-                           entity that resides on disk (somewhat like
-                           a program).  When it is loaded for execution,
-                           it runs in a domain.  Each domain has a
-                           {\bf domain ID}.
-
-\item[Domain 0]            The first domain to be started on a Xen
-                           machine.  Domain 0 is responsible for managing
-                           the system.
-
-\item[Domain ID]           A unique identifier for a {\bf domain},
-                           analogous to a process ID in an operating
-                           system.
-
-\item[Full virtualisation] An approach to virtualisation which
-                           requires no modifications to the hosted
-                           operating system, providing the illusion of
-                           a complete system of real hardware devices.
-
-\item[Hypervisor]          An alternative term for {\bf VMM}, used
-                           because it means `beyond supervisor',
-                           since it is responsible for managing multiple
-                           `supervisor' kernels.
-
-\item[Live migration]      A technique for moving a running virtual
-                           machine to another physical host, without
-                           stopping it or the services running on it.
-
-\item[Microkernel]         A small base of code running at the highest
-                           hardware privilege level.  A microkernel is
-                           responsible for sharing CPU and memory (and
-                           sometimes other devices) between less
-                           privileged tasks running on the system.
-                           This is similar to a VMM, particularly a
-                           {\bf paravirtualising} VMM but typically
-                           addressing a different problem space and
-                           providing different kind of interface.
-
-\item[NetBSD/Xen]          A port of NetBSD to the Xen architecture.
-
-\item[Paravirtualisation]  An approach to virtualisation which requires
-                           modifications to the operating system in
-                           order to run in a virtual machine.  Xen
-                           uses paravirtualisation but preserves
-                           binary compatibility for user space
-                           applications.
-
-\item[Shadow pagetables]   A technique for hiding the layout of machine
-                           memory from a virtual machine's operating
-                           system.  Used in some {\bf VMMs} to provide
-                           the illusion of contiguous physical memory,
-                           in Xen this is used during
-                           {\bf live migration}.
-
-\item[Virtual Machine]     The environment in which a hosted operating
-                           system runs, providing the abstraction of a
-                           dedicated machine.  A virtual machine may
-                           be identical to the underlying hardware (as
-                           in {\bf full virtualisation}, or it may
-                           differ, as in {\bf paravirtualisation}.
-
-\item[VMM]                 Virtual Machine Monitor - the software that
-                           allows multiple virtual machines to be
-                           multiplexed on a single physical machine.
-
-\item[Xen]                 Xen is a paravirtualising virtual machine
-                           monitor, developed primarily by the
-                           Systems Research Group at the University
-                           of Cambridge Computer Laboratory.
-
-\item[XenLinux]            Official name for the port of the Linux kernel
-                           that runs on Xen.
-
-\end{description}
+%% Chapter Installing Xen / XenLinux on Debian moved to debian.tex
+\include{src/user/debian}
+
+%% Chapter Installing Xen on Red Hat moved to redhat.tex
+\include{src/user/redhat}
+
+
+%% Chapter Glossary of Terms moved to glossary.tex
+\include{src/user/glossary}
 
 
 \end{document}
diff -r 97dbd9524a7e -r 06d84bf87159 extras/mini-os/xenbus/xenbus_xs.c
--- a/extras/mini-os/xenbus/xenbus_xs.c Thu Sep 22 17:34:14 2005
+++ b/extras/mini-os/xenbus/xenbus_xs.c Thu Sep 22 17:42:01 2005
@@ -127,7 +127,7 @@
                return ERR_PTR(err);
 
        for (i = 0; i < num_vecs; i++) {
-               err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
+               err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
                if (err)
                        return ERR_PTR(err);
        }
diff -r 97dbd9524a7e -r 06d84bf87159 linux-2.6-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig     Thu Sep 22 17:42:01 2005
@@ -73,6 +73,8 @@
 config XEN_TPMDEV_FRONTEND
         bool "TPM-device frontend driver"
         default n
+       select TCG_TPM
+       select TCG_XEN
         help
           The TPM-device frontend driver.
 
@@ -108,13 +110,6 @@
          network interfaces within another guest OS. Unless you are building a
          dedicated device-driver domain, or your master control domain
          (domain 0), then you almost certainly want to say Y here.
-
-config XEN_NETDEV_GRANT
-        bool "Grant table substrate for network drivers (DANGEROUS)"
-        default n
-        help
-          This introduces the use of grant tables as a data exhange mechanism
-          between the frontend and backend network drivers.
 
 config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
        bool "Pipelined transmitter (DANGEROUS)"
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32       Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32       Thu Sep 
22 17:42:01 2005
@@ -19,7 +19,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64       Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64       Thu Sep 
22 17:42:01 2005
@@ -19,7 +19,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32       Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32       Thu Sep 
22 17:42:01 2005
@@ -16,7 +16,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64       Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64       Thu Sep 
22 17:42:01 2005
@@ -16,7 +16,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32        Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32        Thu Sep 
22 17:42:01 2005
@@ -19,7 +19,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -372,7 +371,7 @@
 #
 CONFIG_ISAPNP=y
 # CONFIG_PNPBIOS is not set
-CONFIG_PNPACPI=y
+# CONFIG_PNPACPI is not set
 
 #
 # Block devices
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64        Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64        Thu Sep 
22 17:42:01 2005
@@ -19,7 +19,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Thu Sep 22 17:42:01 2005
@@ -45,12 +45,12 @@
        return 0;
 }
 
-int direct_remap_pfn_range(struct mm_struct *mm,
-                           unsigned long address, 
-                           unsigned long mfn,
-                           unsigned long size, 
-                           pgprot_t prot,
-                           domid_t  domid)
+static int __direct_remap_pfn_range(struct mm_struct *mm,
+                                   unsigned long address, 
+                                   unsigned long mfn,
+                                   unsigned long size, 
+                                   pgprot_t prot,
+                                   domid_t  domid)
 {
        int i;
        unsigned long start_address;
@@ -98,6 +98,20 @@
        return 0;
 }
 
+int direct_remap_pfn_range(struct vm_area_struct *vma,
+                          unsigned long address, 
+                          unsigned long mfn,
+                          unsigned long size, 
+                          pgprot_t prot,
+                          domid_t  domid)
+{
+       /* Same as remap_pfn_range(). */
+       vma->vm_flags |= VM_IO | VM_RESERVED;
+
+       return __direct_remap_pfn_range(
+               vma->vm_mm, address, mfn, size, prot, domid);
+}
+
 EXPORT_SYMBOL(direct_remap_pfn_range);
 
 
@@ -221,8 +235,9 @@
 #ifdef __x86_64__
        flags |= _PAGE_USER;
 #endif
-       if (direct_remap_pfn_range(&init_mm, (unsigned long) addr, 
phys_addr>>PAGE_SHIFT,
-                                   size, __pgprot(flags), domid)) {
+       if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
+                                    phys_addr>>PAGE_SHIFT,
+                                    size, __pgprot(flags), domid)) {
                vunmap((void __force *) addr);
                return NULL;
        }
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/i386/pci/i386.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/pci/i386.c     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/i386.c     Thu Sep 22 17:42:01 2005
@@ -295,7 +295,7 @@
        /* Write-combine setting is ignored, it is changed via the mtrr
         * interfaces on this platform.
         */
-       if (direct_remap_pfn_range(vma->vm_mm, vma->vm_start, vma->vm_pgoff,
+       if (direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
                                   vma->vm_end - vma->vm_start,
                                   vma->vm_page_prot, DOMID_IO))
                return -EAGAIN;
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/kernel/devmem.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/devmem.c     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/devmem.c     Thu Sep 22 17:42:01 2005
@@ -90,22 +90,10 @@
 
 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 {
-       int uncached;
-
-       uncached = uncached_access(file);
-       if (uncached)
+       if (uncached_access(file))
                vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-       /* Don't try to swap out physical pages.. */
-       vma->vm_flags |= VM_RESERVED;
-
-       /*
-        * Don't dump addresses that are not real memory to a core file.
-        */
-       if (uncached)
-               vma->vm_flags |= VM_IO;
-
-       if (direct_remap_pfn_range(vma->vm_mm, vma->vm_start, vma->vm_pgoff,
+       if (direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
                                   vma->vm_end - vma->vm_start,
                                   vma->vm_page_prot, DOMID_IO))
                return -EAGAIN;
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Thu Sep 22 17:42:01 2005
@@ -182,14 +182,14 @@
 }
 
 int
-gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
+gnttab_grant_foreign_transfer(domid_t domid)
 {
     int ref;
 
     if ( unlikely((ref = get_free_entry()) == -1) )
         return -ENOSPC;
 
-    shared[ref].frame = pfn;
+    shared[ref].frame = 0;
     shared[ref].domid = domid;
     wmb();
     shared[ref].flags = GTF_accept_transfer;
@@ -198,10 +198,9 @@
 }
 
 void
-gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
-                                 unsigned long pfn)
-{
-    shared[ref].frame = pfn;
+gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid)
+{
+    shared[ref].frame = 0;
     shared[ref].domid = domid;
     wmb();
     shared[ref].flags = GTF_accept_transfer;
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Thu Sep 22 17:42:01 2005
@@ -334,7 +334,7 @@
        return;
     }
 
-    xenbus_write("control", "shutdown", "", O_CREAT);
+    xenbus_write("control", "shutdown", "");
 
     err = xenbus_transaction_end(0);
     if (err == -ETIMEDOUT) {
diff -r 97dbd9524a7e -r 06d84bf87159 linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Thu Sep 22 17:42:01 2005
@@ -1,4 +1,5 @@
 
+obj-y  += util.o
 
 obj-y  += console/
 obj-y  += evtchn/
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu Sep 22 
17:42:01 2005
@@ -362,7 +362,10 @@
                return;
        } 
         
-       set_new_target(new_target >> PAGE_SHIFT);
+       /* The given memory/target value is in KiB, so it needs converting to
+          pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+       */
+       set_new_target(new_target >> (PAGE_SHIFT - 10));
     
 }
 
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Thu Sep 22 
17:42:01 2005
@@ -28,12 +28,12 @@
 #define BATCH_PER_DOMAIN 16
 
 static unsigned long mmap_vstart;
-#define MMAP_PAGES                                              \
-    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg)                                   \
-    (mmap_vstart +                                              \
-     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
-     ((_seg) * PAGE_SIZE))
+#define MMAP_PAGES                                             \
+       (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg)                                          \
+       (mmap_vstart +                                                  \
+        ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +        \
+        ((_seg) * PAGE_SIZE))
 
 /*
  * Each outstanding request that we've passed to the lower device layers has a 
@@ -42,12 +42,12 @@
  * response queued for it, with the saved 'id' passed back.
  */
 typedef struct {
-    blkif_t       *blkif;
-    unsigned long  id;
-    int            nr_pages;
-    atomic_t       pendcnt;
-    unsigned short operation;
-    int            status;
+       blkif_t       *blkif;
+       unsigned long  id;
+       int            nr_pages;
+       atomic_t       pendcnt;
+       unsigned short operation;
+       int            status;
 } pending_req_t;
 
 /*
@@ -68,14 +68,13 @@
 static request_queue_t *plugged_queue;
 static inline void flush_plugged_queue(void)
 {
-    request_queue_t *q = plugged_queue;
-    if ( q != NULL )
-    {
-        if ( q->unplug_fn != NULL )
-            q->unplug_fn(q);
-        blk_put_queue(q);
-        plugged_queue = NULL;
-    }
+       request_queue_t *q = plugged_queue;
+       if (q != NULL) {
+               if ( q->unplug_fn != NULL )
+                       q->unplug_fn(q);
+               blk_put_queue(q);
+               plugged_queue = NULL;
+       }
 }
 
 /* When using grant tables to map a frame for device access then the
@@ -106,24 +105,23 @@
 
 static void fast_flush_area(int idx, int nr_pages)
 {
-    struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    unsigned int i, invcount = 0;
-    u16 handle;
-
-    for ( i = 0; i < nr_pages; i++ )
-    {
-        if ( BLKBACK_INVALID_HANDLE != ( handle = pending_handle(idx, i) ) )
-        {
-            unmap[i].host_addr      = MMAP_VADDR(idx, i);
-            unmap[i].dev_bus_addr   = 0;
-            unmap[i].handle         = handle;
-            pending_handle(idx, i)  = BLKBACK_INVALID_HANDLE;
-            invcount++;
-        }
-    }
-    if ( unlikely(HYPERVISOR_grant_table_op(
-                    GNTTABOP_unmap_grant_ref, unmap, invcount)))
-        BUG();
+       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       unsigned int i, invcount = 0;
+       u16 handle;
+
+       for (i = 0; i < nr_pages; i++) {
+               handle = pending_handle(idx, i);
+               if (handle == BLKBACK_INVALID_HANDLE)
+                       continue;
+               unmap[i].host_addr      = MMAP_VADDR(idx, i);
+               unmap[i].dev_bus_addr   = 0;
+               unmap[i].handle         = handle;
+               pending_handle(idx, i)  = BLKBACK_INVALID_HANDLE;
+               invcount++;
+       }
+
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_unmap_grant_ref, unmap, invcount));
 }
 
 
@@ -136,34 +134,38 @@
 
 static int __on_blkdev_list(blkif_t *blkif)
 {
-    return blkif->blkdev_list.next != NULL;
+       return blkif->blkdev_list.next != NULL;
 }
 
 static void remove_from_blkdev_list(blkif_t *blkif)
 {
-    unsigned long flags;
-    if ( !__on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-    if ( __on_blkdev_list(blkif) )
-    {
-        list_del(&blkif->blkdev_list);
-        blkif->blkdev_list.next = NULL;
-        blkif_put(blkif);
-    }
-    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+       unsigned long flags;
+
+       if (!__on_blkdev_list(blkif))
+               return;
+
+       spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+       if (__on_blkdev_list(blkif)) {
+               list_del(&blkif->blkdev_list);
+               blkif->blkdev_list.next = NULL;
+               blkif_put(blkif);
+       }
+       spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
 }
 
 static void add_to_blkdev_list_tail(blkif_t *blkif)
 {
-    unsigned long flags;
-    if ( __on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
-    {
-        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
-        blkif_get(blkif);
-    }
-    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+       unsigned long flags;
+
+       if (__on_blkdev_list(blkif))
+               return;
+
+       spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+       if (!__on_blkdev_list(blkif) && (blkif->status == CONNECTED)) {
+               list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+               blkif_get(blkif);
+       }
+       spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
 }
 
 
@@ -175,54 +177,53 @@
 
 static int blkio_schedule(void *arg)
 {
-    DECLARE_WAITQUEUE(wq, current);
-
-    blkif_t          *blkif;
-    struct list_head *ent;
-
-    daemonize("xenblkd");
-
-    for ( ; ; )
-    {
-        /* Wait for work to do. */
-        add_wait_queue(&blkio_schedule_wait, &wq);
-        set_current_state(TASK_INTERRUPTIBLE);
-        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
-             list_empty(&blkio_schedule_list) )
-            schedule();
-        __set_current_state(TASK_RUNNING);
-        remove_wait_queue(&blkio_schedule_wait, &wq);
-
-        /* Queue up a batch of requests. */
-        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
-                !list_empty(&blkio_schedule_list) )
-        {
-            ent = blkio_schedule_list.next;
-            blkif = list_entry(ent, blkif_t, blkdev_list);
-            blkif_get(blkif);
-            remove_from_blkdev_list(blkif);
-            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
-                add_to_blkdev_list_tail(blkif);
-            blkif_put(blkif);
-        }
-
-        /* Push the batch through to disc. */
-        flush_plugged_queue();
-    }
+       DECLARE_WAITQUEUE(wq, current);
+
+       blkif_t          *blkif;
+       struct list_head *ent;
+
+       daemonize("xenblkd");
+
+       for (;;) {
+               /* Wait for work to do. */
+               add_wait_queue(&blkio_schedule_wait, &wq);
+               set_current_state(TASK_INTERRUPTIBLE);
+               if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
+                    list_empty(&blkio_schedule_list) )
+                       schedule();
+               __set_current_state(TASK_RUNNING);
+               remove_wait_queue(&blkio_schedule_wait, &wq);
+
+               /* Queue up a batch of requests. */
+               while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
+                      !list_empty(&blkio_schedule_list)) {
+                       ent = blkio_schedule_list.next;
+                       blkif = list_entry(ent, blkif_t, blkdev_list);
+                       blkif_get(blkif);
+                       remove_from_blkdev_list(blkif);
+                       if (do_block_io_op(blkif, BATCH_PER_DOMAIN))
+                               add_to_blkdev_list_tail(blkif);
+                       blkif_put(blkif);
+               }
+
+               /* Push the batch through to disc. */
+               flush_plugged_queue();
+       }
 }
 
 static void maybe_trigger_blkio_schedule(void)
 {
-    /*
-     * Needed so that two processes, who together make the following predicate
-     * true, don't both read stale values and evaluate the predicate
-     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
-     */
-    smp_mb();
-
-    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
-         !list_empty(&blkio_schedule_list) )
-        wake_up(&blkio_schedule_wait);
+       /*
+        * Needed so that two processes, which together make the following
+        * predicate true, don't both read stale values and evaluate the
+        * predicate incorrectly. Incredibly unlikely to stall the scheduler
+        * on x86, but...
+        */
+       smp_mb();
+
+       if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+           !list_empty(&blkio_schedule_list))
+               wake_up(&blkio_schedule_wait);
 }
 
 
@@ -233,36 +234,34 @@
 
 static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
 {
-    unsigned long flags;
-
-    /* An error fails the entire request. */
-    if ( !uptodate )
-    {
-        DPRINTK("Buffer not up-to-date at end of operation\n");
-        pending_req->status = BLKIF_RSP_ERROR;
-    }
-
-    if ( atomic_dec_and_test(&pending_req->pendcnt) )
-    {
-        int pending_idx = pending_req - pending_reqs;
-        fast_flush_area(pending_idx, pending_req->nr_pages);
-        make_response(pending_req->blkif, pending_req->id,
-                      pending_req->operation, pending_req->status);
-        blkif_put(pending_req->blkif);
-        spin_lock_irqsave(&pend_prod_lock, flags);
-        pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-        spin_unlock_irqrestore(&pend_prod_lock, flags);
-        maybe_trigger_blkio_schedule();
-    }
+       unsigned long flags;
+
+       /* An error fails the entire request. */
+       if (!uptodate) {
+               DPRINTK("Buffer not up-to-date at end of operation\n");
+               pending_req->status = BLKIF_RSP_ERROR;
+       }
+
+       if (atomic_dec_and_test(&pending_req->pendcnt)) {
+               int pending_idx = pending_req - pending_reqs;
+               fast_flush_area(pending_idx, pending_req->nr_pages);
+               make_response(pending_req->blkif, pending_req->id,
+                             pending_req->operation, pending_req->status);
+               blkif_put(pending_req->blkif);
+               spin_lock_irqsave(&pend_prod_lock, flags);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+               spin_unlock_irqrestore(&pend_prod_lock, flags);
+               maybe_trigger_blkio_schedule();
+       }
 }
 
 static int end_block_io_op(struct bio *bio, unsigned int done, int error)
 {
-    if ( bio->bi_size != 0 )
-        return 1;
-    __end_block_io_op(bio->bi_private, !error);
-    bio_put(bio);
-    return error;
+       if (bio->bi_size != 0)
+               return 1;
+       __end_block_io_op(bio->bi_private, !error);
+       bio_put(bio);
+       return error;
 }
 
 
@@ -272,10 +271,10 @@
 
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 {
-    blkif_t *blkif = dev_id;
-    add_to_blkdev_list_tail(blkif);
-    maybe_trigger_blkio_schedule();
-    return IRQ_HANDLED;
+       blkif_t *blkif = dev_id;
+       add_to_blkdev_list_tail(blkif);
+       maybe_trigger_blkio_schedule();
+       return IRQ_HANDLED;
 }
 
 
@@ -286,183 +285,174 @@
 
 static int do_block_io_op(blkif_t *blkif, int max_to_do)
 {
-    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
-    blkif_request_t *req;
-    RING_IDX i, rp;
-    int more_to_do = 0;
-
-    rp = blk_ring->sring->req_prod;
-    rmb(); /* Ensure we see queued requests up to 'rp'. */
-
-    for ( i = blk_ring->req_cons; 
-         (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
-          i++ )
-    {
-        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
-        {
-            more_to_do = 1;
-            break;
-        }
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+       blkif_request_t *req;
+       RING_IDX i, rp;
+       int more_to_do = 0;
+
+       rp = blk_ring->sring->req_prod;
+       rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+       for (i = blk_ring->req_cons; 
+            (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
+            i++) {
+               if ((max_to_do-- == 0) ||
+                   (NR_PENDING_REQS == MAX_PENDING_REQS)) {
+                       more_to_do = 1;
+                       break;
+               }
         
-        req = RING_GET_REQUEST(blk_ring, i);
-        switch ( req->operation )
-        {
-        case BLKIF_OP_READ:
-        case BLKIF_OP_WRITE:
-            dispatch_rw_block_io(blkif, req);
-            break;
-
-        default:
-            DPRINTK("error: unknown block io operation [%d]\n",
-                    req->operation);
-            make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
-            break;
-        }
-    }
-
-    blk_ring->req_cons = i;
-    return more_to_do;
+               req = RING_GET_REQUEST(blk_ring, i);
+               switch (req->operation) {
+               case BLKIF_OP_READ:
+               case BLKIF_OP_WRITE:
+                       dispatch_rw_block_io(blkif, req);
+                       break;
+
+               default:
+                       DPRINTK("error: unknown block io operation [%d]\n",
+                               req->operation);
+                       make_response(blkif, req->id, req->operation,
+                                     BLKIF_RSP_ERROR);
+                       break;
+               }
+       }
+
+       blk_ring->req_cons = i;
+       return more_to_do;
 }
 
 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
 {
-    extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
-    int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
-    unsigned long fas = 0;
-    int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-    pending_req_t *pending_req;
-    struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    struct phys_req preq;
-    struct { 
-        unsigned long buf; unsigned int nsec;
-    } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    unsigned int nseg;
-    struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    int nbio = 0;
-    request_queue_t *q;
-
-    /* Check that number of segments is sane. */
-    nseg = req->nr_segments;
-    if ( unlikely(nseg == 0) || 
-         unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
-    {
-        DPRINTK("Bad number of segments in request (%d)\n", nseg);
-        goto bad_descriptor;
-    }
-
-    preq.dev           = req->handle;
-    preq.sector_number = req->sector_number;
-    preq.nr_sects      = 0;
-
-    for ( i = 0; i < nseg; i++ )
-    {
-        fas         = req->frame_and_sects[i];
-        seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
-
-        if ( seg[i].nsec <= 0 )
-            goto bad_descriptor;
-        preq.nr_sects += seg[i].nsec;
-
-        map[i].host_addr = MMAP_VADDR(pending_idx, i);
-        map[i].dom = blkif->domid;
-        map[i].ref = blkif_gref_from_fas(fas);
-        map[i].flags = GNTMAP_host_map;
-        if ( operation == WRITE )
-            map[i].flags |= GNTMAP_readonly;
-    }
-
-    if ( unlikely(HYPERVISOR_grant_table_op(
-                    GNTTABOP_map_grant_ref, map, nseg)))
-        BUG();
-
-    for ( i = 0; i < nseg; i++ )
-    {
-        if ( unlikely(map[i].handle < 0) )
-        {
-            DPRINTK("invalid buffer -- could not remap it\n");
-            fast_flush_area(pending_idx, nseg);
-            goto bad_descriptor;
-        }
-
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
-            FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
-
-        pending_handle(pending_idx, i) = map[i].handle;
-    }
-
-    for ( i = 0; i < nseg; i++ )
-    {
-        fas         = req->frame_and_sects[i];
-        seg[i].buf  = map[i].dev_bus_addr | (blkif_first_sect(fas) << 9);
-    }
-
-    if ( vbd_translate(&preq, blkif, operation) != 0 )
-    {
-        DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
-                operation == READ ? "read" : "write", preq.sector_number,
-                preq.sector_number + preq.nr_sects, preq.dev); 
-        goto bad_descriptor;
-    }
-
-    pending_req = &pending_reqs[pending_idx];
-    pending_req->blkif     = blkif;
-    pending_req->id        = req->id;
-    pending_req->operation = operation;
-    pending_req->status    = BLKIF_RSP_OKAY;
-    pending_req->nr_pages  = nseg;
-
-    for ( i = 0; i < nseg; i++ )
-    {
-        if ( ((int)preq.sector_number|(int)seg[i].nsec) &
-             ((bdev_hardsect_size(preq.bdev) >> 9) - 1) )
-        {
-            DPRINTK("Misaligned I/O request from domain %d", blkif->domid);
-            goto cleanup_and_fail;
-        }
-
-        while ( (bio == NULL) ||
-                (bio_add_page(bio,
-                              virt_to_page(MMAP_VADDR(pending_idx, i)),
-                              seg[i].nsec << 9,
-                              seg[i].buf & ~PAGE_MASK) == 0) )
-        {
-            bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
-            if ( unlikely(bio == NULL) )
-            {
-            cleanup_and_fail:
-                for ( i = 0; i < (nbio-1); i++ )
-                    bio_put(biolist[i]);
-                fast_flush_area(pending_idx, nseg);
-                goto bad_descriptor;
-            }
+       extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
+       int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
+       unsigned long fas = 0;
+       int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+       pending_req_t *pending_req;
+       struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct phys_req preq;
+       struct { 
+               unsigned long buf; unsigned int nsec;
+       } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       unsigned int nseg;
+       struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       int nbio = 0;
+       request_queue_t *q;
+
+       /* Check that number of segments is sane. */
+       nseg = req->nr_segments;
+       if (unlikely(nseg == 0) || 
+           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+               DPRINTK("Bad number of segments in request (%d)\n", nseg);
+               goto bad_descriptor;
+       }
+
+       preq.dev           = req->handle;
+       preq.sector_number = req->sector_number;
+       preq.nr_sects      = 0;
+
+       for (i = 0; i < nseg; i++) {
+               fas         = req->frame_and_sects[i];
+               seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
+
+               if (seg[i].nsec <= 0)
+                       goto bad_descriptor;
+               preq.nr_sects += seg[i].nsec;
+
+               map[i].host_addr = MMAP_VADDR(pending_idx, i);
+               map[i].dom = blkif->domid;
+               map[i].ref = blkif_gref_from_fas(fas);
+               map[i].flags = GNTMAP_host_map;
+               if ( operation == WRITE )
+                       map[i].flags |= GNTMAP_readonly;
+       }
+
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_map_grant_ref, map, nseg));
+
+       for (i = 0; i < nseg; i++) {
+               if (unlikely(map[i].handle < 0)) {
+                       DPRINTK("invalid buffer -- could not remap it\n");
+                       fast_flush_area(pending_idx, nseg);
+                       goto bad_descriptor;
+               }
+
+               phys_to_machine_mapping[__pa(MMAP_VADDR(
+                       pending_idx, i)) >> PAGE_SHIFT] =
+                       FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
+
+               pending_handle(pending_idx, i) = map[i].handle;
+       }
+
+       for (i = 0; i < nseg; i++) {
+               fas         = req->frame_and_sects[i];
+               seg[i].buf  = map[i].dev_bus_addr | 
+                       (blkif_first_sect(fas) << 9);
+       }
+
+       if (vbd_translate(&preq, blkif, operation) != 0) {
+               DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
+                       operation == READ ? "read" : "write",
+                       preq.sector_number,
+                       preq.sector_number + preq.nr_sects, preq.dev); 
+               goto bad_descriptor;
+       }
+
+       pending_req = &pending_reqs[pending_idx];
+       pending_req->blkif     = blkif;
+       pending_req->id        = req->id;
+       pending_req->operation = operation;
+       pending_req->status    = BLKIF_RSP_OKAY;
+       pending_req->nr_pages  = nseg;
+
+       for (i = 0; i < nseg; i++) {
+               if (((int)preq.sector_number|(int)seg[i].nsec) &
+                   ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
+                       DPRINTK("Misaligned I/O request from domain %d",
+                               blkif->domid);
+                       goto cleanup_and_fail;
+               }
+
+               while ((bio == NULL) ||
+                      (bio_add_page(bio,
+                                    virt_to_page(MMAP_VADDR(pending_idx, i)),
+                                    seg[i].nsec << 9,
+                                    seg[i].buf & ~PAGE_MASK) == 0)) {
+                       bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
+                       if (unlikely(bio == NULL)) {
+                       cleanup_and_fail:
+                               for (i = 0; i < (nbio-1); i++)
+                                       bio_put(biolist[i]);
+                               fast_flush_area(pending_idx, nseg);
+                               goto bad_descriptor;
+                       }
                 
-            bio->bi_bdev    = preq.bdev;
-            bio->bi_private = pending_req;
-            bio->bi_end_io  = end_block_io_op;
-            bio->bi_sector  = preq.sector_number;
-        }
-
-        preq.sector_number += seg[i].nsec;
-    }
-
-    if ( (q = bdev_get_queue(bio->bi_bdev)) != plugged_queue )
-    {
-        flush_plugged_queue();
-        blk_get_queue(q);
-        plugged_queue = q;
-    }
-
-    atomic_set(&pending_req->pendcnt, nbio);
-    pending_cons++;
-    blkif_get(blkif);
-
-    for ( i = 0; i < nbio; i++ )
-        submit_bio(operation, biolist[i]);
-
-    return;
+                       bio->bi_bdev    = preq.bdev;
+                       bio->bi_private = pending_req;
+                       bio->bi_end_io  = end_block_io_op;
+                       bio->bi_sector  = preq.sector_number;
+               }
+
+               preq.sector_number += seg[i].nsec;
+       }
+
+       if ((q = bdev_get_queue(bio->bi_bdev)) != plugged_queue) {
+               flush_plugged_queue();
+               blk_get_queue(q);
+               plugged_queue = q;
+       }
+
+       atomic_set(&pending_req->pendcnt, nbio);
+       pending_cons++;
+       blkif_get(blkif);
+
+       for (i = 0; i < nbio; i++)
+               submit_bio(operation, biolist[i]);
+
+       return;
 
  bad_descriptor:
-    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+       make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
 } 
 
 
@@ -475,66 +465,71 @@
 static void make_response(blkif_t *blkif, unsigned long id, 
                           unsigned short op, int st)
 {
-    blkif_response_t *resp;
-    unsigned long     flags;
-    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
-
-    /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&blkif->blk_ring_lock, flags);
-    resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
-    resp->id        = id;
-    resp->operation = op;
-    resp->status    = st;
-    wmb(); /* Ensure other side can see the response fields. */
-    blk_ring->rsp_prod_pvt++;
-    RING_PUSH_RESPONSES(blk_ring);
-    spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
-
-    /* Kick the relevant domain. */
-    notify_via_evtchn(blkif->evtchn);
+       blkif_response_t *resp;
+       unsigned long     flags;
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+
+       /* Place on the response ring for the relevant domain. */ 
+       spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+       resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
+       resp->id        = id;
+       resp->operation = op;
+       resp->status    = st;
+       wmb(); /* Ensure other side can see the response fields. */
+       blk_ring->rsp_prod_pvt++;
+       RING_PUSH_RESPONSES(blk_ring);
+       spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+
+       /* Kick the relevant domain. */
+       notify_via_evtchn(blkif->evtchn);
 }
 
 void blkif_deschedule(blkif_t *blkif)
 {
-    remove_from_blkdev_list(blkif);
+       remove_from_blkdev_list(blkif);
 }
 
 static int __init blkif_init(void)
 {
-    int i;
-    struct page *page;
-
-    if ( !(xen_start_info->flags & SIF_INITDOMAIN) &&
-         !(xen_start_info->flags & SIF_BLK_BE_DOMAIN) )
-        return 0;
-
-    blkif_interface_init();
-
-    page = balloon_alloc_empty_page_range(MMAP_PAGES);
-    BUG_ON(page == NULL);
-    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
-    pending_cons = 0;
-    pending_prod = MAX_PENDING_REQS;
-    memset(pending_reqs, 0, sizeof(pending_reqs));
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-        pending_ring[i] = i;
+       int i;
+       struct page *page;
+
+       if (!(xen_start_info->flags & SIF_INITDOMAIN) &&
+           !(xen_start_info->flags & SIF_BLK_BE_DOMAIN))
+               return 0;
+
+       blkif_interface_init();
+
+       page = balloon_alloc_empty_page_range(MMAP_PAGES);
+       BUG_ON(page == NULL);
+       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+       pending_cons = 0;
+       pending_prod = MAX_PENDING_REQS;
+       memset(pending_reqs, 0, sizeof(pending_reqs));
+       for (i = 0; i < MAX_PENDING_REQS; i++)
+               pending_ring[i] = i;
     
-    spin_lock_init(&blkio_schedule_list_lock);
-    INIT_LIST_HEAD(&blkio_schedule_list);
-
-    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
-        BUG();
-
-    blkif_xenbus_init();
-
-    memset( pending_grant_handles,  BLKBACK_INVALID_HANDLE, MMAP_PAGES );
-
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
-    printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
-#endif
-
-    return 0;
+       spin_lock_init(&blkio_schedule_list_lock);
+       INIT_LIST_HEAD(&blkio_schedule_list);
+
+       BUG_ON(kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0);
+
+       blkif_xenbus_init();
+
+       memset(pending_grant_handles,  BLKBACK_INVALID_HANDLE, MMAP_PAGES);
+
+       return 0;
 }
 
 __initcall(blkif_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Thu Sep 22 17:42:01 2005
@@ -17,6 +17,7 @@
 #include <asm-xen/xen-public/io/blkif.h>
 #include <asm-xen/xen-public/io/ring.h>
 #include <asm-xen/gnttab.h>
+#include <asm-xen/driver_util.h>
 
 #if 0
 #define ASSERT(_p) \
@@ -30,39 +31,39 @@
 #endif
 
 struct vbd {
-    blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
-    unsigned char  readonly;    /* Non-zero -> read-only */
-    unsigned char  type;        /* VDISK_xxx */
-    u32            pdevice;     /* phys device that this vbd maps to */
-    struct block_device *bdev;
+       blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
+       unsigned char  readonly;    /* Non-zero -> read-only */
+       unsigned char  type;        /* VDISK_xxx */
+       u32            pdevice;     /* phys device that this vbd maps to */
+       struct block_device *bdev;
 }; 
 
 typedef struct blkif_st {
-    /* Unique identifier for this interface. */
-    domid_t           domid;
-    unsigned int      handle;
-    /* Physical parameters of the comms window. */
-    unsigned long     shmem_frame;
-    unsigned int      evtchn;
-    unsigned int      remote_evtchn;
-    /* Comms information. */
-    blkif_back_ring_t blk_ring;
-    /* VBDs attached to this interface. */
-    struct vbd        vbd;
-    /* Private fields. */
-    enum { DISCONNECTED, CONNECTED } status;
+       /* Unique identifier for this interface. */
+       domid_t           domid;
+       unsigned int      handle;
+       /* Physical parameters of the comms window. */
+       unsigned int      evtchn;
+       unsigned int      remote_evtchn;
+       /* Comms information. */
+       blkif_back_ring_t blk_ring;
+       struct vm_struct *blk_ring_area;
+       /* VBDs attached to this interface. */
+       struct vbd        vbd;
+       /* Private fields. */
+       enum { DISCONNECTED, CONNECTED } status;
 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
-    /* Is this a blktap frontend */
-    unsigned int     is_blktap;
+       /* Is this a blktap frontend */
+       unsigned int     is_blktap;
 #endif
-    struct list_head blkdev_list;
-    spinlock_t       blk_ring_lock;
-    atomic_t         refcnt;
+       struct list_head blkdev_list;
+       spinlock_t       blk_ring_lock;
+       atomic_t         refcnt;
 
-    struct work_struct free_work;
-    u16 shmem_handle;
-    unsigned long shmem_vaddr;
-    grant_ref_t shmem_ref;
+       struct work_struct free_work;
+
+       u16         shmem_handle;
+       grant_ref_t shmem_ref;
 } blkif_t;
 
 blkif_t *alloc_blkif(domid_t domid);
@@ -70,11 +71,11 @@
 int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
 
 #define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define blkif_put(_b)                             \
-    do {                                          \
-        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            free_blkif_callback(_b);             \
-    } while (0)
+#define blkif_put(_b)                                  \
+       do {                                            \
+               if (atomic_dec_and_test(&(_b)->refcnt)) \
+                       free_blkif_callback(_b);        \
+       } while (0)
 
 /* Create a vbd. */
 int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, u32 pdevice,
@@ -86,10 +87,10 @@
 unsigned long vbd_secsize(struct vbd *vbd);
 
 struct phys_req {
-    unsigned short       dev;
-    unsigned short       nr_sects;
-    struct block_device *bdev;
-    blkif_sector_t       sector_number;
+       unsigned short       dev;
+       unsigned short       nr_sects;
+       struct block_device *bdev;
+       blkif_sector_t       sector_number;
 };
 
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
@@ -103,3 +104,13 @@
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
 
 #endif /* __BLKIF__BACKEND__COMMON_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Thu Sep 22 
17:42:01 2005
@@ -13,131 +13,144 @@
 
 blkif_t *alloc_blkif(domid_t domid)
 {
-    blkif_t *blkif;
+       blkif_t *blkif;
 
-    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
-    if (!blkif)
-           return ERR_PTR(-ENOMEM);
+       blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
+       if (!blkif)
+               return ERR_PTR(-ENOMEM);
 
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid = domid;
-    blkif->status = DISCONNECTED;
-    spin_lock_init(&blkif->blk_ring_lock);
-    atomic_set(&blkif->refcnt, 1);
+       memset(blkif, 0, sizeof(*blkif));
+       blkif->domid = domid;
+       blkif->status = DISCONNECTED;
+       spin_lock_init(&blkif->blk_ring_lock);
+       atomic_set(&blkif->refcnt, 1);
 
-    return blkif;
+       return blkif;
 }
 
-static int map_frontend_page(blkif_t *blkif, unsigned long localaddr,
-                            unsigned long shared_page)
+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
 {
-    struct gnttab_map_grant_ref op;
-    op.host_addr = localaddr;
-    op.flags = GNTMAP_host_map;
-    op.ref = shared_page;
-    op.dom = blkif->domid;
+       struct gnttab_map_grant_ref op;
 
-    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+       op.host_addr = (unsigned long)blkif->blk_ring_area->addr;
+       op.flags     = GNTMAP_host_map;
+       op.ref       = shared_page;
+       op.dom       = blkif->domid;
 
-    if (op.handle < 0) {
-       DPRINTK(" Grant table operation failure !\n");
-       return op.handle;
-    }
+       lock_vm_area(blkif->blk_ring_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
+       unlock_vm_area(blkif->blk_ring_area);
 
-    blkif->shmem_ref = shared_page;
-    blkif->shmem_handle = op.handle;
-    blkif->shmem_vaddr = localaddr;
-    return 0;
+       if (op.handle < 0) {
+               DPRINTK(" Grant table operation failure !\n");
+               return op.handle;
+       }
+
+       blkif->shmem_ref = shared_page;
+       blkif->shmem_handle = op.handle;
+
+       return 0;
 }
 
 static void unmap_frontend_page(blkif_t *blkif)
 {
-    struct gnttab_unmap_grant_ref op;
+       struct gnttab_unmap_grant_ref op;
 
-    op.host_addr = blkif->shmem_vaddr;
-    op.handle = blkif->shmem_handle;
-    op.dev_bus_addr = 0;
-    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       op.host_addr    = (unsigned long)blkif->blk_ring_area->addr;
+       op.handle       = blkif->shmem_handle;
+       op.dev_bus_addr = 0;
+
+       lock_vm_area(blkif->blk_ring_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       unlock_vm_area(blkif->blk_ring_area);
 }
 
 int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
 {
-    struct vm_struct *vma;
-    blkif_sring_t *sring;
-    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
-    int err;
+       blkif_sring_t *sring;
+       evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+       int err;
 
-    BUG_ON(blkif->remote_evtchn);
+       BUG_ON(blkif->remote_evtchn);
 
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-       return -ENOMEM;
+       if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
+               return -ENOMEM;
 
-    err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page);
-    if (err) {
-        vfree(vma->addr);
-       return err;
-    }
+       err = map_frontend_page(blkif, shared_page);
+       if (err) {
+               free_vm_area(blkif->blk_ring_area);
+               return err;
+       }
 
-    op.u.bind_interdomain.dom1 = DOMID_SELF;
-    op.u.bind_interdomain.dom2 = blkif->domid;
-    op.u.bind_interdomain.port1 = 0;
-    op.u.bind_interdomain.port2 = evtchn;
-    err = HYPERVISOR_event_channel_op(&op);
-    if (err) {
-       unmap_frontend_page(blkif);
-       vfree(vma->addr);
-       return err;
-    }
+       op.u.bind_interdomain.dom1 = DOMID_SELF;
+       op.u.bind_interdomain.dom2 = blkif->domid;
+       op.u.bind_interdomain.port1 = 0;
+       op.u.bind_interdomain.port2 = evtchn;
+       err = HYPERVISOR_event_channel_op(&op);
+       if (err) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               return err;
+       }
 
-    blkif->evtchn = op.u.bind_interdomain.port1;
-    blkif->remote_evtchn = evtchn;
+       blkif->evtchn = op.u.bind_interdomain.port1;
+       blkif->remote_evtchn = evtchn;
 
-    sring = (blkif_sring_t *)vma->addr;
-    SHARED_RING_INIT(sring);
-    BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
+       sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
+       SHARED_RING_INIT(sring);
+       BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
 
-    bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend",
-                             blkif);
-    blkif->status        = CONNECTED;
-    blkif->shmem_frame   = shared_page;
+       bind_evtchn_to_irqhandler(
+               blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif);
+       blkif->status = CONNECTED;
 
-    return 0;
+       return 0;
 }
 
 static void free_blkif(void *arg)
 {
-    evtchn_op_t op = { .cmd = EVTCHNOP_close };
-    blkif_t *blkif = (blkif_t *)arg;
+       evtchn_op_t op = { .cmd = EVTCHNOP_close };
+       blkif_t *blkif = (blkif_t *)arg;
 
-    op.u.close.port = blkif->evtchn;
-    op.u.close.dom = DOMID_SELF;
-    HYPERVISOR_event_channel_op(&op);
-    op.u.close.port = blkif->remote_evtchn;
-    op.u.close.dom = blkif->domid;
-    HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = blkif->evtchn;
+       op.u.close.dom = DOMID_SELF;
+       HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = blkif->remote_evtchn;
+       op.u.close.dom = blkif->domid;
+       HYPERVISOR_event_channel_op(&op);
 
-    vbd_free(&blkif->vbd);
+       vbd_free(&blkif->vbd);
 
-    if (blkif->evtchn)
-        unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
+       if (blkif->evtchn)
+               unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
 
-    if (blkif->blk_ring.sring) {
-       unmap_frontend_page(blkif);
-       vfree(blkif->blk_ring.sring);
-       blkif->blk_ring.sring = NULL;
-    }
+       if (blkif->blk_ring.sring) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               blkif->blk_ring.sring = NULL;
+       }
 
-    kmem_cache_free(blkif_cachep, blkif);
+       kmem_cache_free(blkif_cachep, blkif);
 }
 
 void free_blkif_callback(blkif_t *blkif)
 {
-    INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
-    schedule_work(&blkif->free_work);
+       INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
+       schedule_work(&blkif->free_work);
 }
 
 void __init blkif_interface_init(void)
 {
-    blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
-                                     0, 0, NULL, NULL);
+       blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
+                                        0, 0, NULL, NULL);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c    Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c    Thu Sep 22 17:42:01 2005
@@ -11,10 +11,10 @@
 
 static inline dev_t vbd_map_devnum(u32 cookie)
 {
-    return MKDEV(BLKIF_MAJOR(cookie), BLKIF_MINOR(cookie));
+       return MKDEV(BLKIF_MAJOR(cookie), BLKIF_MINOR(cookie));
 }
-#define vbd_sz(_v)   ((_v)->bdev->bd_part ? \
-    (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
+#define vbd_sz(_v)   ((_v)->bdev->bd_part ?                            \
+       (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
 #define bdev_put(_b) blkdev_put(_b)
 
 unsigned long vbd_size(struct vbd *vbd)
@@ -35,63 +35,73 @@
 int vbd_create(blkif_t *blkif, blkif_vdev_t handle,
               u32 pdevice, int readonly)
 {
-    struct vbd *vbd;
+       struct vbd *vbd;
 
-    vbd = &blkif->vbd;
-    vbd->handle   = handle; 
-    vbd->readonly = readonly;
-    vbd->type     = 0;
+       vbd = &blkif->vbd;
+       vbd->handle   = handle; 
+       vbd->readonly = readonly;
+       vbd->type     = 0;
 
-    vbd->pdevice  = pdevice;
+       vbd->pdevice  = pdevice;
 
-    vbd->bdev = open_by_devnum(
-        vbd_map_devnum(vbd->pdevice),
-        vbd->readonly ? FMODE_READ : FMODE_WRITE);
-    if ( IS_ERR(vbd->bdev) )
-    {
-        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-        return -ENOENT;
-    }
+       vbd->bdev = open_by_devnum(
+               vbd_map_devnum(vbd->pdevice),
+               vbd->readonly ? FMODE_READ : FMODE_WRITE);
+       if (IS_ERR(vbd->bdev)) {
+               DPRINTK("vbd_creat: device %08x doesn't exist.\n",
+                       vbd->pdevice);
+               return -ENOENT;
+       }
 
-    if ( (vbd->bdev->bd_disk == NULL) )
-    {
-        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-       vbd_free(vbd);
-        return -ENOENT;
-    }
+       if (vbd->bdev->bd_disk == NULL) {
+               DPRINTK("vbd_creat: device %08x doesn't exist.\n",
+                       vbd->pdevice);
+               vbd_free(vbd);
+               return -ENOENT;
+       }
 
-    if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
-        vbd->type |= VDISK_CDROM;
-    if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
-        vbd->type |= VDISK_REMOVABLE;
+       if (vbd->bdev->bd_disk->flags & GENHD_FL_CD)
+               vbd->type |= VDISK_CDROM;
+       if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
+               vbd->type |= VDISK_REMOVABLE;
 
-    DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
-            handle, blkif->domid);
-    return 0;
+       DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+               handle, blkif->domid);
+       return 0;
 }
 
 void vbd_free(struct vbd *vbd)
 {
-    if (vbd->bdev)
-       bdev_put(vbd->bdev);
-    vbd->bdev = NULL;
+       if (vbd->bdev)
+               bdev_put(vbd->bdev);
+       vbd->bdev = NULL;
 }
 
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
 {
-    struct vbd *vbd = &blkif->vbd;
-    int rc = -EACCES;
+       struct vbd *vbd = &blkif->vbd;
+       int rc = -EACCES;
 
-    if ((operation == WRITE) && vbd->readonly)
-        goto out;
+       if ((operation == WRITE) && vbd->readonly)
+               goto out;
 
-    if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
-        goto out;
+       if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
+               goto out;
 
-    req->dev  = vbd->pdevice;
-    req->bdev = vbd->bdev;
-    rc = 0;
+       req->dev  = vbd->pdevice;
+       req->bdev = vbd->bdev;
+       rc = 0;
 
  out:
-    return rc;
+       return rc;
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Sep 22 17:42:01 2005
@@ -124,7 +124,7 @@
 
        return;
 
-abort:
+ abort:
        xenbus_transaction_end(1);
 }
 
@@ -228,6 +228,7 @@
        be->dev = dev;
        be->backend_watch.node = dev->nodename;
        be->backend_watch.callback = backend_changed;
+       /* Will implicitly call backend_changed once. */
        err = register_xenbus_watch(&be->backend_watch);
        if (err) {
                be->backend_watch.node = NULL;
@@ -249,8 +250,6 @@
        }
 
        dev->data = be;
-
-       backend_changed(&be->backend_watch, dev->nodename);
        return 0;
 
  free_be:
@@ -279,3 +278,13 @@
 {
        xenbus_register_backend(&blkback);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Thu Sep 22 17:42:01 2005
@@ -146,4 +146,15 @@
 int xlvbd_add(blkif_sector_t capacity, int device,
              u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
 void xlvbd_del(struct blkfront_info *info);
+
 #endif /* __XEN_DRIVERS_BLOCK_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Thu Sep 22 17:42:01 2005
@@ -65,7 +65,7 @@
 };
 
 static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
-                                         NUM_VBD_MAJORS];
+                                        NUM_VBD_MAJORS];
 
 #define XLBD_MAJOR_IDE_START   0
 #define XLBD_MAJOR_SCSI_START  (NUM_IDE_MAJORS)
@@ -309,3 +309,13 @@
 
        bdput(bd);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Thu Sep 22 17:42:01 2005
@@ -4,7 +4,6 @@
  * This is a modified version of the block backend driver that remaps requests
  * to a user-space memory region.  It is intended to be used to write 
  * application-level servers that provide block interfaces to client VMs.
- * 
  */
 
 #include <linux/kernel.h>
@@ -67,20 +66,19 @@
 
 static inline int BLKTAP_MODE_VALID(unsigned long arg)
 {
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) );
+       return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
+               (arg == BLKTAP_MODE_INTERCEPT_FE) ||
+               (arg == BLKTAP_MODE_INTERPOSE   ));
 /*
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
-        );
+  return (
+  ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
+  ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+  ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
+  ( arg == BLKTAP_MODE_INTERPOSE    ) ||
+  ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
+  ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
+  ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
+  );
 */
 }
 
@@ -110,14 +108,12 @@
 unsigned long rings_vstart; /* start of mmaped vma               */
 unsigned long user_vstart;  /* start of user mappings            */
 
-#define MMAP_PAGES                                              \
-    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#define MMAP_VADDR(_start, _req,_seg)                           \
-    (_start +                                                   \
-     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
-     ((_seg) * PAGE_SIZE))
-
-
+#define MMAP_PAGES                                             \
+       (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+#define MMAP_VADDR(_start, _req,_seg)                                  \
+       (_start +                                                       \
+        ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +        \
+        ((_seg) * PAGE_SIZE))
 
 /*
  * Each outstanding request that we've passed to the lower device layers has a 
@@ -126,12 +122,12 @@
  * response queued for it, with the saved 'id' passed back.
  */
 typedef struct {
-    blkif_t       *blkif;
-    unsigned long  id;
-    int            nr_pages;
-    atomic_t       pendcnt;
-    unsigned short operation;
-    int            status;
+       blkif_t       *blkif;
+       unsigned long  id;
+       int            nr_pages;
+       atomic_t       pendcnt;
+       unsigned short operation;
+       int            status;
 } pending_req_t;
 
 /*
@@ -156,17 +152,17 @@
 
 static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
 {
-    return ( (fe_dom << 16) | MASK_PEND_IDX(idx) );
+       return ((fe_dom << 16) | MASK_PEND_IDX(idx));
 }
 
 extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id) 
 { 
-    return (PEND_RING_IDX)( id & 0x0000ffff );
+       return (PEND_RING_IDX)(id & 0x0000ffff);
 }
 
 extern inline domid_t ID_TO_DOM(unsigned long id) 
 { 
-    return (domid_t)(id >> 16); 
+       return (domid_t)(id >> 16); 
 }
 
 
@@ -181,8 +177,8 @@
  */
 struct grant_handle_pair
 {
-    u16  kernel;
-    u16  user;
+       u16  kernel;
+       u16  user;
 };
 static struct grant_handle_pair pending_grant_handles[MMAP_PAGES];
 #define pending_handle(_idx, _i) \
@@ -199,21 +195,20 @@
  */
 
 static struct page *blktap_nopage(struct vm_area_struct *vma,
-                                             unsigned long address,
-                                             int *type)
-{
-    /*
-     * if the page has not been mapped in by the driver then generate
-     * a SIGBUS to the domain.
-     */
-
-    force_sig(SIGBUS, current);
-
-    return 0;
+                                 unsigned long address,
+                                 int *type)
+{
+       /*
+        * if the page has not been mapped in by the driver then generate
+        * a SIGBUS to the domain.
+        */
+       force_sig(SIGBUS, current);
+
+       return 0;
 }
 
 struct vm_operations_struct blktap_vm_ops = {
-    nopage:   blktap_nopage,
+       nopage:   blktap_nopage,
 };
 
 /******************************************************************
@@ -222,44 +217,45 @@
 
 static int blktap_open(struct inode *inode, struct file *filp)
 {
-    blkif_sring_t *sring;
+       blkif_sring_t *sring;
+
+       if (test_and_set_bit(0, &blktap_dev_inuse))
+               return -EBUSY;
     
-    if ( test_and_set_bit(0, &blktap_dev_inuse) )
-        return -EBUSY;
+       /* Allocate the fe ring. */
+       sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+       if (sring == NULL)
+               goto fail_nomem;
+
+       SetPageReserved(virt_to_page(sring));
     
-    /* Allocate the fe ring. */
-    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
-    if (sring == NULL)
-        goto fail_nomem;
-
-    SetPageReserved(virt_to_page(sring));
-    
-    SHARED_RING_INIT(sring);
-    FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
-
-    return 0;
+       SHARED_RING_INIT(sring);
+       FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
+
+       return 0;
 
  fail_nomem:
-    return -ENOMEM;
+       return -ENOMEM;
 }
 
 static int blktap_release(struct inode *inode, struct file *filp)
 {
-    blktap_dev_inuse = 0;
-    blktap_ring_ok = 0;
-
-    /* Free the ring page. */
-    ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
-    free_page((unsigned long) blktap_ufe_ring.sring);
-
-    /* Clear any active mappings and free foreign map table */
-    if (blktap_vma != NULL) {
-        zap_page_range(blktap_vma, blktap_vma->vm_start, 
-                       blktap_vma->vm_end - blktap_vma->vm_start, NULL);
-        blktap_vma = NULL;
-    }
-
-    return 0;
+       blktap_dev_inuse = 0;
+       blktap_ring_ok = 0;
+
+       /* Free the ring page. */
+       ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
+       free_page((unsigned long) blktap_ufe_ring.sring);
+
+       /* Clear any active mappings and free foreign map table */
+       if (blktap_vma != NULL) {
+               zap_page_range(
+                       blktap_vma, blktap_vma->vm_start, 
+                       blktap_vma->vm_end - blktap_vma->vm_start, NULL);
+               blktap_vma = NULL;
+       }
+
+       return 0;
 }
 
 
@@ -283,128 +279,124 @@
  */
 static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
 {
-    int size;
-    struct page **map;
-    int i;
-
-    DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n",
-           vma->vm_start, vma->vm_end);
-
-    vma->vm_flags |= VM_RESERVED;
-    vma->vm_ops = &blktap_vm_ops;
-
-    size = vma->vm_end - vma->vm_start;
-    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
-        printk(KERN_INFO 
-               "blktap: you _must_ map exactly %d pages!\n",
-               MMAP_PAGES + RING_PAGES);
-        return -EAGAIN;
-    }
-
-    size >>= PAGE_SHIFT;
-    DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
+       int size;
+       struct page **map;
+       int i;
+
+       DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+               vma->vm_start, vma->vm_end);
+
+       vma->vm_flags |= VM_RESERVED;
+       vma->vm_ops = &blktap_vm_ops;
+
+       size = vma->vm_end - vma->vm_start;
+       if (size != ((MMAP_PAGES + RING_PAGES) << PAGE_SHIFT)) {
+               printk(KERN_INFO 
+                      "blktap: you _must_ map exactly %d pages!\n",
+                      MMAP_PAGES + RING_PAGES);
+               return -EAGAIN;
+       }
+
+       size >>= PAGE_SHIFT;
+       DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
     
-    rings_vstart = vma->vm_start;
-    user_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+       rings_vstart = vma->vm_start;
+       user_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
     
-    /* Map the ring pages to the start of the region and reserve it. */
-
-    /* not sure if I really need to do this... */
-    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-    if (remap_pfn_range(vma, vma->vm_start, 
-                         __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 
-                         PAGE_SIZE, vma->vm_page_prot)) 
-    {
-        WPRINTK("Mapping user ring failed!\n");
-        goto fail;
-    }
-
-    /* Mark this VM as containing foreign pages, and set up mappings. */
-    map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
-                  * sizeof(struct page_struct*),
-                  GFP_KERNEL);
-    if (map == NULL) 
-    {
-        WPRINTK("Couldn't alloc VM_FOREIGH map.\n");
-        goto fail;
-    }
-
-    for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
-        map[i] = NULL;
+       /* Map the ring pages to the start of the region and reserve it. */
+
+       /* not sure if I really need to do this... */
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+       if (remap_pfn_range(vma, vma->vm_start, 
+                           __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 
+                           PAGE_SIZE, vma->vm_page_prot)) {
+               WPRINTK("Mapping user ring failed!\n");
+               goto fail;
+       }
+
+       /* Mark this VM as containing foreign pages, and set up mappings. */
+       map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
+                     * sizeof(struct page_struct*),
+                     GFP_KERNEL);
+       if (map == NULL) {
+               WPRINTK("Couldn't alloc VM_FOREIGH map.\n");
+               goto fail;
+       }
+
+       for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
+               map[i] = NULL;
     
-    vma->vm_private_data = map;
-    vma->vm_flags |= VM_FOREIGN;
-
-    blktap_vma = vma;
-    blktap_ring_ok = 1;
-
-    return 0;
+       vma->vm_private_data = map;
+       vma->vm_flags |= VM_FOREIGN;
+
+       blktap_vma = vma;
+       blktap_ring_ok = 1;
+
+       return 0;
  fail:
-    /* Clear any active mappings. */
-    zap_page_range(vma, vma->vm_start, 
-                   vma->vm_end - vma->vm_start, NULL);
-
-    return -ENOMEM;
+       /* Clear any active mappings. */
+       zap_page_range(vma, vma->vm_start, 
+                      vma->vm_end - vma->vm_start, NULL);
+
+       return -ENOMEM;
 }
 
 static int blktap_ioctl(struct inode *inode, struct file *filp,
                         unsigned int cmd, unsigned long arg)
 {
-    switch(cmd) {
-    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
-        return blktap_read_ufe_ring();
-
-    case BLKTAP_IOCTL_SETMODE:
-        if (BLKTAP_MODE_VALID(arg)) {
-            blktap_mode = arg;
-            /* XXX: may need to flush rings here. */
-            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
-            return 0;
-        }
-    case BLKTAP_IOCTL_PRINT_IDXS:
+       switch(cmd) {
+       case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
+               return blktap_read_ufe_ring();
+
+       case BLKTAP_IOCTL_SETMODE:
+               if (BLKTAP_MODE_VALID(arg)) {
+                       blktap_mode = arg;
+                       /* XXX: may need to flush rings here. */
+                       printk(KERN_INFO "blktap: set mode to %lx\n", arg);
+                       return 0;
+               }
+       case BLKTAP_IOCTL_PRINT_IDXS:
         {
-            //print_fe_ring_idxs();
-            WPRINTK("User Rings: \n-----------\n");
-            WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
-                            "| req_prod: %2d, rsp_prod: %2d\n",
-                            blktap_ufe_ring.rsp_cons,
-                            blktap_ufe_ring.req_prod_pvt,
-                            blktap_ufe_ring.sring->req_prod,
-                            blktap_ufe_ring.sring->rsp_prod);
+               //print_fe_ring_idxs();
+               WPRINTK("User Rings: \n-----------\n");
+               WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
+                       "| req_prod: %2d, rsp_prod: %2d\n",
+                       blktap_ufe_ring.rsp_cons,
+                       blktap_ufe_ring.req_prod_pvt,
+                       blktap_ufe_ring.sring->req_prod,
+                       blktap_ufe_ring.sring->rsp_prod);
             
         }
-    }
-    return -ENOIOCTLCMD;
+       }
+       return -ENOIOCTLCMD;
 }
 
 static unsigned int blktap_poll(struct file *file, poll_table *wait)
 {
-        poll_wait(file, &blktap_wait, wait);
-        if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) ) 
-        {
-            flush_tlb_all();
-
-            RING_PUSH_REQUESTS(&blktap_ufe_ring);
-            return POLLIN | POLLRDNORM;
-        }
-
-        return 0;
+       poll_wait(file, &blktap_wait, wait);
+       if (RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring)) {
+               flush_tlb_all();
+               RING_PUSH_REQUESTS(&blktap_ufe_ring);
+               return POLLIN | POLLRDNORM;
+       }
+
+       return 0;
 }
 
 void blktap_kick_user(void)
 {
-    /* blktap_ring->req_prod = blktap_req_prod; */
-    wake_up_interruptible(&blktap_wait);
+       /* blktap_ring->req_prod = blktap_req_prod; */
+       wake_up_interruptible(&blktap_wait);
 }
 
 static struct file_operations blktap_fops = {
-    owner:    THIS_MODULE,
-    poll:     blktap_poll,
-    ioctl:    blktap_ioctl,
-    open:     blktap_open,
-    release:  blktap_release,
-    mmap:     blktap_mmap,
+       owner:    THIS_MODULE,
+       poll:     blktap_poll,
+       ioctl:    blktap_ioctl,
+       open:     blktap_open,
+       release:  blktap_release,
+       mmap:     blktap_mmap,
 };
 
 
@@ -417,44 +409,44 @@
 
 static void fast_flush_area(int idx, int nr_pages)
 {
-    struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
-    unsigned int i, op = 0;
-    struct grant_handle_pair *handle;
-    unsigned long ptep;
-
-    for (i=0; i<nr_pages; i++)
-    {
-        handle = &pending_handle(idx, i);
-        if (!BLKTAP_INVALID_HANDLE(handle))
-        {
-
-            unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i);
-            unmap[op].dev_bus_addr = 0;
-            unmap[op].handle = handle->kernel;
-            op++;
-
-            if (create_lookup_pte_addr(blktap_vma->vm_mm,
-                                       MMAP_VADDR(user_vstart, idx, i), 
-                                       &ptep) !=0) {
-                DPRINTK("Couldn't get a pte addr!\n");
-                return;
-            }
-            unmap[op].host_addr    = ptep;
-            unmap[op].dev_bus_addr = 0;
-            unmap[op].handle       = handle->user;
-            op++;
+       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+       unsigned int i, op = 0;
+       struct grant_handle_pair *handle;
+       unsigned long ptep;
+
+       for ( i = 0; i < nr_pages; i++)
+       {
+               handle = &pending_handle(idx, i);
+               if (BLKTAP_INVALID_HANDLE(handle))
+                       continue;
+
+               unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i);
+               unmap[op].dev_bus_addr = 0;
+               unmap[op].handle = handle->kernel;
+               op++;
+
+               if (create_lookup_pte_addr(
+                       blktap_vma->vm_mm,
+                       MMAP_VADDR(user_vstart, idx, i), 
+                       &ptep) !=0) {
+                       DPRINTK("Couldn't get a pte addr!\n");
+                       return;
+               }
+               unmap[op].host_addr    = ptep;
+               unmap[op].dev_bus_addr = 0;
+               unmap[op].handle       = handle->user;
+               op++;
             
-           BLKTAP_INVALIDATE_HANDLE(handle);
-        }
-    }
-    if ( unlikely(HYPERVISOR_grant_table_op(
-        GNTTABOP_unmap_grant_ref, unmap, op)))
-        BUG();
-
-    if (blktap_vma != NULL)
-        zap_page_range(blktap_vma, 
-                       MMAP_VADDR(user_vstart, idx, 0), 
-                       nr_pages << PAGE_SHIFT, NULL);
+               BLKTAP_INVALIDATE_HANDLE(handle);
+       }
+
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_unmap_grant_ref, unmap, op));
+
+       if (blktap_vma != NULL)
+               zap_page_range(blktap_vma, 
+                              MMAP_VADDR(user_vstart, idx, 0), 
+                              nr_pages << PAGE_SHIFT, NULL);
 }
 
 /******************************************************************
@@ -466,34 +458,38 @@
 
 static int __on_blkdev_list(blkif_t *blkif)
 {
-    return blkif->blkdev_list.next != NULL;
+       return blkif->blkdev_list.next != NULL;
 }
 
 static void remove_from_blkdev_list(blkif_t *blkif)
 {
-    unsigned long flags;
-    if ( !__on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-    if ( __on_blkdev_list(blkif) )
-    {
-        list_del(&blkif->blkdev_list);
-        blkif->blkdev_list.next = NULL;
-        blkif_put(blkif);
-    }
-    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+       unsigned long flags;
+
+       if (!__on_blkdev_list(blkif))
+               return;
+
+       spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+       if (__on_blkdev_list(blkif)) {
+               list_del(&blkif->blkdev_list);
+               blkif->blkdev_list.next = NULL;
+               blkif_put(blkif);
+       }
+       spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
 }
 
 static void add_to_blkdev_list_tail(blkif_t *blkif)
 {
-    unsigned long flags;
-    if ( __on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
-    {
-        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
-        blkif_get(blkif);
-    }
-    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+       unsigned long flags;
+
+       if (__on_blkdev_list(blkif))
+               return;
+
+       spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+       if (!__on_blkdev_list(blkif) && (blkif->status == CONNECTED)) {
+               list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+               blkif_get(blkif);
+       }
+       spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
 }
 
 
@@ -505,51 +501,50 @@
 
 static int blkio_schedule(void *arg)
 {
-    DECLARE_WAITQUEUE(wq, current);
-
-    blkif_t          *blkif;
-    struct list_head *ent;
-
-    daemonize("xenblkd");
-
-    for ( ; ; )
-    {
-        /* Wait for work to do. */
-        add_wait_queue(&blkio_schedule_wait, &wq);
-        set_current_state(TASK_INTERRUPTIBLE);
-        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
-             list_empty(&blkio_schedule_list) )
-            schedule();
-        __set_current_state(TASK_RUNNING);
-        remove_wait_queue(&blkio_schedule_wait, &wq);
-
-        /* Queue up a batch of requests. */
-        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
-                !list_empty(&blkio_schedule_list) )
-        {
-            ent = blkio_schedule_list.next;
-            blkif = list_entry(ent, blkif_t, blkdev_list);
-            blkif_get(blkif);
-            remove_from_blkdev_list(blkif);
-            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
-                add_to_blkdev_list_tail(blkif);
-            blkif_put(blkif);
-        }
-    }
+       DECLARE_WAITQUEUE(wq, current);
+
+       blkif_t          *blkif;
+       struct list_head *ent;
+
+       daemonize("xenblkd");
+
+       for (;;) {
+               /* Wait for work to do. */
+               add_wait_queue(&blkio_schedule_wait, &wq);
+               set_current_state(TASK_INTERRUPTIBLE);
+               if ((NR_PENDING_REQS == MAX_PENDING_REQS) || 
+                   list_empty(&blkio_schedule_list))
+                       schedule();
+               __set_current_state(TASK_RUNNING);
+               remove_wait_queue(&blkio_schedule_wait, &wq);
+
+               /* Queue up a batch of requests. */
+               while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
+                      !list_empty(&blkio_schedule_list)) {
+                       ent = blkio_schedule_list.next;
+                       blkif = list_entry(ent, blkif_t, blkdev_list);
+                       blkif_get(blkif);
+                       remove_from_blkdev_list(blkif);
+                       if (do_block_io_op(blkif, BATCH_PER_DOMAIN))
+                               add_to_blkdev_list_tail(blkif);
+                       blkif_put(blkif);
+               }
+       }
 }
 
 static void maybe_trigger_blkio_schedule(void)
 {
-    /*
-     * Needed so that two processes, who together make the following predicate
-     * true, don't both read stale values and evaluate the predicate
-     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
-     */
-    smp_mb();
-
-    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
-         !list_empty(&blkio_schedule_list) )
-        wake_up(&blkio_schedule_wait);
+       /*
+        * Needed so that two processes, who together make the following
+        * predicate true, don't both read stale values and evaluate the
+        * predicate incorrectly. Incredibly unlikely to stall the scheduler
+        * on the x86, but...
+        */
+       smp_mb();
+
+       if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+           !list_empty(&blkio_schedule_list))
+               wake_up(&blkio_schedule_wait);
 }
 
 
@@ -561,54 +556,53 @@
 
 static int blktap_read_ufe_ring(void)
 {
-    /* This is called to read responses from the UFE ring. */
-
-    RING_IDX i, j, rp;
-    blkif_response_t *resp;
-    blkif_t *blkif;
-    int pending_idx;
-    pending_req_t *pending_req;
-    unsigned long     flags;
-
-    /* if we are forwarding from UFERring to FERing */
-    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
-
-        /* for each outstanding message on the UFEring  */
-        rp = blktap_ufe_ring.sring->rsp_prod;
-        rmb();
+       /* This is called to read responses from the UFE ring. */
+
+       RING_IDX i, j, rp;
+       blkif_response_t *resp;
+       blkif_t *blkif;
+       int pending_idx;
+       pending_req_t *pending_req;
+       unsigned long     flags;
+
+       /* if we are forwarding from UFERring to FERing */
+       if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
+
+               /* for each outstanding message on the UFEring  */
+               rp = blktap_ufe_ring.sring->rsp_prod;
+               rmb();
         
-        for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
-        {
-            resp = RING_GET_RESPONSE(&blktap_ufe_ring, i);
-            pending_idx = MASK_PEND_IDX(ID_TO_IDX(resp->id));
-            pending_req = &pending_reqs[pending_idx];
+               for (i = blktap_ufe_ring.rsp_cons; i != rp; i++) {
+                       resp = RING_GET_RESPONSE(&blktap_ufe_ring, i);
+                       pending_idx = MASK_PEND_IDX(ID_TO_IDX(resp->id));
+                       pending_req = &pending_reqs[pending_idx];
             
-            blkif = pending_req->blkif;
-            for (j = 0; j < pending_req->nr_pages; j++) {
-                unsigned long vaddr;
-                struct page **map = blktap_vma->vm_private_data;
-                int offset; 
-
-                vaddr  = MMAP_VADDR(user_vstart, pending_idx, j);
-                offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
-
-                //ClearPageReserved(virt_to_page(vaddr));
-                ClearPageReserved((struct page *)map[offset]);
-                map[offset] = NULL;
-            }
-
-            fast_flush_area(pending_idx, pending_req->nr_pages);
-            make_response(blkif, pending_req->id, resp->operation, 
-                          resp->status);
-            blkif_put(pending_req->blkif);
-            spin_lock_irqsave(&pend_prod_lock, flags);
-            pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-            spin_unlock_irqrestore(&pend_prod_lock, flags);
-        }
-        blktap_ufe_ring.rsp_cons = i;
-        maybe_trigger_blkio_schedule();
-    }
-    return 0;
+                       blkif = pending_req->blkif;
+                       for (j = 0; j < pending_req->nr_pages; j++) {
+                               unsigned long vaddr;
+                               struct page **map = blktap_vma->vm_private_data;
+                               int offset; 
+
+                               vaddr  = MMAP_VADDR(user_vstart, pending_idx, 
j);
+                               offset = (vaddr - blktap_vma->vm_start) >> 
PAGE_SHIFT;
+
+                               //ClearPageReserved(virt_to_page(vaddr));
+                               ClearPageReserved((struct page *)map[offset]);
+                               map[offset] = NULL;
+                       }
+
+                       fast_flush_area(pending_idx, pending_req->nr_pages);
+                       make_response(blkif, pending_req->id, resp->operation, 
+                                     resp->status);
+                       blkif_put(pending_req->blkif);
+                       spin_lock_irqsave(&pend_prod_lock, flags);
+                       pending_ring[MASK_PEND_IDX(pending_prod++)] = 
pending_idx;
+                       spin_unlock_irqrestore(&pend_prod_lock, flags);
+               }
+               blktap_ufe_ring.rsp_cons = i;
+               maybe_trigger_blkio_schedule();
+       }
+       return 0;
 }
 
 
@@ -618,10 +612,10 @@
 
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 {
-    blkif_t *blkif = dev_id;
-    add_to_blkdev_list_tail(blkif);
-    maybe_trigger_blkio_schedule();
-    return IRQ_HANDLED;
+       blkif_t *blkif = dev_id;
+       add_to_blkdev_list_tail(blkif);
+       maybe_trigger_blkio_schedule();
+       return IRQ_HANDLED;
 }
 
 
@@ -632,199 +626,194 @@
 
 static int do_block_io_op(blkif_t *blkif, int max_to_do)
 {
-    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
-    blkif_request_t *req;
-    RING_IDX i, rp;
-    int more_to_do = 0;
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+       blkif_request_t *req;
+       RING_IDX i, rp;
+       int more_to_do = 0;
     
-    rp = blk_ring->sring->req_prod;
-    rmb(); /* Ensure we see queued requests up to 'rp'. */
-
-    for ( i = blk_ring->req_cons; 
-         (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
-          i++ )
-    {
-        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
-        {
-            more_to_do = 1;
-            break;
-        }
+       rp = blk_ring->sring->req_prod;
+       rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+       for (i = blk_ring->req_cons; 
+            (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
+            i++ ) {
+               if ((max_to_do-- == 0) ||
+                   (NR_PENDING_REQS == MAX_PENDING_REQS)) {
+                       more_to_do = 1;
+                       break;
+               }
         
-        req = RING_GET_REQUEST(blk_ring, i);
-        switch ( req->operation )
-        {
-        case BLKIF_OP_READ:
-        case BLKIF_OP_WRITE:
-            dispatch_rw_block_io(blkif, req);
-            break;
-
-        default:
-            DPRINTK("error: unknown block io operation [%d]\n",
-                    req->operation);
-            make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
-            break;
-        }
-    }
-
-    blk_ring->req_cons = i;
-    blktap_kick_user();
-
-    return more_to_do;
+               req = RING_GET_REQUEST(blk_ring, i);
+               switch (req->operation) {
+               case BLKIF_OP_READ:
+               case BLKIF_OP_WRITE:
+                       dispatch_rw_block_io(blkif, req);
+                       break;
+
+               default:
+                       DPRINTK("error: unknown block io operation [%d]\n",
+                               req->operation);
+                       make_response(blkif, req->id, req->operation,
+                                     BLKIF_RSP_ERROR);
+                       break;
+               }
+       }
+
+       blk_ring->req_cons = i;
+       blktap_kick_user();
+
+       return more_to_do;
 }
 
 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
 {
-    blkif_request_t *target;
-    int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-    pending_req_t *pending_req;
-    struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
-    int op, ret;
-    unsigned int nseg;
-
-    /* Check that number of segments is sane. */
-    nseg = req->nr_segments;
-    if ( unlikely(nseg == 0) || 
-         unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
-    {
-        DPRINTK("Bad number of segments in request (%d)\n", nseg);
-        goto bad_descriptor;
-    }
-
-    /* Make sure userspace is ready. */
-    if (!blktap_ring_ok) {
-        DPRINTK("blktap: ring not ready for requests!\n");
-        goto bad_descriptor;
-    }
+       blkif_request_t *target;
+       int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+       pending_req_t *pending_req;
+       struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+       int op, ret;
+       unsigned int nseg;
+
+       /* Check that number of segments is sane. */
+       nseg = req->nr_segments;
+       if (unlikely(nseg == 0) || 
+           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+               DPRINTK("Bad number of segments in request (%d)\n", nseg);
+               goto bad_descriptor;
+       }
+
+       /* Make sure userspace is ready. */
+       if (!blktap_ring_ok) {
+               DPRINTK("blktap: ring not ready for requests!\n");
+               goto bad_descriptor;
+       }
     
 
-    if ( RING_FULL(&blktap_ufe_ring) ) {
-        WPRINTK("blktap: fe_ring is full, can't add (very broken!).\n");
-        goto bad_descriptor;
-    }
-
-    flush_cache_all(); /* a noop on intel... */
-
-    /* Map the foreign pages directly in to the application */    
-    op = 0;
-    for (i=0; i<req->nr_segments; i++) {
-
-        unsigned long uvaddr;
-        unsigned long kvaddr;
-        unsigned long ptep;
-
-        uvaddr = MMAP_VADDR(user_vstart, pending_idx, i);
-        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
-
-        /* Map the remote page to kernel. */
-        map[op].host_addr = kvaddr;
-        map[op].dom   = blkif->domid;
-        map[op].ref   = blkif_gref_from_fas(req->frame_and_sects[i]);
-        map[op].flags = GNTMAP_host_map;
-        /* This needs a bit more thought in terms of interposition: 
-         * If we want to be able to modify pages during write using 
-         * grant table mappings, the guest will either need to allow 
-         * it, or we'll need to incur a copy. Bit of an fbufs moment. ;) */
-        if (req->operation == BLKIF_OP_WRITE)
-            map[op].flags |= GNTMAP_readonly;
-        op++;
-
-        /* Now map it to user. */
-        ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
-        if (ret)
-        {
-            DPRINTK("Couldn't get a pte addr!\n");
-            fast_flush_area(pending_idx, req->nr_segments);
-            goto bad_descriptor;
-        }
-
-        map[op].host_addr = ptep;
-        map[op].dom       = blkif->domid;
-        map[op].ref       = blkif_gref_from_fas(req->frame_and_sects[i]);
-        map[op].flags     = GNTMAP_host_map | GNTMAP_application_map
-                            | GNTMAP_contains_pte;
-        /* Above interposition comment applies here as well. */
-        if (req->operation == BLKIF_OP_WRITE)
-            map[op].flags |= GNTMAP_readonly;
-        op++;
-    }
-
-    if ( unlikely(HYPERVISOR_grant_table_op(
-            GNTTABOP_map_grant_ref, map, op)))
-        BUG();
-
-    op = 0;
-    for (i=0; i<(req->nr_segments*2); i+=2) {
-        unsigned long uvaddr;
-        unsigned long kvaddr;
-        unsigned long offset;
-        int cancel = 0;
-
-        uvaddr = MMAP_VADDR(user_vstart, pending_idx, i/2);
-        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i/2);
-
-        if ( unlikely(map[i].handle < 0) ) 
-        {
-            DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle);
-            ret = map[i].handle;
-            cancel = 1;
-        }
-
-        if ( unlikely(map[i+1].handle < 0) ) 
-        {
-            DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle);
-            ret = map[i+1].handle;
-            cancel = 1;
-        }
-
-        if (cancel) 
-        {
-            fast_flush_area(pending_idx, req->nr_segments);
-            goto bad_descriptor;
-        }
-
-        /* Set the necessary mappings in p2m and in the VM_FOREIGN 
-         * vm_area_struct to allow user vaddr -> struct page lookups
-         * to work.  This is needed for direct IO to foreign pages. */
-        phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] =
-            FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
-
-        offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
-        ((struct page **)blktap_vma->vm_private_data)[offset] =
-            pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
-
-        /* Save handles for unmapping later. */
-        pending_handle(pending_idx, i/2).kernel = map[i].handle;
-        pending_handle(pending_idx, i/2).user   = map[i+1].handle;
-    }
-
-    /* Mark mapped pages as reserved: */
-    for ( i = 0; i < req->nr_segments; i++ )
-    {
-        unsigned long kvaddr;
-
-        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
-        SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT));
-    }
-
-    pending_req = &pending_reqs[pending_idx];
-    pending_req->blkif     = blkif;
-    pending_req->id        = req->id;
-    pending_req->operation = req->operation;
-    pending_req->status    = BLKIF_RSP_OKAY;
-    pending_req->nr_pages  = nseg;
-    req->id = MAKE_ID(blkif->domid, pending_idx);
-    //atomic_set(&pending_req->pendcnt, nbio);
-    pending_cons++;
-    blkif_get(blkif);
-
-    /* Finally, write the request message to the user ring. */
-    target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt);
-    memcpy(target, req, sizeof(*req));
-    blktap_ufe_ring.req_prod_pvt++;
-    return;
+       if (RING_FULL(&blktap_ufe_ring)) {
+               WPRINTK("blktap: fe_ring is full, can't add "
+                       "(very broken!).\n");
+               goto bad_descriptor;
+       }
+
+       flush_cache_all(); /* a noop on intel... */
+
+       /* Map the foreign pages directly in to the application */    
+       op = 0;
+       for (i = 0; i < req->nr_segments; i++) {
+
+               unsigned long uvaddr;
+               unsigned long kvaddr;
+               unsigned long ptep;
+
+               uvaddr = MMAP_VADDR(user_vstart, pending_idx, i);
+               kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
+
+               /* Map the remote page to kernel. */
+               map[op].host_addr = kvaddr;
+               map[op].dom   = blkif->domid;
+               map[op].ref   = blkif_gref_from_fas(req->frame_and_sects[i]);
+               map[op].flags = GNTMAP_host_map;
+               /* This needs a bit more thought in terms of interposition: 
+                * If we want to be able to modify pages during write using 
+                * grant table mappings, the guest will either need to allow 
+                * it, or we'll need to incur a copy. Bit of an fbufs moment. 
;) */
+               if (req->operation == BLKIF_OP_WRITE)
+                       map[op].flags |= GNTMAP_readonly;
+               op++;
+
+               /* Now map it to user. */
+               ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
+               if (ret) {
+                       DPRINTK("Couldn't get a pte addr!\n");
+                       fast_flush_area(pending_idx, req->nr_segments);
+                       goto bad_descriptor;
+               }
+
+               map[op].host_addr = ptep;
+               map[op].dom       = blkif->domid;
+               map[op].ref       = 
blkif_gref_from_fas(req->frame_and_sects[i]);
+               map[op].flags     = GNTMAP_host_map | GNTMAP_application_map
+                       | GNTMAP_contains_pte;
+               /* Above interposition comment applies here as well. */
+               if (req->operation == BLKIF_OP_WRITE)
+                       map[op].flags |= GNTMAP_readonly;
+               op++;
+       }
+
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_map_grant_ref, map, op));
+
+       op = 0;
+       for (i = 0; i < (req->nr_segments*2); i += 2) {
+               unsigned long uvaddr;
+               unsigned long kvaddr;
+               unsigned long offset;
+               int cancel = 0;
+
+               uvaddr = MMAP_VADDR(user_vstart, pending_idx, i/2);
+               kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i/2);
+
+               if (unlikely(map[i].handle < 0)) {
+                       DPRINTK("Error on kernel grant mapping (%d)\n",
+                               map[i].handle);
+                       ret = map[i].handle;
+                       cancel = 1;
+               }
+
+               if (unlikely(map[i+1].handle < 0)) {
+                       DPRINTK("Error on user grant mapping (%d)\n",
+                               map[i+1].handle);
+                       ret = map[i+1].handle;
+                       cancel = 1;
+               }
+
+               if (cancel) {
+                       fast_flush_area(pending_idx, req->nr_segments);
+                       goto bad_descriptor;
+               }
+
+               /* Set the necessary mappings in p2m and in the VM_FOREIGN 
+                * vm_area_struct to allow user vaddr -> struct page lookups
+                * to work.  This is needed for direct IO to foreign pages. */
+               phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] =
+                       FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
+
+               offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
+               ((struct page **)blktap_vma->vm_private_data)[offset] =
+                       pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+
+               /* Save handles for unmapping later. */
+               pending_handle(pending_idx, i/2).kernel = map[i].handle;
+               pending_handle(pending_idx, i/2).user   = map[i+1].handle;
+       }
+
+       /* Mark mapped pages as reserved: */
+       for (i = 0; i < req->nr_segments; i++) {
+               unsigned long kvaddr;
+               kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
+               SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT));
+       }
+
+       pending_req = &pending_reqs[pending_idx];
+       pending_req->blkif     = blkif;
+       pending_req->id        = req->id;
+       pending_req->operation = req->operation;
+       pending_req->status    = BLKIF_RSP_OKAY;
+       pending_req->nr_pages  = nseg;
+       req->id = MAKE_ID(blkif->domid, pending_idx);
+       //atomic_set(&pending_req->pendcnt, nbio);
+       pending_cons++;
+       blkif_get(blkif);
+
+       /* Finally, write the request message to the user ring. */
+       target = RING_GET_REQUEST(&blktap_ufe_ring,
+                                 blktap_ufe_ring.req_prod_pvt);
+       memcpy(target, req, sizeof(*req));
+       blktap_ufe_ring.req_prod_pvt++;
+       return;
 
  bad_descriptor:
-    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+       make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
 } 
 
 
@@ -837,80 +826,89 @@
 static void make_response(blkif_t *blkif, unsigned long id, 
                           unsigned short op, int st)
 {
-    blkif_response_t *resp;
-    unsigned long     flags;
-    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
-
-    /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&blkif->blk_ring_lock, flags);
-    resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
-    resp->id        = id;
-    resp->operation = op;
-    resp->status    = st;
-    wmb(); /* Ensure other side can see the response fields. */
-    blk_ring->rsp_prod_pvt++;
-    RING_PUSH_RESPONSES(blk_ring);
-    spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
-
-    /* Kick the relevant domain. */
-    notify_via_evtchn(blkif->evtchn);
+       blkif_response_t *resp;
+       unsigned long     flags;
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+
+       /* Place on the response ring for the relevant domain. */ 
+       spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+       resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
+       resp->id        = id;
+       resp->operation = op;
+       resp->status    = st;
+       wmb(); /* Ensure other side can see the response fields. */
+       blk_ring->rsp_prod_pvt++;
+       RING_PUSH_RESPONSES(blk_ring);
+       spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+
+       /* Kick the relevant domain. */
+       notify_via_evtchn(blkif->evtchn);
 }
 
 static struct miscdevice blktap_miscdev = {
-    .minor        = BLKTAP_MINOR,
-    .name         = "blktap",
-    .fops         = &blktap_fops,
-    .devfs_name   = "misc/blktap",
+       .minor        = BLKTAP_MINOR,
+       .name         = "blktap",
+       .fops         = &blktap_fops,
+       .devfs_name   = "misc/blktap",
 };
 
 void blkif_deschedule(blkif_t *blkif)
 {
-    remove_from_blkdev_list(blkif);
+       remove_from_blkdev_list(blkif);
 }
 
 static int __init blkif_init(void)
 {
-    int i, j, err;
-    struct page *page;
+       int i, j, err;
+       struct page *page;
 /*
-    if ( !(xen_start_info->flags & SIF_INITDOMAIN) &&
-         !(xen_start_info->flags & SIF_BLK_BE_DOMAIN) )
-        return 0;
+  if ( !(xen_start_info->flags & SIF_INITDOMAIN) &&
+  !(xen_start_info->flags & SIF_BLK_BE_DOMAIN) )
+  return 0;
 */
-    blkif_interface_init();
-
-    page = balloon_alloc_empty_page_range(MMAP_PAGES);
-    BUG_ON(page == NULL);
-    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
-    pending_cons = 0;
-    pending_prod = MAX_PENDING_REQS;
-    memset(pending_reqs, 0, sizeof(pending_reqs));
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-        pending_ring[i] = i;
+       blkif_interface_init();
+
+       page = balloon_alloc_empty_page_range(MMAP_PAGES);
+       BUG_ON(page == NULL);
+       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+       pending_cons = 0;
+       pending_prod = MAX_PENDING_REQS;
+       memset(pending_reqs, 0, sizeof(pending_reqs));
+       for ( i = 0; i < MAX_PENDING_REQS; i++ )
+               pending_ring[i] = i;
     
-    spin_lock_init(&blkio_schedule_list_lock);
-    INIT_LIST_HEAD(&blkio_schedule_list);
-
-    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
-        BUG();
-
-    blkif_xenbus_init();
-
-    for (i=0; i<MAX_PENDING_REQS ; i++)
-        for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
-            BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j));
-
-    err = misc_register(&blktap_miscdev);
-    if ( err != 0 )
-    {
-        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
-        return err;
-    }
-
-    init_waitqueue_head(&blktap_wait);
-
-    return 0;
+       spin_lock_init(&blkio_schedule_list_lock);
+       INIT_LIST_HEAD(&blkio_schedule_list);
+
+       BUG_ON(kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0);
+
+       blkif_xenbus_init();
+
+       for (i = 0; i < MAX_PENDING_REQS ; i++)
+               for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
+                       BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j));
+
+       err = misc_register(&blktap_miscdev);
+       if (err != 0) {
+               printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n",
+                      err);
+               return err;
+       }
+
+       init_waitqueue_head(&blktap_wait);
+
+       return 0;
 }
 
 __initcall(blkif_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blktap/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h  Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h  Thu Sep 22 17:42:01 2005
@@ -17,6 +17,7 @@
 #include <asm-xen/xen-public/io/blkif.h>
 #include <asm-xen/xen-public/io/ring.h>
 #include <asm-xen/gnttab.h>
+#include <asm-xen/driver_util.h>
 
 #if 0
 #define ASSERT(_p) \
@@ -32,39 +33,39 @@
 #define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
 
 struct vbd {
-    blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
-    unsigned char  readonly;    /* Non-zero -> read-only */
-    unsigned char  type;        /* VDISK_xxx */
-    u32            pdevice;     /* phys device that this vbd maps to */
-    struct block_device *bdev;
+       blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
+       unsigned char  readonly;    /* Non-zero -> read-only */
+       unsigned char  type;        /* VDISK_xxx */
+       u32            pdevice;     /* phys device that this vbd maps to */
+       struct block_device *bdev;
 }; 
 
 typedef struct blkif_st {
-    /* Unique identifier for this interface. */
-    domid_t           domid;
-    unsigned int      handle;
-    /* Physical parameters of the comms window. */
-    unsigned long     shmem_frame;
-    unsigned int      evtchn;
-    unsigned int      remote_evtchn;
-    /* Comms information. */
-    blkif_back_ring_t blk_ring;
-    /* VBDs attached to this interface. */
-    struct vbd        vbd;
-    /* Private fields. */
-    enum { DISCONNECTED, CONNECTED } status;
+       /* Unique identifier for this interface. */
+       domid_t           domid;
+       unsigned int      handle;
+       /* Physical parameters of the comms window. */
+       unsigned int      evtchn;
+       unsigned int      remote_evtchn;
+       /* Comms information. */
+       blkif_back_ring_t blk_ring;
+       struct vm_struct *blk_ring_area;
+       /* VBDs attached to this interface. */
+       struct vbd        vbd;
+       /* Private fields. */
+       enum { DISCONNECTED, CONNECTED } status;
 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
-    /* Is this a blktap frontend */
-    unsigned int     is_blktap;
+       /* Is this a blktap frontend */
+       unsigned int     is_blktap;
 #endif
-    struct list_head blkdev_list;
-    spinlock_t       blk_ring_lock;
-    atomic_t         refcnt;
+       struct list_head blkdev_list;
+       spinlock_t       blk_ring_lock;
+       atomic_t         refcnt;
 
-    struct work_struct free_work;
-    u16 shmem_handle;
-    unsigned long shmem_vaddr;
-    grant_ref_t shmem_ref;
+       struct work_struct free_work;
+
+       u16              shmem_handle;
+       grant_ref_t      shmem_ref;
 } blkif_t;
 
 blkif_t *alloc_blkif(domid_t domid);
@@ -88,10 +89,10 @@
 unsigned long vbd_secsize(struct vbd *vbd);
 
 struct phys_req {
-    unsigned short       dev;
-    unsigned short       nr_sects;
-    struct block_device *bdev;
-    blkif_sector_t       sector_number;
+       unsigned short       dev;
+       unsigned short       nr_sects;
+       struct block_device *bdev;
+       blkif_sector_t       sector_number;
 };
 
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
@@ -105,3 +106,13 @@
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
 
 #endif /* __BLKIF__BACKEND__COMMON_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blktap/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c       Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c       Thu Sep 22 
17:42:01 2005
@@ -13,129 +13,143 @@
 
 blkif_t *alloc_blkif(domid_t domid)
 {
-    blkif_t *blkif;
+       blkif_t *blkif;
 
-    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
-    if (!blkif)
-           return ERR_PTR(-ENOMEM);
+       blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
+       if (!blkif)
+               return ERR_PTR(-ENOMEM);
 
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid = domid;
-    blkif->status = DISCONNECTED;
-    spin_lock_init(&blkif->blk_ring_lock);
-    atomic_set(&blkif->refcnt, 1);
+       memset(blkif, 0, sizeof(*blkif));
+       blkif->domid = domid;
+       blkif->status = DISCONNECTED;
+       spin_lock_init(&blkif->blk_ring_lock);
+       atomic_set(&blkif->refcnt, 1);
 
-    return blkif;
+       return blkif;
 }
 
-static int map_frontend_page(blkif_t *blkif, unsigned long localaddr,
-                            unsigned long shared_page)
+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
 {
-    struct gnttab_map_grant_ref op;
-    op.host_addr = localaddr;
-    op.flags = GNTMAP_host_map;
-    op.ref = shared_page;
-    op.dom = blkif->domid;
+       struct gnttab_map_grant_ref op;
 
-    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+       op.host_addr = (unsigned long)blkif->blk_ring_area->addr;
+       op.flags     = GNTMAP_host_map;
+       op.ref       = shared_page;
+       op.dom       = blkif->domid;
 
-    if (op.handle < 0) {
-       DPRINTK(" Grant table operation failure !\n");
-       return op.handle;
-    }
+       lock_vm_area(blkif->blk_ring_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
+       unlock_vm_area(blkif->blk_ring_area);
 
-    blkif->shmem_ref = shared_page;
-    blkif->shmem_handle = op.handle;
-    blkif->shmem_vaddr = localaddr;
-    return 0;
+       if (op.handle < 0) {
+               DPRINTK(" Grant table operation failure !\n");
+               return op.handle;
+       }
+
+       blkif->shmem_ref    = shared_page;
+       blkif->shmem_handle = op.handle;
+
+       return 0;
 }
 
 static void unmap_frontend_page(blkif_t *blkif)
 {
-    struct gnttab_unmap_grant_ref op;
+       struct gnttab_unmap_grant_ref op;
 
-    op.host_addr = blkif->shmem_vaddr;
-    op.handle = blkif->shmem_handle;
-    op.dev_bus_addr = 0;
-    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       op.host_addr    = (unsigned long)blkif->blk_ring_area->addr;
+       op.handle       = blkif->shmem_handle;
+       op.dev_bus_addr = 0;
+
+       lock_vm_area(blkif->blk_ring_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       unlock_vm_area(blkif->blk_ring_area);
 }
 
 int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
 {
-    struct vm_struct *vma;
-    blkif_sring_t *sring;
-    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
-    int err;
+       blkif_sring_t *sring;
+       evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+       int err;
 
-    BUG_ON(blkif->remote_evtchn);
+       BUG_ON(blkif->remote_evtchn);
 
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-       return -ENOMEM;
+       if ((blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL)
+               return -ENOMEM;
 
-    err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page);
-    if (err) {
-        vfree(vma->addr);
-       return err;
-    }
+       err = map_frontend_page(blkif, shared_page);
+       if (err) {
+               free_vm_area(blkif->blk_ring_area);
+               return err;
+       }
 
-    op.u.bind_interdomain.dom1 = DOMID_SELF;
-    op.u.bind_interdomain.dom2 = blkif->domid;
-    op.u.bind_interdomain.port1 = 0;
-    op.u.bind_interdomain.port2 = evtchn;
-    err = HYPERVISOR_event_channel_op(&op);
-    if (err) {
-       unmap_frontend_page(blkif);
-       vfree(vma->addr);
-       return err;
-    }
+       op.u.bind_interdomain.dom1 = DOMID_SELF;
+       op.u.bind_interdomain.dom2 = blkif->domid;
+       op.u.bind_interdomain.port1 = 0;
+       op.u.bind_interdomain.port2 = evtchn;
+       err = HYPERVISOR_event_channel_op(&op);
+       if (err) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               return err;
+       }
 
-    blkif->evtchn = op.u.bind_interdomain.port1;
-    blkif->remote_evtchn = evtchn;
 
-    sring = (blkif_sring_t *)vma->addr;
-    SHARED_RING_INIT(sring);
-    BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
+       blkif->evtchn = op.u.bind_interdomain.port1;
+       blkif->remote_evtchn = evtchn;
 
-    bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend",
-                             blkif);
-    blkif->status        = CONNECTED;
-    blkif->shmem_frame   = shared_page;
+       sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
+       SHARED_RING_INIT(sring);
+       BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
 
-    return 0;
+       bind_evtchn_to_irqhandler(
+               blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif);
+       blkif->status        = CONNECTED;
+
+       return 0;
 }
 
 static void free_blkif(void *arg)
 {
-    evtchn_op_t op = { .cmd = EVTCHNOP_close };
-    blkif_t *blkif = (blkif_t *)arg;
+       evtchn_op_t op = { .cmd = EVTCHNOP_close };
+       blkif_t *blkif = (blkif_t *)arg;
 
-    op.u.close.port = blkif->evtchn;
-    op.u.close.dom = DOMID_SELF;
-    HYPERVISOR_event_channel_op(&op);
-    op.u.close.port = blkif->remote_evtchn;
-    op.u.close.dom = blkif->domid;
-    HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = blkif->evtchn;
+       op.u.close.dom = DOMID_SELF;
+       HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = blkif->remote_evtchn;
+       op.u.close.dom = blkif->domid;
+       HYPERVISOR_event_channel_op(&op);
 
-    if (blkif->evtchn)
-        unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
+       if (blkif->evtchn)
+               unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
 
-    if (blkif->blk_ring.sring) {
-       unmap_frontend_page(blkif);
-       vfree(blkif->blk_ring.sring);
-       blkif->blk_ring.sring = NULL;
-    }
+       if (blkif->blk_ring.sring) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               blkif->blk_ring.sring = NULL;
+       }
 
-    kmem_cache_free(blkif_cachep, blkif);
+       kmem_cache_free(blkif_cachep, blkif);
 }
 
 void free_blkif_callback(blkif_t *blkif)
 {
-    INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
-    schedule_work(&blkif->free_work);
+       INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
+       schedule_work(&blkif->free_work);
 }
 
 void __init blkif_interface_init(void)
 {
-    blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
-                                     0, 0, NULL, NULL);
+       blkif_cachep = kmem_cache_create(
+               "blkif_cache", sizeof(blkif_t), 0, 0, NULL, NULL);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c  Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c  Thu Sep 22 17:42:01 2005
@@ -172,6 +172,7 @@
        be->dev = dev;
        be->backend_watch.node = dev->nodename;
        be->backend_watch.callback = backend_changed;
+       /* Registration implicitly fires backend_changed once */
        err = register_xenbus_watch(&be->backend_watch);
        if (err) {
                be->backend_watch.node = NULL;
@@ -193,8 +194,6 @@
        }
 
        dev->data = be;
-
-       backend_changed(&be->backend_watch, dev->nodename);
        return 0;
 
  free_be:
@@ -223,3 +222,13 @@
 {
        xenbus_register_backend(&blkback);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c        Thu Sep 22 
17:42:01 2005
@@ -75,31 +75,33 @@
 
 static int __init xencons_setup(char *str)
 {
-    char *q;
-    int n;
-
-    if ( !strncmp(str, "ttyS", 4) )
-        xc_mode = XC_SERIAL;
-    else if ( !strncmp(str, "tty", 3) )
-        xc_mode = XC_TTY;
-    else if ( !strncmp(str, "off", 3) )
-        xc_mode = XC_OFF;
-
-    switch ( xc_mode )
-    {
-    case XC_SERIAL:
-        n = simple_strtol( str+4, &q, 10 );
-        if ( q > (str + 4) ) xc_num = n;
-        break;
-    case XC_TTY:
-        n = simple_strtol( str+3, &q, 10 );
-        if ( q > (str + 3) ) xc_num = n;
-        break;
-    default:
-        break;
-    }
-
-    return 1;
+       char *q;
+       int n;
+
+       if (!strncmp(str, "ttyS", 4))
+               xc_mode = XC_SERIAL;
+       else if (!strncmp(str, "tty", 3))
+               xc_mode = XC_TTY;
+       else if (!strncmp(str, "off", 3))
+               xc_mode = XC_OFF;
+
+       switch ( xc_mode )
+       {
+       case XC_SERIAL:
+               n = simple_strtol(str+4, &q, 10);
+               if (q > (str + 4))
+                       xc_num = n;
+               break;
+       case XC_TTY:
+               n = simple_strtol(str+3, &q, 10);
+               if (q > (str + 3))
+                       xc_num = n;
+               break;
+       default:
+               break;
+       }
+
+       return 1;
 }
 __setup("xencons=", xencons_setup);
 
@@ -111,11 +113,11 @@
 
 static int __init xencons_bufsz_setup(char *str)
 {
-    unsigned int goal;
-    goal = simple_strtoul(str, NULL, 0);
-    while ( wbuf_size < goal )
-        wbuf_size <<= 1;
-    return 1;
+       unsigned int goal;
+       goal = simple_strtoul(str, NULL, 0);
+       while (wbuf_size < goal)
+               wbuf_size <<= 1;
+       return 1;
 }
 __setup("xencons_bufsz=", xencons_bufsz_setup);
 
@@ -135,57 +137,55 @@
 /******************** Kernel console driver ********************************/
 
 static void kcons_write(
-    struct console *c, const char *s, unsigned int count)
-{
-    int           i;
-    unsigned long flags;
-
-    spin_lock_irqsave(&xencons_lock, flags);
+       struct console *c, const char *s, unsigned int count)
+{
+       int           i;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xencons_lock, flags);
     
-    for ( i = 0; i < count; i++ )
-    {
-        if ( (wp - wc) >= (wbuf_size - 1) )
-            break;
-        if ( (wbuf[WBUF_MASK(wp++)] = s[i]) == '\n' )
-            wbuf[WBUF_MASK(wp++)] = '\r';
-    }
-
-    __xencons_tx_flush();
-
-    spin_unlock_irqrestore(&xencons_lock, flags);
+       for (i = 0; i < count; i++) {
+               if ((wp - wc) >= (wbuf_size - 1))
+                       break;
+               if ((wbuf[WBUF_MASK(wp++)] = s[i]) == '\n')
+                       wbuf[WBUF_MASK(wp++)] = '\r';
+       }
+
+       __xencons_tx_flush();
+
+       spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
 static void kcons_write_dom0(
-    struct console *c, const char *s, unsigned int count)
-{
-    int rc;
-
-    while ( (count > 0) &&
-            ((rc = HYPERVISOR_console_io(
-                CONSOLEIO_write, count, (char *)s)) > 0) )
-    {
-        count -= rc;
-        s += rc;
-    }
+       struct console *c, const char *s, unsigned int count)
+{
+       int rc;
+
+       while ((count > 0) &&
+              ((rc = HYPERVISOR_console_io(
+                       CONSOLEIO_write, count, (char *)s)) > 0)) {
+               count -= rc;
+               s += rc;
+       }
 }
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 static struct tty_driver *kcons_device(struct console *c, int *index)
 {
-    *index = c->index;
-    return xencons_driver;
+       *index = c->index;
+       return xencons_driver;
 }
 #else
 static kdev_t kcons_device(struct console *c)
 {
-    return MKDEV(TTY_MAJOR, (xc_mode == XC_SERIAL) ? 64 : 1);
+       return MKDEV(TTY_MAJOR, (xc_mode == XC_SERIAL) ? 64 : 1);
 }
 #endif
 
 static struct console kcons_info = {
-    .device    = kcons_device,
-    .flags     = CON_PRINTBUFFER,
-    .index     = -1,
+       .device = kcons_device,
+       .flags  = CON_PRINTBUFFER,
+       .index  = -1,
 };
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
@@ -196,44 +196,42 @@
 void xen_console_init(void)
 #endif
 {
-    if ( xen_start_info->flags & SIF_INITDOMAIN )
-    {
-        if ( xc_mode == XC_DEFAULT )
-            xc_mode = XC_SERIAL;
-        kcons_info.write = kcons_write_dom0;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-        if ( xc_mode == XC_SERIAL )
-            kcons_info.flags |= CON_ENABLED;
-#endif
-    }
-    else
-    {
-        if ( xc_mode == XC_DEFAULT )
-            xc_mode = XC_TTY;
-        kcons_info.write = kcons_write;
-    }
-
-    switch ( xc_mode )
-    {
-    case XC_SERIAL:
-        strcpy(kcons_info.name, "ttyS");
-        if ( xc_num == -1 ) xc_num = 0;
-        break;
-
-    case XC_TTY:
-        strcpy(kcons_info.name, "tty");
-        if ( xc_num == -1 ) xc_num = 1;
-        break;
-
-    default:
-        return __RETCODE;
-    }
-
-    wbuf = alloc_bootmem(wbuf_size);
-
-    register_console(&kcons_info);
-
-    return __RETCODE;
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               if (xc_mode == XC_DEFAULT)
+                       xc_mode = XC_SERIAL;
+               kcons_info.write = kcons_write_dom0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+               if (xc_mode == XC_SERIAL)
+                       kcons_info.flags |= CON_ENABLED;
+#endif
+       } else {
+               if (xc_mode == XC_DEFAULT)
+                       xc_mode = XC_TTY;
+               kcons_info.write = kcons_write;
+       }
+
+       switch (xc_mode) {
+       case XC_SERIAL:
+               strcpy(kcons_info.name, "ttyS");
+               if (xc_num == -1)
+                       xc_num = 0;
+               break;
+
+       case XC_TTY:
+               strcpy(kcons_info.name, "tty");
+               if (xc_num == -1)
+                       xc_num = 1;
+               break;
+
+       default:
+               return __RETCODE;
+       }
+
+       wbuf = alloc_bootmem(wbuf_size);
+
+       register_console(&kcons_info);
+
+       return __RETCODE;
 }
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 console_initcall(xen_console_init);
@@ -246,41 +244,40 @@
 asmlinkage int xprintk(const char *fmt, ...)
 #endif
 {
-    va_list args;
-    int printk_len;
-    static char printk_buf[1024];
+       va_list args;
+       int printk_len;
+       static char printk_buf[1024];
     
-    /* Emit the output into the temporary buffer */
-    va_start(args, fmt);
-    printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args);
-    va_end(args);
-
-    /* Send the processed output directly to Xen. */
-    kcons_write_dom0(NULL, printk_buf, printk_len);
-
-    return 0;
+       /* Emit the output into the temporary buffer */
+       va_start(args, fmt);
+       printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args);
+       va_end(args);
+
+       /* Send the processed output directly to Xen. */
+       kcons_write_dom0(NULL, printk_buf, printk_len);
+
+       return 0;
 }
 
 /*** Forcibly flush console data before dying. ***/
 void xencons_force_flush(void)
 {
-    int        sz;
-
-    /* Emergency console is synchronous, so there's nothing to flush. */
-    if ( xen_start_info->flags & SIF_INITDOMAIN )
-        return;
-
-
-    /* Spin until console data is flushed through to the domain controller. */
-    while ( (wc != wp) )
-    {
-       int sent = 0;
-        if ( (sz = wp - wc) == 0 )
-            continue;
-       sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
-       if (sent > 0)
-           wc += sent;
-    }
+       int sz;
+
+       /* Emergency console is synchronous, so there's nothing to flush. */
+       if (xen_start_info->flags & SIF_INITDOMAIN)
+               return;
+
+
+       /* Spin until console data is flushed through to the daemon. */
+       while (wc != wp) {
+               int sent = 0;
+               if ((sz = wp - wc) == 0)
+                       continue;
+               sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+               if (sent > 0)
+                       wc += sent;
+       }
 }
 
 
@@ -305,362 +302,358 @@
 /* Non-privileged receive callback. */
 static void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
 {
-    int           i;
-    unsigned long flags;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    if ( xencons_tty != NULL )
-    {
-        for ( i = 0; i < len; i++ ) {
+       int           i;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       if (xencons_tty == NULL)
+               goto out;
+
+       for (i = 0; i < len; i++) {
 #ifdef CONFIG_MAGIC_SYSRQ
-            if (sysrq_enabled) {
-                if (buf[i] == '\x0f') { /* ^O */
-                    sysrq_requested = jiffies;
-                    continue; /* don't print the sysrq key */
-                } else if (sysrq_requested) {
-                    unsigned long sysrq_timeout = sysrq_requested + HZ*2;
-                    sysrq_requested = 0;
-                    /* if it's been less than a timeout, do the sysrq */
-                    if (time_before(jiffies, sysrq_timeout)) {
-                        spin_unlock_irqrestore(&xencons_lock, flags);
-                        handle_sysrq(buf[i], regs, xencons_tty);
-                        spin_lock_irqsave(&xencons_lock, flags);
-                        continue;
-                    }
-                }
-            }
-#endif
-            tty_insert_flip_char(xencons_tty, buf[i], 0);
-        }
-        tty_flip_buffer_push(xencons_tty);
-    }
-    spin_unlock_irqrestore(&xencons_lock, flags);
-
+               if (sysrq_enabled) {
+                       if (buf[i] == '\x0f') { /* ^O */
+                               sysrq_requested = jiffies;
+                               continue; /* don't print the sysrq key */
+                       } else if (sysrq_requested) {
+                               unsigned long sysrq_timeout =
+                                       sysrq_requested + HZ*2;
+                               sysrq_requested = 0;
+                               if (time_before(jiffies, sysrq_timeout)) {
+                                       spin_unlock_irqrestore(
+                                               &xencons_lock, flags);
+                                       handle_sysrq(
+                                               buf[i], regs, xencons_tty);
+                                       spin_lock_irqsave(
+                                               &xencons_lock, flags);
+                                       continue;
+                               }
+                       }
+               }
+#endif
+               tty_insert_flip_char(xencons_tty, buf[i], 0);
+       }
+       tty_flip_buffer_push(xencons_tty);
+
+ out:
+       spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
 /* Privileged and non-privileged transmit worker. */
 static void __xencons_tx_flush(void)
 {
-    int        sz, work_done = 0;
-
-    if ( xen_start_info->flags & SIF_INITDOMAIN )
-    {
-        if ( x_char )
-        {
-            kcons_write_dom0(NULL, &x_char, 1);
-            x_char = 0;
-            work_done = 1;
-        }
-
-        while ( wc != wp )
-        {
-            sz = wp - wc;
-            if ( sz > (wbuf_size - WBUF_MASK(wc)) )
-                sz = wbuf_size - WBUF_MASK(wc);
-            kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
-            wc += sz;
-            work_done = 1;
-        }
-    }
-    else
-    {
-        while ( x_char )
-        {
-           if (xencons_ring_send(&x_char, 1) == 1) {
-               x_char = 0;
-               work_done = 1;
-           }
-        }
-
-        while ( wc != wp )
-        {
-           int sent;
-            sz = wp - wc;
-           if ( sz > (wbuf_size - WBUF_MASK(wc)) )
-               sz = wbuf_size - WBUF_MASK(wc);
-           sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
-           if ( sent > 0 ) {
-               wc += sent;
-               work_done = 1;
-           }
-        }
-    }
-
-    if ( work_done && (xencons_tty != NULL) )
-    {
-        wake_up_interruptible(&xencons_tty->write_wait);
-        if ( (xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
-             (xencons_tty->ldisc.write_wakeup != NULL) )
-            (xencons_tty->ldisc.write_wakeup)(xencons_tty);
-    }
+       int sz, work_done = 0;
+
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               if (x_char) {
+                       kcons_write_dom0(NULL, &x_char, 1);
+                       x_char = 0;
+                       work_done = 1;
+               }
+
+               while (wc != wp) {
+                       sz = wp - wc;
+                       if (sz > (wbuf_size - WBUF_MASK(wc)))
+                               sz = wbuf_size - WBUF_MASK(wc);
+                       kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
+                       wc += sz;
+                       work_done = 1;
+               }
+       } else {
+               while (x_char) {
+                       if (xencons_ring_send(&x_char, 1) == 1) {
+                               x_char = 0;
+                               work_done = 1;
+                       }
+               }
+
+               while (wc != wp) {
+                       int sent;
+                       sz = wp - wc;
+                       if (sz > (wbuf_size - WBUF_MASK(wc)))
+                               sz = wbuf_size - WBUF_MASK(wc);
+                       sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+                       if (sent > 0) {
+                               wc += sent;
+                               work_done = 1;
+                       }
+               }
+       }
+
+       if (work_done && (xencons_tty != NULL))
+       {
+               wake_up_interruptible(&xencons_tty->write_wait);
+               if ((xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
+                   (xencons_tty->ldisc.write_wakeup != NULL))
+                       (xencons_tty->ldisc.write_wakeup)(xencons_tty);
+       }
 }
 
 /* Privileged receive callback and transmit kicker. */
 static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
                                           struct pt_regs *regs)
 {
-    static char   rbuf[16];
-    int           i, l;
-    unsigned long flags;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-
-    if ( xencons_tty != NULL )
-    {
-        /* Receive work. */
-        while ( (l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0 )
-            for ( i = 0; i < l; i++ )
-                tty_insert_flip_char(xencons_tty, rbuf[i], 0);
-        if ( xencons_tty->flip.count != 0 )
-            tty_flip_buffer_push(xencons_tty);
-    }
-
-    /* Transmit work. */
-    __xencons_tx_flush();
-
-    spin_unlock_irqrestore(&xencons_lock, flags);
-
-    return IRQ_HANDLED;
+       static char   rbuf[16];
+       int           i, l;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+
+       if (xencons_tty != NULL)
+       {
+               /* Receive work. */
+               while ((l = HYPERVISOR_console_io(
+                       CONSOLEIO_read, 16, rbuf)) > 0)
+                       for (i = 0; i < l; i++)
+                               tty_insert_flip_char(xencons_tty, rbuf[i], 0);
+               if (xencons_tty->flip.count != 0)
+                       tty_flip_buffer_push(xencons_tty);
+       }
+
+       /* Transmit work. */
+       __xencons_tx_flush();
+
+       spin_unlock_irqrestore(&xencons_lock, flags);
+
+       return IRQ_HANDLED;
 }
 
 static int xencons_write_room(struct tty_struct *tty)
 {
-    return wbuf_size - (wp - wc);
+       return wbuf_size - (wp - wc);
 }
 
 static int xencons_chars_in_buffer(struct tty_struct *tty)
 {
-    return wp - wc;
+       return wp - wc;
 }
 
 static void xencons_send_xchar(struct tty_struct *tty, char ch)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    x_char = ch;
-    __xencons_tx_flush();
-    spin_unlock_irqrestore(&xencons_lock, flags);
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       x_char = ch;
+       __xencons_tx_flush();
+       spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
 static void xencons_throttle(struct tty_struct *tty)
 {
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    if ( I_IXOFF(tty) )
-        xencons_send_xchar(tty, STOP_CHAR(tty));
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       if (I_IXOFF(tty))
+               xencons_send_xchar(tty, STOP_CHAR(tty));
 }
 
 static void xencons_unthrottle(struct tty_struct *tty)
 {
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    if ( I_IXOFF(tty) )
-    {
-        if ( x_char != 0 )
-            x_char = 0;
-        else
-            xencons_send_xchar(tty, START_CHAR(tty));
-    }
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       if (I_IXOFF(tty)) {
+               if (x_char != 0)
+                       x_char = 0;
+               else
+                       xencons_send_xchar(tty, START_CHAR(tty));
+       }
 }
 
 static void xencons_flush_buffer(struct tty_struct *tty)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    wc = wp = 0;
-    spin_unlock_irqrestore(&xencons_lock, flags);
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       wc = wp = 0;
+       spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
 static inline int __xencons_put_char(int ch)
 {
-    char _ch = (char)ch;
-    if ( (wp - wc) == wbuf_size )
-        return 0;
-    wbuf[WBUF_MASK(wp++)] = _ch;
-    return 1;
+       char _ch = (char)ch;
+       if ((wp - wc) == wbuf_size)
+               return 0;
+       wbuf[WBUF_MASK(wp++)] = _ch;
+       return 1;
 }
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 static int xencons_write(
-    struct tty_struct *tty,
-    const unsigned char *buf,
-    int count)
-{
-    int i;
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return count;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-
-    for ( i = 0; i < count; i++ )
-        if ( !__xencons_put_char(buf[i]) )
-            break;
-
-    if ( i != 0 )
-        __xencons_tx_flush();
-
-    spin_unlock_irqrestore(&xencons_lock, flags);
-
-    return i;
+       struct tty_struct *tty,
+       const unsigned char *buf,
+       int count)
+{
+       int i;
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return count;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+
+       for (i = 0; i < count; i++)
+               if (!__xencons_put_char(buf[i]))
+                       break;
+
+       if (i != 0)
+               __xencons_tx_flush();
+
+       spin_unlock_irqrestore(&xencons_lock, flags);
+
+       return i;
 }
 #else
 static int xencons_write(
-    struct tty_struct *tty, 
-    int from_user,
-    const u_char *buf, 
-    int count)
-{
-    int i;
-    unsigned long flags;
-
-    if ( from_user && verify_area(VERIFY_READ, buf, count) )
-        return -EINVAL;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return count;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-
-    for ( i = 0; i < count; i++ )
-    {
-        char ch;
-        if ( from_user )
-            __get_user(ch, buf + i);
-        else
-            ch = buf[i];
-        if ( !__xencons_put_char(ch) )
-            break;
-    }
-
-    if ( i != 0 )
-        __xencons_tx_flush();
-
-    spin_unlock_irqrestore(&xencons_lock, flags);
-
-    return i;
+       struct tty_struct *tty, 
+       int from_user,
+       const u_char *buf, 
+       int count)
+{
+       int i;
+       unsigned long flags;
+
+       if (from_user && verify_area(VERIFY_READ, buf, count))
+               return -EINVAL;
+
+       if (TTY_INDEX(tty) != 0)
+               return count;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+
+       for (i = 0; i < count; i++) {
+               char ch;
+               if (from_user)
+                       __get_user(ch, buf + i);
+               else
+                       ch = buf[i];
+               if (!__xencons_put_char(ch))
+                       break;
+       }
+
+       if (i != 0)
+               __xencons_tx_flush();
+
+       spin_unlock_irqrestore(&xencons_lock, flags);
+
+       return i;
 }
 #endif
 
 static void xencons_put_char(struct tty_struct *tty, u_char ch)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    (void)__xencons_put_char(ch);
-    spin_unlock_irqrestore(&xencons_lock, flags);
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       (void)__xencons_put_char(ch);
+       spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
 static void xencons_flush_chars(struct tty_struct *tty)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    __xencons_tx_flush();
-    spin_unlock_irqrestore(&xencons_lock, flags);    
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       __xencons_tx_flush();
+       spin_unlock_irqrestore(&xencons_lock, flags);    
 }
 
 static void xencons_wait_until_sent(struct tty_struct *tty, int timeout)
 {
-    unsigned long orig_jiffies = jiffies;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    while ( DRV(tty->driver)->chars_in_buffer(tty) )
-    {
-        set_current_state(TASK_INTERRUPTIBLE);
-        schedule_timeout(1);
-        if ( signal_pending(current) )
-            break;
-        if ( (timeout != 0) && time_after(jiffies, orig_jiffies + timeout) )
-            break;
-    }
+       unsigned long orig_jiffies = jiffies;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       while (DRV(tty->driver)->chars_in_buffer(tty))
+       {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(1);
+               if (signal_pending(current))
+                       break;
+               if ( (timeout != 0) &&
+                    time_after(jiffies, orig_jiffies + timeout) )
+                       break;
+       }
     
-    set_current_state(TASK_RUNNING);
+       set_current_state(TASK_RUNNING);
 }
 
 static int xencons_open(struct tty_struct *tty, struct file *filp)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return 0;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    tty->driver_data = NULL;
-    if ( xencons_tty == NULL )
-        xencons_tty = tty;
-    __xencons_tx_flush();
-    spin_unlock_irqrestore(&xencons_lock, flags);    
-
-    return 0;
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return 0;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       tty->driver_data = NULL;
+       if (xencons_tty == NULL)
+               xencons_tty = tty;
+       __xencons_tx_flush();
+       spin_unlock_irqrestore(&xencons_lock, flags);    
+
+       return 0;
 }
 
 static void xencons_close(struct tty_struct *tty, struct file *filp)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    if ( tty->count == 1 )
-    {
-        tty->closing = 1;
-        tty_wait_until_sent(tty, 0);
-        if ( DRV(tty->driver)->flush_buffer != NULL )
-            DRV(tty->driver)->flush_buffer(tty);
-        if ( tty->ldisc.flush_buffer != NULL )
-            tty->ldisc.flush_buffer(tty);
-        tty->closing = 0;
-        spin_lock_irqsave(&xencons_lock, flags);
-        xencons_tty = NULL;
-        spin_unlock_irqrestore(&xencons_lock, flags);    
-    }
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       if (tty->count == 1) {
+               tty->closing = 1;
+               tty_wait_until_sent(tty, 0);
+               if (DRV(tty->driver)->flush_buffer != NULL)
+                       DRV(tty->driver)->flush_buffer(tty);
+               if (tty->ldisc.flush_buffer != NULL)
+                       tty->ldisc.flush_buffer(tty);
+               tty->closing = 0;
+               spin_lock_irqsave(&xencons_lock, flags);
+               xencons_tty = NULL;
+               spin_unlock_irqrestore(&xencons_lock, flags);    
+       }
 }
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 static struct tty_operations xencons_ops = {
-    .open = xencons_open,
-    .close = xencons_close,
-    .write = xencons_write,
-    .write_room = xencons_write_room,
-    .put_char = xencons_put_char,
-    .flush_chars = xencons_flush_chars,
-    .chars_in_buffer = xencons_chars_in_buffer,
-    .send_xchar = xencons_send_xchar,
-    .flush_buffer = xencons_flush_buffer,
-    .throttle = xencons_throttle,
-    .unthrottle = xencons_unthrottle,
-    .wait_until_sent = xencons_wait_until_sent,
+       .open = xencons_open,
+       .close = xencons_close,
+       .write = xencons_write,
+       .write_room = xencons_write_room,
+       .put_char = xencons_put_char,
+       .flush_chars = xencons_flush_chars,
+       .chars_in_buffer = xencons_chars_in_buffer,
+       .send_xchar = xencons_send_xchar,
+       .flush_buffer = xencons_flush_buffer,
+       .throttle = xencons_throttle,
+       .unthrottle = xencons_unthrottle,
+       .wait_until_sent = xencons_wait_until_sent,
 };
 
 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
 static const char *xennullcon_startup(void)
 {
-    return NULL;
+       return NULL;
 }
 
 static int xennullcon_dummy(void)
 {
-    return 0;
+       return 0;
 }
 
 #define DUMMY (void *)xennullcon_dummy
@@ -672,122 +665,128 @@
  */
 
 const struct consw xennull_con = {
-    .owner =           THIS_MODULE,
-    .con_startup =     xennullcon_startup,
-    .con_init =                DUMMY,
-    .con_deinit =      DUMMY,
-    .con_clear =       DUMMY,
-    .con_putc =                DUMMY,
-    .con_putcs =       DUMMY,
-    .con_cursor =      DUMMY,
-    .con_scroll =      DUMMY,
-    .con_bmove =       DUMMY,
-    .con_switch =      DUMMY,
-    .con_blank =       DUMMY,
-    .con_font_set =    DUMMY,
-    .con_font_get =    DUMMY,
-    .con_font_default =        DUMMY,
-    .con_font_copy =   DUMMY,
-    .con_set_palette = DUMMY,
-    .con_scrolldelta = DUMMY,
+       .owner =                THIS_MODULE,
+       .con_startup =  xennullcon_startup,
+       .con_init =             DUMMY,
+       .con_deinit =   DUMMY,
+       .con_clear =    DUMMY,
+       .con_putc =             DUMMY,
+       .con_putcs =    DUMMY,
+       .con_cursor =   DUMMY,
+       .con_scroll =   DUMMY,
+       .con_bmove =    DUMMY,
+       .con_switch =   DUMMY,
+       .con_blank =    DUMMY,
+       .con_font_set = DUMMY,
+       .con_font_get = DUMMY,
+       .con_font_default =     DUMMY,
+       .con_font_copy =        DUMMY,
+       .con_set_palette =      DUMMY,
+       .con_scrolldelta =      DUMMY,
 };
 #endif
 #endif
 
 static int __init xencons_init(void)
 {
-    int rc;
-
-    if ( xc_mode == XC_OFF )
-        return 0;
-
-    xencons_ring_init();
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ? 
-                                      1 : MAX_NR_CONSOLES);
-    if ( xencons_driver == NULL )
-        return -ENOMEM;
+       int rc;
+
+       if (xc_mode == XC_OFF)
+               return 0;
+
+       xencons_ring_init();
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+       xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ? 
+                                         1 : MAX_NR_CONSOLES);
+       if (xencons_driver == NULL)
+               return -ENOMEM;
 #else
-    memset(&xencons_driver, 0, sizeof(struct tty_driver));
-    xencons_driver.magic       = TTY_DRIVER_MAGIC;
-    xencons_driver.refcount    = &xencons_refcount;
-    xencons_driver.table       = xencons_table;
-    xencons_driver.num         = (xc_mode == XC_SERIAL) ? 1 : MAX_NR_CONSOLES;
-#endif
-
-    DRV(xencons_driver)->major           = TTY_MAJOR;
-    DRV(xencons_driver)->type            = TTY_DRIVER_TYPE_SERIAL;
-    DRV(xencons_driver)->subtype         = SERIAL_TYPE_NORMAL;
-    DRV(xencons_driver)->init_termios    = tty_std_termios;
-    DRV(xencons_driver)->flags           = 
-        TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_NO_DEVFS;
-    DRV(xencons_driver)->termios         = xencons_termios;
-    DRV(xencons_driver)->termios_locked  = xencons_termios_locked;
-
-    if ( xc_mode == XC_SERIAL )
-    {
-        DRV(xencons_driver)->name        = "ttyS";
-        DRV(xencons_driver)->minor_start = 64 + xc_num;
-        DRV(xencons_driver)->name_base   = 0 + xc_num;
-    }
-    else
-    {
-        DRV(xencons_driver)->name        = "tty";
-        DRV(xencons_driver)->minor_start = xc_num;
-        DRV(xencons_driver)->name_base   = xc_num;
-    }
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    tty_set_operations(xencons_driver, &xencons_ops);
+       memset(&xencons_driver, 0, sizeof(struct tty_driver));
+       xencons_driver.magic       = TTY_DRIVER_MAGIC;
+       xencons_driver.refcount    = &xencons_refcount;
+       xencons_driver.table       = xencons_table;
+       xencons_driver.num         =
+               (xc_mode == XC_SERIAL) ? 1 : MAX_NR_CONSOLES;
+#endif
+
+       DRV(xencons_driver)->major           = TTY_MAJOR;
+       DRV(xencons_driver)->type            = TTY_DRIVER_TYPE_SERIAL;
+       DRV(xencons_driver)->subtype         = SERIAL_TYPE_NORMAL;
+       DRV(xencons_driver)->init_termios    = tty_std_termios;
+       DRV(xencons_driver)->flags           = 
+               TTY_DRIVER_REAL_RAW |
+               TTY_DRIVER_RESET_TERMIOS |
+               TTY_DRIVER_NO_DEVFS;
+       DRV(xencons_driver)->termios         = xencons_termios;
+       DRV(xencons_driver)->termios_locked  = xencons_termios_locked;
+
+       if (xc_mode == XC_SERIAL)
+       {
+               DRV(xencons_driver)->name        = "ttyS";
+               DRV(xencons_driver)->minor_start = 64 + xc_num;
+               DRV(xencons_driver)->name_base   = 0 + xc_num;
+       } else {
+               DRV(xencons_driver)->name        = "tty";
+               DRV(xencons_driver)->minor_start = xc_num;
+               DRV(xencons_driver)->name_base   = xc_num;
+       }
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+       tty_set_operations(xencons_driver, &xencons_ops);
 #else
-    xencons_driver.open            = xencons_open;
-    xencons_driver.close           = xencons_close;
-    xencons_driver.write           = xencons_write;
-    xencons_driver.write_room      = xencons_write_room;
-    xencons_driver.put_char        = xencons_put_char;
-    xencons_driver.flush_chars     = xencons_flush_chars;
-    xencons_driver.chars_in_buffer = xencons_chars_in_buffer;
-    xencons_driver.send_xchar      = xencons_send_xchar;
-    xencons_driver.flush_buffer    = xencons_flush_buffer;
-    xencons_driver.throttle        = xencons_throttle;
-    xencons_driver.unthrottle      = xencons_unthrottle;
-    xencons_driver.wait_until_sent = xencons_wait_until_sent;
-#endif
-
-    if ( (rc = tty_register_driver(DRV(xencons_driver))) != 0 )
-    {
-        printk("WARNING: Failed to register Xen virtual "
-               "console driver as '%s%d'\n",
-               DRV(xencons_driver)->name, DRV(xencons_driver)->name_base);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-        put_tty_driver(xencons_driver);
-        xencons_driver = NULL;
-#endif
-        return rc;
-    }
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    tty_register_device(xencons_driver, 0, NULL);
-#endif
-
-    if ( xen_start_info->flags & SIF_INITDOMAIN )
-    {
-        xencons_priv_irq = bind_virq_to_irq(VIRQ_CONSOLE);
-        (void)request_irq(xencons_priv_irq,
-                          xencons_priv_interrupt, 0, "console", NULL);
-    }
-    else
-    {
-       
-       xencons_ring_register_receiver(xencons_rx);
-    }
-
-    printk("Xen virtual console successfully installed as %s%d\n",
-           DRV(xencons_driver)->name,
-           DRV(xencons_driver)->name_base );
+       xencons_driver.open            = xencons_open;
+       xencons_driver.close           = xencons_close;
+       xencons_driver.write           = xencons_write;
+       xencons_driver.write_room      = xencons_write_room;
+       xencons_driver.put_char        = xencons_put_char;
+       xencons_driver.flush_chars     = xencons_flush_chars;
+       xencons_driver.chars_in_buffer = xencons_chars_in_buffer;
+       xencons_driver.send_xchar      = xencons_send_xchar;
+       xencons_driver.flush_buffer    = xencons_flush_buffer;
+       xencons_driver.throttle        = xencons_throttle;
+       xencons_driver.unthrottle      = xencons_unthrottle;
+       xencons_driver.wait_until_sent = xencons_wait_until_sent;
+#endif
+
+       if ((rc = tty_register_driver(DRV(xencons_driver))) != 0) {
+               printk("WARNING: Failed to register Xen virtual "
+                      "console driver as '%s%d'\n",
+                      DRV(xencons_driver)->name, 
DRV(xencons_driver)->name_base);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+               put_tty_driver(xencons_driver);
+               xencons_driver = NULL;
+#endif
+               return rc;
+       }
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+       tty_register_device(xencons_driver, 0, NULL);
+#endif
+
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               xencons_priv_irq = bind_virq_to_irq(VIRQ_CONSOLE);
+               (void)request_irq(xencons_priv_irq,
+                                 xencons_priv_interrupt, 0, "console", NULL);
+       } else {
+               xencons_ring_register_receiver(xencons_rx);
+       }
+
+       printk("Xen virtual console successfully installed as %s%d\n",
+              DRV(xencons_driver)->name,
+              DRV(xencons_driver)->name_base );
     
-    return 0;
+       return 0;
 }
 
 module_init(xencons_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c   Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c   Thu Sep 22 
17:42:01 2005
@@ -36,13 +36,12 @@
 
 static inline struct ring_head *outring(void)
 {
-       return machine_to_virt(xen_start_info->console_mfn << PAGE_SHIFT);
+       return mfn_to_virt(xen_start_info->console_mfn);
 }
 
 static inline struct ring_head *inring(void)
 {
-       return machine_to_virt(xen_start_info->console_mfn << PAGE_SHIFT)
-               + PAGE_SIZE/2;
+       return mfn_to_virt(xen_start_info->console_mfn) + PAGE_SIZE/2;
 }
 
 
@@ -126,3 +125,13 @@
 
        (void)xencons_ring_init();
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.h
--- a/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.h   Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.h   Thu Sep 22 
17:42:01 2005
@@ -3,12 +3,21 @@
 
 asmlinkage int xprintk(const char *fmt, ...);
 
-
 int xencons_ring_init(void);
 int xencons_ring_send(const char *data, unsigned len);
 
-typedef void (xencons_receiver_func)(char *buf, unsigned len, 
-                                     struct pt_regs *regs);
+typedef void (xencons_receiver_func)(
+       char *buf, unsigned len, struct pt_regs *regs);
 void xencons_ring_register_receiver(xencons_receiver_func *f);
 
 #endif /* _XENCONS_RING_H */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Thu Sep 22 17:42:01 2005
@@ -1,9 +1,9 @@
 /******************************************************************************
  * evtchn.c
  * 
- * Xenolinux driver for receiving and demuxing event-channel signals.
- * 
- * Copyright (c) 2004, K A Fraser
+ * Driver for receiving and demuxing event-channel signals.
+ * 
+ * Copyright (c) 2004-2005, K A Fraser
  * Multi-process extensions Copyright (c) 2004, Steven Smith
  * 
  * This file may be distributed separately from the Linux kernel, or
@@ -46,29 +46,18 @@
 #include <linux/init.h>
 #define XEN_EVTCHN_MASK_OPS
 #include <asm-xen/evtchn.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-#include <linux/devfs_fs_kernel.h>
-#define OLD_DEVFS
-#else
 #include <linux/gfp.h>
-#endif
-
-#ifdef OLD_DEVFS
-/* NB. This must be shared amongst drivers if more things go in /dev/xen */
-static devfs_handle_t xen_dev_dir;
-#endif
 
 struct per_user_data {
-    /* Notification ring, accessed via /dev/xen/evtchn. */
-#   define EVTCHN_RING_SIZE     2048  /* 2048 16-bit entries */
-#   define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
-    u16 *ring;
-    unsigned int ring_cons, ring_prod, ring_overflow;
-
-    /* Processes wait on this queue when ring is empty. */
-    wait_queue_head_t evtchn_wait;
-    struct fasync_struct *evtchn_async_queue;
+       /* Notification ring, accessed via /dev/xen/evtchn. */
+#define EVTCHN_RING_SIZE     2048  /* 2048 16-bit entries */
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+       u16 *ring;
+       unsigned int ring_cons, ring_prod, ring_overflow;
+
+       /* Processes wait on this queue when ring is empty. */
+       wait_queue_head_t evtchn_wait;
+       struct fasync_struct *evtchn_async_queue;
 };
 
 /* Who's bound to each port? */
@@ -77,356 +66,310 @@
 
 void evtchn_device_upcall(int port)
 {
-    struct per_user_data *u;
-
-    spin_lock(&port_user_lock);
-
-    mask_evtchn(port);
-    clear_evtchn(port);
-
-    if ( (u = port_user[port]) != NULL )
-    {
-        if ( (u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE )
-        {
-            u->ring[EVTCHN_RING_MASK(u->ring_prod)] = (u16)port;
-            if ( u->ring_cons == u->ring_prod++ )
-            {
-                wake_up_interruptible(&u->evtchn_wait);
-                kill_fasync(&u->evtchn_async_queue, SIGIO, POLL_IN);
-            }
-        }
-        else
-        {
-            u->ring_overflow = 1;
-        }
-    }
-
-    spin_unlock(&port_user_lock);
+       struct per_user_data *u;
+
+       spin_lock(&port_user_lock);
+
+       mask_evtchn(port);
+       clear_evtchn(port);
+
+       if ((u = port_user[port]) != NULL) {
+               if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
+                       u->ring[EVTCHN_RING_MASK(u->ring_prod)] = (u16)port;
+                       if (u->ring_cons == u->ring_prod++) {
+                               wake_up_interruptible(&u->evtchn_wait);
+                               kill_fasync(&u->evtchn_async_queue, SIGIO, 
POLL_IN);
+                       }
+               } else {
+                       u->ring_overflow = 1;
+               }
+       }
+
+       spin_unlock(&port_user_lock);
 }
 
 static ssize_t evtchn_read(struct file *file, char *buf,
                            size_t count, loff_t *ppos)
 {
-    int rc;
-    unsigned int c, p, bytes1 = 0, bytes2 = 0;
-    DECLARE_WAITQUEUE(wait, current);
-    struct per_user_data *u = file->private_data;
-
-    add_wait_queue(&u->evtchn_wait, &wait);
-
-    count &= ~1; /* even number of bytes */
-
-    if ( count == 0 )
-    {
-        rc = 0;
-        goto out;
-    }
-
-    if ( count > PAGE_SIZE )
-        count = PAGE_SIZE;
-
-    for ( ; ; )
-    {
-        set_current_state(TASK_INTERRUPTIBLE);
-
-        if ( (c = u->ring_cons) != (p = u->ring_prod) )
-            break;
-
-        if ( u->ring_overflow )
-        {
-            rc = -EFBIG;
-            goto out;
-        }
-
-        if ( file->f_flags & O_NONBLOCK )
-        {
-            rc = -EAGAIN;
-            goto out;
-        }
-
-        if ( signal_pending(current) )
-        {
-            rc = -ERESTARTSYS;
-            goto out;
-        }
-
-        schedule();
-    }
-
-    /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
-    if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 )
-    {
-        bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(u16);
-        bytes2 = EVTCHN_RING_MASK(p) * sizeof(u16);
-    }
-    else
-    {
-        bytes1 = (p - c) * sizeof(u16);
-        bytes2 = 0;
-    }
-
-    /* Truncate chunks according to caller's maximum byte count. */
-    if ( bytes1 > count )
-    {
-        bytes1 = count;
-        bytes2 = 0;
-    }
-    else if ( (bytes1 + bytes2) > count )
-    {
-        bytes2 = count - bytes1;
-    }
-
-    if ( copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
-         ((bytes2 != 0) && copy_to_user(&buf[bytes1], &u->ring[0], bytes2)) )
-    {
-        rc = -EFAULT;
-        goto out;
-    }
-
-    u->ring_cons += (bytes1 + bytes2) / sizeof(u16);
-
-    rc = bytes1 + bytes2;
+       int rc;
+       unsigned int c, p, bytes1 = 0, bytes2 = 0;
+       DECLARE_WAITQUEUE(wait, current);
+       struct per_user_data *u = file->private_data;
+
+       add_wait_queue(&u->evtchn_wait, &wait);
+
+       count &= ~1; /* even number of bytes */
+
+       if (count == 0) {
+               rc = 0;
+               goto out;
+       }
+
+       if (count > PAGE_SIZE)
+               count = PAGE_SIZE;
+
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               if ((c = u->ring_cons) != (p = u->ring_prod))
+                       break;
+
+               if (u->ring_overflow) {
+                       rc = -EFBIG;
+                       goto out;
+               }
+
+               if (file->f_flags & O_NONBLOCK) {
+                       rc = -EAGAIN;
+                       goto out;
+               }
+
+               if (signal_pending(current)) {
+                       rc = -ERESTARTSYS;
+                       goto out;
+               }
+
+               schedule();
+       }
+
+       /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+       if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
+               bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
+                       sizeof(u16);
+               bytes2 = EVTCHN_RING_MASK(p) * sizeof(u16);
+       } else {
+               bytes1 = (p - c) * sizeof(u16);
+               bytes2 = 0;
+       }
+
+       /* Truncate chunks according to caller's maximum byte count. */
+       if (bytes1 > count) {
+               bytes1 = count;
+               bytes2 = 0;
+       } else if ((bytes1 + bytes2) > count) {
+               bytes2 = count - bytes1;
+       }
+
+       if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
+           ((bytes2 != 0) &&
+            copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) {
+               rc = -EFAULT;
+               goto out;
+       }
+
+       u->ring_cons += (bytes1 + bytes2) / sizeof(u16);
+
+       rc = bytes1 + bytes2;
 
  out:
-    __set_current_state(TASK_RUNNING);
-    remove_wait_queue(&u->evtchn_wait, &wait);
-    return rc;
+       __set_current_state(TASK_RUNNING);
+       remove_wait_queue(&u->evtchn_wait, &wait);
+       return rc;
 }
 
 static ssize_t evtchn_write(struct file *file, const char *buf,
                             size_t count, loff_t *ppos)
 {
-    int  rc, i;
-    u16 *kbuf = (u16 *)__get_free_page(GFP_KERNEL);
-    struct per_user_data *u = file->private_data;
-
-    if ( kbuf == NULL )
-        return -ENOMEM;
-
-    count &= ~1; /* even number of bytes */
-
-    if ( count == 0 )
-    {
-        rc = 0;
-        goto out;
-    }
-
-    if ( count > PAGE_SIZE )
-        count = PAGE_SIZE;
-
-    if ( copy_from_user(kbuf, buf, count) != 0 )
-    {
-        rc = -EFAULT;
-        goto out;
-    }
-
-    spin_lock_irq(&port_user_lock);
-    for ( i = 0; i < (count/2); i++ )
-        if ( (kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u) )
-            unmask_evtchn(kbuf[i]);
-    spin_unlock_irq(&port_user_lock);
-
-    rc = count;
+       int  rc, i;
+       u16 *kbuf = (u16 *)__get_free_page(GFP_KERNEL);
+       struct per_user_data *u = file->private_data;
+
+       if (kbuf == NULL)
+               return -ENOMEM;
+
+       count &= ~1; /* even number of bytes */
+
+       if (count == 0) {
+               rc = 0;
+               goto out;
+       }
+
+       if (count > PAGE_SIZE)
+               count = PAGE_SIZE;
+
+       if (copy_from_user(kbuf, buf, count) != 0) {
+               rc = -EFAULT;
+               goto out;
+       }
+
+       spin_lock_irq(&port_user_lock);
+       for (i = 0; i < (count/2); i++)
+               if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
+                       unmask_evtchn(kbuf[i]);
+       spin_unlock_irq(&port_user_lock);
+
+       rc = count;
 
  out:
-    free_page((unsigned long)kbuf);
-    return rc;
+       free_page((unsigned long)kbuf);
+       return rc;
 }
 
 static int evtchn_ioctl(struct inode *inode, struct file *file,
                         unsigned int cmd, unsigned long arg)
 {
-    int rc = 0;
-    struct per_user_data *u = file->private_data;
-
-    spin_lock_irq(&port_user_lock);
+       int rc = 0;
+       struct per_user_data *u = file->private_data;
+
+       spin_lock_irq(&port_user_lock);
     
-    switch ( cmd )
-    {
-    case EVTCHN_RESET:
-        /* Initialise the ring to empty. Clear errors. */
-        u->ring_cons = u->ring_prod = u->ring_overflow = 0;
-        break;
-
-    case EVTCHN_BIND:
-        if ( arg >= NR_EVENT_CHANNELS )
-        {
-            rc = -EINVAL;
-        }
-        else if ( port_user[arg] != NULL )
-        {
-            rc = -EISCONN;
-        }
-        else
-        {
-            port_user[arg] = u;
-            unmask_evtchn(arg);
-        }
-        break;
-
-    case EVTCHN_UNBIND:
-        if ( arg >= NR_EVENT_CHANNELS )
-        {
-            rc = -EINVAL;
-        }
-        else if ( port_user[arg] != u )
-        {
-            rc = -ENOTCONN;
-        }
-        else
-        {
-            port_user[arg] = NULL;
-            mask_evtchn(arg);
-        }
-        break;
-
-    default:
-        rc = -ENOSYS;
-        break;
-    }
-
-    spin_unlock_irq(&port_user_lock);   
-
-    return rc;
+       switch (cmd) {
+       case EVTCHN_RESET:
+               /* Initialise the ring to empty. Clear errors. */
+               u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+               break;
+
+       case EVTCHN_BIND:
+               if (arg >= NR_EVENT_CHANNELS) {
+                       rc = -EINVAL;
+               } else if (port_user[arg] != NULL) {
+                       rc = -EISCONN;
+               } else {
+                       port_user[arg] = u;
+                       unmask_evtchn(arg);
+               }
+               break;
+
+       case EVTCHN_UNBIND:
+               if (arg >= NR_EVENT_CHANNELS) {
+                       rc = -EINVAL;
+               } else if (port_user[arg] != u) {
+                       rc = -ENOTCONN;
+               } else {
+                       port_user[arg] = NULL;
+                       mask_evtchn(arg);
+               }
+               break;
+
+       default:
+               rc = -ENOSYS;
+               break;
+       }
+
+       spin_unlock_irq(&port_user_lock);   
+
+       return rc;
 }
 
 static unsigned int evtchn_poll(struct file *file, poll_table *wait)
 {
-    unsigned int mask = POLLOUT | POLLWRNORM;
-    struct per_user_data *u = file->private_data;
-
-    poll_wait(file, &u->evtchn_wait, wait);
-    if ( u->ring_cons != u->ring_prod )
-        mask |= POLLIN | POLLRDNORM;
-    if ( u->ring_overflow )
-        mask = POLLERR;
-    return mask;
+       unsigned int mask = POLLOUT | POLLWRNORM;
+       struct per_user_data *u = file->private_data;
+
+       poll_wait(file, &u->evtchn_wait, wait);
+       if (u->ring_cons != u->ring_prod)
+               mask |= POLLIN | POLLRDNORM;
+       if (u->ring_overflow)
+               mask = POLLERR;
+       return mask;
 }
 
 static int evtchn_fasync(int fd, struct file *filp, int on)
 {
-    struct per_user_data *u = filp->private_data;
-    return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
+       struct per_user_data *u = filp->private_data;
+       return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
 }
 
 static int evtchn_open(struct inode *inode, struct file *filp)
 {
-    struct per_user_data *u;
-
-    if ( (u = kmalloc(sizeof(*u), GFP_KERNEL)) == NULL )
-        return -ENOMEM;
-
-    memset(u, 0, sizeof(*u));
-    init_waitqueue_head(&u->evtchn_wait);
-
-    if ( (u->ring = (u16 *)__get_free_page(GFP_KERNEL)) == NULL )
-    {
-        kfree(u);
-        return -ENOMEM;
-    }
-
-    filp->private_data = u;
-
-    return 0;
+       struct per_user_data *u;
+
+       if ((u = kmalloc(sizeof(*u), GFP_KERNEL)) == NULL)
+               return -ENOMEM;
+
+       memset(u, 0, sizeof(*u));
+       init_waitqueue_head(&u->evtchn_wait);
+
+       if ((u->ring = (u16 *)__get_free_page(GFP_KERNEL)) == NULL)
+       {
+               kfree(u);
+               return -ENOMEM;
+       }
+
+       filp->private_data = u;
+
+       return 0;
 }
 
 static int evtchn_release(struct inode *inode, struct file *filp)
 {
-    int i;
-    struct per_user_data *u = filp->private_data;
-
-    spin_lock_irq(&port_user_lock);
-
-    free_page((unsigned long)u->ring);
-
-    for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
-    {
-        if ( port_user[i] == u )
-        {
-            port_user[i] = NULL;
-            mask_evtchn(i);
-        }
-    }
-
-    spin_unlock_irq(&port_user_lock);
-
-    kfree(u);
-
-    return 0;
+       int i;
+       struct per_user_data *u = filp->private_data;
+
+       spin_lock_irq(&port_user_lock);
+
+       free_page((unsigned long)u->ring);
+
+       for (i = 0; i < NR_EVENT_CHANNELS; i++)
+       {
+               if (port_user[i] == u)
+               {
+                       port_user[i] = NULL;
+                       mask_evtchn(i);
+               }
+       }
+
+       spin_unlock_irq(&port_user_lock);
+
+       kfree(u);
+
+       return 0;
 }
 
 static struct file_operations evtchn_fops = {
-    .owner   = THIS_MODULE,
-    .read    = evtchn_read,
-    .write   = evtchn_write,
-    .ioctl   = evtchn_ioctl,
-    .poll    = evtchn_poll,
-    .fasync  = evtchn_fasync,
-    .open    = evtchn_open,
-    .release = evtchn_release,
+       .owner   = THIS_MODULE,
+       .read    = evtchn_read,
+       .write   = evtchn_write,
+       .ioctl   = evtchn_ioctl,
+       .poll    = evtchn_poll,
+       .fasync  = evtchn_fasync,
+       .open    = evtchn_open,
+       .release = evtchn_release,
 };
 
 static struct miscdevice evtchn_miscdev = {
-    .minor        = EVTCHN_MINOR,
-    .name         = "evtchn",
-    .fops         = &evtchn_fops,
+       .minor        = EVTCHN_MINOR,
+       .name         = "evtchn",
+       .fops         = &evtchn_fops,
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    .devfs_name   = "misc/evtchn",
+       .devfs_name   = "misc/evtchn",
 #endif
 };
 
 static int __init evtchn_init(void)
 {
-#ifdef OLD_DEVFS
-    devfs_handle_t symlink_handle;
-    int            pos;
-    char           link_dest[64];
-#endif
-    int err;
-
-    spin_lock_init(&port_user_lock);
-    memset(port_user, 0, sizeof(port_user));
-
-    /* (DEVFS) create '/dev/misc/evtchn'. */
-    err = misc_register(&evtchn_miscdev);
-    if ( err != 0 )
-    {
-        printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
-        return err;
-    }
-
-#ifdef OLD_DEVFS
-    /* (DEVFS) create directory '/dev/xen'. */
-    xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
-
-    /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
-    pos = devfs_generate_path(evtchn_miscdev.devfs_handle, 
-                              &link_dest[3], 
-                              sizeof(link_dest) - 3);
-    if ( pos >= 0 )
-        strncpy(&link_dest[pos], "../", 3);
-
-    /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
-    (void)devfs_mk_symlink(xen_dev_dir, 
-                           "evtchn", 
-                           DEVFS_FL_DEFAULT, 
-                           &link_dest[pos],
-                           &symlink_handle, 
-                           NULL);
-
-    /* (DEVFS) automatically destroy the symlink with its destination. */
-    devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
-#endif
-
-    printk("Event-channel device installed.\n");
-
-    return 0;
+       int err;
+
+       spin_lock_init(&port_user_lock);
+       memset(port_user, 0, sizeof(port_user));
+
+       /* (DEVFS) create '/dev/misc/evtchn'. */
+       err = misc_register(&evtchn_miscdev);
+       if (err != 0)
+       {
+               printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+               return err;
+       }
+
+       printk("Event-channel device installed.\n");
+
+       return 0;
 }
 
 static void evtchn_cleanup(void)
 {
-    misc_deregister(&evtchn_miscdev);
+       misc_deregister(&evtchn_miscdev);
 }
 
 module_init(evtchn_init);
 module_exit(evtchn_cleanup);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Sep 22 17:42:01 2005
@@ -18,16 +18,11 @@
 #include <asm-xen/xen-public/io/netif.h>
 #include <asm/io.h>
 #include <asm/pgalloc.h>
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
 #include <asm-xen/xen-public/grant_table.h>
 #include <asm-xen/gnttab.h>
+#include <asm-xen/driver_util.h>
 
 #define GRANT_INVALID_REF (0xFFFF)
-
-#endif
-
-
 
 #if 0
 #define ASSERT(_p) \
@@ -44,74 +39,64 @@
 #define WPRINTK(fmt, args...) \
     printk(KERN_WARNING "xen_net: " fmt, ##args)
 
+typedef struct netif_st {
+       /* Unique identifier for this interface. */
+       domid_t          domid;
+       unsigned int     handle;
 
-typedef struct netif_st {
-    /* Unique identifier for this interface. */
-    domid_t          domid;
-    unsigned int     handle;
+       u8               fe_dev_addr[6];
 
-    u8               fe_dev_addr[6];
+       /* Physical parameters of the comms window. */
+       u16              tx_shmem_handle;
+       grant_ref_t      tx_shmem_ref; 
+       u16              rx_shmem_handle;
+       grant_ref_t      rx_shmem_ref; 
+       unsigned int     evtchn;
+       unsigned int     remote_evtchn;
 
-    /* Physical parameters of the comms window. */
-    unsigned long    tx_shmem_frame;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    u16              tx_shmem_handle;
-    unsigned long    tx_shmem_vaddr; 
-    grant_ref_t      tx_shmem_ref; 
-#endif
-    unsigned long    rx_shmem_frame;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    u16              rx_shmem_handle;
-    unsigned long    rx_shmem_vaddr; 
-    grant_ref_t      rx_shmem_ref; 
-#endif
-    unsigned int     evtchn;
-    unsigned int     remote_evtchn;
+       /* The shared rings and indexes. */
+       netif_tx_interface_t *tx;
+       netif_rx_interface_t *rx;
+       struct vm_struct *comms_area;
 
-    /* The shared rings and indexes. */
-    netif_tx_interface_t *tx;
-    netif_rx_interface_t *rx;
+       /* Private indexes into shared ring. */
+       NETIF_RING_IDX rx_req_cons;
+       NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
+       NETIF_RING_IDX rx_resp_prod_copy;
+       NETIF_RING_IDX tx_req_cons;
+       NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
 
-    /* Private indexes into shared ring. */
-    NETIF_RING_IDX rx_req_cons;
-    NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */
-#endif
-    NETIF_RING_IDX tx_req_cons;
-    NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
+       /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
+       unsigned long   credit_bytes;
+       unsigned long   credit_usec;
+       unsigned long   remaining_credit;
+       struct timer_list credit_timeout;
 
-    /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
-    unsigned long   credit_bytes;
-    unsigned long   credit_usec;
-    unsigned long   remaining_credit;
-    struct timer_list credit_timeout;
+       /* Miscellaneous private stuff. */
+       enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+       int active;
+       struct list_head list;  /* scheduling list */
+       atomic_t         refcnt;
+       struct net_device *dev;
+       struct net_device_stats stats;
 
-    /* Miscellaneous private stuff. */
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    int active;
-    struct list_head list;  /* scheduling list */
-    atomic_t         refcnt;
-    struct net_device *dev;
-    struct net_device_stats stats;
-
-    struct work_struct free_work;
+       struct work_struct free_work;
 } netif_t;
 
 void netif_creditlimit(netif_t *netif);
 int  netif_disconnect(netif_t *netif);
 
 netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]);
-void free_netif_callback(netif_t *netif);
+void free_netif(netif_t *netif);
 int netif_map(netif_t *netif, unsigned long tx_ring_ref,
              unsigned long rx_ring_ref, unsigned int evtchn);
 
 #define netif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define netif_put(_b)                             \
-    do {                                          \
-        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            free_netif_callback(_b);              \
-    } while (0)
+#define netif_put(_b)                                          \
+       do {                                                    \
+               if ( atomic_dec_and_test(&(_b)->refcnt) )       \
+                       free_netif(_b);                         \
+       } while (0)
 
 void netif_xenbus_init(void);
 
@@ -123,3 +108,13 @@
 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
 
 #endif /* __NETIF__BACKEND__COMMON_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/netback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Thu Sep 22 
17:42:01 2005
@@ -11,296 +11,293 @@
 
 static void __netif_up(netif_t *netif)
 {
-    struct net_device *dev = netif->dev;
-    spin_lock_bh(&dev->xmit_lock);
-    netif->active = 1;
-    spin_unlock_bh(&dev->xmit_lock);
-    (void)bind_evtchn_to_irqhandler(
-        netif->evtchn, netif_be_int, 0, dev->name, netif);
-    netif_schedule_work(netif);
+       struct net_device *dev = netif->dev;
+       spin_lock_bh(&dev->xmit_lock);
+       netif->active = 1;
+       spin_unlock_bh(&dev->xmit_lock);
+       (void)bind_evtchn_to_irqhandler(
+               netif->evtchn, netif_be_int, 0, dev->name, netif);
+       netif_schedule_work(netif);
 }
 
 static void __netif_down(netif_t *netif)
 {
-    struct net_device *dev = netif->dev;
-    spin_lock_bh(&dev->xmit_lock);
-    netif->active = 0;
-    spin_unlock_bh(&dev->xmit_lock);
-    unbind_evtchn_from_irqhandler(netif->evtchn, netif);
-    netif_deschedule_work(netif);
+       struct net_device *dev = netif->dev;
+       spin_lock_bh(&dev->xmit_lock);
+       netif->active = 0;
+       spin_unlock_bh(&dev->xmit_lock);
+       unbind_evtchn_from_irqhandler(netif->evtchn, netif);
+       netif_deschedule_work(netif);
 }
 
 static int net_open(struct net_device *dev)
 {
-    netif_t *netif = netdev_priv(dev);
-    if (netif->status == CONNECTED)
-        __netif_up(netif);
-    netif_start_queue(dev);
-    return 0;
+       netif_t *netif = netdev_priv(dev);
+       if (netif->status == CONNECTED)
+               __netif_up(netif);
+       netif_start_queue(dev);
+       return 0;
 }
 
 static int net_close(struct net_device *dev)
 {
-    netif_t *netif = netdev_priv(dev);
-    netif_stop_queue(dev);
-    if (netif->status == CONNECTED)
-        __netif_down(netif);
-    return 0;
+       netif_t *netif = netdev_priv(dev);
+       netif_stop_queue(dev);
+       if (netif->status == CONNECTED)
+               __netif_down(netif);
+       return 0;
 }
 
 netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
 {
-    int err = 0, i;
-    struct net_device *dev;
-    netif_t *netif;
-    char name[IFNAMSIZ] = {};
-
-    snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
-    dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
-    if (dev == NULL) {
-        DPRINTK("Could not create netif: out of memory\n");
-        return NULL;
-    }
-
-    netif = netdev_priv(dev);
-    memset(netif, 0, sizeof(*netif));
-    netif->domid  = domid;
-    netif->handle = handle;
-    netif->status = DISCONNECTED;
-    atomic_set(&netif->refcnt, 0);
-    netif->dev = dev;
-
-    netif->credit_bytes = netif->remaining_credit = ~0UL;
-    netif->credit_usec  = 0UL;
-    init_timer(&netif->credit_timeout);
-
-    dev->hard_start_xmit = netif_be_start_xmit;
-    dev->get_stats       = netif_be_get_stats;
-    dev->open            = net_open;
-    dev->stop            = net_close;
-    dev->features        = NETIF_F_NO_CSUM;
-
-    /* Disable queuing. */
-    dev->tx_queue_len = 0;
-
-    for (i = 0; i < ETH_ALEN; i++)
-       if (be_mac[i] != 0)
-           break;
-    if (i == ETH_ALEN) {
-        /*
-         * Initialise a dummy MAC address. We choose the numerically largest
-         * non-broadcast address to prevent the address getting stolen by an
-         * Ethernet bridge for STP purposes. (FE:FF:FF:FF:FF:FF)
-         */ 
-        memset(dev->dev_addr, 0xFF, ETH_ALEN);
-        dev->dev_addr[0] &= ~0x01;
-    } else
-        memcpy(dev->dev_addr, be_mac, ETH_ALEN);
-
-    rtnl_lock();
-    err = register_netdevice(dev);
-    rtnl_unlock();
-    if (err) {
-        DPRINTK("Could not register new net device %s: err=%d\n",
-                dev->name, err);
-        free_netdev(dev);
-        return NULL;
-    }
-
-    DPRINTK("Successfully created netif\n");
-    return netif;
-}
-
-static int map_frontend_pages(netif_t *netif, unsigned long localaddr,
-                              unsigned long tx_ring_ref, 
-                              unsigned long rx_ring_ref)
-{
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    struct gnttab_map_grant_ref op;
-
-    /* Map: Use the Grant table reference */
-    op.host_addr = localaddr;
-    op.flags     = GNTMAP_host_map;
-    op.ref       = tx_ring_ref;
-    op.dom       = netif->domid;
+       int err = 0, i;
+       struct net_device *dev;
+       netif_t *netif;
+       char name[IFNAMSIZ] = {};
+
+       snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
+       dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
+       if (dev == NULL) {
+               DPRINTK("Could not create netif: out of memory\n");
+               return NULL;
+       }
+
+       netif = netdev_priv(dev);
+       memset(netif, 0, sizeof(*netif));
+       netif->domid  = domid;
+       netif->handle = handle;
+       netif->status = DISCONNECTED;
+       atomic_set(&netif->refcnt, 0);
+       netif->dev = dev;
+
+       netif->credit_bytes = netif->remaining_credit = ~0UL;
+       netif->credit_usec  = 0UL;
+       init_timer(&netif->credit_timeout);
+
+       dev->hard_start_xmit = netif_be_start_xmit;
+       dev->get_stats       = netif_be_get_stats;
+       dev->open            = net_open;
+       dev->stop            = net_close;
+       dev->features        = NETIF_F_NO_CSUM;
+
+       /* Disable queuing. */
+       dev->tx_queue_len = 0;
+
+       for (i = 0; i < ETH_ALEN; i++)
+               if (be_mac[i] != 0)
+                       break;
+       if (i == ETH_ALEN) {
+               /*
+                * Initialise a dummy MAC address. We choose the numerically
+                * largest non-broadcast address to prevent the address getting
+                * stolen by an Ethernet bridge for STP purposes.
+                 * (FE:FF:FF:FF:FF:FF) 
+                */ 
+               memset(dev->dev_addr, 0xFF, ETH_ALEN);
+               dev->dev_addr[0] &= ~0x01;
+       } else
+               memcpy(dev->dev_addr, be_mac, ETH_ALEN);
+
+       rtnl_lock();
+       err = register_netdevice(dev);
+       rtnl_unlock();
+       if (err) {
+               DPRINTK("Could not register new net device %s: err=%d\n",
+                       dev->name, err);
+               free_netdev(dev);
+               return NULL;
+       }
+
+       DPRINTK("Successfully created netif\n");
+       return netif;
+}
+
+static int map_frontend_pages(
+       netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
+{
+       struct gnttab_map_grant_ref op;
+
+       op.host_addr = (unsigned long)netif->comms_area->addr;
+       op.flags     = GNTMAP_host_map;
+       op.ref       = tx_ring_ref;
+       op.dom       = netif->domid;
     
-    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-    if (op.handle < 0) { 
-        DPRINTK(" Grant table operation failure mapping tx_ring_ref!\n");
-        return op.handle;
-    }
-
-    netif->tx_shmem_ref    = tx_ring_ref;
-    netif->tx_shmem_handle = op.handle;
-    netif->tx_shmem_vaddr  = localaddr;
-
-    /* Map: Use the Grant table reference */
-    op.host_addr = localaddr + PAGE_SIZE;
-    op.flags     = GNTMAP_host_map;
-    op.ref       = rx_ring_ref;
-    op.dom       = netif->domid;
-
-    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-    if (op.handle < 0) { 
-        DPRINTK(" Grant table operation failure mapping rx_ring_ref!\n");
-        return op.handle;
-    }
-
-    netif->rx_shmem_ref    = rx_ring_ref;
-    netif->rx_shmem_handle = op.handle;
-    netif->rx_shmem_vaddr  = localaddr + PAGE_SIZE;
-
-#else
-    pgprot_t      prot = __pgprot(_KERNPG_TABLE);
-    int           err;
-
-    err = direct_remap_pfn_range(&init_mm, localaddr,
-                                 tx_ring_ref, PAGE_SIZE,
-                                 prot, netif->domid); 
-    
-    err |= direct_remap_pfn_range(&init_mm, localaddr + PAGE_SIZE,
-                                 rx_ring_ref, PAGE_SIZE,
-                                 prot, netif->domid);
-
-    if (err)
-       return err;
-#endif
-
-    return 0;
+       lock_vm_area(netif->comms_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
+       unlock_vm_area(netif->comms_area);
+
+       if (op.handle < 0) { 
+               DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
+               return op.handle;
+       }
+
+       netif->tx_shmem_ref    = tx_ring_ref;
+       netif->tx_shmem_handle = op.handle;
+
+       op.host_addr = (unsigned long)netif->comms_area->addr + PAGE_SIZE;
+       op.flags     = GNTMAP_host_map;
+       op.ref       = rx_ring_ref;
+       op.dom       = netif->domid;
+
+       lock_vm_area(netif->comms_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
+       unlock_vm_area(netif->comms_area);
+
+       if (op.handle < 0) { 
+               DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
+               return op.handle;
+       }
+
+       netif->rx_shmem_ref    = rx_ring_ref;
+       netif->rx_shmem_handle = op.handle;
+
+       return 0;
 }
 
 static void unmap_frontend_pages(netif_t *netif)
 {
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    struct gnttab_unmap_grant_ref op;
-
-    op.host_addr    = netif->tx_shmem_vaddr;
-    op.handle       = netif->tx_shmem_handle;
-    op.dev_bus_addr = 0;
-    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
-
-    op.host_addr    = netif->rx_shmem_vaddr;
-    op.handle       = netif->rx_shmem_handle;
-    op.dev_bus_addr = 0;
-    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
-#endif
-
-    return; 
+       struct gnttab_unmap_grant_ref op;
+
+       op.host_addr    = (unsigned long)netif->comms_area->addr;
+       op.handle       = netif->tx_shmem_handle;
+       op.dev_bus_addr = 0;
+
+       lock_vm_area(netif->comms_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       unlock_vm_area(netif->comms_area);
+
+       op.host_addr    = (unsigned long)netif->comms_area->addr + PAGE_SIZE;
+       op.handle       = netif->rx_shmem_handle;
+       op.dev_bus_addr = 0;
+
+       lock_vm_area(netif->comms_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       unlock_vm_area(netif->comms_area);
 }
 
 int netif_map(netif_t *netif, unsigned long tx_ring_ref,
              unsigned long rx_ring_ref, unsigned int evtchn)
 {
-    struct vm_struct *vma;
-    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
-    int err;
-
-    vma = get_vm_area(2*PAGE_SIZE, VM_IOREMAP);
-    if (vma == NULL)
-        return -ENOMEM;
-
-    err = map_frontend_pages(netif, (unsigned long)vma->addr, tx_ring_ref,
-                             rx_ring_ref);
-    if (err) {
-        vfree(vma->addr);
-       return err;
-    }
-
-    op.u.bind_interdomain.dom1 = DOMID_SELF;
-    op.u.bind_interdomain.dom2 = netif->domid;
-    op.u.bind_interdomain.port1 = 0;
-    op.u.bind_interdomain.port2 = evtchn;
-    err = HYPERVISOR_event_channel_op(&op);
-    if (err) {
-       unmap_frontend_pages(netif);
-       vfree(vma->addr);
-       return err;
-    }
-
-    netif->evtchn = op.u.bind_interdomain.port1;
-    netif->remote_evtchn = evtchn;
-
-    netif->tx = (netif_tx_interface_t *)vma->addr;
-    netif->rx = (netif_rx_interface_t *)((char *)vma->addr + PAGE_SIZE);
-    netif->tx->resp_prod = netif->rx->resp_prod = 0;
-    netif_get(netif);
-    wmb(); /* Other CPUs see new state before interface is started. */
-
-    rtnl_lock();
-    netif->status = CONNECTED;
-    wmb();
-    if (netif_running(netif->dev))
-        __netif_up(netif);
-    rtnl_unlock();
-
-    return 0;
-}
-
-static void free_netif(void *arg)
-{
-    evtchn_op_t op = { .cmd = EVTCHNOP_close };
-    netif_t *netif = (netif_t *)arg;
-
-    /*
-     * These can't be done in netif_disconnect() because at that point there
-     * may be outstanding requests in the network stack whose asynchronous
-     * responses must still be notified to the remote driver.
-     */
-
-    op.u.close.port = netif->evtchn;
-    op.u.close.dom = DOMID_SELF;
-    HYPERVISOR_event_channel_op(&op);
-    op.u.close.port = netif->remote_evtchn;
-    op.u.close.dom = netif->domid;
-    HYPERVISOR_event_channel_op(&op);
-
-    unregister_netdev(netif->dev);
-
-    if (netif->tx) {
-       unmap_frontend_pages(netif);
-       vfree(netif->tx); /* Frees netif->rx as well. */
-    }
-
-    free_netdev(netif->dev);
-}
-
-void free_netif_callback(netif_t *netif)
-{
-    INIT_WORK(&netif->free_work, free_netif, (void *)netif);
-    schedule_work(&netif->free_work);
+       evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+       int err;
+
+       netif->comms_area = alloc_vm_area(2*PAGE_SIZE);
+       if (netif->comms_area == NULL)
+               return -ENOMEM;
+
+       err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
+       if (err) {
+               free_vm_area(netif->comms_area);
+               return err;
+       }
+
+       op.u.bind_interdomain.dom1 = DOMID_SELF;
+       op.u.bind_interdomain.dom2 = netif->domid;
+       op.u.bind_interdomain.port1 = 0;
+       op.u.bind_interdomain.port2 = evtchn;
+       err = HYPERVISOR_event_channel_op(&op);
+       if (err) {
+               unmap_frontend_pages(netif);
+               free_vm_area(netif->comms_area);
+               return err;
+       }
+
+       netif->evtchn = op.u.bind_interdomain.port1;
+       netif->remote_evtchn = evtchn;
+
+       netif->tx = (netif_tx_interface_t *)netif->comms_area->addr;
+       netif->rx = (netif_rx_interface_t *)
+               ((char *)netif->comms_area->addr + PAGE_SIZE);
+       netif->tx->resp_prod = netif->rx->resp_prod = 0;
+       netif_get(netif);
+       wmb(); /* Other CPUs see new state before interface is started. */
+
+       rtnl_lock();
+       netif->status = CONNECTED;
+       wmb();
+       if (netif_running(netif->dev))
+               __netif_up(netif);
+       rtnl_unlock();
+
+       return 0;
+}
+
+static void free_netif_callback(void *arg)
+{
+       evtchn_op_t op = { .cmd = EVTCHNOP_close };
+       netif_t *netif = (netif_t *)arg;
+
+       /*
+        * These can't be done in netif_disconnect() because at that point
+        * there may be outstanding requests in the network stack whose
+        * asynchronous responses must still be notified to the remote driver.
+        */
+
+       op.u.close.port = netif->evtchn;
+       op.u.close.dom = DOMID_SELF;
+       HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = netif->remote_evtchn;
+       op.u.close.dom = netif->domid;
+       HYPERVISOR_event_channel_op(&op);
+
+       unregister_netdev(netif->dev);
+
+       if (netif->tx) {
+               unmap_frontend_pages(netif);
+               free_vm_area(netif->comms_area);
+       }
+
+       free_netdev(netif->dev);
+}
+
+void free_netif(netif_t *netif)
+{
+       INIT_WORK(&netif->free_work, free_netif_callback, (void *)netif);
+       schedule_work(&netif->free_work);
 }
 
 void netif_creditlimit(netif_t *netif)
 {
 #if 0
-    /* Set the credit limit (reset remaining credit to new limit). */
-    netif->credit_bytes = netif->remaining_credit = creditlimit->credit_bytes;
-    netif->credit_usec = creditlimit->period_usec;
-
-    if (netif->status == CONNECTED) {
-        /*
-         * Schedule work so that any packets waiting under previous credit 
-         * limit are dealt with (acts like a replenishment point).
-         */
-        netif->credit_timeout.expires = jiffies;
-        netif_schedule_work(netif);
-    }
+       /* Set the credit limit (reset remaining credit to new limit). */
+       netif->credit_bytes     = creditlimit->credit_bytes;
+       netif->remaining_credit = creditlimit->credit_bytes;
+       netif->credit_usec      = creditlimit->period_usec;
+
+       if (netif->status == CONNECTED) {
+               /*
+                * Schedule work so that any packets waiting under previous
+                * credit limit are dealt with (acts as a replenishment point).
+                */
+               netif->credit_timeout.expires = jiffies;
+               netif_schedule_work(netif);
+       }
 #endif
 }
 
 int netif_disconnect(netif_t *netif)
 {
 
-    if (netif->status == CONNECTED) {
-        rtnl_lock();
-        netif->status = DISCONNECTING;
-        wmb();
-        if (netif_running(netif->dev))
-            __netif_down(netif);
-        rtnl_unlock();
-        netif_put(netif);
-        return 0; /* Caller should not send response message. */
-    }
-
-    return 1;
-}
+       if (netif->status == CONNECTED) {
+               rtnl_lock();
+               netif->status = DISCONNECTING;
+               wmb();
+               if (netif_running(netif->dev))
+                       __netif_down(netif);
+               rtnl_unlock();
+               netif_put(netif);
+               return 0; /* Caller should not send response message. */
+       }
+
+       return 1;
+}
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Thu Sep 22 
17:42:01 2005
@@ -23,7 +23,7 @@
 static int  make_rx_response(netif_t *netif, 
                              u16      id, 
                              s8       st,
-                             unsigned long addr,
+                             u16      offset,
                              u16      size,
                              u16      csum_valid);
 
@@ -41,11 +41,7 @@
 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
-static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
-#else
-static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
-#endif
+static gnttab_transfer_t grant_rx_op[MAX_PENDING_REQS];
 static unsigned char rx_notify[NR_EVENT_CHANNELS];
 
 /* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -57,8 +53,8 @@
 #define PKT_PROT_LEN 64
 
 static struct {
-    netif_tx_request_t req;
-    netif_t *netif;
+       netif_tx_request_t req;
+       netif_t *netif;
 } pending_tx_info[MAX_PENDING_REQS];
 static u16 pending_ring[MAX_PENDING_REQS];
 typedef unsigned int PEND_RING_IDX;
@@ -72,14 +68,9 @@
 
 static struct sk_buff_head tx_queue;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
 static u16 grant_tx_ref[MAX_PENDING_REQS];
 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
-
-#else
-static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
-#endif
 
 static struct list_head net_schedule_list;
 static spinlock_t net_schedule_list_lock;
@@ -91,49 +82,49 @@
 
 static unsigned long alloc_mfn(void)
 {
-    unsigned long mfn = 0, flags;
-    struct xen_memory_reservation reservation = {
-        .extent_start = mfn_list,
-        .nr_extents   = MAX_MFN_ALLOC,
-        .extent_order = 0,
-        .domid        = DOMID_SELF
-    };
-    spin_lock_irqsave(&mfn_lock, flags);
-    if ( unlikely(alloc_index == 0) )
-        alloc_index = HYPERVISOR_memory_op(
-            XENMEM_increase_reservation, &reservation);
-    if ( alloc_index != 0 )
-        mfn = mfn_list[--alloc_index];
-    spin_unlock_irqrestore(&mfn_lock, flags);
-    return mfn;
-}
-
-#ifndef CONFIG_XEN_NETDEV_GRANT
+       unsigned long mfn = 0, flags;
+       struct xen_memory_reservation reservation = {
+               .extent_start = mfn_list,
+               .nr_extents   = MAX_MFN_ALLOC,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+       spin_lock_irqsave(&mfn_lock, flags);
+       if ( unlikely(alloc_index == 0) )
+               alloc_index = HYPERVISOR_memory_op(
+                       XENMEM_increase_reservation, &reservation);
+       if ( alloc_index != 0 )
+               mfn = mfn_list[--alloc_index];
+       spin_unlock_irqrestore(&mfn_lock, flags);
+       return mfn;
+}
+
+#if 0
 static void free_mfn(unsigned long mfn)
 {
-    unsigned long flags;
-    struct xen_memory_reservation reservation = {
-        .extent_start = &mfn,
-        .nr_extents   = 1,
-        .extent_order = 0,
-        .domid        = DOMID_SELF
-    };
-    spin_lock_irqsave(&mfn_lock, flags);
-    if ( alloc_index != MAX_MFN_ALLOC )
-        mfn_list[alloc_index++] = mfn;
-    else if ( HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation)
-              != 1 )
-        BUG();
-    spin_unlock_irqrestore(&mfn_lock, flags);
+       unsigned long flags;
+       struct xen_memory_reservation reservation = {
+               .extent_start = &mfn,
+               .nr_extents   = 1,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+       spin_lock_irqsave(&mfn_lock, flags);
+       if ( alloc_index != MAX_MFN_ALLOC )
+               mfn_list[alloc_index++] = mfn;
+       else
+               BUG_ON(HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                           &reservation) != 1);
+       spin_unlock_irqrestore(&mfn_lock, flags);
 }
 #endif
 
 static inline void maybe_schedule_tx_action(void)
 {
-    smp_mb();
-    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
-         !list_empty(&net_schedule_list) )
-        tasklet_schedule(&net_tx_tasklet);
+       smp_mb();
+       if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+           !list_empty(&net_schedule_list))
+               tasklet_schedule(&net_tx_tasklet);
 }
 
 /*
@@ -142,77 +133,66 @@
  */
 static inline int is_xen_skb(struct sk_buff *skb)
 {
-    extern kmem_cache_t *skbuff_cachep;
-    kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
-    return (cp == skbuff_cachep);
+       extern kmem_cache_t *skbuff_cachep;
+       kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
+       return (cp == skbuff_cachep);
 }
 
 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-    netif_t *netif = netdev_priv(dev);
-
-    ASSERT(skb->dev == dev);
-
-    /* Drop the packet if the target domain has no receive buffers. */
-    if ( !netif->active || 
-         (netif->rx_req_cons == netif->rx->req_prod) ||
-         ((netif->rx_req_cons-netif->rx_resp_prod) == NETIF_RX_RING_SIZE) )
-        goto drop;
-
-    /*
-     * We do not copy the packet unless:
-     *  1. The data is shared; or
-     *  2. The data is not allocated from our special cache.
-     * NB. We also couldn't cope with fragmented packets, but we won't get
-     *     any because we not advertise the NETIF_F_SG feature.
-     */
-    if ( skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb) )
-    {
-        int hlen = skb->data - skb->head;
-        struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
-        if ( unlikely(nskb == NULL) )
-            goto drop;
-        skb_reserve(nskb, hlen);
-        __skb_put(nskb, skb->len);
-        if (skb_copy_bits(skb, -hlen, nskb->data - hlen, skb->len + hlen))
-            BUG();
-        nskb->dev = skb->dev;
-        nskb->proto_csum_valid = skb->proto_csum_valid;
-        dev_kfree_skb(skb);
-        skb = nskb;
-    }
-#ifdef CONFIG_XEN_NETDEV_GRANT
-#ifdef DEBUG_GRANT
-    printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x 
gr=%04x\n",
-           netif->rx->req_prod,
-           netif->rx_req_cons,
-           netif->rx->ring[
-                  MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
-           netif->rx->ring[
-                  MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
-#endif
-#endif
-    netif->rx_req_cons++;
-    netif_get(netif);
-
-    skb_queue_tail(&rx_queue, skb);
-    tasklet_schedule(&net_rx_tasklet);
-
-    return 0;
+       netif_t *netif = netdev_priv(dev);
+
+       ASSERT(skb->dev == dev);
+
+       /* Drop the packet if the target domain has no receive buffers. */
+       if (!netif->active || 
+           (netif->rx_req_cons == netif->rx->req_prod) ||
+           ((netif->rx_req_cons-netif->rx_resp_prod) == NETIF_RX_RING_SIZE))
+               goto drop;
+
+       /*
+        * We do not copy the packet unless:
+        *  1. The data is shared; or
+        *  2. The data is not allocated from our special cache.
+        * NB. We also couldn't cope with fragmented packets, but we won't get
+        *     any because we not advertise the NETIF_F_SG feature.
+        */
+       if (skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb)) {
+               int hlen = skb->data - skb->head;
+               struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
+               if ( unlikely(nskb == NULL) )
+                       goto drop;
+               skb_reserve(nskb, hlen);
+               __skb_put(nskb, skb->len);
+               BUG_ON(skb_copy_bits(skb, -hlen, nskb->data - hlen,
+                                    skb->len + hlen));
+               nskb->dev = skb->dev;
+               nskb->proto_csum_valid = skb->proto_csum_valid;
+               dev_kfree_skb(skb);
+               skb = nskb;
+       }
+
+       netif->rx_req_cons++;
+       netif_get(netif);
+
+       skb_queue_tail(&rx_queue, skb);
+       tasklet_schedule(&net_rx_tasklet);
+
+       return 0;
 
  drop:
-    netif->stats.tx_dropped++;
-    dev_kfree_skb(skb);
-    return 0;
+       netif->stats.tx_dropped++;
+       dev_kfree_skb(skb);
+       return 0;
 }
 
 #if 0
 static void xen_network_done_notify(void)
 {
-    static struct net_device *eth0_dev = NULL;
-    if ( unlikely(eth0_dev == NULL) )
-        eth0_dev = __dev_get_by_name("eth0");
-    netif_rx_schedule(eth0_dev);
+       static struct net_device *eth0_dev = NULL;
+       if (unlikely(eth0_dev == NULL))
+               eth0_dev = __dev_get_by_name("eth0");
+       netif_rx_schedule(eth0_dev);
 }
 /* 
  * Add following to poll() function in NAPI driver (Tigon3 is example):
@@ -221,776 +201,644 @@
  */
 int xen_network_done(void)
 {
-    return skb_queue_empty(&rx_queue);
+       return skb_queue_empty(&rx_queue);
 }
 #endif
 
 static void net_rx_action(unsigned long unused)
 {
-    netif_t *netif = NULL; 
-    s8 status;
-    u16 size, id, evtchn;
-    multicall_entry_t *mcl;
-    mmu_update_t *mmu;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    gnttab_donate_t *gop;
-#else
-    struct mmuext_op *mmuext;
+       netif_t *netif = NULL; 
+       s8 status;
+       u16 size, id, evtchn;
+       multicall_entry_t *mcl;
+       mmu_update_t *mmu;
+       gnttab_transfer_t *gop;
+       unsigned long vdata, old_mfn, new_mfn;
+       struct sk_buff_head rxq;
+       struct sk_buff *skb;
+       u16 notify_list[NETIF_RX_RING_SIZE];
+       int notify_nr = 0;
+
+       skb_queue_head_init(&rxq);
+
+       mcl = rx_mcl;
+       mmu = rx_mmu;
+       gop = grant_rx_op;
+
+       while ((skb = skb_dequeue(&rx_queue)) != NULL) {
+               netif   = netdev_priv(skb->dev);
+               vdata   = (unsigned long)skb->data;
+               old_mfn = virt_to_mfn(vdata);
+
+               /* Memory squeeze? Back off for an arbitrary while. */
+               if ((new_mfn = alloc_mfn()) == 0) {
+                       if ( net_ratelimit() )
+                               WPRINTK("Memory squeeze in netback driver.\n");
+                       mod_timer(&net_timer, jiffies + HZ);
+                       skb_queue_head(&rx_queue, skb);
+                       break;
+               }
+               /*
+                * Set the new P2M table entry before reassigning the old data
+                * page. Heed the comment in pgtable-2level.h:pte_page(). :-)
+                */
+               phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] =
+                       new_mfn;
+
+               MULTI_update_va_mapping(mcl, vdata,
+                                       pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
+               mcl++;
+
+               gop->mfn = old_mfn;
+               gop->domid = netif->domid;
+               gop->ref = netif->rx->ring[
+                       MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
+               netif->rx_resp_prod_copy++;
+               gop++;
+
+               mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
+                       MMU_MACHPHYS_UPDATE;
+               mmu->val = __pa(vdata) >> PAGE_SHIFT;  
+               mmu++;
+
+               __skb_queue_tail(&rxq, skb);
+
+               /* Filled the batch queue? */
+               if ((mcl - rx_mcl) == ARRAY_SIZE(rx_mcl))
+                       break;
+       }
+
+       if (mcl == rx_mcl)
+               return;
+
+       mcl->op = __HYPERVISOR_mmu_update;
+       mcl->args[0] = (unsigned long)rx_mmu;
+       mcl->args[1] = mmu - rx_mmu;
+       mcl->args[2] = 0;
+       mcl->args[3] = DOMID_SELF;
+       mcl++;
+
+       mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+       BUG_ON(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0);
+
+       mcl = rx_mcl;
+       if( HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, 
+                                     gop - grant_rx_op)) { 
+               /*
+                * The other side has given us a bad grant ref, or has no 
+                * headroom, or has gone away. Unfortunately the current grant
+                * table code doesn't inform us which is the case, so not much
+                * we can do. 
+                */
+               DPRINTK("net_rx: transfer to DOM%u failed; dropping (up to) "
+                       "%d packets.\n",
+                       grant_rx_op[0].domid, gop - grant_rx_op); 
+       }
+       gop = grant_rx_op;
+
+       while ((skb = __skb_dequeue(&rxq)) != NULL) {
+               netif   = netdev_priv(skb->dev);
+               size    = skb->tail - skb->data;
+
+               /* Rederive the machine addresses. */
+               new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
+               old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
+               atomic_set(&(skb_shinfo(skb)->dataref), 1);
+               skb_shinfo(skb)->nr_frags = 0;
+               skb_shinfo(skb)->frag_list = NULL;
+
+               netif->stats.tx_bytes += size;
+               netif->stats.tx_packets++;
+
+               /* The update_va_mapping() must not fail. */
+               BUG_ON(mcl[0].result != 0);
+
+               /* Check the reassignment error code. */
+               status = NETIF_RSP_OKAY;
+               if(gop->status != 0) { 
+                       DPRINTK("Bad status %d from grant transfer to DOM%u\n",
+                               gop->status, netif->domid);
+                       /* XXX SMH: should free 'old_mfn' here */
+                       status = NETIF_RSP_ERROR; 
+               } 
+               evtchn = netif->evtchn;
+               id = netif->rx->ring[
+                       MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
+               if (make_rx_response(netif, id, status,
+                                    (unsigned long)skb->data & ~PAGE_MASK,
+                                    size, skb->proto_csum_valid) &&
+                   (rx_notify[evtchn] == 0)) {
+                       rx_notify[evtchn] = 1;
+                       notify_list[notify_nr++] = evtchn;
+               }
+
+               netif_put(netif);
+               dev_kfree_skb(skb);
+               mcl++;
+               gop++;
+       }
+
+       while (notify_nr != 0) {
+               evtchn = notify_list[--notify_nr];
+               rx_notify[evtchn] = 0;
+               notify_via_evtchn(evtchn);
+       }
+
+       /* More work to do? */
+       if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
+               tasklet_schedule(&net_rx_tasklet);
+#if 0
+       else
+               xen_network_done_notify();
 #endif
-    unsigned long vdata, old_mfn, new_mfn;
-    struct sk_buff_head rxq;
-    struct sk_buff *skb;
-    u16 notify_list[NETIF_RX_RING_SIZE];
-    int notify_nr = 0;
-
-    skb_queue_head_init(&rxq);
-
-    mcl = rx_mcl;
-    mmu = rx_mmu;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    gop = grant_rx_op;
-#else
-    mmuext = rx_mmuext;
-#endif
-
-    while ( (skb = skb_dequeue(&rx_queue)) != NULL )
-    {
-        netif   = netdev_priv(skb->dev);
-        vdata   = (unsigned long)skb->data;
-        old_mfn = virt_to_mfn(vdata);
-
-        /* Memory squeeze? Back off for an arbitrary while. */
-        if ( (new_mfn = alloc_mfn()) == 0 )
-        {
-            if ( net_ratelimit() )
-                WPRINTK("Memory squeeze in netback driver.\n");
-            mod_timer(&net_timer, jiffies + HZ);
-            skb_queue_head(&rx_queue, skb);
-            break;
-        }
-        /*
-         * Set the new P2M table entry before reassigning the old data page.
-         * Heed the comment in pgtable-2level.h:pte_page(). :-)
-         */
-        phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
-
-        MULTI_update_va_mapping(mcl, vdata,
-                               pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
-        mcl++;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        gop->mfn = old_mfn;
-        gop->domid = netif->domid;
-        gop->handle = netif->rx->ring[
-        MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
-        netif->rx_resp_prod_copy++;
-        gop++;
-#else
-        mcl->op = __HYPERVISOR_mmuext_op;
-        mcl->args[0] = (unsigned long)mmuext;
-        mcl->args[1] = 1;
-        mcl->args[2] = 0;
-        mcl->args[3] = netif->domid;
-        mcl++;
-
-        mmuext->cmd = MMUEXT_REASSIGN_PAGE;
-        mmuext->arg1.mfn = old_mfn;
-        mmuext++;
-#endif
-        mmu->ptr = ((unsigned long long)new_mfn << PAGE_SHIFT) | 
MMU_MACHPHYS_UPDATE;
-        mmu->val = __pa(vdata) >> PAGE_SHIFT;  
-        mmu++;
-
-        __skb_queue_tail(&rxq, skb);
-
-#ifdef DEBUG_GRANT
-        dump_packet('a', old_mfn, vdata);
-#endif
-        /* Filled the batch queue? */
-        if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
-            break;
-    }
-
-    if ( mcl == rx_mcl )
-        return;
-
-    mcl->op = __HYPERVISOR_mmu_update;
-    mcl->args[0] = (unsigned long)rx_mmu;
-    mcl->args[1] = mmu - rx_mmu;
-    mcl->args[2] = 0;
-    mcl->args[3] = DOMID_SELF;
-    mcl++;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-#else
-    mcl[-3].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-#endif
-    if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
-        BUG();
-
-    mcl = rx_mcl;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    if(HYPERVISOR_grant_table_op(GNTTABOP_donate, grant_rx_op, 
-                                 gop - grant_rx_op)) { 
-        /* 
-        ** The other side has given us a bad grant ref, or has no headroom, 
-        ** or has gone away. Unfortunately the current grant table code 
-        ** doesn't inform us which is the case, so not much we can do. 
-        */
-        DPRINTK("net_rx: donate to DOM%u failed; dropping (up to) %d "
-                "packets.\n", grant_rx_op[0].domid, gop - grant_rx_op); 
-    }
-    gop = grant_rx_op;
-#else
-    mmuext = rx_mmuext;
-#endif
-    while ( (skb = __skb_dequeue(&rxq)) != NULL )
-    {
-        netif   = netdev_priv(skb->dev);
-        size    = skb->tail - skb->data;
-
-        /* Rederive the machine addresses. */
-        new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
-#else
-        old_mfn = mmuext[0].arg1.mfn;
-#endif
-        atomic_set(&(skb_shinfo(skb)->dataref), 1);
-        skb_shinfo(skb)->nr_frags = 0;
-        skb_shinfo(skb)->frag_list = NULL;
-
-        netif->stats.tx_bytes += size;
-        netif->stats.tx_packets++;
-
-        /* The update_va_mapping() must not fail. */
-        BUG_ON(mcl[0].result != 0);
-
-        /* Check the reassignment error code. */
-        status = NETIF_RSP_OKAY;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        if(gop->status != 0) { 
-            DPRINTK("Bad status %d from grant donate to DOM%u\n", 
-                    gop->status, netif->domid);
-            /* XXX SMH: should free 'old_mfn' here */
-            status = NETIF_RSP_ERROR; 
-        } 
-#else
-        if ( unlikely(mcl[1].result != 0) )
-        {
-            DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
-            free_mfn(old_mfn);
-            status = NETIF_RSP_ERROR;
-        }
-#endif
-        evtchn = netif->evtchn;
-        id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
-        if ( make_rx_response(netif, id, status,
-                              (old_mfn << PAGE_SHIFT) | /* XXX */
-                              ((unsigned long)skb->data & ~PAGE_MASK),
-                              size, skb->proto_csum_valid) &&
-             (rx_notify[evtchn] == 0) )
-        {
-            rx_notify[evtchn] = 1;
-            notify_list[notify_nr++] = evtchn;
-        }
-
-        netif_put(netif);
-        dev_kfree_skb(skb);
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        mcl++;
-        gop++;
-#else
-        mcl += 2;
-        mmuext += 1;
-#endif
-    }
-
-    while ( notify_nr != 0 )
-    {
-        evtchn = notify_list[--notify_nr];
-        rx_notify[evtchn] = 0;
-        notify_via_evtchn(evtchn);
-    }
-
-  out: 
-    /* More work to do? */
-    if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) )
-        tasklet_schedule(&net_rx_tasklet);
-#if 0
-    else
-        xen_network_done_notify();
-#endif
 }
 
 static void net_alarm(unsigned long unused)
 {
-    tasklet_schedule(&net_rx_tasklet);
+       tasklet_schedule(&net_rx_tasklet);
 }
 
 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
 {
-    netif_t *netif = netdev_priv(dev);
-    return &netif->stats;
+       netif_t *netif = netdev_priv(dev);
+       return &netif->stats;
 }
 
 static int __on_net_schedule_list(netif_t *netif)
 {
-    return netif->list.next != NULL;
+       return netif->list.next != NULL;
 }
 
 static void remove_from_net_schedule_list(netif_t *netif)
 {
-    spin_lock_irq(&net_schedule_list_lock);
-    if ( likely(__on_net_schedule_list(netif)) )
-    {
-        list_del(&netif->list);
-        netif->list.next = NULL;
-        netif_put(netif);
-    }
-    spin_unlock_irq(&net_schedule_list_lock);
+       spin_lock_irq(&net_schedule_list_lock);
+       if (likely(__on_net_schedule_list(netif))) {
+               list_del(&netif->list);
+               netif->list.next = NULL;
+               netif_put(netif);
+       }
+       spin_unlock_irq(&net_schedule_list_lock);
 }
 
 static void add_to_net_schedule_list_tail(netif_t *netif)
 {
-    if ( __on_net_schedule_list(netif) )
-        return;
-
-    spin_lock_irq(&net_schedule_list_lock);
-    if ( !__on_net_schedule_list(netif) && netif->active )
-    {
-        list_add_tail(&netif->list, &net_schedule_list);
-        netif_get(netif);
-    }
-    spin_unlock_irq(&net_schedule_list_lock);
+       if (__on_net_schedule_list(netif))
+               return;
+
+       spin_lock_irq(&net_schedule_list_lock);
+       if (!__on_net_schedule_list(netif) && netif->active) {
+               list_add_tail(&netif->list, &net_schedule_list);
+               netif_get(netif);
+       }
+       spin_unlock_irq(&net_schedule_list_lock);
 }
 
 void netif_schedule_work(netif_t *netif)
 {
-    if ( (netif->tx_req_cons != netif->tx->req_prod) &&
-         ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
-    {
-        add_to_net_schedule_list_tail(netif);
-        maybe_schedule_tx_action();
-    }
+       if ((netif->tx_req_cons != netif->tx->req_prod) &&
+           ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE)) {
+               add_to_net_schedule_list_tail(netif);
+               maybe_schedule_tx_action();
+       }
 }
 
 void netif_deschedule_work(netif_t *netif)
 {
-    remove_from_net_schedule_list(netif);
+       remove_from_net_schedule_list(netif);
 }
 
 
 static void tx_credit_callback(unsigned long data)
 {
-    netif_t *netif = (netif_t *)data;
-    netif->remaining_credit = netif->credit_bytes;
-    netif_schedule_work(netif);
+       netif_t *netif = (netif_t *)data;
+       netif->remaining_credit = netif->credit_bytes;
+       netif_schedule_work(netif);
 }
 
 inline static void net_tx_action_dealloc(void)
 {
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    gnttab_unmap_grant_ref_t *gop;
-#else
-    multicall_entry_t *mcl;
-#endif
-    u16 pending_idx;
-    PEND_RING_IDX dc, dp;
-    netif_t *netif;
-
-    dc = dealloc_cons;
-    dp = dealloc_prod;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    /*
-     * Free up any grants we have finished using
-     */
-    gop = tx_unmap_ops;
-    while ( dc != dp )
-    {
-        pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
-        gop->host_addr    = MMAP_VADDR(pending_idx);
-        gop->dev_bus_addr = 0;
-        gop->handle       = grant_tx_ref[pending_idx];
-        grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
-        gop++;
-    }
-    BUG_ON(HYPERVISOR_grant_table_op(
-               GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops));
-#else
-    mcl = tx_mcl;
-    while ( dc != dp )
-    {
-        pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
-       MULTI_update_va_mapping(mcl, MMAP_VADDR(pending_idx),
-                               __pte(0), 0);
-        mcl++;     
-    }
-
-    mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-    if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
-        BUG();
-
-    mcl = tx_mcl;
-#endif
-    while ( dealloc_cons != dp )
-    {
-#ifndef CONFIG_XEN_NETDEV_GRANT
-        /* The update_va_mapping() must not fail. */
-        BUG_ON(mcl[0].result != 0);
-#endif
-
-        pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
-
-        netif = pending_tx_info[pending_idx].netif;
-
-        make_tx_response(netif, pending_tx_info[pending_idx].req.id, 
-                         NETIF_RSP_OKAY);
+       gnttab_unmap_grant_ref_t *gop;
+       u16 pending_idx;
+       PEND_RING_IDX dc, dp;
+       netif_t *netif;
+
+       dc = dealloc_cons;
+       dp = dealloc_prod;
+
+       /*
+        * Free up any grants we have finished using
+        */
+       gop = tx_unmap_ops;
+       while (dc != dp) {
+               pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+               gop->host_addr    = MMAP_VADDR(pending_idx);
+               gop->dev_bus_addr = 0;
+               gop->handle       = grant_tx_ref[pending_idx];
+               grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
+               gop++;
+       }
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops));
+
+       while (dealloc_cons != dp) {
+               pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
+
+               netif = pending_tx_info[pending_idx].netif;
+
+               make_tx_response(netif, pending_tx_info[pending_idx].req.id, 
+                                NETIF_RSP_OKAY);
         
-        pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-
-        /*
-         * Scheduling checks must happen after the above response is posted.
-         * This avoids a possible race with a guest OS on another CPU if that
-         * guest is testing against 'resp_prod' when deciding whether to notify
-         * us when it queues additional packets.
-         */
-        mb();
-        if ( (netif->tx_req_cons != netif->tx->req_prod) &&
-             ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
-            add_to_net_schedule_list_tail(netif);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+
+               /*
+                * Scheduling checks must happen after the above response is
+                * posted. This avoids a possible race with a guest OS on
+                * another CPU if that guest is testing against 'resp_prod'
+                * when deciding whether to notify us when it queues additional
+                 * packets.
+                */
+               mb();
+               if ((netif->tx_req_cons != netif->tx->req_prod) &&
+                   ((netif->tx_req_cons-netif->tx_resp_prod) !=
+                    NETIF_TX_RING_SIZE))
+                       add_to_net_schedule_list_tail(netif);
         
-        netif_put(netif);
-
-#ifndef CONFIG_XEN_NETDEV_GRANT
-        mcl++;
-#endif
-    }
-
+               netif_put(netif);
+       }
 }
 
 /* Called after netfront has transmitted */
 static void net_tx_action(unsigned long unused)
 {
-    struct list_head *ent;
-    struct sk_buff *skb;
-    netif_t *netif;
-    netif_tx_request_t txreq;
-    u16 pending_idx;
-    NETIF_RING_IDX i;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    gnttab_map_grant_ref_t *mop;
-#else
-    multicall_entry_t *mcl;
-#endif
-    unsigned int data_len;
-
-    if ( dealloc_cons != dealloc_prod )
-        net_tx_action_dealloc();
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    mop = tx_map_ops;
-#else
-    mcl = tx_mcl;
-#endif
-    while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
-            !list_empty(&net_schedule_list) )
-    {
-        /* Get a netif from the list with work to do. */
-        ent = net_schedule_list.next;
-        netif = list_entry(ent, netif_t, list);
-        netif_get(netif);
-        remove_from_net_schedule_list(netif);
-
-        /* Work to do? */
-        i = netif->tx_req_cons;
-        if ( (i == netif->tx->req_prod) ||
-             ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE) )
-        {
-            netif_put(netif);
-            continue;
-        }
-
-        rmb(); /* Ensure that we see the request before we copy it. */
-        memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req, 
-               sizeof(txreq));
-        /* Credit-based scheduling. */
-        if ( txreq.size > netif->remaining_credit )
-        {
-            unsigned long now = jiffies;
-            unsigned long next_credit = 
-                netif->credit_timeout.expires +
-                msecs_to_jiffies(netif->credit_usec / 1000);
-
-            /* Timer could already be pending in some rare cases. */
-            if ( timer_pending(&netif->credit_timeout) )
-                break;
-
-            /* Already passed the point at which we can replenish credit? */
-            if ( time_after_eq(now, next_credit) )
-            {
-                netif->credit_timeout.expires = now;
-                netif->remaining_credit = netif->credit_bytes;
-            }
-
-            /* Still too big to send right now? Then set a timer callback. */
-            if ( txreq.size > netif->remaining_credit )
-            {
-                netif->remaining_credit = 0;
-                netif->credit_timeout.expires  = next_credit;
-                netif->credit_timeout.data     = (unsigned long)netif;
-                netif->credit_timeout.function = tx_credit_callback;
-                add_timer_on(&netif->credit_timeout, smp_processor_id());
-                break;
-            }
-        }
-        netif->remaining_credit -= txreq.size;
-
-        /*
-         * Why the barrier? It ensures that the frontend sees updated req_cons
-         * before we check for more work to schedule.
-         */
-        netif->tx->req_cons = ++netif->tx_req_cons;
-        mb();
-
-        netif_schedule_work(netif);
-
-        if ( unlikely(txreq.size < ETH_HLEN) || 
-             unlikely(txreq.size > ETH_FRAME_LEN) )
-        {
-            DPRINTK("Bad packet size: %d\n", txreq.size);
-            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-            netif_put(netif);
-            continue; 
-        }
-
-        /* No crossing a page boundary as the payload mustn't fragment. */
-        if ( unlikely(((txreq.addr & ~PAGE_MASK) + txreq.size) >= PAGE_SIZE) ) 
-        {
-            DPRINTK("txreq.addr: %lx, size: %u, end: %lu\n", 
-                    txreq.addr, txreq.size, 
-                    (txreq.addr &~PAGE_MASK) + txreq.size);
-            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-            netif_put(netif);
-            continue;
-        }
-
-        pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-
-        data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
-
-        if ( unlikely((skb = alloc_skb(data_len+16, GFP_ATOMIC)) == NULL) )
-        {
-            DPRINTK("Can't allocate a skb in start_xmit.\n");
-            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-            netif_put(netif);
-            break;
-        }
-
-        /* Packets passed to netif_rx() must have some headroom. */
-        skb_reserve(skb, 16);
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        mop->host_addr = MMAP_VADDR(pending_idx);
-        mop->dom       = netif->domid;
-        mop->ref       = txreq.addr >> PAGE_SHIFT;
-        mop->flags     = GNTMAP_host_map | GNTMAP_readonly;
-        mop++;
-#else
-       MULTI_update_va_mapping_otherdomain(
-           mcl, MMAP_VADDR(pending_idx),
-           pfn_pte_ma(txreq.addr >> PAGE_SHIFT, PAGE_KERNEL),
-           0, netif->domid);
-
-        mcl++;
-#endif
-
-        memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
-        pending_tx_info[pending_idx].netif = netif;
-        *((u16 *)skb->data) = pending_idx;
-
-        __skb_queue_tail(&tx_queue, skb);
-
-        pending_cons++;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        if ( (mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops) )
-            break;
-#else
-        /* Filled the batch queue? */
-        if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
-            break;
-#endif
-    }
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    if ( mop == tx_map_ops )
-        return;
-
-    BUG_ON(HYPERVISOR_grant_table_op(
-        GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops));
-
-    mop = tx_map_ops;
-#else
-    if ( mcl == tx_mcl )
-        return;
-
-    BUG_ON(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0);
-
-    mcl = tx_mcl;
-#endif
-    while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
-    {
-        pending_idx = *((u16 *)skb->data);
-        netif       = pending_tx_info[pending_idx].netif;
-        memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
-
-        /* Check the remap error code. */
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        /* 
-           XXX SMH: error returns from grant operations are pretty poorly
-           specified/thought out, but the below at least conforms with 
-           what the rest of the code uses. 
-        */
-        if ( unlikely(mop->handle < 0) )
-        {
-            printk(KERN_ALERT "#### netback grant fails\n");
-            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-            netif_put(netif);
-            kfree_skb(skb);
-            mop++;
-            pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-            continue;
-        }
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
-                             FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT);
-        grant_tx_ref[pending_idx] = mop->handle;
-#else
-        if ( unlikely(mcl[0].result != 0) )
-        {
-            DPRINTK("Bad page frame\n");
-            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-            netif_put(netif);
-            kfree_skb(skb);
-            mcl++;
-            pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-            continue;
-        }
-
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
-            FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
-#endif
-
-        data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
-
-        __skb_put(skb, data_len);
-        memcpy(skb->data, 
-               (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
-               data_len);
-        if ( data_len < txreq.size )
-        {
-            /* Append the packet payload as a fragment. */
-            skb_shinfo(skb)->frags[0].page        = 
-                virt_to_page(MMAP_VADDR(pending_idx));
-            skb_shinfo(skb)->frags[0].size        = txreq.size - data_len;
-            skb_shinfo(skb)->frags[0].page_offset = 
-                (txreq.addr + data_len) & ~PAGE_MASK;
-            skb_shinfo(skb)->nr_frags = 1;
-        }
-        else
-        {
-            /* Schedule a response immediately. */
-            netif_idx_release(pending_idx);
-        }
-
-        skb->data_len  = txreq.size - data_len;
-        skb->len      += skb->data_len;
-
-        skb->dev      = netif->dev;
-        skb->protocol = eth_type_trans(skb, skb->dev);
-
-        /* No checking needed on localhost, but remember the field is blank. */
-        skb->ip_summed        = CHECKSUM_UNNECESSARY;
-        skb->proto_csum_valid = 1;
-        skb->proto_csum_blank = txreq.csum_blank;
-
-        netif->stats.rx_bytes += txreq.size;
-        netif->stats.rx_packets++;
-
-        netif_rx(skb);
-        netif->dev->last_rx = jiffies;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        mop++;
-#else
-        mcl++;
-#endif
-    }
+       struct list_head *ent;
+       struct sk_buff *skb;
+       netif_t *netif;
+       netif_tx_request_t txreq;
+       u16 pending_idx;
+       NETIF_RING_IDX i;
+       gnttab_map_grant_ref_t *mop;
+       unsigned int data_len;
+
+       if (dealloc_cons != dealloc_prod)
+               net_tx_action_dealloc();
+
+       mop = tx_map_ops;
+       while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
+               !list_empty(&net_schedule_list)) {
+               /* Get a netif from the list with work to do. */
+               ent = net_schedule_list.next;
+               netif = list_entry(ent, netif_t, list);
+               netif_get(netif);
+               remove_from_net_schedule_list(netif);
+
+               /* Work to do? */
+               i = netif->tx_req_cons;
+               if ((i == netif->tx->req_prod) ||
+                   ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE)) {
+                       netif_put(netif);
+                       continue;
+               }
+
+               rmb(); /* Ensure that we see the request before we copy it. */
+               memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req, 
+                      sizeof(txreq));
+               /* Credit-based scheduling. */
+               if (txreq.size > netif->remaining_credit) {
+                       unsigned long now = jiffies;
+                       unsigned long next_credit = 
+                               netif->credit_timeout.expires +
+                               msecs_to_jiffies(netif->credit_usec / 1000);
+
+                       /* Timer could already be pending in rare cases. */
+                       if (timer_pending(&netif->credit_timeout))
+                               break;
+
+                       /* Passed the point where we can replenish credit? */
+                       if (time_after_eq(now, next_credit)) {
+                               netif->credit_timeout.expires = now;
+                               netif->remaining_credit = netif->credit_bytes;
+                       }
+
+                       /* Still too big to send right now? Set a callback. */
+                       if (txreq.size > netif->remaining_credit) {
+                               netif->remaining_credit = 0;
+                               netif->credit_timeout.expires  = 
+                                       next_credit;
+                               netif->credit_timeout.data     =
+                                       (unsigned long)netif;
+                               netif->credit_timeout.function =
+                                       tx_credit_callback;
+                               add_timer_on(&netif->credit_timeout,
+                                            smp_processor_id());
+                               break;
+                       }
+               }
+               netif->remaining_credit -= txreq.size;
+
+               /*
+                * Why the barrier? It ensures that the frontend sees updated
+                * req_cons before we check for more work to schedule.
+                */
+               netif->tx->req_cons = ++netif->tx_req_cons;
+               mb();
+
+               netif_schedule_work(netif);
+
+               if (unlikely(txreq.size < ETH_HLEN) || 
+                   unlikely(txreq.size > ETH_FRAME_LEN)) {
+                       DPRINTK("Bad packet size: %d\n", txreq.size);
+                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+                       netif_put(netif);
+                       continue; 
+               }
+
+               /* No crossing a page as the payload mustn't fragment. */
+               if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
+                       DPRINTK("txreq.addr: %lx, size: %u, end: %lu\n", 
+                               txreq.addr, txreq.size, 
+                               (txreq.addr &~PAGE_MASK) + txreq.size);
+                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+                       netif_put(netif);
+                       continue;
+               }
+
+               pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+
+               data_len = (txreq.size > PKT_PROT_LEN) ?
+                       PKT_PROT_LEN : txreq.size;
+
+               skb = alloc_skb(data_len+16, GFP_ATOMIC);
+               if (unlikely(skb == NULL)) {
+                       DPRINTK("Can't allocate a skb in start_xmit.\n");
+                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+                       netif_put(netif);
+                       break;
+               }
+
+               /* Packets passed to netif_rx() must have some headroom. */
+               skb_reserve(skb, 16);
+
+               mop->host_addr = MMAP_VADDR(pending_idx);
+               mop->dom       = netif->domid;
+               mop->ref       = txreq.gref;
+               mop->flags     = GNTMAP_host_map | GNTMAP_readonly;
+               mop++;
+
+               memcpy(&pending_tx_info[pending_idx].req,
+                      &txreq, sizeof(txreq));
+               pending_tx_info[pending_idx].netif = netif;
+               *((u16 *)skb->data) = pending_idx;
+
+               __skb_queue_tail(&tx_queue, skb);
+
+               pending_cons++;
+
+               if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
+                       break;
+       }
+
+       if (mop == tx_map_ops)
+               return;
+
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops));
+
+       mop = tx_map_ops;
+       while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+               pending_idx = *((u16 *)skb->data);
+               netif       = pending_tx_info[pending_idx].netif;
+               memcpy(&txreq, &pending_tx_info[pending_idx].req,
+                      sizeof(txreq));
+
+               /* Check the remap error code. */
+               if (unlikely(mop->handle < 0)) {
+                       printk(KERN_ALERT "#### netback grant fails\n");
+                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+                       netif_put(netif);
+                       kfree_skb(skb);
+                       mop++;
+                       pending_ring[MASK_PEND_IDX(pending_prod++)] =
+                               pending_idx;
+                       continue;
+               }
+               phys_to_machine_mapping[
+                       __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
+                       FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT);
+               grant_tx_ref[pending_idx] = mop->handle;
+
+               data_len = (txreq.size > PKT_PROT_LEN) ?
+                       PKT_PROT_LEN : txreq.size;
+
+               __skb_put(skb, data_len);
+               memcpy(skb->data, 
+                      (void *)(MMAP_VADDR(pending_idx)|txreq.offset),
+                      data_len);
+               if (data_len < txreq.size) {
+                       /* Append the packet payload as a fragment. */
+                       skb_shinfo(skb)->frags[0].page        = 
+                               virt_to_page(MMAP_VADDR(pending_idx));
+                       skb_shinfo(skb)->frags[0].size        =
+                               txreq.size - data_len;
+                       skb_shinfo(skb)->frags[0].page_offset = 
+                               txreq.offset + data_len;
+                       skb_shinfo(skb)->nr_frags = 1;
+               } else {
+                       /* Schedule a response immediately. */
+                       netif_idx_release(pending_idx);
+               }
+
+               skb->data_len  = txreq.size - data_len;
+               skb->len      += skb->data_len;
+
+               skb->dev      = netif->dev;
+               skb->protocol = eth_type_trans(skb, skb->dev);
+
+               /*
+                 * No checking needed on localhost, but remember the field is
+                 * blank. 
+                 */
+               skb->ip_summed        = CHECKSUM_UNNECESSARY;
+               skb->proto_csum_valid = 1;
+               skb->proto_csum_blank = txreq.csum_blank;
+
+               netif->stats.rx_bytes += txreq.size;
+               netif->stats.rx_packets++;
+
+               netif_rx(skb);
+               netif->dev->last_rx = jiffies;
+
+               mop++;
+       }
 }
 
 static void netif_idx_release(u16 pending_idx)
 {
-    static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
-    unsigned long flags;
-
-    spin_lock_irqsave(&_lock, flags);
-    dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
-    spin_unlock_irqrestore(&_lock, flags);
-
-    tasklet_schedule(&net_tx_tasklet);
+       static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
+       unsigned long flags;
+
+       spin_lock_irqsave(&_lock, flags);
+       dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
+       spin_unlock_irqrestore(&_lock, flags);
+
+       tasklet_schedule(&net_tx_tasklet);
 }
 
 static void netif_page_release(struct page *page)
 {
-    u16 pending_idx = page - virt_to_page(mmap_vstart);
-
-    /* Ready for next use. */
-    set_page_count(page, 1);
-
-    netif_idx_release(pending_idx);
+       u16 pending_idx = page - virt_to_page(mmap_vstart);
+
+       /* Ready for next use. */
+       set_page_count(page, 1);
+
+       netif_idx_release(pending_idx);
 }
 
 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 {
-    netif_t *netif = dev_id;
-    if ( tx_work_exists(netif) )
-    {
-        add_to_net_schedule_list_tail(netif);
-        maybe_schedule_tx_action();
-    }
-    return IRQ_HANDLED;
+       netif_t *netif = dev_id;
+       if (tx_work_exists(netif)) {
+               add_to_net_schedule_list_tail(netif);
+               maybe_schedule_tx_action();
+       }
+       return IRQ_HANDLED;
 }
 
 static void make_tx_response(netif_t *netif, 
                              u16      id,
                              s8       st)
 {
-    NETIF_RING_IDX i = netif->tx_resp_prod;
-    netif_tx_response_t *resp;
-
-    resp = &netif->tx->ring[MASK_NETIF_TX_IDX(i)].resp;
-    resp->id     = id;
-    resp->status = st;
-    wmb();
-    netif->tx->resp_prod = netif->tx_resp_prod = ++i;
-
-    mb(); /* Update producer before checking event threshold. */
-    if ( i == netif->tx->event )
-        notify_via_evtchn(netif->evtchn);
+       NETIF_RING_IDX i = netif->tx_resp_prod;
+       netif_tx_response_t *resp;
+
+       resp = &netif->tx->ring[MASK_NETIF_TX_IDX(i)].resp;
+       resp->id     = id;
+       resp->status = st;
+       wmb();
+       netif->tx->resp_prod = netif->tx_resp_prod = ++i;
+
+       mb(); /* Update producer before checking event threshold. */
+       if (i == netif->tx->event)
+               notify_via_evtchn(netif->evtchn);
 }
 
 static int make_rx_response(netif_t *netif, 
                             u16      id, 
                             s8       st,
-                            unsigned long addr,
+                            u16      offset,
                             u16      size,
                             u16      csum_valid)
 {
-    NETIF_RING_IDX i = netif->rx_resp_prod;
-    netif_rx_response_t *resp;
-
-    resp = &netif->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-    resp->addr       = addr;
-    resp->csum_valid = csum_valid;
-    resp->id         = id;
-    resp->status     = (s16)size;
-    if ( st < 0 )
-        resp->status = (s16)st;
-    wmb();
-    netif->rx->resp_prod = netif->rx_resp_prod = ++i;
-
-    mb(); /* Update producer before checking event threshold. */
-    return (i == netif->rx->event);
+       NETIF_RING_IDX i = netif->rx_resp_prod;
+       netif_rx_response_t *resp;
+
+       resp = &netif->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
+       resp->offset     = offset;
+       resp->csum_valid = csum_valid;
+       resp->id         = id;
+       resp->status     = (s16)size;
+       if (st < 0)
+               resp->status = (s16)st;
+       wmb();
+       netif->rx->resp_prod = netif->rx_resp_prod = ++i;
+
+       mb(); /* Update producer before checking event threshold. */
+       return (i == netif->rx->event);
 }
 
 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
 {
-    struct list_head *ent;
-    netif_t *netif;
-    int i = 0;
-
-    printk(KERN_ALERT "netif_schedule_list:\n");
-    spin_lock_irq(&net_schedule_list_lock);
-
-    list_for_each ( ent, &net_schedule_list )
-    {
-        netif = list_entry(ent, netif_t, list);
-        printk(KERN_ALERT " %d: private(rx_req_cons=%08x rx_resp_prod=%08x\n",
-               i, netif->rx_req_cons, netif->rx_resp_prod);               
-        printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
-               netif->tx_req_cons, netif->tx_resp_prod);
-        printk(KERN_ALERT "   shared(rx_req_prod=%08x rx_resp_prod=%08x\n",
-               netif->rx->req_prod, netif->rx->resp_prod);
-        printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
-               netif->rx->event, netif->tx->req_prod);
-        printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
-               netif->tx->resp_prod, netif->tx->event);
-        i++;
-    }
-
-    spin_unlock_irq(&net_schedule_list_lock);
-    printk(KERN_ALERT " ** End of netif_schedule_list **\n");
-
-    return IRQ_HANDLED;
+       struct list_head *ent;
+       netif_t *netif;
+       int i = 0;
+
+       printk(KERN_ALERT "netif_schedule_list:\n");
+       spin_lock_irq(&net_schedule_list_lock);
+
+       list_for_each (ent, &net_schedule_list) {
+               netif = list_entry(ent, netif_t, list);
+               printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+                      "rx_resp_prod=%08x\n",
+                      i, netif->rx_req_cons, netif->rx_resp_prod);
+               printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
+                      netif->tx_req_cons, netif->tx_resp_prod);
+               printk(KERN_ALERT "   shared(rx_req_prod=%08x "
+                      "rx_resp_prod=%08x\n",
+                      netif->rx->req_prod, netif->rx->resp_prod);
+               printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
+                      netif->rx->event, netif->tx->req_prod);
+               printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
+                      netif->tx->resp_prod, netif->tx->event);
+               i++;
+       }
+
+       spin_unlock_irq(&net_schedule_list_lock);
+       printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+
+       return IRQ_HANDLED;
 }
 
 static int __init netback_init(void)
 {
-    int i;
-    struct page *page;
-
-    if ( !(xen_start_info->flags & SIF_NET_BE_DOMAIN) &&
-         !(xen_start_info->flags & SIF_INITDOMAIN) )
-        return 0;
-
-    IPRINTK("Initialising Xen netif backend.\n");
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    IPRINTK("Using grant tables.\n");
-#endif
-
-    /* We can increase reservation by this much in net_rx_action(). */
-    balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
-
-    skb_queue_head_init(&rx_queue);
-    skb_queue_head_init(&tx_queue);
-
-    init_timer(&net_timer);
-    net_timer.data = 0;
-    net_timer.function = net_alarm;
+       int i;
+       struct page *page;
+
+       if (!(xen_start_info->flags & SIF_NET_BE_DOMAIN) &&
+           !(xen_start_info->flags & SIF_INITDOMAIN))
+               return 0;
+
+       IPRINTK("Initialising Xen netif backend.\n");
+
+       /* We can increase reservation by this much in net_rx_action(). */
+       balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
+
+       skb_queue_head_init(&rx_queue);
+       skb_queue_head_init(&tx_queue);
+
+       init_timer(&net_timer);
+       net_timer.data = 0;
+       net_timer.function = net_alarm;
     
-    page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
-    BUG_ON(page == NULL);
-    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-    {
-        page = virt_to_page(MMAP_VADDR(i));
-        set_page_count(page, 1);
-        SetPageForeign(page, netif_page_release);
-    }
-
-    pending_cons = 0;
-    pending_prod = MAX_PENDING_REQS;
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-        pending_ring[i] = i;
-
-    spin_lock_init(&net_schedule_list_lock);
-    INIT_LIST_HEAD(&net_schedule_list);
-
-    netif_xenbus_init();
-
-    (void)request_irq(bind_virq_to_irq(VIRQ_DEBUG),
-                      netif_be_dbg, SA_SHIRQ, 
-                      "net-be-dbg", &netif_be_dbg);
-
-    return 0;
+       page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
+       BUG_ON(page == NULL);
+       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+       for (i = 0; i < MAX_PENDING_REQS; i++) {
+               page = virt_to_page(MMAP_VADDR(i));
+               set_page_count(page, 1);
+               SetPageForeign(page, netif_page_release);
+       }
+
+       pending_cons = 0;
+       pending_prod = MAX_PENDING_REQS;
+       for (i = 0; i < MAX_PENDING_REQS; i++)
+               pending_ring[i] = i;
+
+       spin_lock_init(&net_schedule_list_lock);
+       INIT_LIST_HEAD(&net_schedule_list);
+
+       netif_xenbus_init();
+
+       (void)request_irq(bind_virq_to_irq(VIRQ_DEBUG),
+                         netif_be_dbg, SA_SHIRQ, 
+                         "net-be-dbg", &netif_be_dbg);
+
+       return 0;
 }
 
 static void netback_cleanup(void)
 {
-    BUG();
+       BUG();
 }
 
 module_init(netback_init);
 module_exit(netback_cleanup);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Sep 22 17:42:01 2005
@@ -242,6 +242,7 @@
        be->dev = dev;
        be->backend_watch.node = dev->nodename;
        be->backend_watch.callback = backend_changed;
+       /* Registration implicitly calls backend_changed. */
        err = register_xenbus_watch(&be->backend_watch);
        if (err) {
                be->backend_watch.node = NULL;
@@ -263,8 +264,6 @@
        }
 
        dev->data = be;
-
-       backend_changed(&be->backend_watch, dev->nodename);
        return 0;
 
  free_be:
@@ -294,3 +293,13 @@
 {
        xenbus_register_backend(&netback);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Sep 22 
17:42:01 2005
@@ -54,43 +54,10 @@
 #include <asm-xen/balloon.h>
 #include <asm/page.h>
 #include <asm/uaccess.h>
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
 #include <asm-xen/xen-public/grant_table.h>
 #include <asm-xen/gnttab.h>
 
-static grant_ref_t gref_tx_head;
-static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1]; 
-
-static grant_ref_t gref_rx_head;
-static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
-
 #define GRANT_INVALID_REF      (0xFFFF)
-
-#ifdef GRANT_DEBUG
-static void
-dump_packet(int tag, void *addr, u32 ap)
-{
-    unsigned char *p = (unsigned char *)ap;
-    int i;
-    
-    printk(KERN_ALERT "#### rx_poll   %c %08x ", tag & 0xff, addr);
-    for (i = 0; i < 20; i++) {
-        printk("%02x", p[i]);
-    }
-    printk("\n");
-}
-
-#define GDPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#else 
-#define dump_packet(x,y,z)  ((void)0)  
-#define GDPRINTK(_f, _a...) ((void)0)
-#endif
-
-#endif
-
-
 
 #ifndef __GFP_NOWARN
 #define __GFP_NOWARN 0
@@ -124,7 +91,6 @@
 #define NETIF_STATE_DISCONNECTED 0
 #define NETIF_STATE_CONNECTED    1
 
-
 static unsigned int netif_state = NETIF_STATE_DISCONNECTED;
 
 static void network_tx_buf_gc(struct net_device *dev);
@@ -147,45 +113,50 @@
 #define netfront_info net_private
 struct net_private
 {
-    struct list_head list;
-    struct net_device *netdev;
-
-    struct net_device_stats stats;
-    NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
-    unsigned int tx_full;
+       struct list_head list;
+       struct net_device *netdev;
+
+       struct net_device_stats stats;
+       NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
+       unsigned int tx_full;
     
-    netif_tx_interface_t *tx;
-    netif_rx_interface_t *rx;
-
-    spinlock_t   tx_lock;
-    spinlock_t   rx_lock;
-
-    unsigned int handle;
-    unsigned int evtchn;
-
-    /* What is the status of our connection to the remote backend? */
+       netif_tx_interface_t *tx;
+       netif_rx_interface_t *rx;
+
+       spinlock_t   tx_lock;
+       spinlock_t   rx_lock;
+
+       unsigned int handle;
+       unsigned int evtchn;
+
+       /* What is the status of our connection to the remote backend? */
 #define BEST_CLOSED       0
 #define BEST_DISCONNECTED 1
 #define BEST_CONNECTED    2
-    unsigned int backend_state;
-
-    /* Is this interface open or closed (down or up)? */
+       unsigned int backend_state;
+
+       /* Is this interface open or closed (down or up)? */
 #define UST_CLOSED        0
 #define UST_OPEN          1
-    unsigned int user_state;
-
-    /* Receive-ring batched refills. */
+       unsigned int user_state;
+
+       /* Receive-ring batched refills. */
 #define RX_MIN_TARGET 8
 #define RX_MAX_TARGET NETIF_RX_RING_SIZE
-    int rx_min_target, rx_max_target, rx_target;
-    struct sk_buff_head rx_batch;
-
-    /*
-     * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
-     * array is an index into a chain of free entries.
-     */
-    struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
-    struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
+       int rx_min_target, rx_max_target, rx_target;
+       struct sk_buff_head rx_batch;
+
+       /*
+        * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
+        * array is an index into a chain of free entries.
+        */
+       struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
+       struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
+
+       grant_ref_t gref_tx_head;
+       grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1]; 
+       grant_ref_t gref_rx_head;
+       grant_ref_t grant_rx_ref[NETIF_TX_RING_SIZE + 1]; 
 
        struct xenbus_device *xbdev;
        char *backend;
@@ -197,32 +168,32 @@
 };
 
 /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
-#define ADD_ID_TO_FREELIST(_list, _id)             \
-    (_list)[(_id)] = (_list)[0];                   \
-    (_list)[0]     = (void *)(unsigned long)(_id);
-#define GET_ID_FROM_FREELIST(_list)                \
- ({ unsigned long _id = (unsigned long)(_list)[0]; \
-    (_list)[0]  = (_list)[_id];                    \
-    (unsigned short)_id; })
+#define ADD_ID_TO_FREELIST(_list, _id)                 \
+       (_list)[(_id)] = (_list)[0];                    \
+       (_list)[0]     = (void *)(unsigned long)(_id);
+#define GET_ID_FROM_FREELIST(_list)                            \
+       ({ unsigned long _id = (unsigned long)(_list)[0];       \
+          (_list)[0]  = (_list)[_id];                          \
+          (unsigned short)_id; })
 
 #ifdef DEBUG
 static char *be_state_name[] = {
-    [BEST_CLOSED]       = "closed",
-    [BEST_DISCONNECTED] = "disconnected",
-    [BEST_CONNECTED]    = "connected",
+       [BEST_CLOSED]       = "closed",
+       [BEST_DISCONNECTED] = "disconnected",
+       [BEST_CONNECTED]    = "connected",
 };
 #endif
 
 #ifdef DEBUG
 #define DPRINTK(fmt, args...) \
-    printk(KERN_ALERT "xen_net (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
+       printk(KERN_ALERT "xen_net (%s:%d) " fmt, __FUNCTION__, __LINE__, 
##args)
 #else
 #define DPRINTK(fmt, args...) ((void)0)
 #endif
 #define IPRINTK(fmt, args...) \
-    printk(KERN_INFO "xen_net: " fmt, ##args)
+       printk(KERN_INFO "xen_net: " fmt, ##args)
 #define WPRINTK(fmt, args...) \
-    printk(KERN_WARNING "xen_net: " fmt, ##args)
+       printk(KERN_WARNING "xen_net: " fmt, ##args)
 
 /** Send a packet on a net device to encourage switches to learn the
  * MAC. We send a fake ARP request.
@@ -232,628 +203,582 @@
  */
 static int send_fake_arp(struct net_device *dev)
 {
-    struct sk_buff *skb;
-    u32             src_ip, dst_ip;
-
-    dst_ip = INADDR_BROADCAST;
-    src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
-
-    /* No IP? Then nothing to do. */
-    if (src_ip == 0)
-        return 0;
-
-    skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
-                     dst_ip, dev, src_ip,
-                     /*dst_hw*/ NULL, /*src_hw*/ NULL, 
-                     /*target_hw*/ dev->dev_addr);
-    if (skb == NULL)
-        return -ENOMEM;
-
-    return dev_queue_xmit(skb);
+       struct sk_buff *skb;
+       u32             src_ip, dst_ip;
+
+       dst_ip = INADDR_BROADCAST;
+       src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
+
+       /* No IP? Then nothing to do. */
+       if (src_ip == 0)
+               return 0;
+
+       skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
+                        dst_ip, dev, src_ip,
+                        /*dst_hw*/ NULL, /*src_hw*/ NULL, 
+                        /*target_hw*/ dev->dev_addr);
+       if (skb == NULL)
+               return -ENOMEM;
+
+       return dev_queue_xmit(skb);
 }
 
 static int network_open(struct net_device *dev)
 {
-    struct net_private *np = netdev_priv(dev);
-
-    memset(&np->stats, 0, sizeof(np->stats));
-
-    np->user_state = UST_OPEN;
-
-    network_alloc_rx_buffers(dev);
-    np->rx->event = np->rx_resp_cons + 1;
-
-    netif_start_queue(dev);
-
-    return 0;
+       struct net_private *np = netdev_priv(dev);
+
+       memset(&np->stats, 0, sizeof(np->stats));
+
+       np->user_state = UST_OPEN;
+
+       network_alloc_rx_buffers(dev);
+       np->rx->event = np->rx_resp_cons + 1;
+
+       netif_start_queue(dev);
+
+       return 0;
 }
 
 static void network_tx_buf_gc(struct net_device *dev)
 {
-    NETIF_RING_IDX i, prod;
-    unsigned short id;
-    struct net_private *np = netdev_priv(dev);
-    struct sk_buff *skb;
-
-    if (np->backend_state != BEST_CONNECTED)
-        return;
-
-    do {
-        prod = np->tx->resp_prod;
-        rmb(); /* Ensure we see responses up to 'rp'. */
-
-        for (i = np->tx_resp_cons; i != prod; i++) {
-            id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
-            skb = np->tx_skbs[id];
-#ifdef CONFIG_XEN_NETDEV_GRANT
-            if (unlikely(gnttab_query_foreign_access(grant_tx_ref[id]) != 0)) {
-                /* other domain is still using this grant - shouldn't happen
-                   but if it does, we'll try to reclaim the grant later */
-                printk(KERN_ALERT "network_tx_buf_gc: warning -- grant "
-                       "still in use by backend domain.\n");
-                goto out; 
-            }
-            gnttab_end_foreign_access_ref(grant_tx_ref[id], GNTMAP_readonly);
-            gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
-            grant_tx_ref[id] = GRANT_INVALID_REF;
-#endif
-            ADD_ID_TO_FREELIST(np->tx_skbs, id);
-            dev_kfree_skb_irq(skb);
-        }
+       NETIF_RING_IDX i, prod;
+       unsigned short id;
+       struct net_private *np = netdev_priv(dev);
+       struct sk_buff *skb;
+
+       if (np->backend_state != BEST_CONNECTED)
+               return;
+
+       do {
+               prod = np->tx->resp_prod;
+               rmb(); /* Ensure we see responses up to 'rp'. */
+
+               for (i = np->tx_resp_cons; i != prod; i++) {
+                       id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
+                       skb = np->tx_skbs[id];
+                       if (unlikely(gnttab_query_foreign_access(
+                               np->grant_tx_ref[id]) != 0)) {
+                               printk(KERN_ALERT "network_tx_buf_gc: warning "
+                                      "-- grant still in use by backend "
+                                      "domain.\n");
+                               goto out; 
+                       }
+                       gnttab_end_foreign_access_ref(
+                               np->grant_tx_ref[id], GNTMAP_readonly);
+                       gnttab_release_grant_reference(
+                               &np->gref_tx_head, np->grant_tx_ref[id]);
+                       np->grant_tx_ref[id] = GRANT_INVALID_REF;
+                       ADD_ID_TO_FREELIST(np->tx_skbs, id);
+                       dev_kfree_skb_irq(skb);
+               }
         
-        np->tx_resp_cons = prod;
+               np->tx_resp_cons = prod;
         
-        /*
-         * Set a new event, then check for race with update of tx_cons. Note
-         * that it is essential to schedule a callback, no matter how few
-         * buffers are pending. Even if there is space in the transmit ring,
-         * higher layers may be blocked because too much data is outstanding:
-         * in such cases notification from Xen is likely to be the only kick
-         * that we'll get.
-         */
-        np->tx->event = 
-            prod + ((np->tx->req_prod - prod) >> 1) + 1;
-        mb();
-    } while (prod != np->tx->resp_prod);
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-  out: 
-#endif
-
-    if (np->tx_full && ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE)) {
-        np->tx_full = 0;
-        if (np->user_state == UST_OPEN)
-            netif_wake_queue(dev);
-    }
+               /*
+                * Set a new event, then check for race with update of tx_cons.
+                * Note that it is essential to schedule a callback, no matter
+                * how few buffers are pending. Even if there is space in the
+                * transmit ring, higher layers may be blocked because too much
+                * data is outstanding: in such cases notification from Xen is
+                * likely to be the only kick that we'll get.
+                */
+               np->tx->event = prod + ((np->tx->req_prod - prod) >> 1) + 1;
+               mb();
+       } while (prod != np->tx->resp_prod);
+
+ out: 
+       if (np->tx_full && ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE)) {
+               np->tx_full = 0;
+               if (np->user_state == UST_OPEN)
+                       netif_wake_queue(dev);
+       }
 }
 
 
 static void network_alloc_rx_buffers(struct net_device *dev)
 {
-    unsigned short id;
-    struct net_private *np = netdev_priv(dev);
-    struct sk_buff *skb;
-    int i, batch_target;
-    NETIF_RING_IDX req_prod = np->rx->req_prod;
-    struct xen_memory_reservation reservation;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    grant_ref_t ref;
-#endif
-
-    if (unlikely(np->backend_state != BEST_CONNECTED))
-        return;
-
-    /*
-     * Allocate skbuffs greedily, even though we batch updates to the
-     * receive ring. This creates a less bursty demand on the memory allocator,
-     * so should reduce the chance of failed allocation requests both for
-     * ourself and for other kernel subsystems.
-     */
-    batch_target = np->rx_target - (req_prod - np->rx_resp_cons);
-    for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
-        if (unlikely((skb = alloc_xen_skb(dev->mtu + RX_HEADROOM)) == NULL))
-            break;
-        __skb_queue_tail(&np->rx_batch, skb);
-    }
-
-    /* Is the batch large enough to be worthwhile? */
-    if (i < (np->rx_target/2))
-        return;
-
-    for (i = 0; ; i++) {
-        if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
-            break;
-
-        skb->dev = dev;
-
-        id = GET_ID_FROM_FREELIST(np->rx_skbs);
-
-        np->rx_skbs[id] = skb;
+       unsigned short id;
+       struct net_private *np = netdev_priv(dev);
+       struct sk_buff *skb;
+       int i, batch_target;
+       NETIF_RING_IDX req_prod = np->rx->req_prod;
+       struct xen_memory_reservation reservation;
+       grant_ref_t ref;
+
+       if (unlikely(np->backend_state != BEST_CONNECTED))
+               return;
+
+       /*
+        * Allocate skbuffs greedily, even though we batch updates to the
+        * receive ring. This creates a less bursty demand on the memory
+        * allocator, so should reduce the chance of failed allocation requests
+        *  both for ourself and for other kernel subsystems.
+        */
+       batch_target = np->rx_target - (req_prod - np->rx_resp_cons);
+       for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
+               skb = alloc_xen_skb(dev->mtu + RX_HEADROOM);
+               if (skb == NULL)
+                       break;
+               __skb_queue_tail(&np->rx_batch, skb);
+       }
+
+       /* Is the batch large enough to be worthwhile? */
+       if (i < (np->rx_target/2))
+               return;
+
+       for (i = 0; ; i++) {
+               if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
+                       break;
+
+               skb->dev = dev;
+
+               id = GET_ID_FROM_FREELIST(np->rx_skbs);
+
+               np->rx_skbs[id] = skb;
         
-        np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-       ref = gnttab_claim_grant_reference(&gref_rx_head);
-        if (unlikely((signed short)ref < 0)) {
-            printk(KERN_ALERT "#### netfront can't claim rx reference\n");
-            BUG();
-        }
-        grant_rx_ref[id] = ref;
-        gnttab_grant_foreign_transfer_ref(ref, np->backend_id,
-                                          virt_to_mfn(skb->head));
-        np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
-#endif
-        rx_pfn_array[i] = virt_to_mfn(skb->head);
-
-       /* Remove this page from pseudo phys map before passing back to Xen. */
-       phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] 
-           = INVALID_P2M_ENTRY;
-
-       MULTI_update_va_mapping(rx_mcl+i, (unsigned long)skb->head,
-                               __pte(0), 0);
-    }
-
-    /* After all PTEs have been zapped we blow away stale TLB entries. */
-    rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-
-    /* Give away a batch of pages. */
-    rx_mcl[i].op = __HYPERVISOR_memory_op;
-    rx_mcl[i].args[0] = XENMEM_decrease_reservation;
-    rx_mcl[i].args[1] = (unsigned long)&reservation;
-
-    reservation.extent_start = rx_pfn_array;
-    reservation.nr_extents   = i;
-    reservation.extent_order = 0;
-    reservation.address_bits = 0;
-    reservation.domid        = DOMID_SELF;
-
-    /* Tell the ballon driver what is going on. */
-    balloon_update_driver_allowance(i);
-
-    /* Zap PTEs and give away pages in one big multicall. */
-    (void)HYPERVISOR_multicall(rx_mcl, i+1);
-
-    /* Check return status of HYPERVISOR_memory_op(). */
-    if (unlikely(rx_mcl[i].result != i))
-        panic("Unable to reduce memory reservation\n");
-
-    /* Above is a suitable barrier to ensure backend will see requests. */
-    np->rx->req_prod = req_prod + i;
-
-    /* Adjust our floating fill target if we risked running out of buffers. */
-    if (((req_prod - np->rx->resp_prod) < (np->rx_target / 4)) &&
-         ((np->rx_target *= 2) > np->rx_max_target))
-        np->rx_target = np->rx_max_target;
+               np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
+               ref = gnttab_claim_grant_reference(&np->gref_rx_head);
+               BUG_ON((signed short)ref < 0);
+               np->grant_rx_ref[id] = ref;
+               gnttab_grant_foreign_transfer_ref(ref, np->backend_id);
+               np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
+               rx_pfn_array[i] = virt_to_mfn(skb->head);
+
+               /* Remove this page from map before passing back to Xen. */
+               phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] 
+                       = INVALID_P2M_ENTRY;
+
+               MULTI_update_va_mapping(rx_mcl+i, (unsigned long)skb->head,
+                                       __pte(0), 0);
+       }
+
+       /* After all PTEs have been zapped we blow away stale TLB entries. */
+       rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+
+       /* Give away a batch of pages. */
+       rx_mcl[i].op = __HYPERVISOR_memory_op;
+       rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+       rx_mcl[i].args[1] = (unsigned long)&reservation;
+
+       reservation.extent_start = rx_pfn_array;
+       reservation.nr_extents   = i;
+       reservation.extent_order = 0;
+       reservation.address_bits = 0;
+       reservation.domid        = DOMID_SELF;
+
+       /* Tell the ballon driver what is going on. */
+       balloon_update_driver_allowance(i);
+
+       /* Zap PTEs and give away pages in one big multicall. */
+       (void)HYPERVISOR_multicall(rx_mcl, i+1);
+
+       /* Check return status of HYPERVISOR_memory_op(). */
+       if (unlikely(rx_mcl[i].result != i))
+               panic("Unable to reduce memory reservation\n");
+
+       /* Above is a suitable barrier to ensure backend will see requests. */
+       np->rx->req_prod = req_prod + i;
+
+       /* Adjust our fill target if we risked running out of buffers. */
+       if (((req_prod - np->rx->resp_prod) < (np->rx_target / 4)) &&
+           ((np->rx_target *= 2) > np->rx_max_target))
+               np->rx_target = np->rx_max_target;
 }
 
 
 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-    unsigned short id;
-    struct net_private *np = netdev_priv(dev);
-    netif_tx_request_t *tx;
-    NETIF_RING_IDX i;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    grant_ref_t ref;
-    unsigned long mfn;
-#endif
-
-    if (unlikely(np->tx_full)) {
-        printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
-        netif_stop_queue(dev);
-        goto drop;
-    }
-
-    if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
-                  PAGE_SIZE)) {
-        struct sk_buff *nskb;
-        if (unlikely((nskb = alloc_xen_skb(skb->len)) == NULL))
-            goto drop;
-        skb_put(nskb, skb->len);
-        memcpy(nskb->data, skb->data, skb->len);
-        nskb->dev = skb->dev;
-        dev_kfree_skb(skb);
-        skb = nskb;
-    }
+       unsigned short id;
+       struct net_private *np = netdev_priv(dev);
+       netif_tx_request_t *tx;
+       NETIF_RING_IDX i;
+       grant_ref_t ref;
+       unsigned long mfn;
+
+       if (unlikely(np->tx_full)) {
+               printk(KERN_ALERT "%s: full queue wasn't stopped!\n",
+                      dev->name);
+               netif_stop_queue(dev);
+               goto drop;
+       }
+
+       if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
+                    PAGE_SIZE)) {
+               struct sk_buff *nskb;
+               if (unlikely((nskb = alloc_xen_skb(skb->len)) == NULL))
+                       goto drop;
+               skb_put(nskb, skb->len);
+               memcpy(nskb->data, skb->data, skb->len);
+               nskb->dev = skb->dev;
+               dev_kfree_skb(skb);
+               skb = nskb;
+       }
     
-    spin_lock_irq(&np->tx_lock);
-
-    if (np->backend_state != BEST_CONNECTED) {
-        spin_unlock_irq(&np->tx_lock);
-        goto drop;
-    }
-
-    i = np->tx->req_prod;
-
-    id = GET_ID_FROM_FREELIST(np->tx_skbs);
-    np->tx_skbs[id] = skb;
-
-    tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
-
-    tx->id   = id;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    ref = gnttab_claim_grant_reference(&gref_tx_head);
-    if (unlikely((signed short)ref < 0)) {
-        printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
-        BUG();
-    }
-    mfn = virt_to_mfn(skb->data);
-    gnttab_grant_foreign_access_ref(ref, np->backend_id, mfn, GNTMAP_readonly);
-    tx->addr = ref << PAGE_SHIFT;
-    grant_tx_ref[id] = ref;
-#else
-    tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
-#endif
-    tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
-    tx->size = skb->len;
-    tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
-
-    wmb(); /* Ensure that backend will see the request. */
-    np->tx->req_prod = i + 1;
-
-    network_tx_buf_gc(dev);
-
-    if ((i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1)) {
-        np->tx_full = 1;
-        netif_stop_queue(dev);
-    }
-
-    spin_unlock_irq(&np->tx_lock);
-
-    np->stats.tx_bytes += skb->len;
-    np->stats.tx_packets++;
-
-    /* Only notify Xen if we really have to. */
-    mb();
-    if (np->tx->TX_TEST_IDX == i)
-        notify_via_evtchn(np->evtchn);
-
-    return 0;
+       spin_lock_irq(&np->tx_lock);
+
+       if (np->backend_state != BEST_CONNECTED) {
+               spin_unlock_irq(&np->tx_lock);
+               goto drop;
+       }
+
+       i = np->tx->req_prod;
+
+       id = GET_ID_FROM_FREELIST(np->tx_skbs);
+       np->tx_skbs[id] = skb;
+
+       tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
+
+       tx->id   = id;
+       ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+       BUG_ON((signed short)ref < 0);
+       mfn = virt_to_mfn(skb->data);
+       gnttab_grant_foreign_access_ref(
+               ref, np->backend_id, mfn, GNTMAP_readonly);
+       tx->gref = np->grant_tx_ref[id] = ref;
+       tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
+       tx->size = skb->len;
+       tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
+
+       wmb(); /* Ensure that backend will see the request. */
+       np->tx->req_prod = i + 1;
+
+       network_tx_buf_gc(dev);
+
+       if ((i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1)) {
+               np->tx_full = 1;
+               netif_stop_queue(dev);
+       }
+
+       spin_unlock_irq(&np->tx_lock);
+
+       np->stats.tx_bytes += skb->len;
+       np->stats.tx_packets++;
+
+       /* Only notify Xen if we really have to. */
+       mb();
+       if (np->tx->TX_TEST_IDX == i)
+               notify_via_evtchn(np->evtchn);
+
+       return 0;
 
  drop:
-    np->stats.tx_dropped++;
-    dev_kfree_skb(skb);
-    return 0;
+       np->stats.tx_dropped++;
+       dev_kfree_skb(skb);
+       return 0;
 }
 
 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
 {
-    struct net_device *dev = dev_id;
-    struct net_private *np = netdev_priv(dev);
-    unsigned long flags;
-
-    spin_lock_irqsave(&np->tx_lock, flags);
-    network_tx_buf_gc(dev);
-    spin_unlock_irqrestore(&np->tx_lock, flags);
-
-    if((np->rx_resp_cons != np->rx->resp_prod) && (np->user_state == UST_OPEN))
-        netif_rx_schedule(dev);
-
-    return IRQ_HANDLED;
+       struct net_device *dev = dev_id;
+       struct net_private *np = netdev_priv(dev);
+       unsigned long flags;
+
+       spin_lock_irqsave(&np->tx_lock, flags);
+       network_tx_buf_gc(dev);
+       spin_unlock_irqrestore(&np->tx_lock, flags);
+
+       if ((np->rx_resp_cons != np->rx->resp_prod) &&
+           (np->user_state == UST_OPEN))
+               netif_rx_schedule(dev);
+
+       return IRQ_HANDLED;
 }
 
 
 static int netif_poll(struct net_device *dev, int *pbudget)
 {
-    struct net_private *np = netdev_priv(dev);
-    struct sk_buff *skb, *nskb;
-    netif_rx_response_t *rx;
-    NETIF_RING_IDX i, rp;
-    mmu_update_t *mmu = rx_mmu;
-    multicall_entry_t *mcl = rx_mcl;
-    int work_done, budget, more_to_do = 1;
-    struct sk_buff_head rxq;
-    unsigned long flags;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    unsigned long mfn;
-    grant_ref_t ref;
-#endif
-
-    spin_lock(&np->rx_lock);
-
-    if (np->backend_state != BEST_CONNECTED) {
-        spin_unlock(&np->rx_lock);
-        return 0;
-    }
-
-    skb_queue_head_init(&rxq);
-
-    if ((budget = *pbudget) > dev->quota)
-        budget = dev->quota;
-    rp = np->rx->resp_prod;
-    rmb(); /* Ensure we see queued responses up to 'rp'. */
-
-    for (i = np->rx_resp_cons, work_done = 0; 
-                   (i != rp) && (work_done < budget);
-                   i++, work_done++) {
-        rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-        /*
-         * An error here is very odd. Usually indicates a backend bug,
-         * low-memory condition, or that we didn't have reservation headroom.
-         */
-        if (unlikely(rx->status <= 0)) {
-            if (net_ratelimit())
-                printk(KERN_WARNING "Bad rx buffer (memory squeeze?).\n");
-            np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
-            wmb();
-            np->rx->req_prod++;
-            work_done--;
-            continue;
-        }
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        ref = grant_rx_ref[rx->id]; 
-
-        if(ref == GRANT_INVALID_REF) { 
-            printk(KERN_WARNING "Bad rx grant reference %d from dom %d.\n",
-                   ref, np->backend_id);
-            np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
-            wmb();
-            np->rx->req_prod++;
-            work_done--;
-            continue;
-        }
-
-        grant_rx_ref[rx->id] = GRANT_INVALID_REF;
-        mfn = gnttab_end_foreign_transfer_ref(ref);
-        gnttab_release_grant_reference(&gref_rx_head, ref);
-#endif
-
-        skb = np->rx_skbs[rx->id];
-        ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
-
-        /* NB. We handle skb overflow later. */
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        skb->data = skb->head + rx->addr;
-#else
-        skb->data = skb->head + (rx->addr & ~PAGE_MASK);
-#endif
-        skb->len  = rx->status;
-        skb->tail = skb->data + skb->len;
-
-        if ( rx->csum_valid )
-            skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-        np->stats.rx_packets++;
-        np->stats.rx_bytes += rx->status;
-
-        /* Remap the page. */
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        mmu->ptr = ((unsigned long long)mfn << PAGE_SHIFT) | 
MMU_MACHPHYS_UPDATE;
-#else
-        mmu->ptr  = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
-#endif
-        mmu->val  = __pa(skb->head) >> PAGE_SHIFT;
-        mmu++;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-       MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
-                               pfn_pte_ma(mfn, PAGE_KERNEL), 0);
-#else
-       MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
-                               pfn_pte_ma(rx->addr >> PAGE_SHIFT, 
-                                           PAGE_KERNEL), 0);
-#endif
-        mcl++;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = mfn;
-        GDPRINTK("#### rx_poll     enqueue vdata=%p mfn=%lu ref=%x\n",
-                skb->data, mfn, ref);
-#else
-        phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = 
-            rx->addr >> PAGE_SHIFT;
-#endif 
-
-
-        __skb_queue_tail(&rxq, skb);
-    }
-
-
-    /* Some pages are no longer absent... */
-    balloon_update_driver_allowance(-work_done);
-
-    /* Do all the remapping work, and M->P updates, in one big hypercall. */
-    if (likely((mcl - rx_mcl) != 0)) {
-        mcl->op = __HYPERVISOR_mmu_update;
-        mcl->args[0] = (unsigned long)rx_mmu;
-        mcl->args[1] = mmu - rx_mmu;
-        mcl->args[2] = 0;
-        mcl->args[3] = DOMID_SELF;
-        mcl++;
-        (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
-    }
-
-    while ((skb = __skb_dequeue(&rxq)) != NULL) {
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        GDPRINTK("#### rx_poll     dequeue vdata=%p mfn=%lu\n",
-                skb->data, virt_to_mfn(skb->data));
-        dump_packet('d', skb->data, (unsigned long)skb->data);
-#endif
-        /*
-         * Enough room in skbuff for the data we were passed? Also, Linux 
-         * expects at least 16 bytes headroom in each receive buffer.
-         */
-        if (unlikely(skb->tail > skb->end) || 
-                       unlikely((skb->data - skb->head) < 16)) {
-            nskb = NULL;
-
-
-            /* Only copy the packet if it fits in the current MTU. */
-            if (skb->len <= (dev->mtu + ETH_HLEN)) {
-                if ((skb->tail > skb->end) && net_ratelimit())
-                    printk(KERN_INFO "Received packet needs %zd bytes more "
-                           "headroom.\n", skb->tail - skb->end);
-
-                if ((nskb = alloc_xen_skb(skb->len + 2)) != NULL) {
-                    skb_reserve(nskb, 2);
-                    skb_put(nskb, skb->len);
-                    memcpy(nskb->data, skb->data, skb->len);
-                    nskb->dev = skb->dev;
-                }
-            }
-            else if (net_ratelimit())
-                printk(KERN_INFO "Received packet too big for MTU "
-                       "(%d > %d)\n", skb->len - ETH_HLEN, dev->mtu);
-
-            /* Reinitialise and then destroy the old skbuff. */
-            skb->len  = 0;
-            skb->tail = skb->data;
-            init_skb_shinfo(skb);
-            dev_kfree_skb(skb);
-
-            /* Switch old for new, if we copied the buffer. */
-            if ((skb = nskb) == NULL)
-                continue;
-        }
+       struct net_private *np = netdev_priv(dev);
+       struct sk_buff *skb, *nskb;
+       netif_rx_response_t *rx;
+       NETIF_RING_IDX i, rp;
+       mmu_update_t *mmu = rx_mmu;
+       multicall_entry_t *mcl = rx_mcl;
+       int work_done, budget, more_to_do = 1;
+       struct sk_buff_head rxq;
+       unsigned long flags;
+       unsigned long mfn;
+       grant_ref_t ref;
+
+       spin_lock(&np->rx_lock);
+
+       if (np->backend_state != BEST_CONNECTED) {
+               spin_unlock(&np->rx_lock);
+               return 0;
+       }
+
+       skb_queue_head_init(&rxq);
+
+       if ((budget = *pbudget) > dev->quota)
+               budget = dev->quota;
+       rp = np->rx->resp_prod;
+       rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+       for (i = np->rx_resp_cons, work_done = 0; 
+            (i != rp) && (work_done < budget);
+            i++, work_done++) {
+               rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
+               /*
+                * An error here is very odd. Usually indicates a backend bug,
+                * low-mem condition, or we didn't have reservation headroom.
+                */
+               if (unlikely(rx->status <= 0)) {
+                       if (net_ratelimit())
+                               printk(KERN_WARNING "Bad rx buffer "
+                                      "(memory squeeze?).\n");
+                       np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].
+                               req.id = rx->id;
+                       wmb();
+                       np->rx->req_prod++;
+                       work_done--;
+                       continue;
+               }
+
+               ref = np->grant_rx_ref[rx->id]; 
+
+               if(ref == GRANT_INVALID_REF) { 
+                       printk(KERN_WARNING "Bad rx grant reference %d "
+                              "from dom %d.\n",
+                              ref, np->backend_id);
+                       np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].
+                               req.id = rx->id;
+                       wmb();
+                       np->rx->req_prod++;
+                       work_done--;
+                       continue;
+               }
+
+               np->grant_rx_ref[rx->id] = GRANT_INVALID_REF;
+               mfn = gnttab_end_foreign_transfer_ref(ref);
+               gnttab_release_grant_reference(&np->gref_rx_head, ref);
+
+               skb = np->rx_skbs[rx->id];
+               ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
+
+               /* NB. We handle skb overflow later. */
+               skb->data = skb->head + rx->offset;
+               skb->len  = rx->status;
+               skb->tail = skb->data + skb->len;
+
+               if ( rx->csum_valid )
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+               np->stats.rx_packets++;
+               np->stats.rx_bytes += rx->status;
+
+               /* Remap the page. */
+               mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+               mmu->val  = __pa(skb->head) >> PAGE_SHIFT;
+               mmu++;
+               MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
+                                       pfn_pte_ma(mfn, PAGE_KERNEL), 0);
+               mcl++;
+
+               phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = mfn;
+
+               __skb_queue_tail(&rxq, skb);
+       }
+
+       /* Some pages are no longer absent... */
+       balloon_update_driver_allowance(-work_done);
+
+       /* Do all the remapping work, and M2P updates, in one big hypercall. */
+       if (likely((mcl - rx_mcl) != 0)) {
+               mcl->op = __HYPERVISOR_mmu_update;
+               mcl->args[0] = (unsigned long)rx_mmu;
+               mcl->args[1] = mmu - rx_mmu;
+               mcl->args[2] = 0;
+               mcl->args[3] = DOMID_SELF;
+               mcl++;
+               (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
+       }
+
+       while ((skb = __skb_dequeue(&rxq)) != NULL) {
+               /*
+                * Enough room in skbuff for the data we were passed? Also,
+                * Linux expects at least 16 bytes headroom in each rx buffer.
+                */
+               if (unlikely(skb->tail > skb->end) || 
+                   unlikely((skb->data - skb->head) < 16)) {
+                       nskb = NULL;
+
+                       /* Only copy the packet if it fits in the MTU. */
+                       if (skb->len <= (dev->mtu + ETH_HLEN)) {
+                               if ((skb->tail > skb->end) && net_ratelimit())
+                                       printk(KERN_INFO "Received packet "
+                                              "needs %zd bytes more "
+                                              "headroom.\n",
+                                              skb->tail - skb->end);
+
+                               nskb = alloc_xen_skb(skb->len + 2);
+                               if (nskb != NULL) {
+                                       skb_reserve(nskb, 2);
+                                       skb_put(nskb, skb->len);
+                                       memcpy(nskb->data,
+                                              skb->data,
+                                              skb->len);
+                                       nskb->dev = skb->dev;
+                               }
+                       }
+                       else if (net_ratelimit())
+                               printk(KERN_INFO "Received packet too big for "
+                                      "MTU (%d > %d)\n",
+                                      skb->len - ETH_HLEN, dev->mtu);
+
+                       /* Reinitialise and then destroy the old skbuff. */
+                       skb->len  = 0;
+                       skb->tail = skb->data;
+                       init_skb_shinfo(skb);
+                       dev_kfree_skb(skb);
+
+                       /* Switch old for new, if we copied the buffer. */
+                       if ((skb = nskb) == NULL)
+                               continue;
+               }
         
-        /* Set the shared-info area, which is hidden behind the real data. */
-        init_skb_shinfo(skb);
-        /* Ethernet-specific work. Delayed to here as it peeks the header. */
-        skb->protocol = eth_type_trans(skb, dev);
-
-        /* Pass it up. */
-        netif_receive_skb(skb);
-        dev->last_rx = jiffies;
-    }
-
-    np->rx_resp_cons = i;
-
-    /* If we get a callback with very few responses, reduce fill target. */
-    /* NB. Note exponential increase, linear decrease. */
-    if (((np->rx->req_prod - np->rx->resp_prod) > ((3*np->rx_target) / 4)) &&
-         (--np->rx_target < np->rx_min_target))
-        np->rx_target = np->rx_min_target;
-
-    network_alloc_rx_buffers(dev);
-
-    *pbudget   -= work_done;
-    dev->quota -= work_done;
-
-    if (work_done < budget) {
-        local_irq_save(flags);
-
-        np->rx->event = i + 1;
+               /* Set the shinfo area, which is hidden behind the data. */
+               init_skb_shinfo(skb);
+               /* Ethernet work: Delayed to here as it peeks the header. */
+               skb->protocol = eth_type_trans(skb, dev);
+
+               /* Pass it up. */
+               netif_receive_skb(skb);
+               dev->last_rx = jiffies;
+       }
+
+       np->rx_resp_cons = i;
+
+       /* If we get a callback with very few responses, reduce fill target. */
+       /* NB. Note exponential increase, linear decrease. */
+       if (((np->rx->req_prod - np->rx->resp_prod) >
+            ((3*np->rx_target) / 4)) &&
+           (--np->rx_target < np->rx_min_target))
+               np->rx_target = np->rx_min_target;
+
+       network_alloc_rx_buffers(dev);
+
+       *pbudget   -= work_done;
+       dev->quota -= work_done;
+
+       if (work_done < budget) {
+               local_irq_save(flags);
+
+               np->rx->event = i + 1;
     
-        /* Deal with hypervisor racing our resetting of rx_event. */
-        mb();
-        if (np->rx->resp_prod == i) {
-            __netif_rx_complete(dev);
-            more_to_do = 0;
-        }
-
-        local_irq_restore(flags);
-    }
-
-    spin_unlock(&np->rx_lock);
-
-    return more_to_do;
+               /* Deal with hypervisor racing our resetting of rx_event. */
+               mb();
+               if (np->rx->resp_prod == i) {
+                       __netif_rx_complete(dev);
+                       more_to_do = 0;
+               }
+
+               local_irq_restore(flags);
+       }
+
+       spin_unlock(&np->rx_lock);
+
+       return more_to_do;
 }
 
 
 static int network_close(struct net_device *dev)
 {
-    struct net_private *np = netdev_priv(dev);
-    np->user_state = UST_CLOSED;
-    netif_stop_queue(np->netdev);
-    return 0;
+       struct net_private *np = netdev_priv(dev);
+       np->user_state = UST_CLOSED;
+       netif_stop_queue(np->netdev);
+       return 0;
 }
 
 
 static struct net_device_stats *network_get_stats(struct net_device *dev)
 {
-    struct net_private *np = netdev_priv(dev);
-    return &np->stats;
+       struct net_private *np = netdev_priv(dev);
+       return &np->stats;
 }
 
 static void network_connect(struct net_device *dev)
 {
-    struct net_private *np;
-    int i, requeue_idx;
-    netif_tx_request_t *tx;
-
-    np = netdev_priv(dev);
-    spin_lock_irq(&np->tx_lock);
-    spin_lock(&np->rx_lock);
-
-    /* Recovery procedure: */
-
-    /* Step 1: Reinitialise variables. */
-    np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
-    np->rx->event = np->tx->event = 1;
-
-    /* Step 2: Rebuild the RX and TX ring contents.
-     * NB. We could just free the queued TX packets now but we hope
-     * that sending them out might do some good.  We have to rebuild
-     * the RX ring because some of our pages are currently flipped out
-     * so we can't just free the RX skbs.
-     * NB2. Freelist index entries are always going to be less than
-     *  __PAGE_OFFSET, whereas pointers to skbs will always be equal or
-     * greater than __PAGE_OFFSET: we use this property to distinguish
-     * them.
-     */
-
-    /* Rebuild the TX buffer freelist and the TX ring itself.
-     * NB. This reorders packets.  We could keep more private state
-     * to avoid this but maybe it doesn't matter so much given the
-     * interface has been down.
-     */
-    for (requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++) {
-        if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
-            struct sk_buff *skb = np->tx_skbs[i];
-
-            tx = &np->tx->ring[requeue_idx++].req;
-
-            tx->id   = i;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-            gnttab_grant_foreign_access_ref(grant_tx_ref[i], np->backend_id, 
-                                            virt_to_mfn(np->tx_skbs[i]->data),
-                                            GNTMAP_readonly); 
-            tx->addr = grant_tx_ref[i] << PAGE_SHIFT; 
-#else
-            tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
-#endif
-            tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
-            tx->size = skb->len;
-
-            np->stats.tx_bytes += skb->len;
-            np->stats.tx_packets++;
-        }
-    }
-    wmb();
-    np->tx->req_prod = requeue_idx;
-
-    /* Rebuild the RX buffer freelist and the RX ring itself. */
-    for (requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++) { 
-        if ((unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET) {
-#ifdef CONFIG_XEN_NETDEV_GRANT 
-            /* Reinstate the grant ref so backend can 'donate' mfn to us. */
-            gnttab_grant_foreign_transfer_ref(grant_rx_ref[i], np->backend_id,
-                                              virt_to_mfn(np->rx_skbs[i]->head)
-                );
-            np->rx->ring[requeue_idx].req.gref = grant_rx_ref[i];
-#endif
-            np->rx->ring[requeue_idx].req.id   = i;
-            requeue_idx++; 
-        }
-    }
-
-    wmb();                
-    np->rx->req_prod = requeue_idx;
-
-    /* Step 3: All public and private state should now be sane.  Get
-     * ready to start sending and receiving packets and give the driver
-     * domain a kick because we've probably just requeued some
-     * packets.
-     */
-    np->backend_state = BEST_CONNECTED;
-    wmb();
-    notify_via_evtchn(np->evtchn);  
-    network_tx_buf_gc(dev);
-
-    if (np->user_state == UST_OPEN)
-        netif_start_queue(dev);
-
-    spin_unlock(&np->rx_lock);
-    spin_unlock_irq(&np->tx_lock);
+       struct net_private *np;
+       int i, requeue_idx;
+       netif_tx_request_t *tx;
+
+       np = netdev_priv(dev);
+       spin_lock_irq(&np->tx_lock);
+       spin_lock(&np->rx_lock);
+
+       /* Recovery procedure: */
+
+       /* Step 1: Reinitialise variables. */
+       np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
+       np->rx->event = np->tx->event = 1;
+
+       /* Step 2: Rebuild the RX and TX ring contents.
+        * NB. We could just free the queued TX packets now but we hope
+        * that sending them out might do some good.  We have to rebuild
+        * the RX ring because some of our pages are currently flipped out
+        * so we can't just free the RX skbs.
+        * NB2. Freelist index entries are always going to be less than
+        *  __PAGE_OFFSET, whereas pointers to skbs will always be equal or
+        * greater than __PAGE_OFFSET: we use this property to distinguish
+        * them.
+        */
+
+       /* Rebuild the TX buffer freelist and the TX ring itself.
+        * NB. This reorders packets.  We could keep more private state
+        * to avoid this but maybe it doesn't matter so much given the
+        * interface has been down.
+        */
+       for (requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++) {
+               if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
+                       struct sk_buff *skb = np->tx_skbs[i];
+
+                       tx = &np->tx->ring[requeue_idx++].req;
+
+                       tx->id   = i;
+                       gnttab_grant_foreign_access_ref(
+                               np->grant_tx_ref[i], np->backend_id, 
+                               virt_to_mfn(np->tx_skbs[i]->data),
+                               GNTMAP_readonly); 
+                       tx->gref = np->grant_tx_ref[i];
+                       tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
+                       tx->size = skb->len;
+
+                       np->stats.tx_bytes += skb->len;
+                       np->stats.tx_packets++;
+               }
+       }
+       wmb();
+       np->tx->req_prod = requeue_idx;
+
+       /* Rebuild the RX buffer freelist and the RX ring itself. */
+       for (requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++) { 
+               if ((unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET) {
+                       gnttab_grant_foreign_transfer_ref(
+                               np->grant_rx_ref[i], np->backend_id);
+                       np->rx->ring[requeue_idx].req.gref =
+                               np->grant_rx_ref[i];
+                       np->rx->ring[requeue_idx].req.id = i;
+                       requeue_idx++; 
+               }
+       }
+
+       wmb();                
+       np->rx->req_prod = requeue_idx;
+
+       /* Step 3: All public and private state should now be sane.  Get
+        * ready to start sending and receiving packets and give the driver
+        * domain a kick because we've probably just requeued some
+        * packets.
+        */
+       np->backend_state = BEST_CONNECTED;
+       wmb();
+       notify_via_evtchn(np->evtchn);  
+       network_tx_buf_gc(dev);
+
+       if (np->user_state == UST_OPEN)
+               netif_start_queue(dev);
+
+       spin_unlock(&np->rx_lock);
+       spin_unlock_irq(&np->tx_lock);
 }
 
 static void show_device(struct net_private *np)
@@ -890,6 +815,13 @@
        show_device(np);
 }
 
+static void netif_uninit(struct net_device *dev)
+{
+       struct net_private *np = netdev_priv(dev);
+       gnttab_free_grant_references(np->gref_tx_head);
+       gnttab_free_grant_references(np->gref_rx_head);
+}
+
 static struct ethtool_ops network_ethtool_ops =
 {
        .get_tx_csum = ethtool_op_get_tx_csum,
@@ -904,84 +836,99 @@
 static int create_netdev(int handle, struct xenbus_device *dev,
                         struct net_device **val)
 {
-    int i, err = 0;
-    struct net_device *netdev = NULL;
-    struct net_private *np = NULL;
-
-    if ((netdev = alloc_etherdev(sizeof(struct net_private))) == NULL) {
-        printk(KERN_WARNING "%s> alloc_etherdev failed.\n", __FUNCTION__);
-        err = -ENOMEM;
-        goto exit;
-    }
-
-    np                = netdev_priv(netdev);
-    np->backend_state = BEST_CLOSED;
-    np->user_state    = UST_CLOSED;
-    np->handle        = handle;
-    np->xbdev         = dev;
+       int i, err = 0;
+       struct net_device *netdev = NULL;
+       struct net_private *np = NULL;
+
+       if ((netdev = alloc_etherdev(sizeof(struct net_private))) == NULL) {
+               printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
+                      __FUNCTION__);
+               err = -ENOMEM;
+               goto exit;
+       }
+
+       np                = netdev_priv(netdev);
+       np->backend_state = BEST_CLOSED;
+       np->user_state    = UST_CLOSED;
+       np->handle        = handle;
+       np->xbdev         = dev;
     
-    spin_lock_init(&np->tx_lock);
-    spin_lock_init(&np->rx_lock);
-
-    skb_queue_head_init(&np->rx_batch);
-    np->rx_target     = RX_MIN_TARGET;
-    np->rx_min_target = RX_MIN_TARGET;
-    np->rx_max_target = RX_MAX_TARGET;
-
-    /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
-    for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
-        np->tx_skbs[i] = (void *)((unsigned long) i+1);
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        grant_tx_ref[i] = GRANT_INVALID_REF;
-#endif
-    }
-
-    for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
-        np->rx_skbs[i] = (void *)((unsigned long) i+1);
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        grant_rx_ref[i] = GRANT_INVALID_REF;
-#endif
-    }
-
-    netdev->open            = network_open;
-    netdev->hard_start_xmit = network_start_xmit;
-    netdev->stop            = network_close;
-    netdev->get_stats       = network_get_stats;
-    netdev->poll            = netif_poll;
-    netdev->weight          = 64;
-    netdev->features        = NETIF_F_IP_CSUM;
-
-    SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
-
-    if ((err = register_netdev(netdev)) != 0) {
-        printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err);
-        goto exit;
-    }
-
-    if ((err = xennet_proc_addif(netdev)) != 0) {
-        unregister_netdev(netdev);
-        goto exit;
-    }
-
-    np->netdev = netdev;
-
-  exit:
-    if ((err != 0) && (netdev != NULL))
-        kfree(netdev);
-    else if (val != NULL)
-        *val = netdev;
-    return err;
+       spin_lock_init(&np->tx_lock);
+       spin_lock_init(&np->rx_lock);
+
+       skb_queue_head_init(&np->rx_batch);
+       np->rx_target     = RX_MIN_TARGET;
+       np->rx_min_target = RX_MIN_TARGET;
+       np->rx_max_target = RX_MAX_TARGET;
+
+       /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
+       for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
+               np->tx_skbs[i] = (void *)((unsigned long) i+1);
+               np->grant_tx_ref[i] = GRANT_INVALID_REF;
+       }
+
+       for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
+               np->rx_skbs[i] = (void *)((unsigned long) i+1);
+               np->grant_rx_ref[i] = GRANT_INVALID_REF;
+       }
+
+       /* A grant for every tx ring slot */
+       if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+                                         &np->gref_tx_head) < 0) {
+               printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+               goto exit;
+       }
+       /* A grant for every rx ring slot */
+       if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+                                         &np->gref_rx_head) < 0) {
+               printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+               gnttab_free_grant_references(np->gref_tx_head);
+               goto exit;
+       }
+
+       netdev->open            = network_open;
+       netdev->hard_start_xmit = network_start_xmit;
+       netdev->stop            = network_close;
+       netdev->get_stats       = network_get_stats;
+       netdev->poll            = netif_poll;
+       netdev->uninit          = netif_uninit;
+       netdev->weight          = 64;
+       netdev->features        = NETIF_F_IP_CSUM;
+
+       SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
+
+       if ((err = register_netdev(netdev)) != 0) {
+               printk(KERN_WARNING "%s> register_netdev err=%d\n",
+                      __FUNCTION__, err);
+               goto exit_free_grefs;
+       }
+
+       if ((err = xennet_proc_addif(netdev)) != 0) {
+               unregister_netdev(netdev);
+               goto exit_free_grefs;
+       }
+
+       np->netdev = netdev;
+
+ exit:
+       if ((err != 0) && (netdev != NULL))
+               kfree(netdev);
+       else if (val != NULL)
+               *val = netdev;
+       return err;
+
+ exit_free_grefs:
+       gnttab_free_grant_references(np->gref_tx_head);
+       gnttab_free_grant_references(np->gref_rx_head);
+       goto exit;
 }
 
 static int destroy_netdev(struct net_device *netdev)
 {
-
 #ifdef CONFIG_PROC_FS
        xennet_proc_delif(netdev);
 #endif
-
         unregister_netdev(netdev);
-
        return 0;
 }
 
@@ -992,20 +939,20 @@
 static int 
 inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
 {
-    struct in_ifaddr  *ifa = (struct in_ifaddr *)ptr; 
-    struct net_device *dev = ifa->ifa_dev->dev;
-
-    /* UP event and is it one of our devices? */
-    if (event == NETDEV_UP && dev->open == network_open)
-        (void)send_fake_arp(dev);
+       struct in_ifaddr  *ifa = (struct in_ifaddr *)ptr; 
+       struct net_device *dev = ifa->ifa_dev->dev;
+
+       /* UP event and is it one of our devices? */
+       if (event == NETDEV_UP && dev->open == network_open)
+               (void)send_fake_arp(dev);
         
-    return NOTIFY_DONE;
+       return NOTIFY_DONE;
 }
 
 static struct notifier_block notifier_inetdev = {
-    .notifier_call  = inetdev_notify,
-    .next           = NULL,
-    .priority       = 0
+       .notifier_call  = inetdev_notify,
+       .next           = NULL,
+       .priority       = 0
 };
 
 static struct xenbus_device_id netfront_ids[] = {
@@ -1022,10 +969,8 @@
        evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
        int err;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
        info->tx_ring_ref = GRANT_INVALID_REF;
        info->rx_ring_ref = GRANT_INVALID_REF;
-#endif
 
        info->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
        if (info->tx == 0) {
@@ -1043,7 +988,6 @@
        memset(info->rx, 0, PAGE_SIZE);
        info->backend_state = BEST_DISCONNECTED;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
        err = gnttab_grant_foreign_access(info->backend_id,
                                          virt_to_mfn(info->tx), 0);
        if (err < 0) {
@@ -1059,11 +1003,6 @@
                goto out;
        }
        info->rx_ring_ref = err;
-
-#else
-       info->tx_ring_ref = virt_to_mfn(info->tx);
-       info->rx_ring_ref = virt_to_mfn(info->rx);
-#endif
 
        op.u.alloc_unbound.dom = info->backend_id;
        err = HYPERVISOR_event_channel_op(&op);
@@ -1082,7 +1021,6 @@
                free_page((unsigned long)info->rx);
        info->rx = 0;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
        if (info->tx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->tx_ring_ref, 0);
        info->tx_ring_ref = GRANT_INVALID_REF;
@@ -1090,7 +1028,6 @@
        if (info->rx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->rx_ring_ref, 0);
        info->rx_ring_ref = GRANT_INVALID_REF;
-#endif
 
        return err;
 }
@@ -1104,7 +1041,6 @@
                free_page((unsigned long)info->rx);
        info->rx = 0;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
        if (info->tx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->tx_ring_ref, 0);
        info->tx_ring_ref = GRANT_INVALID_REF;
@@ -1112,7 +1048,6 @@
        if (info->rx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->rx_ring_ref, 0);
        info->rx_ring_ref = GRANT_INVALID_REF;
-#endif
 
        unbind_evtchn_from_irqhandler(info->evtchn, info->netdev);
        info->evtchn = 0;
@@ -1282,10 +1217,6 @@
                return err;
        }
 
-
-       /* Call once in case entries already there. */
-       watch_for_status(&info->watch, info->watch.node);
-
        return 0;
 }
 
@@ -1344,72 +1275,50 @@
 
 static int wait_for_netif(void)
 {
-    int err = 0;
-    int i;
-
-    /*
-     * We should figure out how many and which devices we need to
-     * proceed and only wait for those.  For now, continue once the
-     * first device is around.
-     */
-    for ( i=0; netif_state != NETIF_STATE_CONNECTED && (i < 10*HZ); i++ )
-    {
-        set_current_state(TASK_INTERRUPTIBLE);
-        schedule_timeout(1);
-    }
-
-    if (netif_state != NETIF_STATE_CONNECTED) {
-        WPRINTK("Timeout connecting to device!\n");
-        err = -ENOSYS;
-    }
-    return err;
+       int err = 0;
+       int i;
+
+       /*
+        * We should figure out how many and which devices we need to
+        * proceed and only wait for those.  For now, continue once the
+        * first device is around.
+        */
+       for ( i=0; netif_state != NETIF_STATE_CONNECTED && (i < 10*HZ); i++ )
+       {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(1);
+       }
+
+       if (netif_state != NETIF_STATE_CONNECTED) {
+               WPRINTK("Timeout connecting to device!\n");
+               err = -ENOSYS;
+       }
+       return err;
 }
 
 static int __init netif_init(void)
 {
-    int err = 0;
-
-    if (xen_start_info->flags & SIF_INITDOMAIN)
-        return 0;
-
-    if ((err = xennet_proc_init()) != 0)
-        return err;
-
-    IPRINTK("Initialising virtual ethernet driver.\n");
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    IPRINTK("Using grant tables.\n"); 
-
-    /* A grant for every tx ring slot */
-    if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
-                                      &gref_tx_head) < 0) {
-        printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
-        return 1;
-    }
-    /* A grant for every rx ring slot */
-    if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
-                                      &gref_rx_head) < 0) {
-        printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
-        return 1;
-    }
-#endif
-
-
-    (void)register_inetaddr_notifier(&notifier_inetdev);
-
-    init_net_xenbus();
-
-    wait_for_netif();
-
-    return err;
+       int err = 0;
+
+       if (xen_start_info->flags & SIF_INITDOMAIN)
+               return 0;
+
+       if ((err = xennet_proc_init()) != 0)
+               return err;
+
+       IPRINTK("Initialising virtual ethernet driver.\n");
+
+       (void)register_inetaddr_notifier(&notifier_inetdev);
+
+       init_net_xenbus();
+
+       wait_for_netif();
+
+       return err;
 }
 
 static void netif_exit(void)
 {
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    gnttab_free_grant_references(gref_tx_head);
-    gnttab_free_grant_references(gref_rx_head);
-#endif
 }
 
 #ifdef CONFIG_PROC_FS
@@ -1419,147 +1328,159 @@
 #define TARGET_CUR 2UL
 
 static int xennet_proc_read(
-    char *page, char **start, off_t off, int count, int *eof, void *data)
-{
-    struct net_device *dev = (struct net_device *)((unsigned long)data & ~3UL);
-    struct net_private *np = netdev_priv(dev);
-    int len = 0, which_target = (long)data & 3;
+       char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+       struct net_device *dev =
+               (struct net_device *)((unsigned long)data & ~3UL);
+       struct net_private *np = netdev_priv(dev);
+       int len = 0, which_target = (long)data & 3;
     
-    switch (which_target)
-    {
-    case TARGET_MIN:
-        len = sprintf(page, "%d\n", np->rx_min_target);
-        break;
-    case TARGET_MAX:
-        len = sprintf(page, "%d\n", np->rx_max_target);
-        break;
-    case TARGET_CUR:
-        len = sprintf(page, "%d\n", np->rx_target);
-        break;
-    }
-
-    *eof = 1;
-    return len;
+       switch (which_target)
+       {
+       case TARGET_MIN:
+               len = sprintf(page, "%d\n", np->rx_min_target);
+               break;
+       case TARGET_MAX:
+               len = sprintf(page, "%d\n", np->rx_max_target);
+               break;
+       case TARGET_CUR:
+               len = sprintf(page, "%d\n", np->rx_target);
+               break;
+       }
+
+       *eof = 1;
+       return len;
 }
 
 static int xennet_proc_write(
-    struct file *file, const char __user *buffer,
-    unsigned long count, void *data)
-{
-    struct net_device *dev = (struct net_device *)((unsigned long)data & ~3UL);
-    struct net_private *np = netdev_priv(dev);
-    int which_target = (long)data & 3;
-    char string[64];
-    long target;
-
-    if (!capable(CAP_SYS_ADMIN))
-        return -EPERM;
-
-    if (count <= 1)
-        return -EBADMSG; /* runt */
-    if (count > sizeof(string))
-        return -EFBIG;   /* too long */
-
-    if (copy_from_user(string, buffer, count))
-        return -EFAULT;
-    string[sizeof(string)-1] = '\0';
-
-    target = simple_strtol(string, NULL, 10);
-    if (target < RX_MIN_TARGET)
-        target = RX_MIN_TARGET;
-    if (target > RX_MAX_TARGET)
-        target = RX_MAX_TARGET;
-
-    spin_lock(&np->rx_lock);
-
-    switch (which_target)
-    {
-    case TARGET_MIN:
-        if (target > np->rx_max_target)
-            np->rx_max_target = target;
-        np->rx_min_target = target;
-        if (target > np->rx_target)
-            np->rx_target = target;
-        break;
-    case TARGET_MAX:
-        if (target < np->rx_min_target)
-            np->rx_min_target = target;
-        np->rx_max_target = target;
-        if (target < np->rx_target)
-            np->rx_target = target;
-        break;
-    case TARGET_CUR:
-        break;
-    }
-
-    network_alloc_rx_buffers(dev);
-
-    spin_unlock(&np->rx_lock);
-
-    return count;
+       struct file *file, const char __user *buffer,
+       unsigned long count, void *data)
+{
+       struct net_device *dev =
+               (struct net_device *)((unsigned long)data & ~3UL);
+       struct net_private *np = netdev_priv(dev);
+       int which_target = (long)data & 3;
+       char string[64];
+       long target;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (count <= 1)
+               return -EBADMSG; /* runt */
+       if (count > sizeof(string))
+               return -EFBIG;   /* too long */
+
+       if (copy_from_user(string, buffer, count))
+               return -EFAULT;
+       string[sizeof(string)-1] = '\0';
+
+       target = simple_strtol(string, NULL, 10);
+       if (target < RX_MIN_TARGET)
+               target = RX_MIN_TARGET;
+       if (target > RX_MAX_TARGET)
+               target = RX_MAX_TARGET;
+
+       spin_lock(&np->rx_lock);
+
+       switch (which_target)
+       {
+       case TARGET_MIN:
+               if (target > np->rx_max_target)
+                       np->rx_max_target = target;
+               np->rx_min_target = target;
+               if (target > np->rx_target)
+                       np->rx_target = target;
+               break;
+       case TARGET_MAX:
+               if (target < np->rx_min_target)
+                       np->rx_min_target = target;
+               np->rx_max_target = target;
+               if (target < np->rx_target)
+                       np->rx_target = target;
+               break;
+       case TARGET_CUR:
+               break;
+       }
+
+       network_alloc_rx_buffers(dev);
+
+       spin_unlock(&np->rx_lock);
+
+       return count;
 }
 
 static int xennet_proc_init(void)
 {
-    if (proc_mkdir("xen/net", NULL) == NULL)
-        return -ENOMEM;
-    return 0;
+       if (proc_mkdir("xen/net", NULL) == NULL)
+               return -ENOMEM;
+       return 0;
 }
 
 static int xennet_proc_addif(struct net_device *dev)
 {
-    struct proc_dir_entry *dir, *min, *max, *cur;
-    char name[30];
-
-    sprintf(name, "xen/net/%s", dev->name);
-
-    dir = proc_mkdir(name, NULL);
-    if (!dir)
-        goto nomem;
-
-    min = create_proc_entry("rxbuf_min", 0644, dir);
-    max = create_proc_entry("rxbuf_max", 0644, dir);
-    cur = create_proc_entry("rxbuf_cur", 0444, dir);
-    if (!min || !max || !cur)
-        goto nomem;
-
-    min->read_proc  = xennet_proc_read;
-    min->write_proc = xennet_proc_write;
-    min->data       = (void *)((unsigned long)dev | TARGET_MIN);
-
-    max->read_proc  = xennet_proc_read;
-    max->write_proc = xennet_proc_write;
-    max->data       = (void *)((unsigned long)dev | TARGET_MAX);
-
-    cur->read_proc  = xennet_proc_read;
-    cur->write_proc = xennet_proc_write;
-    cur->data       = (void *)((unsigned long)dev | TARGET_CUR);
-
-    return 0;
+       struct proc_dir_entry *dir, *min, *max, *cur;
+       char name[30];
+
+       sprintf(name, "xen/net/%s", dev->name);
+
+       dir = proc_mkdir(name, NULL);
+       if (!dir)
+               goto nomem;
+
+       min = create_proc_entry("rxbuf_min", 0644, dir);
+       max = create_proc_entry("rxbuf_max", 0644, dir);
+       cur = create_proc_entry("rxbuf_cur", 0444, dir);
+       if (!min || !max || !cur)
+               goto nomem;
+
+       min->read_proc  = xennet_proc_read;
+       min->write_proc = xennet_proc_write;
+       min->data       = (void *)((unsigned long)dev | TARGET_MIN);
+
+       max->read_proc  = xennet_proc_read;
+       max->write_proc = xennet_proc_write;
+       max->data       = (void *)((unsigned long)dev | TARGET_MAX);
+
+       cur->read_proc  = xennet_proc_read;
+       cur->write_proc = xennet_proc_write;
+       cur->data       = (void *)((unsigned long)dev | TARGET_CUR);
+
+       return 0;
 
  nomem:
-    xennet_proc_delif(dev);
-    return -ENOMEM;
+       xennet_proc_delif(dev);
+       return -ENOMEM;
 }
 
 static void xennet_proc_delif(struct net_device *dev)
 {
-    char name[30];
-
-    sprintf(name, "xen/net/%s/rxbuf_min", dev->name);
-    remove_proc_entry(name, NULL);
-
-    sprintf(name, "xen/net/%s/rxbuf_max", dev->name);
-    remove_proc_entry(name, NULL);
-
-    sprintf(name, "xen/net/%s/rxbuf_cur", dev->name);
-    remove_proc_entry(name, NULL);
-
-    sprintf(name, "xen/net/%s", dev->name);
-    remove_proc_entry(name, NULL);
+       char name[30];
+
+       sprintf(name, "xen/net/%s/rxbuf_min", dev->name);
+       remove_proc_entry(name, NULL);
+
+       sprintf(name, "xen/net/%s/rxbuf_max", dev->name);
+       remove_proc_entry(name, NULL);
+
+       sprintf(name, "xen/net/%s/rxbuf_cur", dev->name);
+       remove_proc_entry(name, NULL);
+
+       sprintf(name, "xen/net/%s", dev->name);
+       remove_proc_entry(name, NULL);
 }
 
 #endif
 
 module_init(netif_init);
 module_exit(netif_exit);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Thu Sep 22 
17:42:01 2005
@@ -41,232 +41,253 @@
 static int privcmd_ioctl(struct inode *inode, struct file *file,
                          unsigned int cmd, unsigned long data)
 {
-    int ret = -ENOSYS;
-
-    switch ( cmd )
-    {
-    case IOCTL_PRIVCMD_HYPERCALL:
-    {
-        privcmd_hypercall_t hypercall;
+       int ret = -ENOSYS;
+
+       switch (cmd) {
+       case IOCTL_PRIVCMD_HYPERCALL: {
+               privcmd_hypercall_t hypercall;
   
-        if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) )
-            return -EFAULT;
+               if (copy_from_user(&hypercall, (void *)data,
+                                  sizeof(hypercall)))
+                       return -EFAULT;
 
 #if defined(__i386__)
-        __asm__ __volatile__ (
-            "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; "
-            "movl  4(%%eax),%%ebx ;"
-            "movl  8(%%eax),%%ecx ;"
-            "movl 12(%%eax),%%edx ;"
-            "movl 16(%%eax),%%esi ;"
-            "movl 20(%%eax),%%edi ;"
-            "movl   (%%eax),%%eax ;"
-            TRAP_INSTR "; "
-            "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
-            : "=a" (ret) : "0" (&hypercall) : "memory" );
+               __asm__ __volatile__ (
+                       "pushl %%ebx; pushl %%ecx; pushl %%edx; "
+                       "pushl %%esi; pushl %%edi; "
+                       "movl  4(%%eax),%%ebx ;"
+                       "movl  8(%%eax),%%ecx ;"
+                       "movl 12(%%eax),%%edx ;"
+                       "movl 16(%%eax),%%esi ;"
+                       "movl 20(%%eax),%%edi ;"
+                       "movl   (%%eax),%%eax ;"
+                       TRAP_INSTR "; "
+                       "popl %%edi; popl %%esi; popl %%edx; "
+                       "popl %%ecx; popl %%ebx"
+                       : "=a" (ret) : "0" (&hypercall) : "memory" );
 #elif defined (__x86_64__)
-        {
-            long ign1, ign2, ign3;
-            __asm__ __volatile__ (
-                "movq %8,%%r10; movq %9,%%r8;" TRAP_INSTR
-                : "=a" (ret), "=D" (ign1), "=S" (ign2), "=d" (ign3)
-                : "0" ((unsigned long)hypercall.op), 
-                "1" ((unsigned long)hypercall.arg[0]), 
-                "2" ((unsigned long)hypercall.arg[1]),
-                "3" ((unsigned long)hypercall.arg[2]), 
-                "g" ((unsigned long)hypercall.arg[3]),
-                "g" ((unsigned long)hypercall.arg[4])
-                : "r11","rcx","r8","r10","memory");
-        }
+               {
+                       long ign1, ign2, ign3;
+                       __asm__ __volatile__ (
+                               "movq %8,%%r10; movq %9,%%r8;" TRAP_INSTR
+                               : "=a" (ret), "=D" (ign1),
+                                 "=S" (ign2), "=d" (ign3)
+                               : "0" ((unsigned long)hypercall.op), 
+                               "1" ((unsigned long)hypercall.arg[0]), 
+                               "2" ((unsigned long)hypercall.arg[1]),
+                               "3" ((unsigned long)hypercall.arg[2]), 
+                               "g" ((unsigned long)hypercall.arg[3]),
+                               "g" ((unsigned long)hypercall.arg[4])
+                               : "r11","rcx","r8","r10","memory");
+               }
 #elif defined (__ia64__)
-       __asm__ __volatile__ (
-           ";; mov r14=%2; mov r15=%3; mov r16=%4; mov r17=%5; mov r18=%6; mov
-r2=%1; break 0x1000;; mov %0=r8 ;;"
-           : "=r" (ret)
-           : "r" (hypercall.op),
-             "r" (hypercall.arg[0]),
-             "r" (hypercall.arg[1]),
-             "r" (hypercall.arg[2]),
-             "r" (hypercall.arg[3]),
-             "r" (hypercall.arg[4])
-           : "r14","r15","r16","r17","r18","r2","r8","memory");
+               __asm__ __volatile__ (
+                       ";; mov r14=%2; mov r15=%3; "
+                       "mov r16=%4; mov r17=%5; mov r18=%6;"
+                       "mov r2=%1; break 0x1000;; mov %0=r8 ;;"
+                       : "=r" (ret)
+                       : "r" (hypercall.op),
+                       "r" (hypercall.arg[0]),
+                       "r" (hypercall.arg[1]),
+                       "r" (hypercall.arg[2]),
+                       "r" (hypercall.arg[3]),
+                       "r" (hypercall.arg[4])
+                       : "r14","r15","r16","r17","r18","r2","r8","memory");
 #endif
-    }
-    break;
+       }
+       break;
 
 #if defined(CONFIG_XEN_PRIVILEGED_GUEST)
-    case IOCTL_PRIVCMD_MMAP:
-    {
+       case IOCTL_PRIVCMD_MMAP: {
 #define PRIVCMD_MMAP_SZ 32
-        privcmd_mmap_t mmapcmd;
-        privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ], *p;
-        int i, rc;
-
-        if ( copy_from_user(&mmapcmd, (void *)data, sizeof(mmapcmd)) )
-            return -EFAULT;
-
-        p = mmapcmd.entry;
-
-        for (i=0; i<mmapcmd.num; i+=PRIVCMD_MMAP_SZ, p+=PRIVCMD_MMAP_SZ)
-        {
-            int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
-                PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
-
-
-            if ( copy_from_user(&msg, p, n*sizeof(privcmd_mmap_entry_t)) )
-                return -EFAULT;
+               privcmd_mmap_t mmapcmd;
+               privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ], *p;
+               int i, rc;
+
+               if (copy_from_user(&mmapcmd, (void *)data, sizeof(mmapcmd)))
+                       return -EFAULT;
+
+               p = mmapcmd.entry;
+
+               for (i = 0; i < mmapcmd.num;
+                    i += PRIVCMD_MMAP_SZ, p += PRIVCMD_MMAP_SZ) {
+                       int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
+                               PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
+
+                       if (copy_from_user(&msg, p,
+                                          n*sizeof(privcmd_mmap_entry_t)))
+                               return -EFAULT;
      
-            for ( j = 0; j < n; j++ )
-            {
-                struct vm_area_struct *vma = 
-                    find_vma( current->mm, msg[j].va );
-
-                if ( !vma )
-                    return -EINVAL;
-
-                if ( msg[j].va > PAGE_OFFSET )
-                    return -EINVAL;
-
-                if ( (msg[j].va + (msg[j].npages<<PAGE_SHIFT)) > vma->vm_end )
-                    return -EINVAL;
-
-                if ( (rc = direct_remap_pfn_range(vma->vm_mm, 
-                                                   msg[j].va&PAGE_MASK, 
-                                                   msg[j].mfn, 
-                                                   msg[j].npages<<PAGE_SHIFT, 
-                                                   vma->vm_page_prot,
-                                                   mmapcmd.dom)) < 0 )
-                    return rc;
-            }
-        }
-        ret = 0;
-    }
-    break;
-
-    case IOCTL_PRIVCMD_MMAPBATCH:
-    {
-        mmu_update_t u;
-        privcmd_mmapbatch_t m;
-        struct vm_area_struct *vma = NULL;
-        unsigned long *p, addr;
-        unsigned long mfn, ptep;
-        int i;
-
-        if ( copy_from_user(&m, (void *)data, sizeof(m)) )
-        { ret = -EFAULT; goto batch_err; }
-
-        vma = find_vma( current->mm, m.addr );
-
-        if ( !vma )
-        { ret = -EINVAL; goto batch_err; }
-
-        if ( m.addr > PAGE_OFFSET )
-        { ret = -EFAULT; goto batch_err; }
-
-        if ( (m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end )
-        { ret = -EFAULT; goto batch_err; }
-
-        p = m.arr;
-        addr = m.addr;
-        for ( i = 0; i < m.num; i++, addr += PAGE_SIZE, p++ )
-        {
-            if ( get_user(mfn, p) )
-                return -EFAULT;
-
-            ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
-            if (ret)
-                goto batch_err;
-
-            u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
-            u.ptr = ptep;
-
-            if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) )
-                put_user(0xF0000000 | mfn, p);
-        }
-
-        ret = 0;
-        break;
-
-    batch_err:
-        printk("batch_err ret=%d vma=%p addr=%lx num=%d arr=%p %lx-%lx\n", 
-               ret, vma, m.addr, m.num, m.arr,
-               vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
-        break;
-    }
-    break;
+                       for (j = 0; j < n; j++) {
+                               struct vm_area_struct *vma = 
+                                       find_vma( current->mm, msg[j].va );
+
+                               if (!vma)
+                                       return -EINVAL;
+
+                               if (msg[j].va > PAGE_OFFSET)
+                                       return -EINVAL;
+
+                               if ((msg[j].va + (msg[j].npages << PAGE_SHIFT))
+                                   > vma->vm_end )
+                                       return -EINVAL;
+
+                               if ((rc = direct_remap_pfn_range(
+                                       vma,
+                                       msg[j].va&PAGE_MASK, 
+                                       msg[j].mfn, 
+                                       msg[j].npages<<PAGE_SHIFT, 
+                                       vma->vm_page_prot,
+                                       mmapcmd.dom)) < 0)
+                                       return rc;
+                       }
+               }
+               ret = 0;
+       }
+       break;
+
+       case IOCTL_PRIVCMD_MMAPBATCH: {
+               mmu_update_t u;
+               privcmd_mmapbatch_t m;
+               struct vm_area_struct *vma = NULL;
+               unsigned long *p, addr;
+               unsigned long mfn, ptep;
+               int i;
+
+               if (copy_from_user(&m, (void *)data, sizeof(m))) {
+                       ret = -EFAULT;
+                       goto batch_err;
+               }
+
+               vma = find_vma( current->mm, m.addr );
+               if (!vma) {
+                       ret = -EINVAL;
+                       goto batch_err;
+               }
+
+               if (m.addr > PAGE_OFFSET) {
+                       ret = -EFAULT;
+                       goto batch_err;
+               }
+
+               if ((m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end) {
+                       ret = -EFAULT;
+                       goto batch_err;
+               }
+
+               p = m.arr;
+               addr = m.addr;
+               for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
+                       if (get_user(mfn, p))
+                               return -EFAULT;
+
+                       ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
+                       if (ret)
+                               goto batch_err;
+
+                       u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
+                       u.ptr = ptep;
+
+                       if (HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0)
+                               put_user(0xF0000000 | mfn, p);
+               }
+
+               ret = 0;
+               break;
+
+       batch_err:
+               printk("batch_err ret=%d vma=%p addr=%lx "
+                      "num=%d arr=%p %lx-%lx\n", 
+                      ret, vma, m.addr, m.num, m.arr,
+                      vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
+               break;
+       }
+       break;
 #endif
 
-    case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN:
-    {
-        unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
-        pgd_t *pgd = pgd_offset_k(m2pv);
-        pud_t *pud = pud_offset(pgd, m2pv);
-        pmd_t *pmd = pmd_offset(pud, m2pv);
-        unsigned long m2p_start_mfn = (*(unsigned long *)pmd) >> PAGE_SHIFT; 
-        ret = put_user(m2p_start_mfn, (unsigned long *)data) ? -EFAULT: 0;
-    }
-    break;
-
-    case IOCTL_PRIVCMD_INITDOMAIN_STORE:
-    {
-        extern int do_xenbus_probe(void*);
-        unsigned long page;
-
-        if (xen_start_info->store_evtchn != 0) {
-            ret = xen_start_info->store_mfn;
-            break;
-        }
-
-        /* Allocate page. */
-        page = get_zeroed_page(GFP_KERNEL);
-        if (!page) {
-            ret = -ENOMEM;
-            break;
-        }
-
-        /* We don't refcnt properly, so set reserved on page.
-         * (this allocation is permanent) */
-        SetPageReserved(virt_to_page(page));
-
-        /* Initial connect. Setup channel and page. */
-        xen_start_info->store_evtchn = data;
-        xen_start_info->store_mfn = pfn_to_mfn(virt_to_phys((void *)page) >>
-                                              PAGE_SHIFT);
-        ret = xen_start_info->store_mfn;
-
-        /* We'll return then this will wait for daemon to answer */
-        kthread_run(do_xenbus_probe, NULL, "xenbus_probe");
-    }
-    break;
-
-    default:
-        ret = -EINVAL;
-        break;
-    }
-    return ret;
+       case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN: {
+               unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
+               pgd_t *pgd = pgd_offset_k(m2pv);
+               pud_t *pud = pud_offset(pgd, m2pv);
+               pmd_t *pmd = pmd_offset(pud, m2pv);
+               unsigned long m2p_start_mfn =
+                       (*(unsigned long *)pmd) >> PAGE_SHIFT; 
+               ret = put_user(m2p_start_mfn, (unsigned long *)data) ?
+                       -EFAULT: 0;
+       }
+       break;
+
+       case IOCTL_PRIVCMD_INITDOMAIN_STORE: {
+               extern int do_xenbus_probe(void*);
+               unsigned long page;
+
+               if (xen_start_info->store_evtchn != 0) {
+                       ret = xen_start_info->store_mfn;
+                       break;
+               }
+
+               /* Allocate page. */
+               page = get_zeroed_page(GFP_KERNEL);
+               if (!page) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               /* We don't refcnt properly, so set reserved on page.
+                * (this allocation is permanent) */
+               SetPageReserved(virt_to_page(page));
+
+               /* Initial connect. Setup channel and page. */
+               xen_start_info->store_evtchn = data;
+               xen_start_info->store_mfn =
+                       pfn_to_mfn(virt_to_phys((void *)page) >>
+                                  PAGE_SHIFT);
+               ret = xen_start_info->store_mfn;
+
+               /* We'll return then this will wait for daemon to answer */
+               kthread_run(do_xenbus_probe, NULL, "xenbus_probe");
+       }
+       break;
+
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
 }
 
 static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
 {
-    /* DONTCOPY is essential for Xen as copy_page_range is broken. */
-    vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
-
-    return 0;
+       /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+       vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+
+       return 0;
 }
 
 static struct file_operations privcmd_file_ops = {
-    .ioctl = privcmd_ioctl,
-    .mmap  = privcmd_mmap,
+       .ioctl = privcmd_ioctl,
+       .mmap  = privcmd_mmap,
 };
 
 
 static int __init privcmd_init(void)
 {
-    privcmd_intf = create_xen_proc_entry("privcmd", 0400);
-    if ( privcmd_intf != NULL )
-        privcmd_intf->proc_fops = &privcmd_file_ops;
-
-    return 0;
+       privcmd_intf = create_xen_proc_entry("privcmd", 0400);
+       if (privcmd_intf != NULL)
+               privcmd_intf->proc_fops = &privcmd_file_ops;
+
+       return 0;
 }
 
 __initcall(privcmd_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h Thu Sep 22 17:42:01 2005
@@ -11,10 +11,10 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <asm-xen/evtchn.h>
+#include <asm-xen/driver_util.h>
 #include <asm-xen/xen-public/io/tpmif.h>
 #include <asm/io.h>
 #include <asm/pgalloc.h>
-#include <asm-xen/xen-public/io/domain_controller.h>
 
 #if 0
 #define ASSERT(_p) \
@@ -34,12 +34,12 @@
        unsigned int handle;
 
        /* Physical parameters of the comms window. */
-       unsigned long tx_shmem_frame;
        unsigned int evtchn;
        unsigned int remote_evtchn;
 
        /* The shared rings and indexes. */
        tpmif_tx_interface_t *tx;
+       struct vm_struct *tx_area;
 
        /* Miscellaneous private stuff. */
        enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
@@ -55,9 +55,7 @@
        struct work_struct work;
 
        u16 shmem_handle;
-       unsigned long shmem_vaddr;
        grant_ref_t shmem_ref;
-
 } tpmif_t;
 
 void tpmif_disconnect_complete(tpmif_t * tpmif);
@@ -86,3 +84,13 @@
 #define MMAP_VADDR(t,_req) ((t)->mmap_vstart + ((_req) * PAGE_SIZE))
 
 #endif /* __TPMIF__BACKEND__COMMON_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c      Thu Sep 22 
17:42:01 2005
@@ -1,4 +1,4 @@
-/******************************************************************************
+ /*****************************************************************************
  * drivers/xen/tpmback/interface.c
  *
  * Vritual TPM interface management.
@@ -14,187 +14,192 @@
 #include "common.h"
 #include <asm-xen/balloon.h>
 
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-
 #define TPMIF_HASHSZ (2 << 5)
 #define TPMIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(TPMIF_HASHSZ-1))
 
 static kmem_cache_t *tpmif_cachep;
 int num_frontends = 0;
+
 LIST_HEAD(tpmif_list);
 
-
-tpmif_t *alloc_tpmif(domid_t domid, long int instance)
-{
-    struct page *page;
-    tpmif_t *tpmif;
-
-    tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL);
-    if (!tpmif)
-        return ERR_PTR(-ENOMEM);
-
-    memset(tpmif, 0, sizeof(*tpmif));
-    tpmif->domid        = domid;
-    tpmif->status       = DISCONNECTED;
-    tpmif->tpm_instance = instance;
-    atomic_set(&tpmif->refcnt, 1);
-
-    page = balloon_alloc_empty_page_range(TPMIF_TX_RING_SIZE);
-    BUG_ON(page == NULL);
-    tpmif->mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
-    list_add(&tpmif->tpmif_list, &tpmif_list);
-    num_frontends++;
-
-    return tpmif;
-}
-
-
-void free_tpmif(tpmif_t *tpmif)
-{
-    num_frontends--;
-    list_del(&tpmif->tpmif_list);
-    kmem_cache_free(tpmif_cachep, tpmif);
-}
-
-
-tpmif_t *tpmif_find(domid_t domid, long int instance)
-{
-    tpmif_t *tpmif;
-
-    list_for_each_entry(tpmif, &tpmif_list, tpmif_list) {
-        if (tpmif->tpm_instance == instance) {
-            if (tpmif->domid == domid) {
-                tpmif_get(tpmif);
-                return tpmif;
-           } else {
-               return NULL;
-           }
-        }
-    }
-
-    return alloc_tpmif(domid, instance);
-}
-
-
-static int map_frontend_page(tpmif_t *tpmif, unsigned long localaddr,
-                            unsigned long shared_page)
-{
-    struct gnttab_map_grant_ref op = {
-        .host_addr = localaddr,
-        .flags     = GNTMAP_host_map,
-        .ref       = shared_page,
-        .dom       = tpmif->domid,
-    };
-
-    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-
-    if (op.handle < 0) {
-       DPRINTK(" Grant table operation failure !\n");
-       return op.handle;
-    }
-
-    tpmif->shmem_ref    = shared_page;
-    tpmif->shmem_handle = op.handle;
-    tpmif->shmem_vaddr  = localaddr;
-    return 0;
-}
-
-
-static void unmap_frontend_page(tpmif_t *tpmif)
-{
-    struct gnttab_unmap_grant_ref op;
-
-    op.host_addr = tpmif->shmem_vaddr;
-    op.handle = tpmif->shmem_handle;
-    op.dev_bus_addr = 0;
-
-    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
-}
-
-
-int tpmif_map(tpmif_t *tpmif,
-              unsigned long shared_page, unsigned int evtchn)
-{
-    struct vm_struct *vma;
-    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
-    int err;
-
-    BUG_ON(tpmif->remote_evtchn);
-
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-       return -ENOMEM;
-
-    err = map_frontend_page(tpmif,
-                            VMALLOC_VMADDR(vma->addr),
-                            shared_page);
-    if (err) {
-        vfree(vma->addr);
-       return err;
-    }
-
-    op.u.bind_interdomain.dom1 = DOMID_SELF;
-    op.u.bind_interdomain.dom2 = tpmif->domid;
-    op.u.bind_interdomain.port1 = 0;
-    op.u.bind_interdomain.port2 = evtchn;
-    err = HYPERVISOR_event_channel_op(&op);
-    if (err) {
-       unmap_frontend_page(tpmif);
-       vfree(vma->addr);
-       return err;
-    }
-
-    tpmif->evtchn = op.u.bind_interdomain.port1;
-    tpmif->remote_evtchn = evtchn;
-
-    tpmif->tx = (tpmif_tx_interface_t *) vma->addr;
-
-    bind_evtchn_to_irqhandler(tpmif->evtchn,
-                              tpmif_be_int,
-                              0,
-                              "tpmif-backend",
-                             tpmif);
-    tpmif->status        = CONNECTED;
-    tpmif->shmem_ref     = shared_page;
-    tpmif->active        = 1;
-
-    return 0;
-}
-
-
-static void __tpmif_disconnect_complete(void *arg)
-{
-    evtchn_op_t op = { .cmd = EVTCHNOP_close };
-    tpmif_t *tpmif = (tpmif_t *) arg;
-
-    op.u.close.port = tpmif->evtchn;
-    op.u.close.dom  = DOMID_SELF;
-    HYPERVISOR_event_channel_op(&op);
-    op.u.close.port = tpmif->remote_evtchn;
-    op.u.close.dom  = tpmif->domid;
-    HYPERVISOR_event_channel_op(&op);
-
-    if (tpmif->evtchn)
-         unbind_evtchn_from_irqhandler(tpmif->evtchn, tpmif);
-
-    if (tpmif->tx) {
-        unmap_frontend_page(tpmif);
-        vfree(tpmif->tx);
-    }
-
-    free_tpmif(tpmif);
-}
-
-
-void tpmif_disconnect_complete(tpmif_t * tpmif)
-{
-    INIT_WORK(&tpmif->work, __tpmif_disconnect_complete, (void *)tpmif);
-    schedule_work(&tpmif->work);
-}
-
-
-void __init tpmif_interface_init(void)
-{
-    tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof(tpmif_t),
-                                     0, 0, NULL, NULL);
-}
+tpmif_t *
+alloc_tpmif(domid_t domid, long int instance)
+{
+       struct page *page;
+       tpmif_t *tpmif;
+
+       tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL);
+       if (!tpmif)
+               return ERR_PTR(-ENOMEM);
+
+       memset(tpmif, 0, sizeof (*tpmif));
+       tpmif->domid = domid;
+       tpmif->status = DISCONNECTED;
+       tpmif->tpm_instance = instance;
+       atomic_set(&tpmif->refcnt, 1);
+
+       page = balloon_alloc_empty_page_range(TPMIF_TX_RING_SIZE);
+       BUG_ON(page == NULL);
+       tpmif->mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+       list_add(&tpmif->tpmif_list, &tpmif_list);
+       num_frontends++;
+
+       return tpmif;
+}
+
+void
+free_tpmif(tpmif_t * tpmif)
+{
+       num_frontends--;
+       list_del(&tpmif->tpmif_list);
+       kmem_cache_free(tpmif_cachep, tpmif);
+}
+
+tpmif_t *
+tpmif_find(domid_t domid, long int instance)
+{
+       tpmif_t *tpmif;
+
+       list_for_each_entry(tpmif, &tpmif_list, tpmif_list) {
+               if (tpmif->tpm_instance == instance) {
+                       if (tpmif->domid == domid) {
+                               tpmif_get(tpmif);
+                               return tpmif;
+                       } else {
+                               return NULL;
+                       }
+               }
+       }
+
+       return alloc_tpmif(domid, instance);
+}
+
+static int
+map_frontend_page(tpmif_t *tpmif, unsigned long shared_page)
+{
+       struct gnttab_map_grant_ref op = {
+               .host_addr = (unsigned long)tpmif->tx_area->addr,
+               .flags = GNTMAP_host_map,
+               .ref = shared_page,
+               .dom = tpmif->domid,
+       };
+
+       lock_vm_area(tpmif->tx_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
+       unlock_vm_area(tpmif->tx_area);
+
+       if (op.handle < 0) {
+               DPRINTK(" Grant table operation failure !\n");
+               return op.handle;
+       }
+
+       tpmif->shmem_ref = shared_page;
+       tpmif->shmem_handle = op.handle;
+
+       return 0;
+}
+
+static void
+unmap_frontend_page(tpmif_t *tpmif)
+{
+       struct gnttab_unmap_grant_ref op;
+
+       op.host_addr    = (unsigned long)tpmif->tx_area->addr;
+       op.handle       = tpmif->shmem_handle;
+       op.dev_bus_addr = 0;
+
+       lock_vm_area(tpmif->tx_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       unlock_vm_area(tpmif->tx_area);
+}
+
+int
+tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn)
+{
+       evtchn_op_t op = {.cmd = EVTCHNOP_bind_interdomain };
+       int err;
+
+       BUG_ON(tpmif->remote_evtchn);
+
+       if ((tpmif->tx_area = alloc_vm_area(PAGE_SIZE)) == NULL)
+               return -ENOMEM;
+
+       err = map_frontend_page(tpmif, shared_page);
+       if (err) {
+               free_vm_area(tpmif->tx_area);
+               return err;
+       }
+
+       op.u.bind_interdomain.dom1 = DOMID_SELF;
+       op.u.bind_interdomain.dom2 = tpmif->domid;
+       op.u.bind_interdomain.port1 = 0;
+       op.u.bind_interdomain.port2 = evtchn;
+       err = HYPERVISOR_event_channel_op(&op);
+       if (err) {
+               unmap_frontend_page(tpmif);
+               free_vm_area(tpmif->tx_area);
+               return err;
+       }
+
+       tpmif->evtchn = op.u.bind_interdomain.port1;
+       tpmif->remote_evtchn = evtchn;
+
+       tpmif->tx = (tpmif_tx_interface_t *)tpmif->tx_area->addr;
+
+       bind_evtchn_to_irqhandler(tpmif->evtchn,
+                                 tpmif_be_int, 0, "tpmif-backend", tpmif);
+       tpmif->status = CONNECTED;
+       tpmif->shmem_ref = shared_page;
+       tpmif->active = 1;
+
+       return 0;
+}
+
+static void
+__tpmif_disconnect_complete(void *arg)
+{
+       evtchn_op_t op = {.cmd = EVTCHNOP_close };
+       tpmif_t *tpmif = (tpmif_t *) arg;
+
+       op.u.close.port = tpmif->evtchn;
+       op.u.close.dom = DOMID_SELF;
+       HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = tpmif->remote_evtchn;
+       op.u.close.dom = tpmif->domid;
+       HYPERVISOR_event_channel_op(&op);
+
+       if (tpmif->evtchn)
+               unbind_evtchn_from_irqhandler(tpmif->evtchn, tpmif);
+
+       if (tpmif->tx) {
+               unmap_frontend_page(tpmif);
+               free_vm_area(tpmif->tx_area);
+       }
+
+       free_tpmif(tpmif);
+}
+
+void
+tpmif_disconnect_complete(tpmif_t * tpmif)
+{
+       INIT_WORK(&tpmif->work, __tpmif_disconnect_complete, (void *)tpmif);
+       schedule_work(&tpmif->work);
+}
+
+void __init
+tpmif_interface_init(void)
+{
+       tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t),
+                                        0, 0, NULL, NULL);
+}
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c        Thu Sep 22 
17:42:01 2005
@@ -566,7 +566,7 @@
                                 * the more time we give the TPM to process the 
request.
                                 */
                                mod_timer(&pak->processing_timer,
-                                         jiffies + (num_frontends * 10 * HZ));
+                                         jiffies + (num_frontends * 60 * HZ));
                                dataex.copied_so_far = 0;
                        }
                }
@@ -850,7 +850,7 @@
                write_lock_irqsave(&dataex.pak_lock, flags);
                list_add_tail(&pak->next, &dataex.pending_pak);
                /* give the TPM some time to pick up the request */
-               mod_timer(&pak->processing_timer, jiffies + (10 * HZ));
+               mod_timer(&pak->processing_timer, jiffies + (30 * HZ));
                write_unlock_irqrestore(&dataex.pak_lock,
                                        flags);
 
@@ -1075,3 +1075,13 @@
 }
 
 __initcall(tpmback_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Thu Sep 22 17:42:01 2005
@@ -213,6 +213,7 @@
 
        be->dev = dev;
        be->backend_watch.node     = dev->nodename;
+       /* Implicitly calls backend_changed() once. */
        be->backend_watch.callback = backend_changed;
        be->instance = -1;
        err = register_xenbus_watch(&be->backend_watch);
@@ -236,8 +237,6 @@
        }
 
        dev->data = be;
-
-       backend_changed(&be->backend_watch, dev->nodename);
        return err;
 
 free_be:
@@ -269,3 +268,13 @@
 {
        xenbus_register_backend(&tpmback);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c      Thu Sep 22 
17:42:01 2005
@@ -46,7 +46,6 @@
 #include <asm-xen/xen-public/io/tpmif.h>
 #include <asm/uaccess.h>
 #include <asm-xen/xenbus.h>
-#include <asm-xen/xen-public/io/domain_controller.h>
 #include <asm-xen/xen-public/grant_table.h>
 
 #include "tpmfront.h"
@@ -258,18 +257,24 @@
 
        tpm_allocate_buffers(tp);
 
-       info->ring_ref = gnttab_claim_grant_reference(&gref_head);
-       ASSERT(info->ring_ref != -ENOSPC);
-       gnttab_grant_foreign_access_ref(info->ring_ref,
-                                       backend_id,
-                                       (virt_to_machine(tp->tx) >> PAGE_SHIFT),
-                                       0);
+       err = gnttab_grant_foreign_access(backend_id,
+                                         (virt_to_machine(tp->tx) >> 
PAGE_SHIFT),
+                                         0);
+
+       if (err == -ENOSPC) {
+               free_page((unsigned long)sring);
+               tp->tx = NULL;
+               xenbus_dev_error(dev, err, "allocating grant reference");
+               return err;
+       }
+       info->ring_ref = err;
 
        op.u.alloc_unbound.dom = backend_id;
        err = HYPERVISOR_event_channel_op(&op);
        if (err) {
+               gnttab_end_foreign_access(info->ring_ref, 0);
                free_page((unsigned long)sring);
-               tp->tx = 0;
+               tp->tx = NULL;
                xenbus_dev_error(dev, err, "allocating event channel");
                return err;
        }
@@ -283,6 +288,7 @@
        tpmif_set_connected_state(tp,0);
 
        if ( tp->tx != NULL ) {
+               gnttab_end_foreign_access(info->ring_ref, 0);
                free_page((unsigned long)tp->tx);
                tp->tx = NULL;
        }
@@ -412,7 +418,6 @@
                return err;
        }
 
-       watch_for_status(&info->watch, info->watch.node);
        return 0;
 }
 
@@ -736,3 +741,13 @@
 }
 
 __initcall(tpmif_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.h
--- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.h      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.h      Thu Sep 22 
17:42:01 2005
@@ -2,7 +2,8 @@
 #define TPM_FRONT_H
 
 
-struct tpm_private {
+struct tpm_private
+{
        tpmif_tx_interface_t *tx;
        unsigned int evtchn;
        int connected;
@@ -29,10 +30,21 @@
 };
 
 
-struct tx_buffer {
+struct tx_buffer
+{
        unsigned int size;      // available space in data
        unsigned int len;       // used space in data
        unsigned char *data;    // pointer to a page
 };
 
 #endif
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Thu Sep 22 
17:42:01 2005
@@ -231,3 +231,13 @@
 
        unbind_evtchn_from_irqhandler(xen_start_info->store_evtchn, &xb_waitq);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h    Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h    Thu Sep 22 
17:42:01 2005
@@ -39,3 +39,13 @@
 extern wait_queue_head_t xb_waitq;
 
 #endif /* _XENBUS_COMMS_H */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Thu Sep 22 
17:42:01 2005
@@ -186,3 +186,13 @@
 }
 
 __initcall(xenbus_dev_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Thu Sep 22 
17:42:01 2005
@@ -687,3 +687,13 @@
 }
 
 postcore_initcall(xenbus_probe_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Thu Sep 22 
17:42:01 2005
@@ -253,31 +253,19 @@
 EXPORT_SYMBOL(xenbus_read);
 
 /* Write the value of a single file.
- * Returns -err on failure.  createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ * Returns -err on failure.
  */
-int xenbus_write(const char *dir, const char *node,
-                const char *string, int createflags)
-{
-       const char *flags, *path;
-       struct kvec iovec[3];
+int xenbus_write(const char *dir, const char *node, const char *string)
+{
+       const char *path;
+       struct kvec iovec[2];
 
        path = join(dir, node);
-       /* Format: Flags (as string), path, data. */
-       if (createflags == 0)
-               flags = XS_WRITE_NONE;
-       else if (createflags == O_CREAT)
-               flags = XS_WRITE_CREATE;
-       else if (createflags == (O_CREAT|O_EXCL))
-               flags = XS_WRITE_CREATE_EXCL;
-       else
-               return -EINVAL;
 
        iovec[0].iov_base = (void *)path;
        iovec[0].iov_len = strlen(path) + 1;
-       iovec[1].iov_base = (void *)flags;
-       iovec[1].iov_len = strlen(flags) + 1;
-       iovec[2].iov_base = (void *)string;
-       iovec[2].iov_len = strlen(string);
+       iovec[1].iov_base = (void *)string;
+       iovec[1].iov_len = strlen(string);
 
        return xs_error(xs_talkv(XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
 }
@@ -357,7 +345,7 @@
        va_end(ap);
 
        BUG_ON(ret > sizeof(printf_buffer)-1);
-       return xenbus_write(dir, node, printf_buffer, O_CREAT);
+       return xenbus_write(dir, node, printf_buffer);
 }
 EXPORT_SYMBOL(xenbus_printf);
 
@@ -377,7 +365,7 @@
 
        BUG_ON(len + ret > sizeof(printf_buffer)-1);
        dev->has_error = 1;
-       if (xenbus_write(dev->nodename, "error", printf_buffer, O_CREAT) != 0)
+       if (xenbus_write(dev->nodename, "error", printf_buffer) != 0)
                printk("xenbus: failed to write error node for %s (%s)\n",
                       dev->nodename, printf_buffer);
 }
@@ -578,3 +566,13 @@
                return PTR_ERR(watcher);
        return 0;
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Thu Sep 22 
17:42:01 2005
@@ -261,7 +261,6 @@
 
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)     (phys_to_machine(__pa(v)))
-#define machine_to_virt(m)     (__va(machine_to_phys(m)))
 #define virt_to_mfn(v)         (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
 #define mfn_to_virt(m)         (__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Thu Sep 22 
17:42:01 2005
@@ -460,7 +460,7 @@
 #define kern_addr_valid(addr)  (1)
 #endif /* !CONFIG_DISCONTIGMEM */
 
-int direct_remap_pfn_range(struct mm_struct *mm,
+int direct_remap_pfn_range(struct vm_area_struct *vma,
                             unsigned long address, 
                             unsigned long mfn,
                             unsigned long size, 
@@ -474,10 +474,10 @@
                     unsigned long size);
 
 #define io_remap_page_range(vma,from,phys,size,prot) \
-direct_remap_pfn_range(vma->vm_mm,from,phys>>PAGE_SHIFT,size,prot,DOMID_IO)
+direct_remap_pfn_range(vma,from,(phys)>>PAGE_SHIFT,size,prot,DOMID_IO)
 
 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
-direct_remap_pfn_range(vma->vm_mm,from,pfn,size,prot,DOMID_IO)
+direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
 
 #define MK_IOSPACE_PFN(space, pfn)     (pfn)
 #define GET_IOSPACE(pfn)               0
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h    Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h    Thu Sep 22 
17:42:01 2005
@@ -497,11 +497,22 @@
  * includes these barriers, for example.
  */
 
+/*
+ * Don't use smp_processor_id() in preemptible code: debug builds will barf.
+ * It's okay in these cases as we only read the upcall mask in preemptible
+ * regions, which is always safe.
+ */
+#ifdef CONFIG_SMP
+#define __this_cpu()   __smp_processor_id()
+#else
+#define __this_cpu()   0
+#endif
+
 #define __cli()                                                                
\
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        _vcpu->evtchn_upcall_mask = 1;                                  \
        preempt_enable_no_resched();                                    \
        barrier();                                                      \
@@ -512,7 +523,7 @@
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        _vcpu->evtchn_upcall_mask = 0;                                  \
        barrier(); /* unmask then check (avoid races) */                \
        if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
@@ -523,7 +534,7 @@
 #define __save_flags(x)                                                        
\
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        (x) = _vcpu->evtchn_upcall_mask;                                \
 } while (0)
 
@@ -532,7 +543,7 @@
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
                barrier(); /* unmask then check (avoid races) */        \
                if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
@@ -548,7 +559,7 @@
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        (x) = _vcpu->evtchn_upcall_mask;                                \
        _vcpu->evtchn_upcall_mask = 1;                                  \
        preempt_enable_no_resched();                                    \
@@ -561,14 +572,8 @@
 #define local_irq_disable()    __cli()
 #define local_irq_enable()     __sti()
 
-/* Don't use smp_processor_id: this is called in debug versions of that fn. */
-#ifdef CONFIG_SMP
-#define irqs_disabled()                        \
-    HYPERVISOR_shared_info->vcpu_data[__smp_processor_id()].evtchn_upcall_mask
-#else
-#define irqs_disabled()                        \
-    HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask
-#endif
+#define irqs_disabled()                                                        
\
+       HYPERVISOR_shared_info->vcpu_data[__this_cpu()].evtchn_upcall_mask
 
 /*
  * disable hlt during certain critical i/o operations
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h     Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h     Thu Sep 
22 17:42:01 2005
@@ -35,7 +35,7 @@
         * of cr3/ldt (i.e., not in __switch_to).
         */
        __asm__ __volatile__ (
-               "movl %%es,%0 ; movl %%ds,%1 ; movl %%fs,%2 ; movl %%gs,%3"
+               "mov %%es,%0 ; mov %%ds,%1 ; mov %%fs,%2 ; mov %%gs,%3"
                : "=m" (current->thread.es),
                  "=m" (current->thread.ds),
                  "=m" (current->thread.fsindex),
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Thu Sep 22 
17:42:01 2005
@@ -239,7 +239,6 @@
 
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)     (phys_to_machine(__pa(v)))
-#define machine_to_virt(m)     (__va(machine_to_phys(m)))
 #define virt_to_mfn(v)         (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
 #define mfn_to_virt(m)         (__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Thu Sep 22 
17:42:01 2005
@@ -526,7 +526,7 @@
 
 #define DOMID_LOCAL (0xFFFFU)
 
-int direct_remap_pfn_range(struct mm_struct *mm,
+int direct_remap_pfn_range(struct vm_area_struct *vma,
                             unsigned long address,
                             unsigned long mfn,
                             unsigned long size,
@@ -542,10 +542,10 @@
                     unsigned long size);
 
 #define io_remap_page_range(vma, vaddr, paddr, size, prot)             \
-               
direct_remap_pfn_range((vma)->vm_mm,vaddr,paddr>>PAGE_SHIFT,size,prot,DOMID_IO)
+               
direct_remap_pfn_range(vma,vaddr,(paddr)>>PAGE_SHIFT,size,prot,DOMID_IO)
 
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)                \
-               
direct_remap_pfn_range((vma)->vm_mm,vaddr,pfn,size,prot,DOMID_IO)
+               direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
 
 #define MK_IOSPACE_PFN(space, pfn)     (pfn)
 #define GET_IOSPACE(pfn)               0
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h  Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h  Thu Sep 22 
17:42:01 2005
@@ -321,11 +321,22 @@
  * includes these barriers, for example.
  */
 
+/*
+ * Don't use smp_processor_id() in preemptible code: debug builds will barf.
+ * It's okay in these cases as we only read the upcall mask in preemptible
+ * regions, which is always safe.
+ */
+#ifdef CONFIG_SMP
+#define __this_cpu()   __smp_processor_id()
+#else
+#define __this_cpu()   0
+#endif
+
 #define __cli()                                                                
\
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        _vcpu->evtchn_upcall_mask = 1;                                  \
        preempt_enable_no_resched();                                    \
        barrier();                                                      \
@@ -336,7 +347,7 @@
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        _vcpu->evtchn_upcall_mask = 0;                                  \
        barrier(); /* unmask then check (avoid races) */                \
        if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
@@ -347,7 +358,7 @@
 #define __save_flags(x)                                                        
\
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        (x) = _vcpu->evtchn_upcall_mask;                                \
 } while (0)
 
@@ -356,7 +367,7 @@
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
                barrier(); /* unmask then check (avoid races) */        \
                if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
@@ -372,7 +383,7 @@
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        (x) = _vcpu->evtchn_upcall_mask;                                \
        _vcpu->evtchn_upcall_mask = 1;                                  \
        preempt_enable_no_resched();                                    \
@@ -387,14 +398,8 @@
 #define local_irq_disable()    __cli()
 #define local_irq_enable()     __sti()
 
-/* Don't use smp_processor_id: this is called in debug versions of that fn. */
-#ifdef CONFIG_SMP
-#define irqs_disabled()                        \
-    HYPERVISOR_shared_info->vcpu_data[__smp_processor_id()].evtchn_upcall_mask
-#else
-#define irqs_disabled()                        \
-    HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask
-#endif
+#define irqs_disabled()                                                        
\
+       HYPERVISOR_shared_info->vcpu_data[__this_cpu()].evtchn_upcall_mask
 
 /*
  * disable hlt during certain critical i/o operations
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/asm-xen/gnttab.h     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/gnttab.h     Thu Sep 22 17:42:01 2005
@@ -37,7 +37,7 @@
 void gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
 void gnttab_end_foreign_access(grant_ref_t ref, int readonly);
 
-int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
+int gnttab_grant_foreign_transfer(domid_t domid);
 
 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
@@ -64,8 +64,7 @@
 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
                                     unsigned long frame, int readonly);
 
-void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
-                                      unsigned long pfn);
+void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid);
 
 #ifdef __ia64__
 #define gnttab_map_vaddr(map) __va(map.dev_bus_addr)
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Thu Sep 22 17:42:01 2005
@@ -83,8 +83,7 @@
 
 char **xenbus_directory(const char *dir, const char *node, unsigned int *num);
 void *xenbus_read(const char *dir, const char *node, unsigned int *len);
-int xenbus_write(const char *dir, const char *node,
-                const char *string, int createflags);
+int xenbus_write(const char *dir, const char *node, const char *string);
 int xenbus_mkdir(const char *dir, const char *node);
 int xenbus_exists(const char *dir, const char *node);
 int xenbus_rm(const char *dir, const char *node);
diff -r 97dbd9524a7e -r 06d84bf87159 tools/blktap/xenbus.c
--- a/tools/blktap/xenbus.c     Thu Sep 22 17:34:14 2005
+++ b/tools/blktap/xenbus.c     Thu Sep 22 17:42:01 2005
@@ -92,7 +92,7 @@
         if ((path == NULL) || (buf == NULL))
             return 0;
 
-        ret = xs_write(h, path, buf, strlen(buf)+1, O_CREAT);
+        ret = xs_write(h, path, buf, strlen(buf)+1);
 
         free(buf);
         free(path);
diff -r 97dbd9524a7e -r 06d84bf87159 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Thu Sep 22 17:34:14 2005
+++ b/tools/console/daemon/io.c Thu Sep 22 17:42:01 2005
@@ -165,7 +165,7 @@
                success = asprintf(&path, "%s/tty", dom->conspath) != -1;
                if (!success)
                        goto out;
-               success = xs_write(xs, path, slave, strlen(slave), O_CREAT);
+               success = xs_write(xs, path, slave, strlen(slave));
                free(path);
                if (!success)
                        goto out;
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure
--- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure   Thu Sep 
22 17:34:14 2005
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure   Thu Sep 
22 17:42:01 2005
@@ -3475,7 +3475,7 @@
 
 
 GDBSERVER_DEPFILES="$srv_regobj $srv_tgtobj $srv_thread_depfiles"
-GDBSERVER_LIBS="$srv_libs -L../../../../../libxc/ -lxc"
+GDBSERVER_LIBS="$srv_libs -L../../../../../libxc/ -lxenctrl"
 
 
 
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in
--- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in        
Thu Sep 22 17:34:14 2005
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in        
Thu Sep 22 17:42:01 2005
@@ -107,7 +107,7 @@
 
 
 GDBSERVER_DEPFILES="$srv_regobj $srv_tgtobj $srv_thread_depfiles"
-GDBSERVER_LIBS="$srv_libs -L../../../../../libxc/ -lxc"
+GDBSERVER_LIBS="$srv_libs -L../../../../../libxc/ -lxenctrl"
 
 AC_SUBST(GDBSERVER_DEPFILES)
 AC_SUBST(GDBSERVER_LIBS)
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c
--- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c     
Thu Sep 22 17:34:14 2005
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c     
Thu Sep 22 17:42:01 2005
@@ -37,9 +37,10 @@
 #include <errno.h>
 #include <xenctrl.h>
 #define TRACE_ENTER /* printf("enter %s\n", __FUNCTION__) */
-long (*myptrace)(enum __ptrace_request, pid_t, long, long);
-int (*myxcwait)(int domain, int *status, int options) ;
-
+
+long (*myptrace)(int xc_handle, enum __ptrace_request, u32, long, long);
+int (*myxcwait)(int xc_handle, int domain, int *status, int options) ;
+static int xc_handle;
 
 #define DOMFLAGS_DYING     (1<<0) /* Domain is scheduled to die.             */
 #define DOMFLAGS_SHUTDOWN  (1<<2) /* The guest OS has shut down.             */
@@ -47,11 +48,7 @@
 #define DOMFLAGS_BLOCKED   (1<<4) /* Currently blocked pending an event.     */
 #define DOMFLAGS_RUNNING   (1<<5) /* Domain is currently running.            */
 
-
-
 struct inferior_list all_processes;
-
-
 static int current_domain;
 static int expect_signal = 0;
 static int signal_to_send = 0; 
@@ -150,7 +147,7 @@
 {
     struct process_info *new_process;
     current_domain = domain;
-    if (myptrace (PTRACE_ATTACH, domain, 0, 0) != 0) {
+    if (myptrace (xc_handle, PTRACE_ATTACH, domain, 0, 0) != 0) {
        fprintf (stderr, "Cannot attach to domain %d: %s (%d)\n", domain,
                 strerror (errno), errno);
        fflush (stderr);
@@ -173,8 +170,7 @@
 {
   struct thread_info *thread = (struct thread_info *) entry;
   struct process_info *process = get_thread_process (thread);
-  myptrace (PTRACE_KILL, pid_of (process), 0, 0);
-
+  myptrace (xc_handle, PTRACE_KILL, pid_of (process), 0, 0);
 }
 
 static void
@@ -190,7 +186,7 @@
   struct thread_info *thread = (struct thread_info *) entry;
   struct process_info *process = get_thread_process (thread);
 
-  myptrace (PTRACE_DETACH, pid_of (process), 0, 0);
+  myptrace (xc_handle, PTRACE_DETACH, pid_of (process), 0, 0);
 }
 
 
@@ -216,7 +212,7 @@
 linux_wait (char *status)
 {
   int w;
-  if (myxcwait(current_domain, &w, 0))
+  if (myxcwait(xc_handle, current_domain, &w, 0))
       return -1;
   
   if (w & (DOMFLAGS_SHUTDOWN|DOMFLAGS_DYING)) {
@@ -241,7 +237,7 @@
   expect_signal = resume_info->sig;
   for_each_inferior(&all_threads, regcache_invalidate_one);
 
-  myptrace (step ? PTRACE_SINGLESTEP : PTRACE_CONT, current_domain, 0, 0);
+  myptrace (xc_handle, step ? PTRACE_SINGLESTEP : PTRACE_CONT, current_domain, 
0, 0);
 
 }
 
@@ -265,7 +261,7 @@
        }
 
       buf = malloc (regset->size);
-      res = myptrace (regset->get_request, inferior_pid, 0, 
(PTRACE_XFER_TYPE)buf);
+      res = myptrace (xc_handle, regset->get_request, inferior_pid, 0, 
(PTRACE_XFER_TYPE)buf);
       if (res < 0)
        {
          if (errno == EIO)
@@ -317,7 +313,7 @@
 
       buf = malloc (regset->size);
       regset->fill_function (buf);
-      res = myptrace (regset->set_request, inferior_pid, 0, 
(PTRACE_XFER_TYPE)buf);
+      res = myptrace (xc_handle, regset->set_request, inferior_pid, 0, 
(PTRACE_XFER_TYPE)buf);
       if (res < 0)
        {
          if (errno == EIO)
@@ -395,7 +391,7 @@
   for (i = 0; i < count; i++, addr += sizeof (PTRACE_XFER_TYPE))
     {
       errno = 0;
-      buffer[i] = myptrace (PTRACE_PEEKTEXT, inferior_pid, (PTRACE_ARG3_TYPE) 
addr, 0);
+      buffer[i] = myptrace (xc_handle, PTRACE_PEEKTEXT, inferior_pid, 
(PTRACE_ARG3_TYPE) addr, 0);
       if (errno)
        return errno;
     }
@@ -428,13 +424,13 @@
 
   /* Fill start and end extra bytes of buffer with existing memory data.  */
 
-  buffer[0] = myptrace (PTRACE_PEEKTEXT, inferior_pid,
+  buffer[0] = myptrace (xc_handle, PTRACE_PEEKTEXT, inferior_pid,
                      (PTRACE_ARG3_TYPE) addr, 0);
 
   if (count > 1)
     {
       buffer[count - 1]
-       = myptrace (PTRACE_PEEKTEXT, inferior_pid,
+       = myptrace (xc_handle, PTRACE_PEEKTEXT, inferior_pid,
                  (PTRACE_ARG3_TYPE) (addr + (count - 1)
                                      * sizeof (PTRACE_XFER_TYPE)),
                  0);
@@ -448,7 +444,7 @@
   for (i = 0; i < count; i++, addr += sizeof (PTRACE_XFER_TYPE))
     {
       errno = 0;
-      myptrace (PTRACE_POKETEXT, inferior_pid, (PTRACE_ARG3_TYPE) addr, 
buffer[i]);
+      myptrace (xc_handle, PTRACE_POKETEXT, inferior_pid, (PTRACE_ARG3_TYPE) 
addr, buffer[i]);
       if (errno)
        return errno;
     }
@@ -539,7 +535,7 @@
 void
 initialize_low (void)
 {
-
+  xc_handle = xc_interface_open();
   set_target_ops (&linux_xen_target_ops);
   set_breakpoint_data (the_low_target.breakpoint,
                       the_low_target.breakpoint_len);
diff -r 97dbd9524a7e -r 06d84bf87159 tools/debugger/gdb/gdbbuild
--- a/tools/debugger/gdb/gdbbuild       Thu Sep 22 17:34:14 2005
+++ b/tools/debugger/gdb/gdbbuild       Thu Sep 22 17:42:01 2005
@@ -1,20 +1,17 @@
 #!/bin/sh
 
-XENROOT=`hg root`
-export XENROOT
-
-cd $XENROOT/tools/debugger/gdb
-rm -rf gdb-6.2.1  gdb-6.2.1-linux-i386-xen
-# FIXME:cw this should be smarter
-wget -c ftp://ftp.gnu.org/gnu/gdb/gdb-6.2.1.tar.bz2
+rm -rf gdb-6.2.1 gdb-6.2.1-linux-i386-xen
+[ -a gdb-6.2.1.tar.bz2 ] || wget -c ftp://ftp.gnu.org/gnu/gdb/gdb-6.2.1.tar.bz2
 tar xjf gdb-6.2.1.tar.bz2
 
-cd $XENROOT/tools/debugger/gdb/gdb-6.2.1-xen-sparse
+cd gdb-6.2.1-xen-sparse
 ./mkbuildtree ../gdb-6.2.1
 
-mkdir $XENROOT/tools/debugger/gdb/gdb-6.2.1-linux-i386-xen
-cd $XENROOT/tools/debugger/gdb/gdb-6.2.1-linux-i386-xen
+cd ..
+mkdir gdb-6.2.1-linux-i386-xen
+cd gdb-6.2.1-linux-i386-xen
 ../gdb-6.2.1/configure
+
 # some people don't have gmake
 if which gmake ; then
     gmake -j4
diff -r 97dbd9524a7e -r 06d84bf87159 tools/examples/network-bridge
--- a/tools/examples/network-bridge     Thu Sep 22 17:34:14 2005
+++ b/tools/examples/network-bridge     Thu Sep 22 17:42:01 2005
@@ -1,4 +1,4 @@
-#!/bin/sh -x
+#!/bin/sh
 #============================================================================
 # Default Xen network start/stop script.
 # Xend calls a network script when it starts.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp    Thu Sep 22 17:34:14 2005
+++ b/tools/examples/xend-config.sxp    Thu Sep 22 17:42:01 2005
@@ -49,6 +49,6 @@
 # If dom0-min-mem=0, dom0 will never balloon out.
 (dom0-min-mem 0)
 
-# In SMP system, dom0 will use only CPUs in range [1,dom0-cpus]
+# In SMP system, dom0 will use dom0-cpus # of CPUS
 # If dom0-cpus = 0, dom0 will take all cpus available
 (dom0-cpus 0)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/examples/xmexample.vmx
--- a/tools/examples/xmexample.vmx      Thu Sep 22 17:34:14 2005
+++ b/tools/examples/xmexample.vmx      Thu Sep 22 17:42:01 2005
@@ -25,6 +25,10 @@
 
 # A name for your domain. All domains must have different names.
 name = "ExampleVMXDomain"
+
+#-----------------------------------------------------------------------------
+# the number of cpus guest platform has, default=1
+vcpus=1
 
 # Which CPU to start domain on? 
 #cpu = -1   # leave to Xen to pick
diff -r 97dbd9524a7e -r 06d84bf87159 tools/firmware/acpi/acpi_madt.c
--- a/tools/firmware/acpi/acpi_madt.c   Thu Sep 22 17:34:14 2005
+++ b/tools/firmware/acpi/acpi_madt.c   Thu Sep 22 17:42:01 2005
@@ -37,44 +37,7 @@
                                ACPI_LOCAL_APIC_ADDRESS,
                                ACPI_MULTIPLE_APIC_FLAGS,
                },
-               //
-               // LOCAL APIC Entries for 4 processors.
-               //
-               {
-                               {
-                                               ACPI_PROCESSOR_LOCAL_APIC,      
                    
-                                               sizeof 
(ACPI_LOCAL_APIC_STRUCTURE),     
-                                               0x00,                           
                          
-                                               0x00,                           
                          
-                                               0x00000001,                     
                          
-                               },
-
-                               {
-                                               ACPI_PROCESSOR_LOCAL_APIC,      
                    
-                                               sizeof 
(ACPI_LOCAL_APIC_STRUCTURE),     
-                                               0x01,                           
                          
-                                               0x00,                           
                          
-                                               0x00000000
-                               },                                              
 
-
-                               {
-                                               ACPI_PROCESSOR_LOCAL_APIC,      
                    
-                                               sizeof 
(ACPI_LOCAL_APIC_STRUCTURE),     
-                                               0x02,                           
                          
-                                               0x00,                           
                          
-                                               0x00000000
-                               },                                              
 
-
-                               {
-                                               ACPI_PROCESSOR_LOCAL_APIC,      
                    
-                                               sizeof 
(ACPI_LOCAL_APIC_STRUCTURE),     
-                                               0x03,                           
                          
-                                               0x00,                           
                          
-                                               0x00000000
-                               }
-               }
-               ,
-
+       
                //
                // IO APIC
                // 
@@ -87,5 +50,19 @@
                                                ACPI_IO_APIC_ADDRESS_1,
                                                0x0000
                                }
+               },
+
+               //
+               // LOCAL APIC Entries for up to 32 processors.
+               //
+               {
+                               {
+                                               ACPI_PROCESSOR_LOCAL_APIC,
+                                               sizeof 
(ACPI_LOCAL_APIC_STRUCTURE),
+                                               0x00,
+                                               0x00,
+                                               0x00000001,
+                               }
+
                }
 };
diff -r 97dbd9524a7e -r 06d84bf87159 tools/firmware/acpi/acpi_madt.h
--- a/tools/firmware/acpi/acpi_madt.h   Thu Sep 22 17:34:14 2005
+++ b/tools/firmware/acpi/acpi_madt.h   Thu Sep 22 17:42:01 2005
@@ -35,9 +35,9 @@
 //
 #pragma pack (1)
 typedef struct {
-  ACPI_2_0_MADT                                Header;
-  ACPI_LOCAL_APIC_STRUCTURE     LocalApic[4];
-  ACPI_IO_APIC_STRUCTURE        IoApic[1];
+       ACPI_2_0_MADT                   Header;
+       ACPI_IO_APIC_STRUCTURE          IoApic[1];
+       ACPI_LOCAL_APIC_STRUCTURE       LocalApic[32];
 } ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE;
 #pragma pack ()
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/firmware/vmxassist/Makefile
--- a/tools/firmware/vmxassist/Makefile Thu Sep 22 17:34:14 2005
+++ b/tools/firmware/vmxassist/Makefile Thu Sep 22 17:42:01 2005
@@ -41,9 +41,9 @@
 
 all: vmxloader
 
-vmxloader: roms.h vmxloader.c acpi.h
-       ${CC} ${CFLAGS} ${DEFINES} -c vmxloader.c
-       $(CC) -o vmxloader.tmp -m32 -nostdlib -Wl,-N -Wl,-Ttext -Wl,0x100000 
vmxloader.o
+vmxloader: roms.h vmxloader.c acpi.h acpi_madt.c
+       ${CC} ${CFLAGS} ${DEFINES} -c vmxloader.c -c acpi_madt.c
+       $(CC) -o vmxloader.tmp -m32 -nostdlib -Wl,-N -Wl,-Ttext -Wl,0x100000 
vmxloader.o acpi_madt.o
        objcopy --change-addresses=0xC0000000 vmxloader.tmp vmxloader
        rm -f vmxloader.tmp
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/firmware/vmxassist/vmxloader.c
--- a/tools/firmware/vmxassist/vmxloader.c      Thu Sep 22 17:34:14 2005
+++ b/tools/firmware/vmxassist/vmxloader.c      Thu Sep 22 17:42:01 2005
@@ -27,6 +27,7 @@
 #ifdef _ACPI_
 #include "acpi.h"
 #include "../acpi/acpi2_0.h"  // for ACPI_PHYSICAL_ADDRESS
+int acpi_madt_update(unsigned char* acpi_start);
 #endif
 
 
@@ -110,7 +111,10 @@
        }
 #ifdef _ACPI_
        puts("Loading ACPI ...\n");
-       if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000 ){
+
+       acpi_madt_update(acpi);
+
+       if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
                /* make sure acpi table does not overlap rombios
                 * currently acpi less than 8K will be OK.
                 */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Sep 22 17:34:14 2005
+++ b/tools/ioemu/vl.c  Thu Sep 22 17:42:01 2005
@@ -126,6 +126,7 @@
 int vm_running;
 int audio_enabled = 0;
 int nic_pcnet = 1;
+int vcpus = 1;
 int sb16_enabled = 1;
 int adlib_enabled = 1;
 int gus_enabled = 1;
@@ -2105,6 +2106,7 @@
           "-snapshot       write to temporary files instead of disk image 
files\n"
            "-m megs         set virtual RAM size to megs MB [default=%d]\n"
            "-nographic      disable graphical output and redirect serial I/Os 
to console\n"
+           "-vcpus          set CPU number of guest platform\n"
 #ifdef CONFIG_VNC
           "-vnc port             use vnc instead of sdl\n"
           "-vncport port         use a different port\n"
@@ -2235,6 +2237,7 @@
     QEMU_OPTION_hdachs,
     QEMU_OPTION_L,
     QEMU_OPTION_no_code_copy,
+    QEMU_OPTION_vcpus,
     QEMU_OPTION_pci,
     QEMU_OPTION_nic_pcnet,
     QEMU_OPTION_isa,
@@ -2307,6 +2310,7 @@
     { "hdachs", HAS_ARG, QEMU_OPTION_hdachs },
     { "L", HAS_ARG, QEMU_OPTION_L },
     { "no-code-copy", 0, QEMU_OPTION_no_code_copy },
+    { "vcpus", 1, QEMU_OPTION_vcpus },
 #ifdef TARGET_PPC
     { "prep", 0, QEMU_OPTION_prep },
     { "g", 1, QEMU_OPTION_g },
@@ -2646,6 +2650,9 @@
             case QEMU_OPTION_S:
                 start_emulation = 0;
                 break;
+            case QEMU_OPTION_vcpus:
+                vcpus = atoi(optarg);
+                fprintf(logfile, "qemu: the number of cpus is %d\n", vcpus);
             case QEMU_OPTION_pci:
                 pci_enabled = 1;
                 break;
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/Makefile
--- a/tools/libxc/Makefile      Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/Makefile      Thu Sep 22 17:42:01 2005
@@ -26,19 +26,21 @@
 BUILD_SRCS += xc_linux_build.c
 BUILD_SRCS += xc_load_bin.c
 BUILD_SRCS += xc_load_elf.c
-BUILD_SRCS += xg_private.c
 
 ifeq ($(XEN_TARGET_ARCH),ia64)
 BUILD_SRCS += xc_ia64_stubs.c
 else
+ifeq ($(XEN_TARGET_ARCH),x86_32)
 SRCS       += xc_ptrace.c
 SRCS       += xc_ptrace_core.c
-
-BUILD_SRCS := xc_load_aout9.c
+endif
+BUILD_SRCS += xc_load_aout9.c
 BUILD_SRCS += xc_linux_restore.c
 BUILD_SRCS += xc_linux_save.c
 BUILD_SRCS += xc_vmx_build.c
 endif
+
+BUILD_SRCS += xg_private.c
 
 CFLAGS   += -Wall
 CFLAGS   += -Werror
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_core.c     Thu Sep 22 17:42:01 2005
@@ -11,10 +11,10 @@
 
 static int
 copy_from_domain_page(int xc_handle,
-                     u32 domid,
-                     unsigned long *page_array,
-                     unsigned long src_pfn,
-                     void *dst_page)
+                      u32 domid,
+                      unsigned long *page_array,
+                      unsigned long src_pfn,
+                      void *dst_page)
 {
     void *vaddr = xc_map_foreign_range(
         xc_handle, domid, PAGE_SIZE, PROT_READ, page_array[src_pfn]);
@@ -27,90 +27,100 @@
 
 int 
 xc_domain_dumpcore(int xc_handle,
-                  u32 domid,
-                  const char *corename)
+                   u32 domid,
+                   const char *corename)
 {
-       unsigned long nr_pages;
-       unsigned long *page_array;
-       xc_dominfo_t info;
-       int i, j, vcpu_map_size, dump_fd;
-       char *dump_mem, *dump_mem_start = NULL;
-       struct xc_core_header header;
-       vcpu_guest_context_t     ctxt[MAX_VIRT_CPUS];
+    unsigned long nr_pages;
+    unsigned long *page_array;
+    xc_dominfo_t info;
+    int i, j, vcpu_map_size, dump_fd;
+    char *dump_mem, *dump_mem_start = NULL;
+    struct xc_core_header header;
+    vcpu_guest_context_t     ctxt[MAX_VIRT_CPUS];
 
-       
-       if ((dump_fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0) {
-               PERROR("Could not open corefile %s: %s", corename, 
strerror(errno));
-               goto error_out;
-       }
-       
-       if ((dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL) {
-               PERROR("Could not allocate dump_mem");
-               goto error_out;
-       }
-       
-       if (xc_domain_getinfo(xc_handle, domid, 1, &info) != 1) {
-               PERROR("Could not get info for domain");
-               goto error_out;
-       }
-       
-       vcpu_map_size =  sizeof(info.vcpu_to_cpu) / sizeof(info.vcpu_to_cpu[0]);
+ 
+    if ((dump_fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0) {
+        PERROR("Could not open corefile %s: %s", corename, strerror(errno));
+        goto error_out;
+    }
+ 
+    if ((dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL) {
+        PERROR("Could not allocate dump_mem");
+        goto error_out;
+    }
+ 
+    if (xc_domain_getinfo(xc_handle, domid, 1, &info) != 1) {
+        PERROR("Could not get info for domain");
+        goto error_out;
+    }
+ 
+    vcpu_map_size =  sizeof(info.vcpu_to_cpu) / sizeof(info.vcpu_to_cpu[0]);
 
-       for (i = 0, j = 0; i < vcpu_map_size; i++) {
-               if (info.vcpu_to_cpu[i] == -1) {
-                       continue;
-               }
-               if (xc_domain_get_vcpu_context(xc_handle, domid, i, &ctxt[j])) {
-                       PERROR("Could not get all vcpu contexts for domain");
-                       goto error_out;
-               }
-               j++;
-       }
-       
-       nr_pages = info.nr_pages;
+    for (i = 0, j = 0; i < vcpu_map_size; i++) {
+        if (info.vcpu_to_cpu[i] == -1) {
+            continue;
+        }
+        if (xc_domain_get_vcpu_context(xc_handle, domid, i, &ctxt[j])) {
+            PERROR("Could not get all vcpu contexts for domain");
+            goto error_out;
+        }
+        j++;
+    }
+ 
+    nr_pages = info.nr_pages;
 
-       header.xch_magic = 0xF00FEBED; 
-       header.xch_nr_vcpus = info.vcpus;
-       header.xch_nr_pages = nr_pages;
-       header.xch_ctxt_offset = sizeof(struct xc_core_header);
-       header.xch_index_offset = sizeof(struct xc_core_header) +
-           sizeof(vcpu_guest_context_t)*info.vcpus;
-       header.xch_pages_offset = round_pgup(sizeof(struct xc_core_header) +
-           (sizeof(vcpu_guest_context_t) * info.vcpus) + 
-           (nr_pages * sizeof(unsigned long)));
+    header.xch_magic = 0xF00FEBED; 
+    header.xch_nr_vcpus = info.vcpus;
+    header.xch_nr_pages = nr_pages;
+    header.xch_ctxt_offset = sizeof(struct xc_core_header);
+    header.xch_index_offset = sizeof(struct xc_core_header) +
+        sizeof(vcpu_guest_context_t)*info.vcpus;
+    header.xch_pages_offset = round_pgup(sizeof(struct xc_core_header) +
+                                         (sizeof(vcpu_guest_context_t) * 
info.vcpus) + 
+                                         (nr_pages * sizeof(unsigned long)));
 
-       write(dump_fd, &header, sizeof(struct xc_core_header));
-       write(dump_fd, &ctxt, sizeof(ctxt[0]) * info.vcpus);
+    write(dump_fd, &header, sizeof(struct xc_core_header));
+    write(dump_fd, &ctxt, sizeof(ctxt[0]) * info.vcpus);
 
-       if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
-           printf("Could not allocate memory\n");
-           goto error_out;
-       }
-       if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != 
nr_pages) {
-           printf("Could not get the page frame list\n");
-           goto error_out;
-       }
-       write(dump_fd, page_array, nr_pages * sizeof(unsigned long));
-       lseek(dump_fd, header.xch_pages_offset, SEEK_SET);
-       for (dump_mem = dump_mem_start, i = 0; i < nr_pages; i++) {
-               copy_from_domain_page(xc_handle, domid, page_array, i, 
dump_mem);
-               dump_mem += PAGE_SIZE;
-               if (((i + 1) % DUMP_INCREMENT == 0) || (i + 1) == nr_pages) {
-                       if (write(dump_fd, dump_mem_start, dump_mem - 
dump_mem_start) < 
-                           dump_mem - dump_mem_start) {
-                               PERROR("Partial write, file system full?");
-                               goto error_out;
-                       }
-                       dump_mem = dump_mem_start;
-               }
-       }
+    if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
+        printf("Could not allocate memory\n");
+        goto error_out;
+    }
+    if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages) {
+        printf("Could not get the page frame list\n");
+        goto error_out;
+    }
+    write(dump_fd, page_array, nr_pages * sizeof(unsigned long));
+    lseek(dump_fd, header.xch_pages_offset, SEEK_SET);
+    for (dump_mem = dump_mem_start, i = 0; i < nr_pages; i++) {
+        copy_from_domain_page(xc_handle, domid, page_array, i, dump_mem);
+        dump_mem += PAGE_SIZE;
+        if (((i + 1) % DUMP_INCREMENT == 0) || (i + 1) == nr_pages) {
+            if (write(dump_fd, dump_mem_start, dump_mem - dump_mem_start) < 
+                dump_mem - dump_mem_start) {
+                PERROR("Partial write, file system full?");
+                goto error_out;
+            }
+            dump_mem = dump_mem_start;
+        }
+    }
 
-       close(dump_fd);
-       free(dump_mem_start);
-       return 0;
+    close(dump_fd);
+    free(dump_mem_start);
+    return 0;
  error_out:
-       if (dump_fd != -1)
-               close(dump_fd);
-       free(dump_mem_start);
-       return -1;
+    if (dump_fd != -1)
+        close(dump_fd);
+    free(dump_mem_start);
+    return -1;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_domain.c   Thu Sep 22 17:42:01 2005
@@ -265,7 +265,7 @@
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
                                           unsigned int address_bits,
-                                         unsigned long *extent_start)
+                                          unsigned long *extent_start)
 {
     int err;
     struct xen_memory_reservation reservation = {
@@ -296,7 +296,7 @@
                                           u32 domid, 
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
-                                         unsigned long *extent_start)
+                                          unsigned long *extent_start)
 {
     int err;
     struct xen_memory_reservation reservation = {
@@ -328,3 +328,13 @@
 
     return err;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c       Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_ia64_stubs.c       Thu Sep 22 17:42:01 2005
@@ -9,8 +9,8 @@
 }
 
 int xc_linux_restore(int xc_handle, int io_fd, u32 dom, unsigned long nr_pfns,
-                    unsigned int store_evtchn, unsigned long *store_mfn,
-                    unsigned int console_evtchn, unsigned long *console_mfn)
+                     unsigned int store_evtchn, unsigned long *store_mfn,
+                     unsigned int console_evtchn, unsigned long *console_mfn)
 {
     PERROR("xc_linux_restore not implemented\n");
     return -1;
@@ -44,3 +44,12 @@
     return -1;
 }
 
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_linux_build.c      Thu Sep 22 17:42:01 2005
@@ -12,7 +12,6 @@
 #if defined(__x86_64__) || defined(__ia64__)
 #define ELFSIZE 64
 #endif
-
 
 #include "xc_elf.h"
 #include "xc_aout9.h"
@@ -33,6 +32,13 @@
 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #endif
 
+#ifdef __ia64__
+#define already_built(ctxt) (0)
+#define get_tot_pages xc_get_max_pages
+#else
+#define already_built(ctxt) ((ctxt)->ctrlreg[3] != 0)
+#define get_tot_pages xc_get_tot_pages
+#endif
 
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
@@ -47,7 +53,7 @@
 {
     if ( probe_elf(image, image_size, load_funcs) &&
          probe_bin(image, image_size, load_funcs) &&
-        probe_aout9(image, image_size, load_funcs) )
+         probe_aout9(image, image_size, load_funcs) )
     {
         ERROR( "Unrecognized image format" );
         return -EINVAL;
@@ -56,27 +62,27 @@
     return 0;
 }
 
-#define alloc_pt(ltab, vltab) \
-        ltab = (unsigned long long)(page_array[ppt_alloc++]) << PAGE_SHIFT; \
-        if (vltab != NULL) { \
-            munmap(vltab, PAGE_SIZE); \
-        } \
-        if ((vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
-                          PROT_READ|PROT_WRITE, \
-                          ltab >> PAGE_SHIFT)) == NULL) { \
-            goto error_out; \
-        } \
-        memset(vltab, 0, PAGE_SIZE);
+#define alloc_pt(ltab, vltab)                                           \
+do {                                                                    \
+    ltab = (u64)page_array[ppt_alloc++] << PAGE_SHIFT;                  \
+    if ( vltab != NULL )                                                \
+        munmap(vltab, PAGE_SIZE);                                       \
+    if ( (vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,       \
+                                       PROT_READ|PROT_WRITE,            \
+                                       ltab >> PAGE_SHIFT)) == NULL )   \
+        goto error_out;                                                 \
+    memset(vltab, 0, PAGE_SIZE);                                        \
+} while ( 0 )
 
 #if defined(__i386__)
 
 static int setup_pg_tables(int xc_handle, u32 dom,
-                          vcpu_guest_context_t *ctxt,
-                          unsigned long dsi_v_start,
-                          unsigned long v_end,
-                          unsigned long *page_array,
-                          unsigned long vpt_start,
-                          unsigned long vpt_end)
+                           vcpu_guest_context_t *ctxt,
+                           unsigned long dsi_v_start,
+                           unsigned long v_end,
+                           unsigned long *page_array,
+                           unsigned long vpt_start,
+                           unsigned long vpt_end)
 {
     l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
     l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
@@ -90,11 +96,11 @@
     vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
     ctxt->ctrlreg[3] = l2tab;
 
-    for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++ )
+    for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++ )
     {    
         if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
         {
-           alloc_pt(l1tab, vl1tab);
+            alloc_pt(l1tab, vl1tab);
             vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
             *vl2e++ = l1tab | L2_PROT;
         }
@@ -111,79 +117,67 @@
 
  error_out:
     if (vl1tab)
-       munmap(vl1tab, PAGE_SIZE);
+        munmap(vl1tab, PAGE_SIZE);
     if (vl2tab)
-       munmap(vl2tab, PAGE_SIZE);
+        munmap(vl2tab, PAGE_SIZE);
     return -1;
 }
 
 static int setup_pg_tables_pae(int xc_handle, u32 dom,
-                              vcpu_guest_context_t *ctxt,
-                              unsigned long dsi_v_start,
-                              unsigned long v_end,
-                              unsigned long *page_array,
-                              unsigned long vpt_start,
-                              unsigned long vpt_end)
+                               vcpu_guest_context_t *ctxt,
+                               unsigned long dsi_v_start,
+                               unsigned long v_end,
+                               unsigned long *page_array,
+                               unsigned long vpt_start,
+                               unsigned long vpt_end)
 {
-    l1_pgentry_64_t *vl1tab=NULL, *vl1e=NULL;
-    l2_pgentry_64_t *vl2tab=NULL, *vl2e=NULL;
-    l3_pgentry_64_t *vl3tab=NULL, *vl3e=NULL;
-    unsigned long long l1tab = 0;
-    unsigned long long l2tab = 0;
-    unsigned long long l3tab = 0;
-    unsigned long ppt_alloc;
-    unsigned long count;
+    l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL;
+    l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
+    l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
+    u64 l1tab, l2tab, l3tab;
+    unsigned long ppt_alloc, count, nmfn;
 
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
 
     if ( page_array[ppt_alloc] > 0xfffff )
     {
-       unsigned long nmfn;
-       nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
-       if ( nmfn == 0 )
-       {
-           fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
-           goto error_out;
-       }
-       page_array[ppt_alloc] = nmfn;
+        nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
+        if ( nmfn == 0 )
+        {
+            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
+            goto error_out;
+        }
+        page_array[ppt_alloc] = nmfn;
     }
 
     alloc_pt(l3tab, vl3tab);
     vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
     ctxt->ctrlreg[3] = l3tab;
 
-    if(l3tab>0xfffff000ULL)
-    {
-        fprintf(stderr,"L3TAB = %llx above 4GB!\n",l3tab);
-        goto error_out;
-    }
- 
-    for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
+    for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++)
     {
         if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
         {
+            if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
+            {
+                alloc_pt(l2tab, vl2tab);
+                vl2e = &vl2tab[l2_table_offset_pae(
+                    dsi_v_start + (count << PAGE_SHIFT))];
+                *vl3e++ = l2tab | L3_PROT;
+            }
+
             alloc_pt(l1tab, vl1tab);
-            
-                if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
-                {
-                    alloc_pt(l2tab, vl2tab);
-                    vl2e = &vl2tab[l2_table_offset_pae(dsi_v_start + 
(count<<PAGE_SHIFT))];
-                    *vl3e = l2tab | L3_PROT;
-                    vl3e++;
-                }
-            vl1e = &vl1tab[l1_table_offset_pae(dsi_v_start + 
(count<<PAGE_SHIFT))];
-            *vl2e = l1tab | L2_PROT;
-            vl2e++;
+            vl1e = &vl1tab[l1_table_offset_pae(
+                dsi_v_start + (count << PAGE_SHIFT))];
+            *vl2e++ = l1tab | L2_PROT;
         }
         
-        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
+        *vl1e = ((u64)page_array[count] << PAGE_SHIFT) | L1_PROT;
         if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
-            (count <  ((vpt_end  -dsi_v_start)>>PAGE_SHIFT)) ) 
-        {
-           *vl1e &= ~_PAGE_RW;
-        }
-       vl1e++;
+             (count <  ((vpt_end  -dsi_v_start)>>PAGE_SHIFT)) ) 
+            *vl1e &= ~_PAGE_RW;
+        vl1e++;
     }
      
     munmap(vl1tab, PAGE_SIZE);
@@ -193,11 +187,11 @@
 
  error_out:
     if (vl1tab)
-       munmap(vl1tab, PAGE_SIZE);
+        munmap(vl1tab, PAGE_SIZE);
     if (vl2tab)
-       munmap(vl2tab, PAGE_SIZE);
+        munmap(vl2tab, PAGE_SIZE);
     if (vl3tab)
-       munmap(vl3tab, PAGE_SIZE);
+        munmap(vl3tab, PAGE_SIZE);
     return -1;
 }
 
@@ -206,12 +200,12 @@
 #if defined(__x86_64__)
 
 static int setup_pg_tables_64(int xc_handle, u32 dom,
-                             vcpu_guest_context_t *ctxt,
-                             unsigned long dsi_v_start,
-                             unsigned long v_end,
-                             unsigned long *page_array,
-                             unsigned long vpt_start,
-                             unsigned long vpt_end)
+                              vcpu_guest_context_t *ctxt,
+                              unsigned long dsi_v_start,
+                              unsigned long v_end,
+                              unsigned long *page_array,
+                              unsigned long vpt_start,
+                              unsigned long vpt_end)
 {
     l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
     l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
@@ -236,20 +230,20 @@
         {
             alloc_pt(l1tab, vl1tab);
             
-                if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
+            if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
+            {
+                alloc_pt(l2tab, vl2tab);
+                if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
                 {
-                    alloc_pt(l2tab, vl2tab);
-                    if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
-                    {
-                        alloc_pt(l3tab, vl3tab);
-                        vl3e = &vl3tab[l3_table_offset(dsi_v_start + 
(count<<PAGE_SHIFT))];
-                        *vl4e = l3tab | L4_PROT;
-                        vl4e++;
-                    }
-                    vl2e = &vl2tab[l2_table_offset(dsi_v_start + 
(count<<PAGE_SHIFT))];
-                    *vl3e = l2tab | L3_PROT;
-                    vl3e++;
+                    alloc_pt(l3tab, vl3tab);
+                    vl3e = &vl3tab[l3_table_offset(dsi_v_start + 
(count<<PAGE_SHIFT))];
+                    *vl4e = l3tab | L4_PROT;
+                    vl4e++;
                 }
+                vl2e = &vl2tab[l2_table_offset(dsi_v_start + 
(count<<PAGE_SHIFT))];
+                *vl3e = l2tab | L3_PROT;
+                vl3e++;
+            }
             vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
             *vl2e = l1tab | L2_PROT;
             vl2e++;
@@ -257,11 +251,11 @@
         
         *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
         if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
-            (count <  ((vpt_end  -dsi_v_start)>>PAGE_SHIFT)) ) 
-        {
-                *vl1e &= ~_PAGE_RW;
-        }
-            vl1e++;
+             (count <  ((vpt_end  -dsi_v_start)>>PAGE_SHIFT)) ) 
+        {
+            *vl1e &= ~_PAGE_RW;
+        }
+        vl1e++;
     }
      
     munmap(vl1tab, PAGE_SIZE);
@@ -272,13 +266,13 @@
 
  error_out:
     if (vl1tab)
-       munmap(vl1tab, PAGE_SIZE);
+        munmap(vl1tab, PAGE_SIZE);
     if (vl2tab)
-       munmap(vl2tab, PAGE_SIZE);
+        munmap(vl2tab, PAGE_SIZE);
     if (vl3tab)
-       munmap(vl3tab, PAGE_SIZE);
+        munmap(vl3tab, PAGE_SIZE);
     if (vl4tab)
-       munmap(vl4tab, PAGE_SIZE);
+        munmap(vl4tab, PAGE_SIZE);
     return -1;
 }
 #endif
@@ -286,18 +280,18 @@
 #ifdef __ia64__
 #include <asm/fpu.h> /* for FPSR_DEFAULT */
 static int setup_guest(int xc_handle,
-                         u32 dom,
-                         char *image, unsigned long image_size,
-                         gzFile initrd_gfd, unsigned long initrd_len,
-                         unsigned long nr_pages,
-                         unsigned long *pvsi, unsigned long *pvke,
-                         unsigned long *pvss, vcpu_guest_context_t *ctxt,
-                         const char *cmdline,
-                         unsigned long shared_info_frame,
-                         unsigned long flags,
-                         unsigned int vcpus,
-                         unsigned int store_evtchn, unsigned long *store_mfn,
-                        unsigned int console_evtchn, unsigned long 
*console_mfn)
+                       u32 dom,
+                       char *image, unsigned long image_size,
+                       gzFile initrd_gfd, unsigned long initrd_len,
+                       unsigned long nr_pages,
+                       unsigned long *pvsi, unsigned long *pvke,
+                       unsigned long *pvss, vcpu_guest_context_t *ctxt,
+                       const char *cmdline,
+                       unsigned long shared_info_frame,
+                       unsigned long flags,
+                       unsigned int vcpus,
+                       unsigned int store_evtchn, unsigned long *store_mfn,
+                       unsigned int console_evtchn, unsigned long *console_mfn)
 {
     unsigned long *page_array = NULL;
     struct load_funcs load_funcs;
@@ -339,19 +333,20 @@
     *pvke = dsi.v_kernentry;
 
     /* Now need to retrieve machine pfn for system pages:
-     *         start_info/store/console
+     *  start_info/store/console
      */
     pgnr = 3;
-    if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, nr_pages - 3, pgnr) 
!= pgnr)
-    {
-       PERROR("Could not get page frame for xenstore");
-       goto error_out;
+    if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array,
+                              nr_pages - 3, pgnr) != pgnr )
+    {
+        PERROR("Could not get page frame for xenstore");
+        goto error_out;
     }
 
     *store_mfn = page_array[1];
     *console_mfn = page_array[2];
     printf("store_mfn: 0x%lx, console_mfn: 0x%lx\n",
-       (u64)store_mfn, (u64)console_mfn);
+           (u64)store_mfn, (u64)console_mfn);
 
     start_info = xc_map_foreign_range(
         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[0]);
@@ -382,8 +377,8 @@
                        unsigned long shared_info_frame,
                        unsigned long flags,
                        unsigned int vcpus,
-                      unsigned int store_evtchn, unsigned long *store_mfn,
-                      unsigned int console_evtchn, unsigned long *console_mfn)
+                       unsigned int store_evtchn, unsigned long *store_mfn,
+                       unsigned int console_evtchn, unsigned long *console_mfn)
 {
     unsigned long *page_array = NULL;
     unsigned long count, i;
@@ -458,26 +453,26 @@
         if ( (v_end - vstack_end) < (512UL << 10) )
             v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
 #if defined(__i386__)
-       if (dsi.pae_kernel) {
-           /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
-           if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >> 
-                  L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
-               break;
-       } else {
-           if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> 
-                  L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
-               break;
-       }
+        if (dsi.pae_kernel) {
+            /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
+            if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >> 
+                   L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
+                break;
+        } else {
+            if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> 
+                   L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
+                break;
+        }
 #endif
 #if defined(__x86_64__)
 #define NR(_l,_h,_s) \
     (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
     ((_l) & ~((1UL<<(_s))-1))) >> (_s))
-    if ( (1 + /* # L4 */
-        NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
-        NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
-        NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT))  /* # L1 */
-        <= nr_pt_pages )
+        if ( (1 + /* # L4 */
+              NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
+              NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
+              NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT))  /* # L1 */
+             <= nr_pt_pages )
             break;
 #endif
     }
@@ -541,7 +536,7 @@
                 goto error_out;
             }
             xc_copy_to_domain_page(xc_handle, dom,
-                                page_array[i>>PAGE_SHIFT], page);
+                                   page_array[i>>PAGE_SHIFT], page);
         }
     }
 
@@ -551,22 +546,22 @@
     /* setup page tables */
 #if defined(__i386__)
     if (dsi.pae_kernel)
-       rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
-                                dsi.v_start, v_end,
-                                page_array, vpt_start, vpt_end);
+        rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
+                                 dsi.v_start, v_end,
+                                 page_array, vpt_start, vpt_end);
     else {
-       rc = setup_pg_tables(xc_handle, dom, ctxt,
-                            dsi.v_start, v_end,
-                            page_array, vpt_start, vpt_end);
+        rc = setup_pg_tables(xc_handle, dom, ctxt,
+                             dsi.v_start, v_end,
+                             page_array, vpt_start, vpt_end);
     }
 #endif
 #if defined(__x86_64__)
     rc = setup_pg_tables_64(xc_handle, dom, ctxt,
-                           dsi.v_start, v_end,
-                           page_array, vpt_start, vpt_end);
+                            dsi.v_start, v_end,
+                            page_array, vpt_start, vpt_end);
 #endif
     if (0 != rc)
-       goto error_out;
+        goto error_out;
 
     /* Write the phys->machine and machine->phys table entries. */
     physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
@@ -576,11 +571,13 @@
 
     for ( count = 0; count < nr_pages; count++ )
     {
-        if ( xc_add_mmu_update(xc_handle, mmu,
-                              ((unsigned long long)page_array[count] << 
PAGE_SHIFT) | 
-                              MMU_MACHPHYS_UPDATE, count) )
-        {
-            fprintf(stderr,"m2p update failure p=%lx 
m=%lx\n",count,page_array[count] ); 
+        if ( xc_add_mmu_update(
+            xc_handle, mmu,
+            ((u64)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
+            count) )
+        {
+            fprintf(stderr,"m2p update failure p=%lx m=%lx\n",
+                    count, page_array[count]); 
             munmap(physmap, PAGE_SIZE);
             goto error_out;
         }
@@ -601,13 +598,13 @@
      * correct protection for the page
      */
     if (dsi.pae_kernel) {
-       if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
-                      ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
-           goto error_out;
+        if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
+                       ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+            goto error_out;
     } else {
-       if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
-                      ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
-           goto error_out;
+        if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
+                       ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+            goto error_out;
     }
 #endif
 
@@ -616,8 +613,8 @@
      * Pin down l4tab addr as page dir page - causes hypervisor to  provide
      * correct protection for the page
      */
-     if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
-                   ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+    if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
+                   ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
         goto error_out;
 #endif
 
@@ -703,12 +700,7 @@
     unsigned long image_size, initrd_size=0;
     unsigned long vstartinfo_start, vkern_entry, vstack_start;
 
-#ifdef __ia64__
-    /* Current xen/ia64 allocates domU pages on demand */
-    if ( (nr_pages = xc_get_max_pages(xc_handle, domid)) < 0 )
-#else
-    if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
-#endif
+    if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 )
     {
         PERROR("Could not find total pages for domain");
         goto error_out;
@@ -755,12 +747,7 @@
         goto error_out;
     }
 
-    if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
-#ifdef __ia64__
-       0 )
-#else
-         (ctxt->ctrlreg[3] != 0) )
-#endif
+    if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) || already_built(ctxt) )
     {
         ERROR("Domain is already constructed");
         goto error_out;
@@ -773,7 +760,7 @@
                      op.u.getdomaininfo.shared_info_frame,
                      flags, vcpus,
                      store_evtchn, store_mfn,
-                    console_evtchn, console_mfn) < 0 )
+                     console_evtchn, console_mfn) < 0 )
     {
         ERROR("Error constructing guest OS");
         goto error_out;
@@ -789,12 +776,13 @@
     /* based on new_thread in xen/arch/ia64/domain.c */
     ctxt->flags = 0;
     ctxt->shared.flags = flags;
-    ctxt->shared.start_info_pfn = nr_pages - 3; // metaphysical
+    ctxt->shared.start_info_pfn = nr_pages - 3; /* metaphysical */
     ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
     ctxt->regs.cr_iip = vkern_entry;
     ctxt->regs.cr_ifs = 1UL << 63;
     ctxt->regs.ar_fpsr = FPSR_DEFAULT;
-    /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should 
move here */
+    /* currently done by hypervisor, should move here */
+    /* ctxt->regs.r28 = dom_fw_setup(); */
     ctxt->vcpu.privregs = 0;
     ctxt->sys_pgnr = nr_pages - 3;
     i = 0; /* silence unused variable warning */
@@ -875,3 +863,13 @@
 
     return -1;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_linux_save.c       Thu Sep 22 17:42:01 2005
@@ -17,7 +17,6 @@
 #define BATCH_SIZE 1024   /* 1024 pages (4MB) at a time */
 
 #define MAX_MBIT_RATE 500
-
 
 /*
 ** Default values for important tuning parameters. Can override by passing
@@ -29,12 +28,9 @@
 #define DEF_MAX_ITERS   29   /* limit us to 30 times round loop */ 
 #define DEF_MAX_FACTOR   3   /* never send more than 3x nr_pfns */
 
-
-
 /* Flags to control behaviour of xc_linux_save */
 #define XCFLAGS_LIVE      1
 #define XCFLAGS_DEBUG     2
-
 
 #define DEBUG 0
 
@@ -115,8 +111,8 @@
     int i, count = 0;
     unsigned long *p = (unsigned long *)addr;
     /* We know that the array is padded to unsigned long. */
-    for(i=0;i<nr/(sizeof(unsigned long)*8);i++,p++)
-        count += hweight32( *p );
+    for( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ )
+        count += hweight32(*p);
     return count;
 }
 
@@ -201,42 +197,50 @@
     struct timespec delay;
     long long delta;
 
-    if (START_MBIT_RATE == 0)
-       return write(io_fd, buf, n);
+    if ( START_MBIT_RATE == 0 )
+        return write(io_fd, buf, n);
     
     budget -= n;
-    if (budget < 0) {
-       if (MBIT_RATE != ombit_rate) {
-           BURST_TIME_US = RATE_TO_BTU / MBIT_RATE;
-           ombit_rate = MBIT_RATE;
-           DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
-                   MBIT_RATE, BURST_BUDGET, BURST_TIME_US);
-       }
-       if (last_put.tv_sec == 0) {
-           budget += BURST_BUDGET;
-           gettimeofday(&last_put, NULL);
-       } else {
-           while (budget < 0) {
-               gettimeofday(&now, NULL);
-               delta = tv_delta(&now, &last_put);
-               while (delta > BURST_TIME_US) {
-                   budget += BURST_BUDGET;
-                   last_put.tv_usec += BURST_TIME_US;
-                   if (last_put.tv_usec > 1000000) {
-                       last_put.tv_usec -= 1000000;
-                       last_put.tv_sec++;
-                   }
-                   delta -= BURST_TIME_US;
-               }
-               if (budget > 0)
-                   break;
-               delay.tv_sec = 0;
-               delay.tv_nsec = 1000 * (BURST_TIME_US - delta);
-               while (delay.tv_nsec > 0)
-                   if (nanosleep(&delay, &delay) == 0)
-                       break;
-           }
-       }
+    if ( budget < 0 )
+    {
+        if ( MBIT_RATE != ombit_rate )
+        {
+            BURST_TIME_US = RATE_TO_BTU / MBIT_RATE;
+            ombit_rate = MBIT_RATE;
+            DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
+                    MBIT_RATE, BURST_BUDGET, BURST_TIME_US);
+        }
+        if ( last_put.tv_sec == 0 )
+        {
+            budget += BURST_BUDGET;
+            gettimeofday(&last_put, NULL);
+        }
+        else
+        {
+            while ( budget < 0 )
+            {
+                gettimeofday(&now, NULL);
+                delta = tv_delta(&now, &last_put);
+                while ( delta > BURST_TIME_US )
+                {
+                    budget += BURST_BUDGET;
+                    last_put.tv_usec += BURST_TIME_US;
+                    if ( last_put.tv_usec > 1000000 )
+                    {
+                        last_put.tv_usec -= 1000000;
+                        last_put.tv_sec++;
+                    }
+                    delta -= BURST_TIME_US;
+                }
+                if ( budget > 0 )
+                    break;
+                delay.tv_sec = 0;
+                delay.tv_nsec = 1000 * (BURST_TIME_US - delta);
+                while ( delay.tv_nsec > 0 )
+                    if ( nanosleep(&delay, &delay) == 0 )
+                        break;
+            }
+        }
     }
     return write(io_fd, buf, n);
 }
@@ -271,20 +275,21 @@
 
     if ( print )
         fprintf(stderr,
-               "delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
-               "dirtied %dMb/s %" PRId32 " pages\n",
-               wall_delta, 
-               (int)((d0_cpu_delta*100)/wall_delta),
-               (int)((d1_cpu_delta*100)/wall_delta),
-               (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
-               (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
-               stats->dirty_count);
-
-    if (((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate) {
-       mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
-           + 50;
-       if (mbit_rate > MAX_MBIT_RATE)
-           mbit_rate = MAX_MBIT_RATE;
+                "delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
+                "dirtied %dMb/s %" PRId32 " pages\n",
+                wall_delta, 
+                (int)((d0_cpu_delta*100)/wall_delta),
+                (int)((d1_cpu_delta*100)/wall_delta),
+                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
+                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
+                stats->dirty_count);
+
+    if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate )
+    {
+        mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
+            + 50;
+        if (mbit_rate > MAX_MBIT_RATE)
+            mbit_rate = MAX_MBIT_RATE;
     }
 
     d0_cpu_last  = d0_cpu_now;
@@ -303,7 +308,7 @@
 
     start = llgettimeofday();
 
-    for (j = 0; j < runs; j++)
+    for ( j = 0; j < runs; j++ )
     {
         int i;
 
@@ -320,10 +325,10 @@
                                NULL, 0, &stats);
 
             fprintf(stderr, "now= %lld faults= %" PRId32 " dirty= %" PRId32
-                   " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n", 
-                   ((now-start)+500)/1000, 
-                   stats.fault_count, stats.dirty_count,
-                   stats.dirty_net_count, stats.dirty_block_count);
+                    " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n", 
+                    ((now-start)+500)/1000, 
+                    stats.fault_count, stats.dirty_count,
+                    stats.dirty_net_count, stats.dirty_block_count);
         }
     }
 
@@ -331,7 +336,7 @@
 }
 
 
-static int suspend_and_state(int xc_handle, int io_fd, int dom,              
+static int suspend_and_state(int xc_handle, int io_fd, int dom,       
                              xc_dominfo_t *info,
                              vcpu_guest_context_t *ctxt)
 {
@@ -340,51 +345,53 @@
 
     printf("suspend\n");
     fflush(stdout);
-    if (fgets(ans, sizeof(ans), stdin) == NULL) {
+    if ( fgets(ans, sizeof(ans), stdin) == NULL )
+    {
         ERR("failed reading suspend reply");
         return -1;
     }
-    if (strncmp(ans, "done\n", 5)) {
+    if ( strncmp(ans, "done\n", 5) )
+    {
         ERR("suspend reply incorrect: %s", ans);
         return -1;
     }
 
-retry:
+ retry:
 
     if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1)
     {
-       ERR("Could not get domain info");
-       return -1;
+        ERR("Could not get domain info");
+        return -1;
     }
 
     if ( xc_domain_get_vcpu_context(xc_handle, dom, 0 /* XXX */, 
-                                   ctxt) )
+                                    ctxt) )
     {
         ERR("Could not get vcpu context");
     }
 
     if ( info->shutdown && info->shutdown_reason == SHUTDOWN_suspend )
     {
-       return 0; // success
+        return 0; // success
     }
 
     if ( info->paused )
     {
-       // try unpausing domain, wait, and retest       
-       xc_domain_unpause( xc_handle, dom );
-
-       ERR("Domain was paused. Wait and re-test.");
-       usleep(10000);  // 10ms
-
-       goto retry;
+        // try unpausing domain, wait, and retest 
+        xc_domain_unpause( xc_handle, dom );
+
+        ERR("Domain was paused. Wait and re-test.");
+        usleep(10000);  // 10ms
+
+        goto retry;
     }
 
 
     if( ++i < 100 )
     {
-       ERR("Retry suspend domain.");
-       usleep(10000);  // 10ms 
-       goto retry;
+        ERR("Retry suspend domain.");
+        usleep(10000);  // 10ms 
+        goto retry;
     }
 
     ERR("Unable to suspend domain.");
@@ -454,26 +461,26 @@
 
 
     /* If no explicit control parameters given, use defaults */
-    if(!max_iters) 
+    if( !max_iters ) 
         max_iters = DEF_MAX_ITERS; 
-    if(!max_factor) 
+    if( !max_factor ) 
         max_factor = DEF_MAX_FACTOR; 
 
 
     DPRINTF("xc_linux_save start DOM%u live=%s\n", dom, live?"true":"false"); 
 
-    if (mlock(&ctxt, sizeof(ctxt))) {
+    if ( mlock(&ctxt, sizeof(ctxt)) ) 
+    {
         ERR("Unable to mlock ctxt");
         return 1;
     }
     
-    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1)
+    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
     {
         ERR("Could not get domain info");
         goto out;
     }
-    if ( xc_domain_get_vcpu_context( xc_handle, dom, /* FIXME */ 0, 
-                                &ctxt) )
+    if ( xc_domain_get_vcpu_context(xc_handle, dom, /* FIXME */ 0, &ctxt) )
     {
         ERR("Could not get vcpu context");
         goto out;
@@ -481,7 +488,8 @@
     shared_info_frame = info.shared_info_frame;
 
     /* A cheesy test to see whether the domain contains valid state. */
-    if ( ctxt.ctrlreg[3] == 0 ){
+    if ( ctxt.ctrlreg[3] == 0 )
+    {
         ERR("Domain is not in a valid Linux guest OS state");
         goto out;
     }
@@ -496,18 +504,17 @@
     }
 
     /* Map the shared info frame */
-    live_shinfo = xc_map_foreign_range(xc_handle, dom,
-                                        PAGE_SIZE, PROT_READ,
-                                        shared_info_frame);
-
-    if (!live_shinfo){
+    live_shinfo = xc_map_foreign_range(
+        xc_handle, dom, PAGE_SIZE, PROT_READ, shared_info_frame);
+    if ( !live_shinfo )
+    {
         ERR("Couldn't map live_shinfo");
         goto out;
     }
 
-    live_pfn_to_mfn_frame_list_list = xc_map_foreign_range(xc_handle, dom,
-                                        PAGE_SIZE, PROT_READ,
-                                        
live_shinfo->arch.pfn_to_mfn_frame_list_list);
+    live_pfn_to_mfn_frame_list_list = xc_map_foreign_range(
+        xc_handle, dom,
+        PAGE_SIZE, PROT_READ, live_shinfo->arch.pfn_to_mfn_frame_list_list);
 
     if (!live_pfn_to_mfn_frame_list_list){
         ERR("Couldn't map pfn_to_mfn_frame_list_list");
@@ -515,12 +522,13 @@
     }
 
     live_pfn_to_mfn_frame_list = 
-       xc_map_foreign_batch(xc_handle, dom, 
-                            PROT_READ,
-                            live_pfn_to_mfn_frame_list_list,
-                            (nr_pfns+(1024*1024)-1)/(1024*1024) );
-
-    if (!live_pfn_to_mfn_frame_list){
+        xc_map_foreign_batch(xc_handle, dom, 
+                             PROT_READ,
+                             live_pfn_to_mfn_frame_list_list,
+                             (nr_pfns+(1024*1024)-1)/(1024*1024) );
+
+    if ( !live_pfn_to_mfn_frame_list)
+    {
         ERR("Couldn't map pfn_to_mfn_frame_list");
         goto out;
     }
@@ -535,7 +543,8 @@
                                                  PROT_READ,
                                                  live_pfn_to_mfn_frame_list,
                                                  (nr_pfns+1023)/1024 );  
-    if( !live_pfn_to_mfn_table ){
+    if ( !live_pfn_to_mfn_table )
+    {
         ERR("Couldn't map pfn_to_mfn table");
         goto out;
     }
@@ -544,15 +553,17 @@
     mfn_to_pfn_table_start_mfn = xc_get_m2p_start_mfn( xc_handle );
 
     live_mfn_to_pfn_table = 
-       xc_map_foreign_range(xc_handle, DOMID_XEN, 
-                             PAGE_SIZE*1024, PROT_READ, 
-                             mfn_to_pfn_table_start_mfn );
+        xc_map_foreign_range(xc_handle, DOMID_XEN, 
+                             PAGE_SIZE*1024, PROT_READ, 
+                             mfn_to_pfn_table_start_mfn );
 
     /* Canonicalise the pfn-to-mfn table frame-number list. */
     memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
 
-    for ( i = 0; i < nr_pfns; i += 1024 ){
-        if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ){
+    for ( i = 0; i < nr_pfns; i += 1024 )
+    {
+        if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
+        {
             ERR("Frame# in pfn-to-mfn frame list is not in pseudophys");
             goto out;
         }
@@ -561,40 +572,44 @@
 
     /* Domain is still running at this point */
 
-    if( live )
+    if ( live )
     {
         if ( xc_shadow_control( xc_handle, dom, 
                                 DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
-                                NULL, 0, NULL ) < 0 ) {
+                                NULL, 0, NULL ) < 0 )
+        {
             ERR("Couldn't enable shadow mode");
             goto out;
         }
 
         last_iter = 0;
-    } else{
-       /* This is a non-live suspend. Issue the call back to get the
-        domain suspended */
+    } 
+    else
+    {
+        /* This is a non-live suspend. Issue the call back to get the
+           domain suspended */
 
         last_iter = 1;
 
-       if ( suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt) )
-       {
-           ERR("Domain appears not to have suspended");
-           goto out;
-       }
+        if ( suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt) )
+        {
+            ERR("Domain appears not to have suspended");
+            goto out;
+        }
 
     }
     sent_last_iter = 1<<20; /* 4GB of pages */
 
     /* calculate the power of 2 order of nr_pfns, e.g.
        15->4 16->4 17->5 */
-    for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ );
+    for ( i = nr_pfns-1, order_nr = 0; i ; i >>= 1, order_nr++ )
+        continue;
 
     /* Setup to_send bitmap */
     {
-       /* size these for a maximal 4GB domain, to make interaction
-          with balloon driver easier. It's only user space memory,
-          ater all... (3x 128KB) */
+        /* size these for a maximal 4GB domain, to make interaction
+           with balloon driver easier. It's only user space memory,
+           ater all... (3x 128KB) */
 
         int sz = ( 1<<20 ) / 8;
  
@@ -602,21 +617,24 @@
         to_fix  = calloc( 1, sz );
         to_skip = malloc( sz );
 
-        if (!to_send || !to_fix || !to_skip){
+        if ( !to_send || !to_fix || !to_skip )
+        {
             ERR("Couldn't allocate to_send array");
             goto out;
         }
 
-        memset( to_send, 0xff, sz );
-
-        if ( mlock( to_send, sz ) ){
+        memset(to_send, 0xff, sz);
+
+        if ( mlock(to_send, sz) )
+        {
             ERR("Unable to mlock to_send");
             return 1;
         }
 
         /* (to fix is local only) */
 
-        if ( mlock( to_skip, sz ) ){
+        if ( mlock(to_skip, sz) )
+        {
             ERR("Unable to mlock to_skip");
             return 1;
         }
@@ -629,12 +647,14 @@
     pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
     pfn_batch = calloc(BATCH_SIZE, sizeof(unsigned long));
 
-    if ( (pfn_type == NULL) || (pfn_batch == NULL) ){
+    if ( (pfn_type == NULL) || (pfn_batch == NULL) )
+    {
         errno = ENOMEM;
         goto out;
     }
 
-    if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ){
+    if ( mlock(pfn_type, BATCH_SIZE * sizeof(unsigned long)) )
+    {
         ERR("Unable to mlock");
         goto out;
     }
@@ -645,31 +665,34 @@
      */
 #if DEBUG
     {
-       int err=0;
-       for ( i = 0; i < nr_pfns; i++ )
-       {
-           mfn = live_pfn_to_mfn_table[i];
-           
-           if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0xffffffffUL) )
-           {
-               fprintf(stderr, "i=0x%x mfn=%lx live_mfn_to_pfn_table=%lx\n",
-                       i,mfn,live_mfn_to_pfn_table[mfn]);
-               err++;
-           }
-       }
-       fprintf(stderr, "Had %d unexplained entries in p2m table\n",err);
+        int err=0;
+        for ( i = 0; i < nr_pfns; i++ )
+        {
+            mfn = live_pfn_to_mfn_table[i];
+     
+            if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0xffffffffUL) )
+            {
+                fprintf(stderr, "i=0x%x mfn=%lx live_mfn_to_pfn_table=%lx\n",
+                        i,mfn,live_mfn_to_pfn_table[mfn]);
+                err++;
+            }
+        }
+        fprintf(stderr, "Had %d unexplained entries in p2m table\n",err);
     }
 #endif
 
 
     /* Start writing out the saved-domain record. */
 
-    if (write(io_fd, &nr_pfns, sizeof(unsigned long)) !=
-       sizeof(unsigned long)) {
-       ERR("write: nr_pfns");
-       goto out;
-    }
-    if (write(io_fd, pfn_to_mfn_frame_list, PAGE_SIZE) != PAGE_SIZE) {
+    if ( write(io_fd, &nr_pfns, sizeof(unsigned long)) !=
+         sizeof(unsigned long) )
+    {
+        ERR("write: nr_pfns");
+        goto out;
+    }
+
+    if ( write(io_fd, pfn_to_mfn_frame_list, PAGE_SIZE) != PAGE_SIZE )
+    {
         ERR("write: pfn_to_mfn_frame_list");
         goto out;
     }
@@ -678,7 +701,8 @@
 
     /* Now write out each data page, canonicalising page tables as we go... */
     
-    while(1){
+    for ( ; ; )
+    {
         unsigned int prev_pc, sent_this_iter, N, batch;
 
         iter++;
@@ -689,10 +713,12 @@
 
         DPRINTF("Saving memory pages: iter %d   0%%", iter);
 
-        while( N < nr_pfns ){
+        while ( N < nr_pfns )
+        {
             unsigned int this_pc = (N * 100) / nr_pfns;
 
-            if ( (this_pc - prev_pc) >= 5 ){
+            if ( (this_pc - prev_pc) >= 5 )
+            {
                 DPRINTF("\b\b\b\b%3d%%", this_pc);
                 prev_pc = this_pc;
             }
@@ -701,10 +727,10 @@
                but this is fast enough for the moment. */
 
             if ( !last_iter && 
-                xc_shadow_control(xc_handle, dom, 
+                 xc_shadow_control(xc_handle, dom, 
                                    DOM0_SHADOW_CONTROL_OP_PEEK,
                                    to_skip, nr_pfns, NULL) != nr_pfns )
-           {
+            {
                 ERR("Error peeking shadow bitmap");
                 goto out;
             }
@@ -748,7 +774,7 @@
                 pfn_type[batch] = live_pfn_to_mfn_table[n];
 
                 if( ! is_mapped(pfn_type[batch]) )
-               {
+                {
                     /* not currently in pusedo-physical map -- set bit
                        in to_fix that we must send this page in last_iter
                        unless its sent sooner anyhow */
@@ -756,7 +782,7 @@
                     set_bit( n, to_fix );
                     if( iter>1 )
                         DPRINTF("netbuf race: iter %d, pfn %x. mfn %lx\n",
-                               iter,n,pfn_type[batch]);
+                                iter,n,pfn_type[batch]);
                     continue;
                 }
 
@@ -790,8 +816,10 @@
                 goto out;
             }
      
-            for ( j = 0; j < batch; j++ ){
-                if ( (pfn_type[j] & LTAB_MASK) == XTAB ){
+            for ( j = 0; j < batch; j++ )
+            {
+                if ( (pfn_type[j] & LTAB_MASK) == XTAB )
+                {
                     DPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
                     continue;
                 }
@@ -809,21 +837,25 @@
                 pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j];
             }
 
-            if (write(io_fd, &batch, sizeof(int)) != sizeof(int)) {
+            if ( write(io_fd, &batch, sizeof(int)) != sizeof(int) )
+            {
                 ERR("Error when writing to state file (2)");
                 goto out;
             }
 
-            if (write(io_fd, pfn_type, sizeof(unsigned long)*j) !=
-               sizeof(unsigned long)*j) {
+            if ( write(io_fd, pfn_type, sizeof(unsigned long)*j) !=
+                 (sizeof(unsigned long) * j) )
+            {
                 ERR("Error when writing to state file (3)");
                 goto out;
             }
      
             /* entering this loop, pfn_type is now in pfns (Not mfns) */
-            for( j = 0; j < batch; j++ ){
+            for ( j = 0; j < batch; j++ )
+            {
                 /* write out pages in batch */
-                if( (pfn_type[j] & LTAB_MASK) == XTAB){
+                if ( (pfn_type[j] & LTAB_MASK) == XTAB )
+                {
                     DPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
                     continue;
                 }
@@ -836,7 +868,8 @@
                           k < (((pfn_type[j] & LTABTYPE_MASK) == L2TAB) ? 
                                (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) :
                                1024); 
-                          k++ ){
+                          k++ )
+                    {
                         unsigned long pfn;
 
                         if ( !(page[k] & _PAGE_PRESENT) )
@@ -849,13 +882,13 @@
                         {
                             /* I don't think this should ever happen */
                             fprintf(stderr, "FNI %d : [%08lx,%d] pte=%08lx, "
-                                   "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
-                                   j, pfn_type[j], k,
-                                   page[k], mfn, live_mfn_to_pfn_table[mfn],
-                                   (live_mfn_to_pfn_table[mfn]<nr_pfns)? 
-                                   live_pfn_to_mfn_table[
-                                       live_mfn_to_pfn_table[mfn]] : 
-                                   0xdeadbeef);
+                                    "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
+                                    j, pfn_type[j], k,
+                                    page[k], mfn, live_mfn_to_pfn_table[mfn],
+                                    (live_mfn_to_pfn_table[mfn]<nr_pfns)? 
+                                    live_pfn_to_mfn_table[
+                                        live_mfn_to_pfn_table[mfn]] : 
+                                    0xdeadbeef);
 
                             pfn = 0; /* be suspicious */
                         }
@@ -865,12 +898,12 @@
    
 #if 0
                         fprintf(stderr,
-                               "L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx "
-                               "xpfn=%d\n",
-                               pfn_type[j]>>28,
-                               j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
+                                "L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx "
+                                "xpfn=%d\n",
+                                pfn_type[j]>>28,
+                                j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
 #endif     
-                       
+   
                     } /* end of page table rewrite for loop */
       
                     if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) {
@@ -880,8 +913,9 @@
       
                 }  /* end of it's a PT page */ else {  /* normal page */
 
-                    if (ratewrite(io_fd, region_base + (PAGE_SIZE*j), 
-                                 PAGE_SIZE) != PAGE_SIZE) {
+                    if ( ratewrite(io_fd, region_base + (PAGE_SIZE*j), 
+                                   PAGE_SIZE) != PAGE_SIZE )
+                    {
                         ERR("Error when writing to state file (5)");
                         goto out;
                     }
@@ -899,13 +933,13 @@
         total_sent += sent_this_iter;
 
         DPRINTF("\r %d: sent %d, skipped %d, ", 
-                       iter, sent_this_iter, skip_this_iter );
+                iter, sent_this_iter, skip_this_iter );
 
         if ( last_iter ) {
             print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
 
             DPRINTF("Total pages sent= %d (%.2fx)\n", 
-                           total_sent, ((float)total_sent)/nr_pfns );
+                    total_sent, ((float)total_sent)/nr_pfns );
             DPRINTF("(of which %d were fixups)\n", needed_to_fix  );
         }       
 
@@ -930,7 +964,7 @@
         {
             if ( 
                 ( ( sent_this_iter > sent_last_iter ) &&
-                 (mbit_rate == MAX_MBIT_RATE ) ) ||
+                  (mbit_rate == MAX_MBIT_RATE ) ) ||
                 (iter >= max_iters) || 
                 (sent_this_iter+skip_this_iter < 50) || 
                 (total_sent > nr_pfns*max_factor) )
@@ -938,15 +972,15 @@
                 DPRINTF("Start last iteration\n");
                 last_iter = 1;
 
-               if ( suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt) )
-               {
-                   ERR("Domain appears not to have suspended");
-                   goto out;
-               }
-
-               DPRINTF("SUSPEND shinfo %08lx eip %08u esi %08u\n",
-                       info.shared_info_frame,
-                       ctxt.user_regs.eip, ctxt.user_regs.esi);
+                if ( suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt) )
+                {
+                    ERR("Domain appears not to have suspended");
+                    goto out;
+                }
+
+                DPRINTF("SUSPEND shinfo %08lx eip %08u esi %08u\n",
+                        info.shared_info_frame,
+                        ctxt.user_regs.eip, ctxt.user_regs.esi);
             } 
 
             if ( xc_shadow_control( xc_handle, dom, 
@@ -972,86 +1006,92 @@
     rc = 0;
     
     /* Zero terminate */
-    if (write(io_fd, &rc, sizeof(int)) != sizeof(int)) {
+    if ( write(io_fd, &rc, sizeof(int)) != sizeof(int) )
+    {
         ERR("Error when writing to state file (6)");
         goto out;
     }
 
     /* Send through a list of all the PFNs that were not in map at the close */
     {
-       unsigned int i,j;
-       unsigned int pfntab[1024];
-
-       for ( i = 0, j = 0; i < nr_pfns; i++ )
-       {
-           if ( ! is_mapped(live_pfn_to_mfn_table[i]) )
-               j++;
-       }
-
-       if (write(io_fd, &j, sizeof(unsigned int)) != sizeof(unsigned int)) {
-           ERR("Error when writing to state file (6a)");
-           goto out;
-       }       
-
-       for ( i = 0, j = 0; i < nr_pfns; )
-       {
-           if ( ! is_mapped(live_pfn_to_mfn_table[i]) )
-           {
-               pfntab[j++] = i;
-           }
-           i++;
-           if ( j == 1024 || i == nr_pfns )
-           {
-               if (write(io_fd, &pfntab, sizeof(unsigned long)*j) !=
-                   sizeof(unsigned long)*j) {
-                   ERR("Error when writing to state file (6b)");
-                   goto out;
-               }       
-               j = 0;
-           }
-       }
+        unsigned int i,j;
+        unsigned int pfntab[1024];
+
+        for ( i = 0, j = 0; i < nr_pfns; i++ )
+            if ( !is_mapped(live_pfn_to_mfn_table[i]) )
+                j++;
+
+        if ( write(io_fd, &j, sizeof(unsigned int)) != sizeof(unsigned int) )
+        {
+            ERR("Error when writing to state file (6a)");
+            goto out;
+        } 
+
+        for ( i = 0, j = 0; i < nr_pfns; )
+        {
+            if ( !is_mapped(live_pfn_to_mfn_table[i]) )
+            {
+                pfntab[j++] = i;
+            }
+            i++;
+            if ( j == 1024 || i == nr_pfns )
+            {
+                if ( write(io_fd, &pfntab, sizeof(unsigned long)*j) !=
+                     (sizeof(unsigned long) * j) )
+                {
+                    ERR("Error when writing to state file (6b)");
+                    goto out;
+                } 
+                j = 0;
+            }
+        }
     }
 
     /* Canonicalise the suspend-record frame number. */
-    if ( !translate_mfn_to_pfn(&ctxt.user_regs.esi) ){
+    if ( !translate_mfn_to_pfn(&ctxt.user_regs.esi) )
+    {
         ERR("Suspend record is not in range of pseudophys map");
         goto out;
     }
 
     /* Canonicalise each GDT frame number. */
-    for ( i = 0; i < ctxt.gdt_ents; i += 512 ) {
-        if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) {
+    for ( i = 0; i < ctxt.gdt_ents; i += 512 )
+    {
+        if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) 
+        {
             ERR("GDT frame is not in range of pseudophys map");
             goto out;
         }
     }
 
     /* Canonicalise the page table base pointer. */
-    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.ctrlreg[3] >> PAGE_SHIFT) ) {
+    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.ctrlreg[3] >> PAGE_SHIFT) )
+    {
         ERR("PT base is not in range of pseudophys map");
         goto out;
     }
     ctxt.ctrlreg[3] = live_mfn_to_pfn_table[ctxt.ctrlreg[3] >> PAGE_SHIFT] <<
         PAGE_SHIFT;
 
-    if (write(io_fd, &ctxt, sizeof(ctxt)) != sizeof(ctxt) ||
-       write(io_fd, live_shinfo, PAGE_SIZE) != PAGE_SIZE) {
+    if ( write(io_fd, &ctxt, sizeof(ctxt)) != sizeof(ctxt) ||
+         write(io_fd, live_shinfo, PAGE_SIZE) != PAGE_SIZE)
+    {
         ERR("Error when writing to state file (1)");
         goto out;
     }
 
  out:
 
-    if(live_shinfo)
+    if ( live_shinfo )
         munmap(live_shinfo, PAGE_SIZE);
 
-    if(live_pfn_to_mfn_frame_list) 
+    if ( live_pfn_to_mfn_frame_list ) 
         munmap(live_pfn_to_mfn_frame_list, PAGE_SIZE);
 
-    if(live_pfn_to_mfn_table) 
+    if ( live_pfn_to_mfn_table ) 
         munmap(live_pfn_to_mfn_table, nr_pfns*4);
 
-    if(live_mfn_to_pfn_table) 
+    if ( live_mfn_to_pfn_table ) 
         munmap(live_mfn_to_pfn_table, PAGE_SIZE*1024);
 
     free(pfn_type);
@@ -1063,3 +1103,13 @@
     DPRINTF("Save exit rc=%d\n",rc);
     return !!rc;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_load_aout9.c
--- a/tools/libxc/xc_load_aout9.c       Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_load_aout9.c       Thu Sep 22 17:42:01 2005
@@ -64,11 +64,11 @@
     dstart = round_pgup(start + ehdr.text);
     end = dstart + ehdr.data + ehdr.bss;
 
-    dsi->v_start       = KZERO;
-    dsi->v_kernstart   = start;
-    dsi->v_kernend     = end;
-    dsi->v_kernentry   = ehdr.entry;
-    dsi->v_end         = end;
+    dsi->v_start     = KZERO;
+    dsi->v_kernstart = start;
+    dsi->v_kernend   = end;
+    dsi->v_kernentry = ehdr.entry;
+    dsi->v_end       = end;
 
     /* XXX load symbols */
 
@@ -168,3 +168,12 @@
     return ehdr;
 }
 
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_load_bin.c
--- a/tools/libxc/xc_load_bin.c Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_load_bin.c Thu Sep 22 17:42:01 2005
@@ -109,8 +109,8 @@
     unsigned long *parray, struct domain_setup_info *dsi);
 
 int probe_bin(char *image,
-             unsigned long image_size,
-             struct load_funcs *load_funcs)
+              unsigned long image_size,
+              struct load_funcs *load_funcs)
 {
     if ( NULL == findtable(image, image_size) )
     {
@@ -297,3 +297,13 @@
 
     return 0;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_load_elf.c Thu Sep 22 17:42:01 2005
@@ -30,8 +30,8 @@
     struct domain_setup_info *dsi);
 
 int probe_elf(char *image,
-             unsigned long image_size,
-             struct load_funcs *load_funcs)
+              unsigned long image_size,
+              struct load_funcs *load_funcs)
 {
     Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
 
@@ -116,7 +116,7 @@
             return -EINVAL;
         }
         if ( (strstr(guestinfo, "PAE=yes") != NULL) )
-           dsi->pae_kernel = 1;
+            dsi->pae_kernel = 1;
 
         break;
     }
@@ -313,3 +313,13 @@
 
     return 0;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_misc.c     Thu Sep 22 17:42:01 2005
@@ -133,5 +133,15 @@
 
 long xc_init_store(int xc_handle, int remote_port)
 {
-       return ioctl(xc_handle, IOCTL_PRIVCMD_INITDOMAIN_STORE, remote_port);
+    return ioctl(xc_handle, IOCTL_PRIVCMD_INITDOMAIN_STORE, remote_port);
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_private.c  Thu Sep 22 17:42:01 2005
@@ -15,7 +15,7 @@
     void *addr;
     addr = mmap(NULL, num*PAGE_SIZE, prot, MAP_SHARED, xc_handle, 0);
     if ( addr == MAP_FAILED )
-       return NULL;
+        return NULL;
 
     ioctlx.num=num;
     ioctlx.dom=dom;
@@ -24,10 +24,10 @@
     if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) < 0 )
     {
         int saved_errno = errno;
-       perror("XXXXXXXX");
-       (void)munmap(addr, num*PAGE_SIZE);
+        perror("XXXXXXXX");
+        (void)munmap(addr, num*PAGE_SIZE);
         errno = saved_errno;
-       return NULL;
+        return NULL;
     }
     return addr;
 
@@ -36,15 +36,15 @@
 /*******************/
 
 void *xc_map_foreign_range(int xc_handle, u32 dom,
-                            int size, int prot,
-                            unsigned long mfn )
+                           int size, int prot,
+                           unsigned long mfn )
 {
     privcmd_mmap_t ioctlx; 
     privcmd_mmap_entry_t entry; 
     void *addr;
     addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0);
     if ( addr == MAP_FAILED )
-       return NULL;
+        return NULL;
 
     ioctlx.num=1;
     ioctlx.dom=dom;
@@ -55,9 +55,9 @@
     if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) < 0 )
     {
         int saved_errno = errno;
-       (void)munmap(addr, size);
+        (void)munmap(addr, size);
         errno = saved_errno;
-       return NULL;
+        return NULL;
     }
     return addr;
 }
@@ -66,7 +66,7 @@
 
 /* NB: arr must be mlock'ed */
 int xc_get_pfn_type_batch(int xc_handle, 
-                         u32 dom, int num, unsigned long *arr)
+                          u32 dom, int num, unsigned long *arr)
 {
     dom0_op_t op;
     op.cmd = DOM0_GETPAGEFRAMEINFO2;
@@ -116,8 +116,8 @@
 
     if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
     {
-       fprintf(stderr, "Dom_mmuext operation failed (rc=%ld errno=%d)-- need 
to"
-                    " rebuild the user-space tool set?\n",ret,errno);
+        fprintf(stderr, "Dom_mmuext operation failed (rc=%ld errno=%d)-- need 
to"
+                " rebuild the user-space tool set?\n",ret,errno);
     }
 
     safe_munlock(op, nr_ops*sizeof(*op));
@@ -172,7 +172,7 @@
 }
 
 int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, 
-                     unsigned long long ptr, unsigned long long val)
+                      unsigned long long ptr, unsigned long long val)
 {
     mmu->updates[mmu->idx].ptr = ptr;
     mmu->updates[mmu->idx].val = val;
@@ -229,7 +229,7 @@
 
     if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
     {
-       fprintf(stderr, "hypercall failed (rc=%ld errno=%d)-- need to"
+        fprintf(stderr, "hypercall failed (rc=%ld errno=%d)-- need to"
                 " rebuild the user-space tool set?\n",ret,errno);
     }
 
@@ -275,16 +275,16 @@
 
     if ( ioctl( xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN, &mfn ) < 0 )
     {
-       perror("xc_get_m2p_start_mfn:");
-       return 0;
+        perror("xc_get_m2p_start_mfn:");
+        return 0;
     }
     return mfn;
 }
 
 int xc_get_pfn_list(int xc_handle,
-                u32 domid, 
-                unsigned long *pfn_buf, 
-                unsigned long max_pfns)
+                    u32 domid, 
+                    unsigned long *pfn_buf, 
+                    unsigned long max_pfns)
 {
     dom0_op_t op;
     int ret;
@@ -306,16 +306,16 @@
 
 #if 0
 #ifdef DEBUG
-       DPRINTF(("Ret for xc_get_pfn_list is %d\n", ret));
-       if (ret >= 0) {
-               int i, j;
-               for (i = 0; i < op.u.getmemlist.num_pfns; i += 16) {
-                       fprintf(stderr, "0x%x: ", i);
-                       for (j = 0; j < 16; j++)
-                               fprintf(stderr, "0x%lx ", pfn_buf[i + j]);
-                       fprintf(stderr, "\n");
-               }
-       }
+    DPRINTF(("Ret for xc_get_pfn_list is %d\n", ret));
+    if (ret >= 0) {
+        int i, j;
+        for (i = 0; i < op.u.getmemlist.num_pfns; i += 16) {
+            fprintf(stderr, "0x%x: ", i);
+            for (j = 0; j < 16; j++)
+                fprintf(stderr, "0x%lx ", pfn_buf[i + j]);
+            fprintf(stderr, "\n");
+        }
+    }
 #endif
 #endif
 
@@ -324,10 +324,10 @@
 
 #ifdef __ia64__
 int xc_ia64_get_pfn_list(int xc_handle,
-                u32 domid, 
-                unsigned long *pfn_buf, 
-                unsigned int start_page,
-                unsigned int nr_pages)
+                         u32 domid, 
+                         unsigned long *pfn_buf, 
+                         unsigned int start_page,
+                         unsigned int nr_pages)
 {
     dom0_op_t op;
     int ret;
@@ -372,9 +372,9 @@
 }
 
 int xc_copy_to_domain_page(int xc_handle,
-                                   u32 domid,
-                                   unsigned long dst_pfn, 
-                                   void *src_page)
+                           u32 domid,
+                           unsigned long dst_pfn, 
+                           void *src_page)
 {
     void *vaddr = xc_map_foreign_range(
         xc_handle, domid, PAGE_SIZE, PROT_WRITE, dst_pfn);
@@ -465,18 +465,28 @@
     unsigned long new_mfn;
 
     if ( xc_domain_memory_decrease_reservation( 
-       xc_handle, domid, 1, 0, &mfn) != 0 )
-    {
-       fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
-       return 0;
+        xc_handle, domid, 1, 0, &mfn) != 0 )
+    {
+        fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
+        return 0;
     }
 
     if ( xc_domain_memory_increase_reservation(
         xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
     {
-       fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
-       return 0;
+        fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
+        return 0;
     }
 
     return new_mfn;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_ptrace.c   Thu Sep 22 17:42:01 2005
@@ -1,25 +1,15 @@
 #include <sys/ptrace.h>
 #include <sys/wait.h>
 #include "xc_private.h"
+#include "xg_private.h"
 #include <time.h>
 
 #define X86_CR0_PE              0x00000001 /* Enable Protected Mode    (RW) */
 #define X86_CR0_PG              0x80000000 /* Paging                   (RW) */
-
-#define BSD_PAGE_MASK  (PAGE_SIZE-1)
-#define        PG_FRAME        (~((unsigned long)BSD_PAGE_MASK)
+#define BSD_PAGE_MASK (PAGE_SIZE-1)
 #define PDRSHIFT        22
-#define        PSL_T           0x00000100      /* trace enable bit */
-
+#define PSL_T  0x00000100 /* trace enable bit */
 #define VCPU            0               /* XXX */
-
-/*
- * long  
- * ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
- */
-
-
-int waitdomain(int domain, int *status, int options);
 
 char * ptrace_names[] = {
     "PTRACE_TRACEME",
@@ -69,67 +59,64 @@
     int  xss;    /* 64 */
 };
 
-#define FETCH_REGS(cpu) \
-    if (!regs_valid[cpu]) \
-    {                \
-       int retval = xc_domain_get_vcpu_context(xc_handle, domid, cpu, 
&ctxt[cpu]); \
-       if (retval) \
-           goto error_out; \
-       cr3[cpu] = ctxt[cpu].ctrlreg[3]; /* physical address */ \
-       regs_valid[cpu] = 1; \
-    } \
+#define FETCH_REGS(cpu)                                         \
+    if (!regs_valid[cpu])                                       \
+    {                                                           \
+        int retval = xc_domain_get_vcpu_context(                \
+            xc_handle, domid, cpu, &ctxt[cpu]);                 \
+        if (retval)                                             \
+            goto error_out;                                     \
+        cr3[cpu] = ctxt[cpu].ctrlreg[3]; /* physical address */ \
+        regs_valid[cpu] = 1;                                    \
+    }
 
 #define printval(x) printf("%s = %lx\n", #x, (long)x);
-#define SET_PT_REGS(pt, xc) \
-{ \
-    pt.ebx = xc.ebx; \
-    pt.ecx = xc.ecx; \
-    pt.edx = xc.edx; \
-    pt.esi = xc.esi; \
-    pt.edi = xc.edi; \
-    pt.ebp = xc.ebp; \
-    pt.eax = xc.eax; \
-    pt.eip = xc.eip; \
-    pt.xcs = xc.cs; \
-    pt.eflags = xc.eflags; \
-    pt.esp = xc.esp; \
-    pt.xss = xc.ss; \
-    pt.xes = xc.es; \
-    pt.xds = xc.ds; \
-    pt.xfs = xc.fs; \
-    pt.xgs = xc.gs; \
-}
-
-#define SET_XC_REGS(pt, xc) \
-{ \
-    xc.ebx = pt->ebx; \
-    xc.ecx = pt->ecx; \
-    xc.edx = pt->edx; \
-    xc.esi = pt->esi; \
-    xc.edi = pt->edi; \
-    xc.ebp = pt->ebp; \
-    xc.eax = pt->eax; \
-    xc.eip = pt->eip; \
-    xc.cs = pt->xcs; \
-    xc.eflags = pt->eflags; \
-    xc.esp = pt->esp; \
-    xc.ss = pt->xss; \
-    xc.es = pt->xes; \
-    xc.ds = pt->xds; \
-    xc.fs = pt->xfs; \
-    xc.gs = pt->xgs; \
-}
-
+#define SET_PT_REGS(pt, xc)                     \
+{                                               \
+    pt.ebx = xc.ebx;                            \
+    pt.ecx = xc.ecx;                            \
+    pt.edx = xc.edx;                            \
+    pt.esi = xc.esi;                            \
+    pt.edi = xc.edi;                            \
+    pt.ebp = xc.ebp;                            \
+    pt.eax = xc.eax;                            \
+    pt.eip = xc.eip;                            \
+    pt.xcs = xc.cs;                             \
+    pt.eflags = xc.eflags;                      \
+    pt.esp = xc.esp;                            \
+    pt.xss = xc.ss;                             \
+    pt.xes = xc.es;                             \
+    pt.xds = xc.ds;                             \
+    pt.xfs = xc.fs;                             \
+    pt.xgs = xc.gs;                             \
+}
+
+#define SET_XC_REGS(pt, xc)                     \
+{                                               \
+    xc.ebx = pt->ebx;                           \
+    xc.ecx = pt->ecx;                           \
+    xc.edx = pt->edx;                           \
+    xc.esi = pt->esi;                           \
+    xc.edi = pt->edi;                           \
+    xc.ebp = pt->ebp;                           \
+    xc.eax = pt->eax;                           \
+    xc.eip = pt->eip;                           \
+    xc.cs = pt->xcs;                            \
+    xc.eflags = pt->eflags;                     \
+    xc.esp = pt->esp;                           \
+    xc.ss = pt->xss;                            \
+    xc.es = pt->xes;                            \
+    xc.ds = pt->xds;                            \
+    xc.fs = pt->xfs;                            \
+    xc.gs = pt->xgs;                            \
+}
 
 #define vtopdi(va) ((va) >> PDRSHIFT)
 #define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
 
 /* XXX application state */
-
-
-static int                      xc_handle;
-static long                    nr_pages = 0;
-unsigned long                  *page_array = NULL;
+static long   nr_pages = 0;
+unsigned long   *page_array = NULL;
 static int                      regs_valid[MAX_VIRT_CPUS];
 static unsigned long            cr3[MAX_VIRT_CPUS];
 static vcpu_guest_context_t ctxt[MAX_VIRT_CPUS];
@@ -137,14 +124,60 @@
 static inline int paging_enabled(vcpu_guest_context_t *v)
 {
     unsigned long cr0 = v->ctrlreg[0];
-
     return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
 }
 
 /* --------------------- */
 
 static void *
-map_domain_va(unsigned long domid, int cpu, void * guest_va, int perm)
+map_domain_va_pae(
+    int xc_handle,
+    unsigned long domid,
+    int cpu,
+    void *guest_va,
+    int perm)
+{
+    unsigned long l2p, l1p, p, va = (unsigned long)guest_va;
+    u64 *l3, *l2, *l1;
+    static void *v;
+
+    FETCH_REGS(cpu);
+
+    l3 = xc_map_foreign_range(
+        xc_handle, domid, PAGE_SIZE, PROT_READ, cr3[cpu] >> PAGE_SHIFT);
+    if ( l3 == NULL )
+        goto error_out;
+
+    l2p = l3[l3_table_offset_pae(va)] >> PAGE_SHIFT;
+    l2 = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, PROT_READ, l2p);
+    if ( l2 == NULL )
+        goto error_out;
+
+    l1p = l2[l2_table_offset_pae(va)] >> PAGE_SHIFT;
+    l1 = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, perm, l1p);
+    if ( l1 == NULL )
+        goto error_out;
+
+    p = l1[l1_table_offset_pae(va)] >> PAGE_SHIFT;
+    if ( v != NULL )
+        munmap(v, PAGE_SIZE);
+    v = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, perm, p);
+    if ( v == NULL )
+        goto error_out;
+
+    return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1)));
+
+ error_out:
+    return NULL;
+}
+
+static void *
+map_domain_va(
+    int xc_handle,
+    unsigned long domid,
+    int cpu,
+    void *guest_va,
+    int perm)
 {
     unsigned long pde, page;
     unsigned long va = (unsigned long)guest_va;
@@ -155,69 +188,88 @@
     static unsigned long  pde_phys[MAX_VIRT_CPUS];
     static unsigned long *pde_virt[MAX_VIRT_CPUS];
     static unsigned long  page_phys[MAX_VIRT_CPUS];
-    static unsigned long *page_virt[MAX_VIRT_CPUS];
-    
+    static unsigned long *page_virt[MAX_VIRT_CPUS];    
     static int            prev_perm[MAX_VIRT_CPUS];
-
-    if (nr_pages != npgs) {
-       if (nr_pages > 0)
-           free(page_array);
-       nr_pages = npgs;
-       if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
-           printf("Could not allocate memory\n");
-           goto error_out;
-       }
-
-       if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != 
nr_pages) {
-               printf("Could not get the page frame list\n");
-               goto error_out;
-       }
+    static enum { MODE_UNKNOWN, MODE_32, MODE_PAE } mode;
+
+    if ( mode == MODE_UNKNOWN )
+    {
+        xen_capabilities_info_t caps;
+        (void)xc_version(xc_handle, XENVER_capabilities, caps);
+        mode = MODE_32;
+        if ( strstr(caps, "_x86_32p") )
+            mode = MODE_PAE;
+    }
+
+    if ( mode == MODE_PAE )
+        return map_domain_va_pae(xc_handle, domid, cpu, guest_va, perm);
+
+    if ( nr_pages != npgs )
+    {
+        if ( nr_pages > 0 )
+            free(page_array);
+        nr_pages = npgs;
+        if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+        {
+            printf("Could not allocate memory\n");
+            goto error_out;
+        }
+        if ( xc_get_pfn_list(xc_handle, domid,
+                             page_array, nr_pages) != nr_pages )
+        {
+            printf("Could not get the page frame list\n");
+            goto error_out;
+        }
     }
 
     FETCH_REGS(cpu);
 
-    if (cr3[cpu] != cr3_phys[cpu]) 
-    {
-       cr3_phys[cpu] = cr3[cpu];
-       if (cr3_virt[cpu])
-           munmap(cr3_virt[cpu], PAGE_SIZE);
-       if ((cr3_virt[cpu] = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                            PROT_READ,
-                                            cr3_phys[cpu] >> PAGE_SHIFT)) == 
NULL)
-           goto error_out;
+    if ( cr3[cpu] != cr3_phys[cpu] )
+    {
+        cr3_phys[cpu] = cr3[cpu];
+        if ( cr3_virt[cpu] )
+            munmap(cr3_virt[cpu], PAGE_SIZE);
+        cr3_virt[cpu] = xc_map_foreign_range(
+            xc_handle, domid, PAGE_SIZE, PROT_READ,
+            cr3_phys[cpu] >> PAGE_SHIFT);
+        if ( cr3_virt[cpu] == NULL )
+            goto error_out;
+    }
+    if ( (pde = cr3_virt[cpu][vtopdi(va)]) == 0 )
+        goto error_out;
+    if ( (ctxt[cpu].flags & VGCF_VMX_GUEST) && paging_enabled(&ctxt[cpu]) )
+        pde = page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
+    if ( pde != pde_phys[cpu] )
+    {
+        pde_phys[cpu] = pde;
+        if ( pde_virt[cpu] )
+            munmap(pde_virt[cpu], PAGE_SIZE);
+        pde_virt[cpu] = xc_map_foreign_range(
+            xc_handle, domid, PAGE_SIZE, PROT_READ,
+            pde_phys[cpu] >> PAGE_SHIFT);
+        if ( pde_virt[cpu] == NULL )
+            goto error_out;
+    }
+    if ( (page = pde_virt[cpu][vtopti(va)]) == 0 )
+        goto error_out;
+    if ( (ctxt[cpu].flags & VGCF_VMX_GUEST) && paging_enabled(&ctxt[cpu]) )
+        page = page_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
+    if ( (page != page_phys[cpu]) || (perm != prev_perm[cpu]) )
+    {
+        page_phys[cpu] = page;
+        if ( page_virt[cpu] )
+            munmap(page_virt[cpu], PAGE_SIZE);
+        page_virt[cpu] = xc_map_foreign_range(
+            xc_handle, domid, PAGE_SIZE, perm,
+            page_phys[cpu] >> PAGE_SHIFT);
+        if ( page_virt[cpu] == NULL )
+        {
+            page_phys[cpu] = 0;
+            goto error_out;
+        }
+        prev_perm[cpu] = perm;
     } 
-    if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
-       goto error_out;
-    if ((ctxt[cpu].flags & VGCF_VMX_GUEST) && paging_enabled(&ctxt[cpu]))
-        pde = page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
-    if (pde != pde_phys[cpu]) 
-    {
-       pde_phys[cpu] = pde;
-       if (pde_virt[cpu])
-           munmap(pde_virt[cpu], PAGE_SIZE);
-       if ((pde_virt[cpu] = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                            PROT_READ,
-                                            pde_phys[cpu] >> PAGE_SHIFT)) == 
NULL)
-           goto error_out;
-    }
-    if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
-       goto error_out;
-    if (ctxt[cpu].flags & VGCF_VMX_GUEST && paging_enabled(&ctxt[cpu]))
-        page = page_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
-    if (page != page_phys[cpu] || perm != prev_perm[cpu]) 
-    {
-       page_phys[cpu] = page;
-       if (page_virt[cpu])
-           munmap(page_virt[cpu], PAGE_SIZE);
-       if ((page_virt[cpu] = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                             perm,
-                                             page_phys[cpu] >> PAGE_SHIFT)) == 
NULL) {
-           printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
vtopti(va));
-           page_phys[cpu] = 0;
-           goto error_out;
-       }
-       prev_perm[cpu] = perm;
-    }  
+
     return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK));
 
  error_out:
@@ -225,7 +277,11 @@
 }
 
 int 
-xc_waitdomain(int domain, int *status, int options)
+xc_waitdomain(
+    int xc_handle,
+    int domain,
+    int *status,
+    int options)
 {
     dom0_op_t op;
     int retval;
@@ -233,38 +289,39 @@
     ts.tv_sec = 0;
     ts.tv_nsec = 10*1000*1000;
 
-    if (!xc_handle)
-       if ((xc_handle = xc_interface_open()) < 0) 
-       {
-           printf("xc_interface_open failed\n");
-           return -1;
-       }
     op.cmd = DOM0_GETDOMAININFO;
     op.u.getdomaininfo.domain = domain;
+
  retry:
-
     retval = do_dom0_op(xc_handle, &op);
-    if (retval || op.u.getdomaininfo.domain != domain) {
-       printf("getdomaininfo failed\n");
-       goto done;
+    if ( retval || (op.u.getdomaininfo.domain != domain) )
+    {
+        printf("getdomaininfo failed\n");
+        goto done;
     }
     *status = op.u.getdomaininfo.flags;
     
-    if (options & WNOHANG)
-       goto done;
-       
-
-    if (!(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED)) {       
-       nanosleep(&ts,NULL);
-       goto retry;
-    }
+    if ( options & WNOHANG )
+        goto done;
+
+    if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) )
+    {
+        nanosleep(&ts,NULL);
+        goto retry;
+    }
+
  done:
     return retval;
 
 }
 
 long
-xc_ptrace(enum __ptrace_request request, u32 domid, long eaddr, long edata)
+xc_ptrace(
+    int xc_handle,
+    enum __ptrace_request request,
+    u32 domid,
+    long eaddr,
+    long edata)
 {
     dom0_op_t       op;
     int             status = 0;
@@ -277,108 +334,124 @@
 
     op.interface_version = DOM0_INTERFACE_VERSION;
     
-    if (!xc_handle)
-       if ((xc_handle = xc_interface_open()) < 0)
-           return -1;
-#if 0
-    printf("%20s %d, %p, %p \n", ptrace_names[request], domid, addr, data);
-#endif
-    switch (request) { 
+    switch ( request )
+    { 
     case PTRACE_PEEKTEXT:
     case PTRACE_PEEKDATA:
-       if ((guest_va = (unsigned long *)map_domain_va(domid, cpu, addr, 
PROT_READ)) == NULL) {
-           status = EFAULT;
-           goto error_out;
-       }
-
-       retval = *guest_va;
-       break;
+        guest_va = (unsigned long *)map_domain_va(
+            xc_handle, domid, cpu, addr, PROT_READ);
+        if ( guest_va == NULL )
+        {
+            status = EFAULT;
+            goto error_out;
+        }
+        retval = *guest_va;
+        break;
+
     case PTRACE_POKETEXT:
     case PTRACE_POKEDATA:
-       if ((guest_va = (unsigned long *)map_domain_va(domid, cpu, addr, 
PROT_READ|PROT_WRITE)) == NULL) {
-           status = EFAULT;
-           goto error_out;
-       }
-
-       *guest_va = (unsigned long)data;
-       break;
+        guest_va = (unsigned long *)map_domain_va(
+            xc_handle, domid, cpu, addr, PROT_READ|PROT_WRITE);
+        if ( guest_va == NULL )
+        {
+            status = EFAULT;
+            goto error_out;
+        }
+        *guest_va = (unsigned long)data;
+        break;
+
     case PTRACE_GETREGS:
     case PTRACE_GETFPREGS:
     case PTRACE_GETFPXREGS:
-       FETCH_REGS(cpu);
-
-       if (request == PTRACE_GETREGS) {
-               SET_PT_REGS(pt, ctxt[cpu].user_regs); 
-               memcpy(data, &pt, sizeof(struct gdb_regs));
-       } else if (request == PTRACE_GETFPREGS)
-           memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
-       else /*if (request == PTRACE_GETFPXREGS)*/
-           memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
-       break;
+        FETCH_REGS(cpu);
+        if ( request == PTRACE_GETREGS )
+        {
+            SET_PT_REGS(pt, ctxt[cpu].user_regs); 
+            memcpy(data, &pt, sizeof(struct gdb_regs));
+        }
+        else if (request == PTRACE_GETFPREGS)
+        {
+            memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+        }
+        else /*if (request == PTRACE_GETFPXREGS)*/
+        {
+            memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+        }
+        break;
+
     case PTRACE_SETREGS:
-       op.cmd = DOM0_SETDOMAININFO;
-       SET_XC_REGS(((struct gdb_regs *)data), ctxt[VCPU].user_regs);
-       op.u.setdomaininfo.domain = domid;
-       /* XXX need to understand multiple vcpus */
-       op.u.setdomaininfo.vcpu = cpu;
-       op.u.setdomaininfo.ctxt = &ctxt[cpu];
-       retval = do_dom0_op(xc_handle, &op);
-       if (retval)
-           goto error_out;
-
-       break;
+        op.cmd = DOM0_SETDOMAININFO;
+        SET_XC_REGS(((struct gdb_regs *)data), ctxt[VCPU].user_regs);
+        op.u.setdomaininfo.domain = domid;
+        /* XXX need to understand multiple vcpus */
+        op.u.setdomaininfo.vcpu = cpu;
+        op.u.setdomaininfo.ctxt = &ctxt[cpu];
+        retval = do_dom0_op(xc_handle, &op);
+        if (retval)
+            goto error_out;
+        break;
+
     case PTRACE_ATTACH:
-       op.cmd = DOM0_GETDOMAININFO;
-       op.u.getdomaininfo.domain = domid;
-       retval = do_dom0_op(xc_handle, &op);
-       if (retval || op.u.getdomaininfo.domain != domid) {
-           perror("dom0 op failed");
-           goto error_out;
-       }
-       if (op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) {
-           printf("domain currently paused\n");
-           goto error_out;
-       }
-       printf("domain not currently paused\n");
-       op.cmd = DOM0_PAUSEDOMAIN;
-       op.u.pausedomain.domain = domid;
-       retval = do_dom0_op(xc_handle, &op);
-       break;
+        op.cmd = DOM0_GETDOMAININFO;
+        op.u.getdomaininfo.domain = domid;
+        retval = do_dom0_op(xc_handle, &op);
+        if ( retval || (op.u.getdomaininfo.domain != domid) )
+        {
+            perror("dom0 op failed");
+            goto error_out;
+        }
+        if ( op.u.getdomaininfo.flags & DOMFLAGS_PAUSED )
+        {
+            printf("domain currently paused\n");
+            goto error_out;
+        }
+        printf("domain not currently paused\n");
+        op.cmd = DOM0_PAUSEDOMAIN;
+        op.u.pausedomain.domain = domid;
+        retval = do_dom0_op(xc_handle, &op);
+        break;
+
     case PTRACE_SINGLESTEP:
-       ctxt[VCPU].user_regs.eflags |= PSL_T;
-       op.cmd = DOM0_SETDOMAININFO;
-       op.u.setdomaininfo.domain = domid;
-       op.u.setdomaininfo.vcpu = 0;
-       op.u.setdomaininfo.ctxt = &ctxt[cpu];
-       retval = do_dom0_op(xc_handle, &op);    
-       if (retval) {
-           perror("dom0 op failed");
-           goto error_out;
-       }
-       /* FALLTHROUGH */
+        ctxt[VCPU].user_regs.eflags |= PSL_T;
+        op.cmd = DOM0_SETDOMAININFO;
+        op.u.setdomaininfo.domain = domid;
+        op.u.setdomaininfo.vcpu = 0;
+        op.u.setdomaininfo.ctxt = &ctxt[cpu];
+        retval = do_dom0_op(xc_handle, &op); 
+        if ( retval )
+        {
+            perror("dom0 op failed");
+            goto error_out;
+        }
+        /* FALLTHROUGH */
+
     case PTRACE_CONT:
     case PTRACE_DETACH:
-       if (request != PTRACE_SINGLESTEP) {
-           FETCH_REGS(cpu);
-           /* Clear trace flag */
-           if (ctxt[cpu].user_regs.eflags & PSL_T) {
-               ctxt[cpu].user_regs.eflags &= ~PSL_T;
-               op.cmd = DOM0_SETDOMAININFO;
-               op.u.setdomaininfo.domain = domid;
-               op.u.setdomaininfo.vcpu = cpu;
-               op.u.setdomaininfo.ctxt = &ctxt[cpu];
-               retval = do_dom0_op(xc_handle, &op);    
-               if (retval) {
-                   perror("dom0 op failed");
-                   goto error_out;
-               }
-           }
-       }
-       regs_valid[cpu] = 0;
-       op.cmd = DOM0_UNPAUSEDOMAIN;
-       op.u.unpausedomain.domain = domid > 0 ? domid : -domid;
-       retval = do_dom0_op(xc_handle, &op);
-       break;
+        if ( request != PTRACE_SINGLESTEP )
+        {
+            FETCH_REGS(cpu);
+            /* Clear trace flag */
+            if ( ctxt[cpu].user_regs.eflags & PSL_T )
+            {
+                ctxt[cpu].user_regs.eflags &= ~PSL_T;
+                op.cmd = DOM0_SETDOMAININFO;
+                op.u.setdomaininfo.domain = domid;
+                op.u.setdomaininfo.vcpu = cpu;
+                op.u.setdomaininfo.ctxt = &ctxt[cpu];
+                retval = do_dom0_op(xc_handle, &op); 
+                if ( retval )
+                {
+                    perror("dom0 op failed");
+                    goto error_out;
+                }
+            }
+        }
+        regs_valid[cpu] = 0;
+        op.cmd = DOM0_UNPAUSEDOMAIN;
+        op.u.unpausedomain.domain = domid > 0 ? domid : -domid;
+        retval = do_dom0_op(xc_handle, &op);
+        break;
+
     case PTRACE_SETFPREGS:
     case PTRACE_SETFPXREGS:
     case PTRACE_PEEKUSER:
@@ -386,20 +459,33 @@
     case PTRACE_SYSCALL:
     case PTRACE_KILL:
 #ifdef DEBUG
-       printf("unsupported xc_ptrace request %s\n", ptrace_names[request]);
+        printf("unsupported xc_ptrace request %s\n", ptrace_names[request]);
 #endif
-       /* XXX not yet supported */
-       status = ENOSYS;
-       break;
+        /* XXX not yet supported */
+        status = ENOSYS;
+        break;
+
     case PTRACE_TRACEME:
-       printf("PTRACE_TRACEME is an invalid request under Xen\n");
-       status = EINVAL;
+        printf("PTRACE_TRACEME is an invalid request under Xen\n");
+        status = EINVAL;
     }
     
-    if (status) {
-       errno = status;
-       retval = -1;
-    }
+    if ( status )
+    {
+        errno = status;
+        retval = -1;
+    }
+
  error_out:
     return retval;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_ptrace_core.c      Thu Sep 22 17:42:01 2005
@@ -3,19 +3,14 @@
 #include "xc_private.h"
 #include <time.h>
 
-
-#define BSD_PAGE_MASK  (PAGE_SIZE-1)
-#define        PG_FRAME        (~((unsigned long)BSD_PAGE_MASK)
+#define BSD_PAGE_MASK (PAGE_SIZE-1)
 #define PDRSHIFT        22
-#define        PSL_T           0x00000100      /* trace enable bit */
-
 #define VCPU            0               /* XXX */
 
 /*
  * long  
  * ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
  */
-
 
 struct gdb_regs {
     long ebx; /* 0 */
@@ -38,44 +33,44 @@
 };
 
 #define printval(x) printf("%s = %lx\n", #x, (long)x);
-#define SET_PT_REGS(pt, xc) \
-{ \
-    pt.ebx = xc.ebx; \
-    pt.ecx = xc.ecx; \
-    pt.edx = xc.edx; \
-    pt.esi = xc.esi; \
-    pt.edi = xc.edi; \
-    pt.ebp = xc.ebp; \
-    pt.eax = xc.eax; \
-    pt.eip = xc.eip; \
-    pt.xcs = xc.cs; \
-    pt.eflags = xc.eflags; \
-    pt.esp = xc.esp; \
-    pt.xss = xc.ss; \
-    pt.xes = xc.es; \
-    pt.xds = xc.ds; \
-    pt.xfs = xc.fs; \
-    pt.xgs = xc.gs; \
-}
-
-#define SET_XC_REGS(pt, xc) \
-{ \
-    xc.ebx = pt->ebx; \
-    xc.ecx = pt->ecx; \
-    xc.edx = pt->edx; \
-    xc.esi = pt->esi; \
-    xc.edi = pt->edi; \
-    xc.ebp = pt->ebp; \
-    xc.eax = pt->eax; \
-    xc.eip = pt->eip; \
-    xc.cs = pt->xcs; \
-    xc.eflags = pt->eflags; \
-    xc.esp = pt->esp; \
-    xc.ss = pt->xss; \
-    xc.es = pt->xes; \
-    xc.ds = pt->xds; \
-    xc.fs = pt->xfs; \
-    xc.gs = pt->xgs; \
+#define SET_PT_REGS(pt, xc)                     \
+{                                               \
+    pt.ebx = xc.ebx;                            \
+    pt.ecx = xc.ecx;                            \
+    pt.edx = xc.edx;                            \
+    pt.esi = xc.esi;                            \
+    pt.edi = xc.edi;                            \
+    pt.ebp = xc.ebp;                            \
+    pt.eax = xc.eax;                            \
+    pt.eip = xc.eip;                            \
+    pt.xcs = xc.cs;                             \
+    pt.eflags = xc.eflags;                      \
+    pt.esp = xc.esp;                            \
+    pt.xss = xc.ss;                             \
+    pt.xes = xc.es;                             \
+    pt.xds = xc.ds;                             \
+    pt.xfs = xc.fs;                             \
+    pt.xgs = xc.gs;                             \
+}
+
+#define SET_XC_REGS(pt, xc)                     \
+{                                               \
+    xc.ebx = pt->ebx;                           \
+    xc.ecx = pt->ecx;                           \
+    xc.edx = pt->edx;                           \
+    xc.esi = pt->esi;                           \
+    xc.edi = pt->edi;                           \
+    xc.ebp = pt->ebp;                           \
+    xc.eax = pt->eax;                           \
+    xc.eip = pt->eip;                           \
+    xc.cs = pt->xcs;                            \
+    xc.eflags = pt->eflags;                     \
+    xc.esp = pt->esp;                           \
+    xc.ss = pt->xss;                            \
+    xc.es = pt->xes;                            \
+    xc.ds = pt->xds;                            \
+    xc.fs = pt->xfs;                            \
+    xc.gs = pt->xgs;                            \
 }
 
 
@@ -84,10 +79,9 @@
 
 /* XXX application state */
 
-
-static long                    nr_pages = 0;
-static unsigned long           *p2m_array = NULL;
-static unsigned long           *m2p_array = NULL;
+static long   nr_pages = 0;
+static unsigned long  *p2m_array = NULL;
+static unsigned long  *m2p_array = NULL;
 static unsigned long            pages_offset;
 static unsigned long            cr3[MAX_VIRT_CPUS];
 static vcpu_guest_context_t     ctxt[MAX_VIRT_CPUS];
@@ -117,54 +111,54 @@
 
     if (cr3[cpu] != cr3_phys[cpu]) 
     {
-       cr3_phys[cpu] = cr3[cpu];
-       if (cr3_virt[cpu])
-           munmap(cr3_virt[cpu], PAGE_SIZE);
-       v = mmap(
+        cr3_phys[cpu] = cr3[cpu];
+        if (cr3_virt[cpu])
+            munmap(cr3_virt[cpu], PAGE_SIZE);
+        v = mmap(
             NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
             map_mtop_offset(cr3_phys[cpu]));
         if (v == MAP_FAILED)
-       {
-           perror("mmap failed");
-           goto error_out;
-       }
+        {
+            perror("mmap failed");
+            goto error_out;
+        }
         cr3_virt[cpu] = v;
     } 
     if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
-       goto error_out;
+        goto error_out;
     if (ctxt[cpu].flags & VGCF_VMX_GUEST)
-       pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
+        pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
     if (pde != pde_phys[cpu]) 
     {
-       pde_phys[cpu] = pde;
-       if (pde_virt[cpu])
-           munmap(pde_virt[cpu], PAGE_SIZE);
-       v = mmap(
+        pde_phys[cpu] = pde;
+        if (pde_virt[cpu])
+            munmap(pde_virt[cpu], PAGE_SIZE);
+        v = mmap(
             NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
             map_mtop_offset(pde_phys[cpu]));
         if (v == MAP_FAILED)
-           goto error_out;
+            goto error_out;
         pde_virt[cpu] = v;
     }
     if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
-       goto error_out;
+        goto error_out;
     if (ctxt[cpu].flags & VGCF_VMX_GUEST)
-       page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
+        page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
     if (page != page_phys[cpu]) 
     {
-       page_phys[cpu] = page;
-       if (page_virt[cpu])
-           munmap(page_virt[cpu], PAGE_SIZE);
-       v = mmap(
+        page_phys[cpu] = page;
+        if (page_virt[cpu])
+            munmap(page_virt[cpu], PAGE_SIZE);
+        v = mmap(
             NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
             map_mtop_offset(page_phys[cpu]));
         if (v == MAP_FAILED) {
-           printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
vtopti(va));
-           page_phys[cpu] = 0;
-           goto error_out;
-       }
+            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
vtopti(va));
+            page_phys[cpu] = 0;
+            goto error_out;
+        }
         page_virt[cpu] = v;
-    }  
+    } 
     return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK));
 
  error_out:
@@ -172,7 +166,11 @@
 }
 
 int 
-xc_waitdomain_core(int domfd, int *status, int options)
+xc_waitdomain_core(
+    int xc_handle,
+    int domfd,
+    int *status,
+    int options)
 {
     int retval = -1;
     int nr_vcpus;
@@ -181,37 +179,37 @@
 
     if (nr_pages == 0) {
 
-       if (read(domfd, &header, sizeof(header)) != sizeof(header))
-           return -1;
-
-       nr_pages = header.xch_nr_pages;
-       nr_vcpus = header.xch_nr_vcpus;
-       pages_offset = header.xch_pages_offset;
-
-       if (read(domfd, ctxt, sizeof(vcpu_guest_context_t)*nr_vcpus) != 
-           sizeof(vcpu_guest_context_t)*nr_vcpus)
-           return -1;
-
-       for (i = 0; i < nr_vcpus; i++) {
-           cr3[i] = ctxt[i].ctrlreg[3];
-       }
-       if ((p2m_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
-           printf("Could not allocate p2m_array\n");
-           goto error_out;
-       }
-       if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) != 
-           sizeof(unsigned long)*nr_pages)
-           return -1;
-
-       if ((m2p_array = malloc((1<<20) * sizeof(unsigned long))) == NULL) {
-           printf("Could not allocate m2p array\n");
-           goto error_out;
-       }
-       bzero(m2p_array, sizeof(unsigned long)* 1 << 20);
-
-       for (i = 0; i < nr_pages; i++) {
-           m2p_array[p2m_array[i]] = i;
-       }
+        if (read(domfd, &header, sizeof(header)) != sizeof(header))
+            return -1;
+
+        nr_pages = header.xch_nr_pages;
+        nr_vcpus = header.xch_nr_vcpus;
+        pages_offset = header.xch_pages_offset;
+
+        if (read(domfd, ctxt, sizeof(vcpu_guest_context_t)*nr_vcpus) != 
+            sizeof(vcpu_guest_context_t)*nr_vcpus)
+            return -1;
+
+        for (i = 0; i < nr_vcpus; i++) {
+            cr3[i] = ctxt[i].ctrlreg[3];
+        }
+        if ((p2m_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
+            printf("Could not allocate p2m_array\n");
+            goto error_out;
+        }
+        if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) != 
+            sizeof(unsigned long)*nr_pages)
+            return -1;
+
+        if ((m2p_array = malloc((1<<20) * sizeof(unsigned long))) == NULL) {
+            printf("Could not allocate m2p array\n");
+            goto error_out;
+        }
+        bzero(m2p_array, sizeof(unsigned long)* 1 << 20);
+
+        for (i = 0; i < nr_pages; i++) {
+            m2p_array[p2m_array[i]] = i;
+        }
 
     }
     retval = 0;
@@ -221,7 +219,12 @@
 }
 
 long
-xc_ptrace_core(enum __ptrace_request request, u32 domfd, long eaddr, long 
edata)
+xc_ptrace_core(
+    int xc_handle,
+    enum __ptrace_request request,
+    u32 domfd,
+    long eaddr,
+    long edata)
 {
     int             status = 0;
     struct gdb_regs pt;
@@ -234,38 +237,38 @@
 #if 0
     printf("%20s %d, %p, %p \n", ptrace_names[request], domid, addr, data);
 #endif
-    switch (request) { 
+    switch (request) { 
     case PTRACE_PEEKTEXT:
     case PTRACE_PEEKDATA:
-       if ((guest_va = (unsigned long *)map_domain_va(domfd, cpu, addr)) == 
NULL) {
-           status = EFAULT;
-           goto error_out;
-       }
-
-       retval = *guest_va;
-       break;
+        if ((guest_va = (unsigned long *)map_domain_va(domfd, cpu, addr)) == 
NULL) {
+            status = EFAULT;
+            goto error_out;
+        }
+
+        retval = *guest_va;
+        break;
     case PTRACE_POKETEXT:
     case PTRACE_POKEDATA:
-       if ((guest_va = (unsigned long *)map_domain_va(domfd, cpu, addr)) == 
NULL) {
-           status = EFAULT;
-           goto error_out;
-       }
-       *guest_va = (unsigned long)data;
-       break;
+        if ((guest_va = (unsigned long *)map_domain_va(domfd, cpu, addr)) == 
NULL) {
+            status = EFAULT;
+            goto error_out;
+        }
+        *guest_va = (unsigned long)data;
+        break;
     case PTRACE_GETREGS:
     case PTRACE_GETFPREGS:
     case PTRACE_GETFPXREGS:
-       if (request == PTRACE_GETREGS) {
-               SET_PT_REGS(pt, ctxt[cpu].user_regs); 
-               memcpy(data, &pt, sizeof(struct gdb_regs));
-       } else if (request == PTRACE_GETFPREGS)
-           memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
-       else /*if (request == PTRACE_GETFPXREGS)*/
-           memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
-       break;
+        if (request == PTRACE_GETREGS) {
+            SET_PT_REGS(pt, ctxt[cpu].user_regs); 
+            memcpy(data, &pt, sizeof(struct gdb_regs));
+        } else if (request == PTRACE_GETFPREGS)
+            memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+        else /*if (request == PTRACE_GETFPXREGS)*/
+            memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+        break;
     case PTRACE_ATTACH:
-       retval = 0;
-       break;
+        retval = 0;
+        break;
     case PTRACE_SETREGS:
     case PTRACE_SINGLESTEP:
     case PTRACE_CONT:
@@ -277,19 +280,29 @@
     case PTRACE_SYSCALL:
     case PTRACE_KILL:
 #ifdef DEBUG
-       printf("unsupported xc_ptrace request %s\n", ptrace_names[request]);
+        printf("unsupported xc_ptrace request %s\n", ptrace_names[request]);
 #endif
-       status = ENOSYS;
-       break;
+        status = ENOSYS;
+        break;
     case PTRACE_TRACEME:
-       printf("PTRACE_TRACEME is an invalid request under Xen\n");
-       status = EINVAL;
+        printf("PTRACE_TRACEME is an invalid request under Xen\n");
+        status = EINVAL;
     }
     
     if (status) {
-       errno = status;
-       retval = -1;
+        errno = status;
+        retval = -1;
     }
  error_out:
     return retval;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_vmx_build.c
--- a/tools/libxc/xc_vmx_build.c        Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_vmx_build.c        Thu Sep 22 17:42:01 2005
@@ -107,11 +107,38 @@
     mem_mapp->nr_map = nr_map;
 }
 
+/*
+ * Use E820 reserved memory 0x9F800 to pass number of vcpus to vmxloader
+ * vmxloader will use it to config ACPI MADT table
+ */
+#define VCPU_MAGIC 0x76637075 /* "vcpu" */
+static int 
+set_nr_vcpus(int xc_handle, u32 dom, unsigned long *pfn_list, 
+             struct domain_setup_info *dsi, unsigned long vcpus)
+{
+    char          *va_map;
+    unsigned long *va_vcpus;
+    
+    va_map = xc_map_foreign_range(
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        pfn_list[(0x9F000 - dsi->v_start) >> PAGE_SHIFT]);    
+    if ( va_map == NULL )
+        return -1;
+    
+    va_vcpus = (unsigned long *)(va_map + 0x800);
+    *va_vcpus++ = VCPU_MAGIC;
+    *va_vcpus++ = vcpus;
+
+    munmap(va_map, PAGE_SIZE);
+
+    return 0;
+}
+
 #ifdef __i386__
 static int zap_mmio_range(int xc_handle, u32 dom,
-                            l2_pgentry_32_t *vl2tab,
-                            unsigned long mmio_range_start,
-                            unsigned long mmio_range_size)
+                          l2_pgentry_32_t *vl2tab,
+                          unsigned long mmio_range_start,
+                          unsigned long mmio_range_size)
 {
     unsigned long mmio_addr;
     unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
@@ -123,12 +150,14 @@
         vl2e = vl2tab[l2_table_offset(mmio_addr)];
         if (vl2e == 0)
             continue;
-        vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
-       if (vl1tab == 0) {
-           PERROR("Failed zap MMIO range");
-           return -1;
-       }
+        vl1tab = xc_map_foreign_range(
+            xc_handle, dom, PAGE_SIZE,
+            PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
+        if ( vl1tab == 0 )
+        {
+            PERROR("Failed zap MMIO range");
+            return -1;
+        }
         vl1tab[l1_table_offset(mmio_addr)] = 0;
         munmap(vl1tab, PAGE_SIZE);
     }
@@ -136,114 +165,118 @@
 }
 
 static int zap_mmio_ranges(int xc_handle, u32 dom,
-                            unsigned long l2tab,
-                            struct mem_map *mem_mapp)
+                           unsigned long l2tab,
+                           struct mem_map *mem_mapp)
 {
     int i;
     l2_pgentry_32_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                PROT_READ|PROT_WRITE,
-                                                l2tab >> PAGE_SHIFT);
-    if (vl2tab == 0)
-       return -1;
-    for (i = 0; i < mem_mapp->nr_map; i++) {
-        if ((mem_mapp->map[i].type == E820_IO)
-          && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
-            if (zap_mmio_range(xc_handle, dom, vl2tab,
-                       mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
-               return -1;
-    }
+                                                   PROT_READ|PROT_WRITE,
+                                                   l2tab >> PAGE_SHIFT);
+    if ( vl2tab == 0 )
+        return -1;
+
+    for ( i = 0; i < mem_mapp->nr_map; i++ )
+    {
+        if ( (mem_mapp->map[i].type == E820_IO) &&
+             (mem_mapp->map[i].caching_attr == MEMMAP_UC) &&
+             (zap_mmio_range(xc_handle, dom, vl2tab,
+                             mem_mapp->map[i].addr,
+                             mem_mapp->map[i].size) == -1) )
+            return -1;
+    }
+
     munmap(vl2tab, PAGE_SIZE);
     return 0;
 }
 #else
 static int zap_mmio_range(int xc_handle, u32 dom,
-                           l3_pgentry_t *vl3tab,
-                           unsigned long mmio_range_start,
-                           unsigned long mmio_range_size)
-{
-   unsigned long mmio_addr;
-   unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
-   unsigned long vl2e = 0;
-   unsigned long vl3e;
-   l1_pgentry_t *vl1tab;
-   l2_pgentry_t *vl2tab;
+                          l3_pgentry_t *vl3tab,
+                          unsigned long mmio_range_start,
+                          unsigned long mmio_range_size)
+{
+    unsigned long mmio_addr;
+    unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
+    unsigned long vl2e = 0;
+    unsigned long vl3e;
+    l1_pgentry_t *vl1tab;
+    l2_pgentry_t *vl2tab;
  
-   mmio_addr = mmio_range_start & PAGE_MASK;
-   for ( ; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE )
-   {
-       vl3e = vl3tab[l3_table_offset(mmio_addr)];
-       if ( vl3e == 0 )
-           continue;
-
-       vl2tab = xc_map_foreign_range(
-           xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl3e>>PAGE_SHIFT);
-       if ( vl2tab == NULL )
-       {
-           PERROR("Failed zap MMIO range");
-           return -1;
-       }
-
-       vl2e = vl2tab[l2_table_offset(mmio_addr)];
-       if ( vl2e == 0 )
-       {
-           munmap(vl2tab, PAGE_SIZE);
-           continue;
-       }
-
-       vl1tab = xc_map_foreign_range(
-           xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl2e>>PAGE_SHIFT);
-       if ( vl1tab == NULL )
-       {
-           PERROR("Failed zap MMIO range");
-           munmap(vl2tab, PAGE_SIZE);
-           return -1;
-       }
-
-       vl1tab[l1_table_offset(mmio_addr)] = 0;
-       munmap(vl2tab, PAGE_SIZE);
-       munmap(vl1tab, PAGE_SIZE);
-   }
-   return 0;
+    mmio_addr = mmio_range_start & PAGE_MASK;
+    for ( ; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE )
+    {
+        vl3e = vl3tab[l3_table_offset(mmio_addr)];
+        if ( vl3e == 0 )
+            continue;
+
+        vl2tab = xc_map_foreign_range(
+            xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl3e>>PAGE_SHIFT);
+        if ( vl2tab == NULL )
+        {
+            PERROR("Failed zap MMIO range");
+            return -1;
+        }
+
+        vl2e = vl2tab[l2_table_offset(mmio_addr)];
+        if ( vl2e == 0 )
+        {
+            munmap(vl2tab, PAGE_SIZE);
+            continue;
+        }
+
+        vl1tab = xc_map_foreign_range(
+            xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl2e>>PAGE_SHIFT);
+        if ( vl1tab == NULL )
+        {
+            PERROR("Failed zap MMIO range");
+            munmap(vl2tab, PAGE_SIZE);
+            return -1;
+        }
+
+        vl1tab[l1_table_offset(mmio_addr)] = 0;
+        munmap(vl2tab, PAGE_SIZE);
+        munmap(vl1tab, PAGE_SIZE);
+    }
+    return 0;
 }
 
 static int zap_mmio_ranges(int xc_handle, u32 dom,
                            unsigned long l3tab,
                            struct mem_map *mem_mapp)
 {
-   int i;
-   l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                               PROT_READ|PROT_WRITE,
-                                               l3tab >> PAGE_SHIFT);
-   if (vl3tab == 0)
-       return -1;
-   for (i = 0; i < mem_mapp->nr_map; i++) {
-       if ((mem_mapp->map[i].type == E820_IO)
-         && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
-           if (zap_mmio_range(xc_handle, dom, vl3tab,
-                       mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
-               return -1;
-   }
-   munmap(vl3tab, PAGE_SIZE);
-   return 0;
+    int i;
+    l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                                PROT_READ|PROT_WRITE,
+                                                l3tab >> PAGE_SHIFT);
+    if (vl3tab == 0)
+        return -1;
+    for (i = 0; i < mem_mapp->nr_map; i++) {
+        if ((mem_mapp->map[i].type == E820_IO)
+            && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
+            if (zap_mmio_range(xc_handle, dom, vl3tab,
+                               mem_mapp->map[i].addr, mem_mapp->map[i].size) 
== -1)
+                return -1;
+    }
+    munmap(vl3tab, PAGE_SIZE);
+    return 0;
 }
 
 #endif
 
 static int setup_guest(int xc_handle,
-                         u32 dom, int memsize,
-                         char *image, unsigned long image_size,
-                         gzFile initrd_gfd, unsigned long initrd_len,
-                         unsigned long nr_pages,
-                         vcpu_guest_context_t *ctxt,
-                         const char *cmdline,
-                         unsigned long shared_info_frame,
-                         unsigned int control_evtchn,
-                         unsigned long flags,
-                         unsigned int vcpus,
-                         unsigned int store_evtchn,
-                         unsigned long *store_mfn,
-                         struct mem_map *mem_mapp
-                         )
+                       u32 dom, int memsize,
+                       char *image, unsigned long image_size,
+                       gzFile initrd_gfd, unsigned long initrd_len,
+                       unsigned long nr_pages,
+                       vcpu_guest_context_t *ctxt,
+                       const char *cmdline,
+                       unsigned long shared_info_frame,
+                       unsigned int control_evtchn,
+                       unsigned long flags,
+                       unsigned int vcpus,
+                       unsigned int store_evtchn,
+                       unsigned long *store_mfn,
+                       struct mem_map *mem_mapp
+    )
 {
     l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
     l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
@@ -303,7 +336,8 @@
 
     /* memsize is in megabytes */
     v_end              = memsize << 20;
-    vinitrd_end        = v_end - PAGE_SIZE; /* leaving the top 4k untouched 
for IO requests page use */
+    /* leaving the top 4k untouched for IO requests page use */
+    vinitrd_end        = v_end - PAGE_SIZE;
     vinitrd_start      = vinitrd_end - initrd_len;
     vinitrd_start      = vinitrd_start & (~(PAGE_SIZE - 1));
 
@@ -369,16 +403,28 @@
                 goto error_out;
             }
             xc_copy_to_domain_page(xc_handle, dom,
-                                page_array[i>>PAGE_SHIFT], page);
+                                   page_array[i>>PAGE_SHIFT], page);
         }
     }
 
     if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
         goto error_out;
 
+    /* First allocate page for page dir or pdpt */
+    ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
+    if ( page_array[ppt_alloc] > 0xfffff )
+    {
+        unsigned long nmfn;
+        nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
+        if ( nmfn == 0 )
+        {
+            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
+            goto error_out;
+        }
+        page_array[ppt_alloc] = nmfn;
+    }
+
 #ifdef __i386__
-    /* First allocate page for page dir. */
-    ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
     l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
     ctxt->ctrlreg[3] = l2tab;
 
@@ -414,8 +460,6 @@
     munmap(vl1tab, PAGE_SIZE);
     munmap(vl2tab, PAGE_SIZE);
 #else
-    /* First allocate pdpt */
-    ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
     /* here l3tab means pdpt, only 4 entry is used */
     l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
     ctxt->ctrlreg[3] = l3tab;
@@ -438,8 +482,8 @@
                 munmap(vl2tab, PAGE_SIZE);
 
             if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                      PROT_READ|PROT_WRITE,
-                      l2tab >> PAGE_SHIFT)) == NULL )
+                                                PROT_READ|PROT_WRITE,
+                                                l2tab >> PAGE_SHIFT)) == NULL )
                 goto error_out;
 
             memset(vl2tab, 0, PAGE_SIZE);
@@ -452,8 +496,8 @@
             if ( vl1tab != NULL )
                 munmap(vl1tab, PAGE_SIZE);
             if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                      PROT_READ|PROT_WRITE,
-                      l1tab >> PAGE_SHIFT)) == NULL )
+                                                PROT_READ|PROT_WRITE,
+                                                l1tab >> PAGE_SHIFT)) == NULL )
             {
                 munmap(vl2tab, PAGE_SIZE);
                 goto error_out;
@@ -475,15 +519,16 @@
     for ( count = 0; count < nr_pages; count++ )
     {
         if ( xc_add_mmu_update(xc_handle, mmu,
-                              (page_array[count] << PAGE_SHIFT) | 
-                              MMU_MACHPHYS_UPDATE, count) )
-           goto error_out;
-    }
-    
+                               (page_array[count] << PAGE_SHIFT) | 
+                               MMU_MACHPHYS_UPDATE, count) )
+            goto error_out;
+    }
+
+    set_nr_vcpus(xc_handle, dom, page_array, &dsi, vcpus);
 
     if ((boot_paramsp = xc_map_foreign_range(
-               xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-               page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
         goto error_out;
 
     memset(boot_paramsp, 0, sizeof(*boot_paramsp));
@@ -548,9 +593,9 @@
 #if defined (__i386__)
     if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
 #else
-    if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1)
+        if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1)
 #endif
-       goto error_out;
+            goto error_out;
     boot_paramsp->e820_map_nr = mem_mapp->nr_map;
     for (i=0; i<mem_mapp->nr_map; i++) {
         boot_paramsp->e820_map[i].addr = mem_mapp->map[i].addr; 
@@ -562,9 +607,9 @@
     munmap(boot_paramsp, PAGE_SIZE); 
 
     if ((boot_gdtp = xc_map_foreign_range(
-               xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-               page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
-       goto error_out;
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
+        goto error_out;
     memset(boot_gdtp, 0, PAGE_SIZE);
     boot_gdtp[12*4 + 0] = boot_gdtp[13*4 + 0] = 0xffff; /* limit */
     boot_gdtp[12*4 + 1] = boot_gdtp[13*4 + 1] = 0x0000; /* base */
@@ -574,20 +619,24 @@
 
     /* shared_info page starts its life empty. */
     if ((shared_info = xc_map_foreign_range(
-               xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-               shared_info_frame)) == 0)
-       goto error_out;
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        shared_info_frame)) == 0)
+        goto error_out;
     memset(shared_info, 0, sizeof(shared_info_t));
     /* Mask all upcalls... */
     for ( i = 0; i < MAX_VIRT_CPUS; i++ )
         shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+
+    shared_info->n_vcpu = vcpus;
+    printf(" VCPUS:         %d\n", shared_info->n_vcpu);
+
     munmap(shared_info, PAGE_SIZE);
 
     /* Populate the event channel port in the shared page */
     if ((sp = (shared_iopage_t *) xc_map_foreign_range(
-               xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-               page_array[shared_page_frame])) == 0)
-       goto error_out;
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        page_array[shared_page_frame])) == 0)
+        goto error_out;
     memset(sp, 0, PAGE_SIZE);
     sp->sp_global.eport = control_evtchn;
     munmap(sp, PAGE_SIZE);
@@ -612,7 +661,7 @@
     ctxt->user_regs.edx = vboot_gdt_start;
     ctxt->user_regs.eax = 0x800;
     ctxt->user_regs.esp = vboot_gdt_end;
-    ctxt->user_regs.ebx = 0;   /* startup_32 expects this to be 0 to signal 
boot cpu */
+    ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot 
cpu */
     ctxt->user_regs.ecx = mem_mapp->nr_map;
     ctxt->user_regs.esi = vboot_params_start;
     ctxt->user_regs.edi = vboot_params_start + 0x2d0;
@@ -636,9 +685,9 @@
 
 #ifdef __i386__
     __asm__ __volatile__ ("pushl %%ebx; cpuid; popl %%ebx" 
-                         : "=a" (eax), "=c" (ecx) 
-                         : "0" (1) 
-                         : "dx");
+                          : "=a" (eax), "=c" (ecx) 
+                          : "0" (1) 
+                          : "dx");
 #elif defined __x86_64__
     __asm__ __volatile__ ("pushq %%rbx; cpuid; popq %%rbx"
                           : "=a" (eax), "=c" (ecx)
@@ -653,17 +702,17 @@
 }
 
 int xc_vmx_build(int xc_handle,
-                   u32 domid,
-                   int memsize,
-                   const char *image_name,
-                   struct mem_map *mem_mapp,
-                   const char *ramdisk_name,
-                   const char *cmdline,
-                   unsigned int control_evtchn,
-                   unsigned long flags,
-                   unsigned int vcpus,
-                   unsigned int store_evtchn,
-                   unsigned long *store_mfn)
+                 u32 domid,
+                 int memsize,
+                 const char *image_name,
+                 struct mem_map *mem_mapp,
+                 const char *ramdisk_name,
+                 const char *cmdline,
+                 unsigned int control_evtchn,
+                 unsigned long flags,
+                 unsigned int vcpus,
+                 unsigned int store_evtchn,
+                 unsigned long *store_mfn)
 {
     dom0_op_t launch_op, op;
     int initrd_fd = -1;
@@ -735,11 +784,11 @@
     }
 
     if ( setup_guest(xc_handle, domid, memsize, image, image_size, 
-                       initrd_gfd, initrd_size, nr_pages, 
-                       ctxt, cmdline,
-                       op.u.getdomaininfo.shared_info_frame,
-                       control_evtchn, flags, vcpus, store_evtchn, store_mfn,
-                       mem_mapp) < 0 )
+                     initrd_gfd, initrd_size, nr_pages, 
+                     ctxt, cmdline,
+                     op.u.getdomaininfo.shared_info_frame,
+                     control_evtchn, flags, vcpus, store_evtchn, store_mfn,
+                     mem_mapp) < 0 )
     {
         ERROR("Error constructing guest OS");
         goto error_out;
@@ -770,8 +819,8 @@
 
     /* Ring 1 stack is the initial stack. */
 /*
-    ctxt->kernel_ss = FLAT_KERNEL_DS;
-    ctxt->kernel_sp = vstartinfo_start;
+  ctxt->kernel_ss = FLAT_KERNEL_DS;
+  ctxt->kernel_sp = vstartinfo_start;
 */
     /* No debugging. */
     memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
@@ -851,7 +900,7 @@
         return -EINVAL;
     }
     shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff + 
-                        (ehdr->e_shstrndx*ehdr->e_shentsize));
+                          (ehdr->e_shstrndx*ehdr->e_shentsize));
     shstrtab = elfbase + shdr->sh_offset;
     
     for ( h = 0; h < ehdr->e_phnum; h++ ) 
@@ -906,9 +955,9 @@
         {
             pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
             if ((va = xc_map_foreign_range(
-                       xch, dom, PAGE_SIZE, PROT_WRITE,
-                       parray[pa>>PAGE_SHIFT])) == 0)
-               return -1;
+                xch, dom, PAGE_SIZE, PROT_WRITE,
+                parray[pa>>PAGE_SHIFT])) == 0)
+                return -1;
             chunksz = phdr->p_filesz - done;
             if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
                 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
@@ -921,9 +970,9 @@
         {
             pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
             if ((va = xc_map_foreign_range(
-                       xch, dom, PAGE_SIZE, PROT_WRITE,
-                       parray[pa>>PAGE_SHIFT])) == 0)
-               return -1;
+                xch, dom, PAGE_SIZE, PROT_WRITE,
+                parray[pa>>PAGE_SHIFT])) == 0)
+                return -1;
             chunksz = phdr->p_memsz - done;
             if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
                 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
@@ -934,3 +983,13 @@
 
     return 0;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xenctrl.h     Thu Sep 22 17:42:01 2005
@@ -101,23 +101,31 @@
 } xc_core_header_t;
 
 
-long xc_ptrace(enum __ptrace_request request, 
-               u32  domid,
-               long addr, 
-               long data);
-
-long xc_ptrace_core(enum __ptrace_request request, 
-                    u32 domid, 
-                    long addr, 
-                    long data);
-
-int xc_waitdomain(int domain, 
-                  int *status, 
-                  int options);
-
-int xc_waitdomain_core(int domain, 
-                       int *status, 
-                       int options);
+long xc_ptrace(
+    int xc_handle,
+    enum __ptrace_request request, 
+    u32  domid,
+    long addr, 
+    long data);
+
+long xc_ptrace_core(
+    int xc_handle,
+    enum __ptrace_request request, 
+    u32 domid, 
+    long addr, 
+    long data);
+
+int xc_waitdomain(
+    int xc_handle,
+    int domain, 
+    int *status, 
+    int options);
+
+int xc_waitdomain_core(
+    int xc_handle,
+    int domain, 
+    int *status, 
+    int options);
 
 /*
  * DOMAIN MANAGEMENT FUNCTIONS
diff -r 97dbd9524a7e -r 06d84bf87159 tools/misc/xend
--- a/tools/misc/xend   Thu Sep 22 17:34:14 2005
+++ b/tools/misc/xend   Thu Sep 22 17:42:01 2005
@@ -86,9 +86,6 @@
     daemon = SrvDaemon.instance()
     if not sys.argv[1:]:
         print 'usage: %s {start|stop|restart}' % sys.argv[0]
-    elif os.fork():
-        pid, status = os.wait()
-        return status >> 8
     elif sys.argv[1] == 'start':
         start_xenstored()
         start_consoled()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/pylintrc
--- a/tools/python/pylintrc     Thu Sep 22 17:34:14 2005
+++ b/tools/python/pylintrc     Thu Sep 22 17:42:01 2005
@@ -74,7 +74,7 @@
 init-import=no
 
 # List of variable names used for dummy variables (i.e. not used).
-dummy-variables=_,dummy
+dummy-variables=_,_1,_2,_3,_4,_5,dummy
 
 
 
@@ -131,7 +131,7 @@
 bad-names=foo,bar,baz,toto,tutu,tata
 
 # List of builtins function names that should not be used, separated by a comma
-bad-functions=map,filter,apply,input
+bad-functions=apply,input
 
 
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/lowlevel/xc/xc.c Thu Sep 22 17:42:01 2005
@@ -220,7 +220,13 @@
         return PyErr_NoMemory();
 
     nr_doms = xc_domain_getinfo(xc->xc_handle, first_dom, max_doms, info);
-    
+
+    if (nr_doms < 0)
+    {
+        free(info);
+        return PyErr_SetFromErrno(xc_error);
+    }
+
     list = PyList_New(nr_doms);
     for ( i = 0 ; i < nr_doms; i++ )
     {
@@ -844,7 +850,7 @@
     XcObject *xc = (XcObject *)self;
 
     u32 dom;
-    unsigned long maxmem_kb;
+    unsigned int maxmem_kb;
 
     static char *kwd_list[] = { "dom", "maxmem_kb", NULL };
 
@@ -1175,7 +1181,7 @@
       METH_VARARGS | METH_KEYWORDS, "\n"
       "Set a domain's memory limit\n"
       " dom [int]: Identifier of domain.\n"
-      " maxmem_kb [long]: .\n"
+      " maxmem_kb [int]: .\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
     { "domain_memory_increase_reservation", 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/lowlevel/xs/xs.c Thu Sep 22 17:42:01 2005
@@ -116,8 +116,6 @@
        "Write data to a path.\n"                               \
        " path   [string] : xenstore path to write to\n."       \
        " data   [string] : data to write.\n"                   \
-       " create [int]    : create flag, default 0.\n"          \
-       " excl   [int]    : exclusive flag, default 0.\n"       \
        "\n"                                                    \
        "Returns None on success.\n"                            \
        "Raises RuntimeError on error.\n"                       \
@@ -125,30 +123,23 @@
 
 static PyObject *xspy_write(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", "data", "create", "excl", NULL };
-    static char *arg_spec = "ss#|ii";
+    static char *kwd_spec[] = { "path", "data", NULL };
+    static char *arg_spec = "ss#";
     char *path = NULL;
     char *data = NULL;
     int data_n = 0;
-    int create = 0;
-    int excl = 0;
-
-    struct xs_handle *xh = xshandle(self);
-    PyObject *val = NULL;
-    int flags = 0;
+
+    struct xs_handle *xh = xshandle(self);
+    PyObject *val = NULL;
     int xsval = 0;
 
     if (!xh)
         goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
-                                     &path, &data, &data_n, &create, &excl))
-        goto exit;
-    if (create)
-        flags |= O_CREAT;
-    if (excl)
-        flags |= O_EXCL;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_write(xh, path, data, data_n, flags);
+                                     &path, &data, &data_n))
+        goto exit;
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_write(xh, path, data, data_n);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -808,6 +799,48 @@
     }
     Py_INCREF(Py_None);
     val = Py_None;
+ exit:
+    return val;
+}
+
+#define xspy_get_domain_path_doc "\n"                  \
+       "Return store path of domain.\n"                \
+       " domid [int]: domain id\n"                     \
+       "\n"                                            \
+       "Returns: [string] domain store path.\n"        \
+       "         None if domid doesn't exist.\n"       \
+       "Raises RuntimeError on error.\n"               \
+       "\n"
+
+static PyObject *xspy_get_domain_path(PyObject *self, PyObject *args,
+                                     PyObject *kwds)
+{
+    static char *kwd_spec[] = { "domid", NULL };
+    static char *arg_spec = "i";
+    int domid = 0;
+
+    struct xs_handle *xh = xshandle(self);
+    char *xsval = NULL;
+    PyObject *val = NULL;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &domid))
+        goto exit;
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_get_domain_path(xh, domid);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        if (errno == ENOENT) {
+            Py_INCREF(Py_None);
+            val = Py_None;
+        } else
+            PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    val = PyString_FromString(xsval);
+    free(xsval);
  exit:
     return val;
 }
@@ -858,6 +891,7 @@
      XSPY_METH(release_domain),
      XSPY_METH(close),
      XSPY_METH(shutdown),
+     XSPY_METH(get_domain_path),
      XSPY_METH(fileno),
      { /* Terminator. */ },
 };
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/sv/Main.py
--- a/tools/python/xen/sv/Main.py       Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/sv/Main.py       Thu Sep 22 17:42:01 2005
@@ -1,5 +1,4 @@
 
-from xen.sv.HTMLBase import HTMLBase
 from xen.sv.NodeInfo import NodeInfo
 from xen.sv.DomInfo  import DomInfo
 from xen.sv.CreateDomain import CreateDomain
@@ -33,15 +32,8 @@
             result.append( (key, self.fieldStorage.getlist( key ) ) )
         return result
                                                                                
                                                                             
-class TwistedAdapter:
-    def __init__( self, req ):
-        self.args = Args( req )
-        self.uri = req.unparsed_uri
-        self.url = req.uri
-        self.write = req.write
-
 # This is the Main class
-# It peices together all the modules
+# It pieces together all the modules
 
 class Main:
     def __init__( self ):
@@ -61,7 +53,7 @@
             self.init_modules( request )
             self.init_done = True
             
-        for moduleName, module in self.modules.iteritems():
+        for _, module in self.modules.iteritems():
             module.write_MENU( request )
             request.write( "\n" )
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/sv/Wizard.py
--- a/tools/python/xen/sv/Wizard.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/sv/Wizard.py     Thu Sep 22 17:42:01 2005
@@ -47,7 +47,7 @@
     def __init__( self, urlWriter, title, location ):
         HTMLBase.__init__( self )
         self.urlWriter = urlWriter
-        self.feilds = []
+        self.fields = []
         self.title = title
         self.location = location
         self.passback = None
@@ -86,9 +86,9 @@
         
         request.write( "<table width='100%' cellpadding='0' cellspacing='1' 
border='0'>" )
         
-       for (feild, control) in self.feilds:
-            control.write_Control( request, previous_values.get( feild ) )
-            if previous_values.get( feild ) is not None and not 
control.validate( previous_values.get( feild ) ):
+       for (field, control) in self.fields:
+            control.write_Control( request, previous_values.get( field ) )
+            if previous_values.get( field ) is not None and not 
control.validate( previous_values.get( field ) ):
                control.write_Help( request )
             
         request.write( "</table>" )
@@ -97,7 +97,7 @@
         #request.write( "<input type='hidden' name='visited-sheet%s' 
value='True'></p>" % self.location )
                 
     def addControl( self, control ):
-       self.feilds.append( [ control.getName(), control ] )
+       self.fields.append( [ control.getName(), control ] )
         
     def validate( self, request ):
     
@@ -108,10 +108,10 @@
         previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the 
map for quick reference
        if DEBUG: print previous_values
       
-       for (feild, control) in self.feilds:
-            if not control.validate( previous_values.get( feild ) ):
+       for (field, control) in self.fields:
+            if not control.validate( previous_values.get( field ) ):
                 check = False
-                if DEBUG: print "> %s = %s" % (feild, previous_values.get( 
feild ))
+                if DEBUG: print "> %s = %s" % (field, previous_values.get( 
field ))
 
         return check
         
@@ -143,7 +143,7 @@
         
 class InputControl( SheetControl ):
 
-    def __init__( self, name, defaultValue, humanText,  reg_exp = ".*", 
help_text = "You must enter the appropriate details in this feild." ):
+    def __init__( self, name, defaultValue, humanText,  reg_exp = ".*", 
help_text = "You must enter the appropriate details in this field." ):
         SheetControl.__init__( self, reg_exp )
         self.setName( name )
         
@@ -206,7 +206,7 @@
         
 class FileControl( InputControl ):
 
-    def __init__( self, name, defaultValue, humanText,  reg_exp = ".*", 
help_text = "You must enter the appropriate details in this feild." ):
+    def __init__( self, name, defaultValue, humanText,  reg_exp = ".*", 
help_text = "You must enter the appropriate details in this field." ):
        InputControl.__init__( self, name, defaultValue, humanText )
         
     def validate( self, persistedValue ):
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/util/process.py
--- a/tools/python/xen/util/process.py  Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/util/process.py  Thu Sep 22 17:42:01 2005
@@ -24,6 +24,8 @@
         r = p.poll()
         for (fd, event) in r:
             if event == select.POLLHUP:
+                cout.close()
+                cerr.close()
                 return stdout
             if fd == cout.fileno():
                 stdout = stdout + cout.readline()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/web/httpserver.py
--- a/tools/python/xen/web/httpserver.py        Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/web/httpserver.py        Thu Sep 22 17:42:01 2005
@@ -273,6 +273,9 @@
         self.interface = interface
         self.port = port
         self.root = root
+        # ready indicates when we are ready to begin accept connections
+        # it should be set after a successful bind
+        self.ready = False
 
     def getRoot(self):
         return self.root
@@ -283,6 +286,7 @@
     def run(self):
         self.bind()
         self.listen()
+        self.ready = True
         self.requestLoop()
 
     def stop(self):
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/web/tcp.py
--- a/tools/python/xen/web/tcp.py       Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/web/tcp.py       Thu Sep 22 17:42:01 2005
@@ -18,6 +18,8 @@
 import sys
 import socket
 import types
+import time
+import errno
 
 from connection import *
 from protocol import *
@@ -35,9 +37,20 @@
     def createSocket(self):
         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        addr = (self.interface, self.port)
-        sock.bind(addr)
-        return sock
+
+        # SO_REUSEADDR does not always ensure that we do not get an address
+        # in use error when restarted quickly
+        # we implement a timeout to try and avoid failing unnecessarily
+        timeout = time.time() + 30
+        while True:
+            try:
+                sock.bind((self.interface, self.port))
+                return sock
+            except socket.error, (_errno, strerrno):
+                if _errno == errno.EADDRINUSE and time.time() < timeout:
+                    time.sleep(0.5)
+                else:
+                    raise
 
     def acceptConnection(self, sock, protocol, addr):
         return TCPServerConnection(sock, protocol, addr, self)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/Args.py
--- a/tools/python/xen/xend/Args.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/Args.py     Thu Sep 22 17:42:01 2005
@@ -32,12 +32,12 @@
         self.arg_dict = {}
         self.key_ord = []
         self.key_dict = {}
-        for (name, type) in paramspec:
+        for (name, typ) in paramspec:
                 self.arg_ord.append(name)
-                self.arg_dict[name] = type
-        for (name, type) in keyspec:
+                self.arg_dict[name] = typ
+        for (name, typ) in keyspec:
                 self.key_ord.append(name)
-                self.key_dict[name] = type
+                self.key_dict[name] = typ
 
     def get_args(self, d, xargs=None):
         args = {}
@@ -56,12 +56,12 @@
     def split_args(self, d, args, keys):
         for (k, v) in d.items():
             if k in self.arg_dict:
-                type = self.arg_dict[k]
-                val = self.coerce(type, v)
+                typ = self.arg_dict[k]
+                val = self.coerce(typ, v)
                 args[k] = val
             elif k in self.key_dict:
-                type = self.key_dict[k]
-                val = self.coerce(type, v)
+                typ = self.key_dict[k]
+                val = self.coerce(typ, v)
                 keys[k] = val
             else:
                 raise ArgError('Invalid parameter: %s' % k)
@@ -85,20 +85,20 @@
             d[k] = val
         return self.get_args(d, xargs=xargs)
 
-    def coerce(self, type, v):
+    def coerce(self, typ, v):
         try:
-            if type == 'int':
+            if typ == 'int':
                 val = int(v)
-            elif type == 'long':
+            elif typ == 'long':
                 val = long(v)
-            elif type == 'str':
+            elif typ == 'str':
                 val = str(v)
-            elif type == 'sxpr':
+            elif typ == 'sxpr':
                 val = self.sxpr(v)
-            elif type == 'bool':
+            elif typ == 'bool':
                 val = self.bool(v)
             else:
-                raise ArgError('invalid type:' + str(type))
+                raise ArgError('invalid type:' + str(typ))
             return val
         except ArgError:
             raise
@@ -142,7 +142,9 @@
     Used on the client.
     """
 
-    def __init__(self, fn, paramspec, keyspec={}):
+    def __init__(self, fn, paramspec, keyspec = None):
+        if keyspec == None:
+            keyspec = {}
         Args.__init__(self, paramspec, keyspec)
         self.fn = fn
 
@@ -154,7 +156,9 @@
     Used in the HTTP server.
     """
 
-    def __init__(self, fn, paramspec, keyspec={}):
+    def __init__(self, fn, paramspec, keyspec = None):
+        if keyspec == None:
+            keyspec = {}
         Args.__init__(self, paramspec, keyspec)
         self.fn = fn
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/EventServer.py
--- a/tools/python/xen/xend/EventServer.py      Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/EventServer.py      Thu Sep 22 17:42:01 2005
@@ -145,7 +145,7 @@
             self.lock.release()
             
         if async:
-            scheduler.now(self.call_handlers, [event, val])
+            scheduler.now(self.call_handlers, event, val)
         else:
             self.call_handlers(event, val)
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/PrettyPrint.py
--- a/tools/python/xen/xend/PrettyPrint.py      Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/PrettyPrint.py      Thu Sep 22 17:42:01 2005
@@ -13,6 +13,7 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
 """General pretty-printer, including support for SXP.
@@ -34,11 +35,11 @@
     def get_width(self):
         return self.width
 
-    def output(self, out):
+    def output(self, _):
         print '***PrettyItem>output>', self
         pass
 
-    def prettyprint(self, out, width):
+    def prettyprint(self, _, width):
         print '***PrettyItem>prettyprint>', self
         return width
 
@@ -51,7 +52,7 @@
     def output(self, out):
         out.write(self.value)
 
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         line.output(self)
 
     def show(self, out):
@@ -62,7 +63,7 @@
     def output(self, out):
         out.write(' ' * self.width)
 
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         line.output(self)
 
     def show(self, out):
@@ -79,7 +80,7 @@
     def output(self, out):
         out.write(' ' * self.width)
 
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         if line.breaks(self.space):
             self.active = 1
             line.newline(self.indent)
@@ -88,26 +89,20 @@
 
     def show(self, out):
         print >> out, ("(break (width %d) (indent %d) (space %d) (active %d))"
-                       % (self.width, self.indent, self.space, self.lspace, 
self.active))
+                       % (self.width, self.indent, self.space, self.active))
 
 class PrettyNewline(PrettySpace):
-
-    def __init__(self, indent):
-        PrettySpace.__init__(self, indent)
 
     def insert(self, block):
         block.newline()
         block.addtoline(self)
 
-    def output(self, out):
-        out.write(' ' * self.width)
-
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         line.newline(0)
         line.output(self)
 
     def show(self, out):
-        print >> out, ("(nl (indent %d))" % self.indent)
+        print >> out, ("(nl (width %d))" % self.width)
 
 class PrettyLine(PrettyItem):
     def __init__(self):
@@ -132,7 +127,7 @@
             lastbreak.space = (width - lastwidth)
         self.width = width
  
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         for x in self.content:
             x.prettyprint(line)
 
@@ -145,7 +140,8 @@
 class PrettyBlock(PrettyItem):
 
     def __init__(self, all=0, parent=None):
-        self.width = 0
+        PrettyItem.__init__(self, 0)
+
         self.lines = []
         self.parent = parent
         self.indent = 0
@@ -163,7 +159,7 @@
             if self.width < l.width:
                 self.width = l.width
 
-    def breaks(self, n):
+    def breaks(self, _):
         return self.all and self.broken
 
     def newline(self):
@@ -172,7 +168,7 @@
     def addtoline(self, x):
         self.lines[-1].write(x)
 
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         self.indent = line.used
         line.block = self
         if not line.fits(self.width):
@@ -191,6 +187,7 @@
 class Line:
 
     def __init__(self, out, width):
+        self.block = None
         self.out = out
         self.width = width
         self.used = 0
@@ -255,8 +252,7 @@
         self.block = self.block.parent
 
     def prettyprint(self, out=sys.stdout):
-        line = Line(out, self.width)
-        self.top.prettyprint(line)
+        self.top.prettyprint(Line(out, self.width))
 
 class SXPPrettyPrinter(PrettyPrinter):
     """An SXP prettyprinter.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/Vifctl.py
--- a/tools/python/xen/xend/Vifctl.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/Vifctl.py   Thu Sep 22 17:42:01 2005
@@ -13,13 +13,13 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
 """Xend interface to networking control scripts.
 """
 import os
 import os.path
-import sys
 import xen.util.process
 
 from xen.xend import XendRoot
@@ -71,7 +71,7 @@
         vif = vif_old
     return vif
 
-def vifctl(op, vif=None, script=None, domain=None, mac=None, bridge=None, 
ipaddr=[]):
+def vifctl(op, vif=None, script=None, domain=None, mac=None, bridge=None, 
ipaddr=None):
     """Call a vif control script.
     Xend calls this when bringing vifs up or down.
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendBootloader.py
--- a/tools/python/xen/xend/XendBootloader.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendBootloader.py   Thu Sep 22 17:42:01 2005
@@ -12,7 +12,7 @@
 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 #
 
-import os, sys, select, errno
+import os, select, errno
 import sxp
 
 from XendLogging import log
@@ -72,7 +72,7 @@
         if len(s) == 0:
             break
         
-    (pid, status) = os.waitpid(child, 0)
+    os.waitpid(child, 0)
     os.close(r)
     os.unlink(BL_FIFO)
 
@@ -89,6 +89,4 @@
     if vcpus and sxp.child_value(config_image, "vcpus") is None:
         config_image.append(['vcpus', vcpus])
 
-    config = ['image', config_image]
-    return config
-
+    return config_image
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendCheckpoint.py   Thu Sep 22 17:42:01 2005
@@ -4,7 +4,6 @@
 # Public License.  See the file "COPYING" in the main directory of
 # this archive for more details.
 
-import errno
 import os
 import re
 import select
@@ -12,7 +11,7 @@
 from string import join
 from struct import pack, unpack, calcsize
 from xen.util.xpopen import xPopen3
-import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+import xen.lowlevel.xc
 from xen.xend.xenstore.xsutil import IntroduceDomain
 
 from XendError import XendError
@@ -24,6 +23,10 @@
 
 sizeof_int = calcsize("i")
 sizeof_unsigned_long = calcsize("L")
+
+
+xc = xen.lowlevel.xc.new()
+
 
 def write_exact(fd, buf, errmsg):
     if os.write(fd, buf) != len(buf):
@@ -83,7 +86,7 @@
     if child.wait() != 0:
         raise XendError("xc_save failed: %s" % lasterr)
 
-    dominfo.setStoreChannel(None)
+    dominfo.closeStoreChannel()
     xd.domain_destroy(dominfo.domid)
     return None
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendClient.py
--- a/tools/python/xen/xend/XendClient.py       Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendClient.py       Thu Sep 22 17:42:01 2005
@@ -33,8 +33,6 @@
                          UnixXendClientProtocol, \
                          XendError
 
-DEBUG = 0
-
 def fileof(val):
     """Converter for passing configs or other 'large' data.
     Handles lists, files directly.
@@ -385,7 +383,6 @@
 python XendClient.py domain 0
     (domain (id 0) (name Domain-0) (memory 128))
     """
-    global DEBUG
     from getopt import getopt
     short_options = 'x:au:d'
     long_options = ['xend=', 'unix=', 'debug']
@@ -397,8 +394,6 @@
             srv = v
         elif k in ['-u', '--unix']:
             unix = int(v)
-        elif k in ['-d', '--debug']:
-            DEBUG = 1
     if len(args):
         fn = args[0]
         args = args[1:]
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendDmesg.py
--- a/tools/python/xen/xend/XendDmesg.py        Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendDmesg.py        Thu Sep 22 17:42:01 2005
@@ -18,7 +18,6 @@
 """Get dmesg output for this node.
 """
 
-import os
 import xen.lowlevel.xc
 
 class XendDmesg:
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendDomain.py       Thu Sep 22 17:42:01 2005
@@ -14,40 +14,52 @@
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
 # Copyright (C) 2005 Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
 """Handler for domain operations.
  Nothing here is persistent (across reboots).
  Needs to be persistent for one uptime.
 """
-import errno
 import os
-import sys
-import time
-import traceback
-
-import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+
+import xen.lowlevel.xc
 
 from xen.xend import sxp
-from xen.xend import XendRoot; xroot = XendRoot.instance()
+from xen.xend import XendRoot
 from xen.xend import XendCheckpoint
 from xen.xend.XendDomainInfo import XendDomainInfo, shutdown_reason
-from xen.xend import EventServer; eserver = EventServer.instance()
+from xen.xend import EventServer
 from xen.xend.XendError import XendError
 from xen.xend.XendLogging import log
 from xen.xend import scheduler
 from xen.xend.server import relocate
 from xen.xend.uuid import getUuid
 from xen.xend.xenstore import XenNode, DBMap
+from xen.xend.xenstore.xstransact import xstransact
+from xen.xend.xenstore.xsutil import GetDomainPath
+
+
+xc = xen.lowlevel.xc.new()
+xroot = XendRoot.instance()
+eserver = EventServer.instance()
+
 
 __all__ = [ "XendDomain" ]
 
 SHUTDOWN_TIMEOUT = 30
+PRIV_DOMAIN      =  0
+
+def is_dead(dom):
+    return dom['crashed'] or dom['shutdown'] or (
+        dom['dying'] and not(dom['running'] or dom['paused'] or
+                             dom['blocked']))
+
 
 class XendDomainDict(dict):
     def get_by_name(self, name):
         try:
-            return filter(lambda d: d.name == name, self.values())[0]
+            return filter(lambda d: d.getName() == name, self.values())[0]
         except IndexError, err:
             return None
 
@@ -65,9 +77,12 @@
         # So we stuff the XendDomain instance (self) into xroot's components.
         xroot.add_component("xen.xend.XendDomain", self)
         self.domains = XendDomainDict()
-        self.dbmap = DBMap(db=XenNode("/domain"))
+        self.domroot = "/domain"
+        self.vmroot = "/domain"
+        self.dbmap = DBMap(db=XenNode(self.vmroot))
         self.watchReleaseDomain()
         self.initial_refresh()
+        self.dom0_setup()
 
     def list(self):
         """Get list of domain objects.
@@ -83,7 +98,7 @@
         @return: domain objects
         """
         doms = self.list()
-        doms.sort(lambda x, y: cmp(x.name, y.name))
+        doms.sort(lambda x, y: cmp(x.getName(), y.getName()))
         return doms
 
     def list_names(self):
@@ -92,10 +107,12 @@
         @return: domain names
         """
         doms = self.list_sorted()
-        return map(lambda x: x.name, doms)
+        return map(lambda x: x.getName(), doms)
 
     def onReleaseDomain(self):
-        self.refresh(cleanup=True)
+        self.reap()
+        self.refresh()
+        self.domain_restarts()
 
     def watchReleaseDomain(self):
         from xen.xend.xenstore.xswatch import xswatch
@@ -123,70 +140,58 @@
         else:
             dominfo = dominfo[0]
         return dominfo
-            
+
     def initial_refresh(self):
         """Refresh initial domain info from db.
         """
         doms = self.xen_domains()
-        self.dbmap.readDB()
-        for domdb in self.dbmap.values():
-            if not domdb.has_key("xend"):
-                continue
-            db = domdb.addChild("xend")
+        self.dbmap.readDB()             # XXX only needed for "xend"
+        for dom in doms.values():
+            domid = dom['dom']
+            dompath = GetDomainPath(domid)
+            if not dompath:
+                continue
+            vmpath = xstransact.Read(dompath, "vm")
+            if not vmpath:
+                continue
+            uuid = xstransact.Read(vmpath, "uuid")
+            if not uuid:
+                continue
+            log.info("recreating domain %d, uuid %s" % (domid, uuid))
+            dompath = "/".join(dompath.split("/")[0:-1])
             try:
-                domid = int(domdb["domid"].getData())
-            except:
-                domid = None
-            # XXX if domid in self.domains, then something went wrong
-            if (domid is None) or (domid in self.domains):
-                domdb.delete()
-            elif domid in doms:
-                try:
-                    self._new_domain(domdb["uuid"].getData(), domid, db,
-                                     doms[domid]) 
-                except Exception, ex:
-                    log.exception("Error recreating domain info: id=%d", domid)
-                    self._delete_domain(domid)
-            else:
-                self._delete_domain(domid)
-        self.refresh(cleanup=True)
-
-        dom0 = self.domain_lookup(0)
+                dominfo = XendDomainInfo.recreate(uuid, dompath, domid, dom)
+            except Exception, ex:
+                log.exception("Error recreating domain info: id=%d", domid)
+                continue
+            self._add_domain(dominfo)
+        self.reap()
+        self.refresh()
+        self.domain_restarts()
+
+    def dom0_setup(self):
+        dom0 = self.domain_lookup(PRIV_DOMAIN)
         if not dom0:
-            dom0 = self.domain_unknown(0)
+            dom0 = self.dom0_unknown()
         dom0.dom0_init_store()    
+        dom0.dom0_enforce_vcpus()
 
     def close(self):
         pass
 
-    def _new_domain(self, uuid, domid, db, info):
-        """Create a domain entry from saved info.
-
-        @param db:   saved info from the db
-        @param info: domain info from xen
-        @return: domain
-        """
-        dominfo = XendDomainInfo.recreate(uuid, domid, db, info)
-        self.domains[dominfo.domid] = dominfo
-        return dominfo
-
     def _add_domain(self, info, notify=True):
         """Add a domain entry to the tables.
 
         @param info:   domain info object
         @param notify: send a domain created event if true
         """
-        # Remove entries under the wrong id.
-        for i, d in self.domains.items():
-            if i != d.domid:
-                del self.domains[i]
-                self.dbmap.delete(d.uuid)
-        if info.domid in self.domains:
+        if info.getDomid() in self.domains:
             notify = False
-        self.domains[info.domid] = info
-        info.exportToDB(save=True)
+        self.domains[info.getDomid()] = info
+        info.exportToDB()
         if notify:
-            eserver.inject('xend.domain.create', [info.name, info.domid])
+            eserver.inject('xend.domain.create', [info.getName(),
+                                                  info.getDomid()])
 
     def _delete_domain(self, id, notify=True):
         """Remove a domain from the tables.
@@ -194,18 +199,14 @@
         @param id:     domain id
         @param notify: send a domain died event if true
         """
-        try:
-            if self.xen_domain(id):
-                return
-        except:
-            pass
         info = self.domains.get(id)
         if info:
             del self.domains[id]
             info.cleanup()
             info.delete()
             if notify:
-                eserver.inject('xend.domain.died', [info.name, info.domid])
+                eserver.inject('xend.domain.died', [info.getName(),
+                                                    info.getDomid()])
         # XXX this should not be needed
         for domdb in self.dbmap.values():
             if not domdb.has_key("xend"):
@@ -222,61 +223,40 @@
         """Look for domains that have crashed or stopped.
         Tidy them up.
         """
-        casualties = []
         doms = self.xen_domains()
         for d in doms.values():
-            dead = 0
-            dead = dead or (d['crashed'] or d['shutdown'])
-            dead = dead or (d['dying'] and
-                            not(d['running'] or d['paused'] or d['blocked']))
-            if dead:
-                casualties.append(d)
-        for d in casualties:
-            id = d['dom']
-            dominfo = self.domains.get(id)
-            name = (dominfo and dominfo.name) or '??'
-            if dominfo and dominfo.is_terminated():
-                continue
-            log.debug('XendDomain>reap> domain died name=%s id=%d', name, id)
+            if not is_dead(d):
+                continue
+            domid = d['dom']
+            dominfo = self.domains.get(domid)
+            if not dominfo or dominfo.is_terminated():
+                continue
+            log.debug('domain died name=%s domid=%d', dominfo.getName(), domid)
+            if d['crashed'] and xroot.get_enable_dump():
+                self.domain_dumpcore(domid)
             if d['shutdown']:
                 reason = shutdown_reason(d['shutdown_reason'])
-                log.debug('XendDomain>reap> shutdown name=%s id=%d reason=%s', 
name, id, reason)
-                if reason in ['suspend']:
-                    if dominfo and dominfo.is_terminated():
-                        log.debug('XendDomain>reap> Suspended domain died 
id=%d', id)
-                    else:
-                        eserver.inject('xend.domain.suspended', [name, id])
-                        if dominfo:
-                            dominfo.state_set("suspended")
-                        continue
+                log.debug('shutdown name=%s id=%d reason=%s',
+                          dominfo.getName(), domid, reason)
+                if reason == 'suspend':
+                    dominfo.state_set("suspended")
+                    continue
                 if reason in ['poweroff', 'reboot']:
-                    eserver.inject('xend.domain.exit', [name, id, reason])
-                    self.domain_restart_schedule(id, reason)
-            else:
-               if xroot.get_enable_dump():
-                   self.domain_dumpcore(id)
-               eserver.inject('xend.domain.exit', [name, id, 'crash']) 
-            self.final_domain_destroy(id)
-
-    def refresh(self, cleanup=False):
+                    self.domain_restart_schedule(domid, reason)
+            dominfo.destroy()
+
+    def refresh(self):
         """Refresh domain list from Xen.
         """
-        if cleanup:
-            self.reap()
         doms = self.xen_domains()
         # Remove entries for domains that no longer exist.
         # Update entries for existing domains.
-        do_domain_restarts = False
         for d in self.domains.values():
-            info = doms.get(d.domid)
+            info = doms.get(d.getDomid())
             if info:
                 d.update(info)
-            elif d.restart_pending():
-                do_domain_restarts = True
-            else:
-                self._delete_domain(d.domid)
-        if cleanup and do_domain_restarts:
-            scheduler.now(self.domain_restarts)
+            elif not d.restart_pending():
+                self._delete_domain(d.getDomid())
 
     def update_domain(self, id):
         """Update information for a single domain.
@@ -297,7 +277,8 @@
         @param config: configuration
         @return: domain
         """
-        dominfo = XendDomainInfo.create(self.dbmap, config)
+        dominfo = XendDomainInfo.create(self.dbmap.getPath(), config)
+        self._add_domain(dominfo)
         return dominfo
 
     def domain_restart(self, dominfo):
@@ -305,31 +286,39 @@
 
         @param dominfo: domain object
         """
-        log.info("Restarting domain: name=%s id=%s", dominfo.name, 
dominfo.domid)
+        log.info("Restarting domain: name=%s id=%s", dominfo.getName(),
+                 dominfo.getDomid())
         eserver.inject("xend.domain.restart",
-                       [dominfo.name, dominfo.domid, "begin"])
+                       [dominfo.getName(), dominfo.getDomid(), "begin"])
         try:
             dominfo.restart()
-            log.info('Restarted domain name=%s id=%s', dominfo.name, 
dominfo.domid)
+            log.info('Restarted domain name=%s id=%s', dominfo.getName(),
+                     dominfo.getDomid())
             eserver.inject("xend.domain.restart",
-                           [dominfo.name, dominfo.domid, "success"])
-            self.domain_unpause(dominfo.domid)
+                           [dominfo.getName(), dominfo.getDomid(),
+                            "success"])
+            self.domain_unpause(dominfo.getDomid())
         except Exception, ex:
             log.exception("Exception restarting domain: name=%s id=%s",
-                          dominfo.name, dominfo.domid)
+                          dominfo.getName(), dominfo.getDomid())
             eserver.inject("xend.domain.restart",
-                           [dominfo.name, dominfo.domid, "fail"])
+                           [dominfo.getName(), dominfo.getDomid(), "fail"])
         return dominfo
 
-    def domain_configure(self, vmconfig):
+    def domain_configure(self, config):
         """Configure an existing domain. This is intended for internal
         use by domain restore and migrate.
 
         @param vmconfig: vm configuration
         """
-        config = sxp.child_value(vmconfig, 'config')
-        dominfo = XendDomainInfo.restore(self.dbmap, config)
-        return dominfo
+        # We accept our configuration specified as ['config' [...]], which
+        # some tools or configuration files may be using.  For save-restore,
+        # we use the value of XendDomainInfo.sxpr() directly, which has no
+        # such item.
+        nested = sxp.child_value(config, 'config')
+        if nested:
+            config = nested
+        return XendDomainInfo.restore(self.dbmap.getPath(), config)
 
     def domain_restore(self, src, progress=False):
         """Restore a domain from file.
@@ -340,7 +329,9 @@
 
         try:
             fd = os.open(src, os.O_RDONLY)
-            return XendCheckpoint.restore(self, fd)
+            dominfo = XendCheckpoint.restore(self, fd)
+            self._add_domain(dominfo)
+            return dominfo
         except OSError, ex:
             raise XendError("can't read guest state file %s: %s" %
                             (src, ex[1]))
@@ -354,22 +345,32 @@
         self.update_domain(id)
         return self.domains.get(id)
 
-    def domain_unknown(self, id):
-        try:
-            info = self.xen_domain(id)
-            if info:
-                uuid = getUuid()
-                log.info(
-                    "Creating entry for unknown domain: id=%d uuid=%s",
-                    id, uuid)
-                db = self.dbmap.addChild("%s/xend" % uuid)
-                dominfo = XendDomainInfo.recreate(uuid, id, db, info)
-                self._add_domain(dominfo)
-                return dominfo
-        except Exception, ex:
-            raise
-            log.exception("Error creating domain info: id=%d", id)
-        return None
+    def dom0_unknown(self):
+        dom0 = PRIV_DOMAIN
+        uuid = None
+        info = self.xen_domain(dom0)
+        dompath = GetDomainPath(dom0)
+        if dompath:
+            vmpath = xstransact.Read(dompath, "vm")
+            if vmpath:
+                uuid = xstransact.Read(vmpath, "uuid")
+            if not uuid:
+                uuid = dompath.split("/")[-1]
+            dompath = "/".join(dompath.split("/")[0:-1])
+        if not uuid:
+            uuid = getUuid()
+            dompath = self.domroot
+        log.info("Creating entry for unknown xend domain: id=%d uuid=%s",
+                 dom0, uuid)
+        try:
+            dominfo = XendDomainInfo.recreate(uuid, dompath, dom0, info)
+            self._add_domain(dominfo)
+            return dominfo
+        except Exception, exn:
+            log.exception(exn)
+            raise XendError("Error recreating xend domain info: id=%d: %s" %
+                            (dom0, str(exn)))
+
         
     def domain_lookup(self, id):
         return self.domains.get(id)
@@ -390,9 +391,10 @@
         @param id: domain id
         """
         dominfo = self.domain_lookup(id)
-        eserver.inject('xend.domain.unpause', [dominfo.name, dominfo.domid])
-        try:
-            return xc.domain_unpause(dom=dominfo.domid)
+        eserver.inject('xend.domain.unpause', [dominfo.getName(),
+                                               dominfo.getDomid()])
+        try:
+            return xc.domain_unpause(dom=dominfo.getDomid())
         except Exception, ex:
             raise XendError(str(ex))
     
@@ -402,9 +404,10 @@
         @param id: domain id
         """
         dominfo = self.domain_lookup(id)
-        eserver.inject('xend.domain.pause', [dominfo.name, dominfo.domid])
-        try:
-            return xc.domain_pause(dom=dominfo.domid)
+        eserver.inject('xend.domain.pause', [dominfo.getName(),
+                                             dominfo.getDomid()])
+        try:
+            return xc.domain_pause(dom=dominfo.getDomid())
         except Exception, ex:
             raise XendError(str(ex))
     
@@ -420,8 +423,9 @@
         @param reason: shutdown type: poweroff, reboot, suspend, halt
         """
         dominfo = self.domain_lookup(id)
-        self.domain_restart_schedule(dominfo.domid, reason, force=True)
-        eserver.inject('xend.domain.shutdown', [dominfo.name, dominfo.domid, 
reason])
+        self.domain_restart_schedule(dominfo.getDomid(), reason, force=True)
+        eserver.inject('xend.domain.shutdown', [dominfo.getName(),
+                                                dominfo.getDomid(), reason])
         if reason == 'halt':
             reason = 'poweroff'
         val = dominfo.shutdown(reason)
@@ -445,13 +449,13 @@
             if not dominfo.shutdown_pending:
                 # domain doesn't need shutdown
                 continue
-            id = dominfo.domid
+            id = dominfo.getDomid()
             left = dominfo.shutdown_time_left(SHUTDOWN_TIMEOUT)
             if left <= 0:
                 # Shutdown expired - destroy domain.
                 try:
                     log.info("Domain shutdown timeout expired: name=%s id=%s",
-                             dominfo.name, id)
+                             dominfo.getName(), id)
                     self.domain_destroy(id, reason=
                                         dominfo.shutdown_pending['reason'])
                 except Exception:
@@ -476,15 +480,16 @@
         restart = (force and reason == 'reboot') or 
dominfo.restart_needed(reason)
         if restart:
             log.info('Scheduling restart for domain: name=%s id=%s',
-                     dominfo.name, dominfo.domid)
+                     dominfo.getName(), dominfo.getDomid())
             eserver.inject("xend.domain.restart",
-                           [dominfo.name, dominfo.domid, "schedule"])
+                           [dominfo.getName(), dominfo.getDomid(),
+                            "schedule"])
             dominfo.restarting()
         else:
             log.info('Cancelling restart for domain: name=%s id=%s',
-                     dominfo.name, dominfo.domid)
+                     dominfo.getName(), dominfo.getDomid())
             eserver.inject("xend.domain.restart",
-                           [dominfo.name, dominfo.domid, "cancel"])
+                           [dominfo.getName(), dominfo.getDomid(), "cancel"])
             dominfo.restart_cancel()
 
     def domain_restarts(self):
@@ -494,45 +499,36 @@
         for dominfo in self.domains.values():
             if not dominfo.restart_pending():
                 continue
-            print 'domain_restarts>', dominfo.name, dominfo.domid
-            info = doms.get(dominfo.domid)
+            info = doms.get(dominfo.getDomid())
             if info:
                 # Don't execute restart for domains still running.
-                print 'domain_restarts> still runnning: ', dominfo.name
                 continue
             # Remove it from the restarts.
-            print 'domain_restarts> restarting: ', dominfo.name
+            log.info('restarting: %s' % dominfo.getName())
             self.domain_restart(dominfo)
 
-    def final_domain_destroy(self, id):
-        """Final destruction of a domain..
-
-        @param id: domain id
-        """
-        try:
-            dominfo = self.domain_lookup(id)
-            log.info('Destroying domain: name=%s', dominfo.name)
-            eserver.inject('xend.domain.destroy', [dominfo.name, 
dominfo.domid])
+    def domain_destroy(self, domid, reason='halt'):
+        """Terminate domain immediately.
+        - halt:   cancel any restart for the domain
+        - reboot  schedule a restart for the domain
+
+        @param domid: domain id
+        """
+
+        if domid == PRIV_DOMAIN:
+            raise XendError("Cannot destroy privileged domain %i" % domid)
+        
+        self.domain_restart_schedule(domid, reason, force=True)
+        dominfo = self.domain_lookup(domid)
+        if dominfo:
             val = dominfo.destroy()
-        except:
-            #todo
+        else:
             try:
-                val = xc.domain_destroy(dom=id)
+                val = xc.domain_destroy(dom=domid)
             except Exception, ex:
                 raise XendError(str(ex))
         return val       
 
-    def domain_destroy(self, id, reason='halt'):
-        """Terminate domain immediately.
-        - halt:   cancel any restart for the domain
-        - reboot  schedule a restart for the domain
-
-        @param id: domain id
-        """
-        self.domain_restart_schedule(id, reason, force=True)
-        val = self.final_domain_destroy(id)
-        return val
-
     def domain_migrate(self, id, dst, live=False, resource=0):
         """Start domain migration.
 
@@ -547,13 +543,14 @@
 
         # temporarily rename domain for localhost migration
         if dst == "localhost":
-            dominfo.name = "tmp-" + dominfo.name
+            dominfo.setName("tmp-" + dominfo.getName())
 
         try:
             XendCheckpoint.save(self, sock.fileno(), dominfo, live)
         except:
             if dst == "localhost":
-                dominfo.name = string.replace(dominfo.name, "tmp-", "", 1)
+                dominfo.setName(
+                    string.replace(dominfo.getName(), "tmp-", "", 1))
             raise
         
         return None
@@ -587,7 +584,7 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.domain_pincpu(dominfo.domid, vcpu, cpumap)
+            return xc.domain_pincpu(dominfo.getDomid(), vcpu, cpumap)
         except Exception, ex:
             raise XendError(str(ex))
 
@@ -596,8 +593,10 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.bvtsched_domain_set(dom=dominfo.domid, mcuadv=mcuadv,
-                                          warpback=warpback, 
warpvalue=warpvalue, 
+            return xc.bvtsched_domain_set(dom=dominfo.getDomid(),
+                                          mcuadv=mcuadv,
+                                          warpback=warpback,
+                                          warpvalue=warpvalue, 
                                           warpl=warpl, warpu=warpu)
         except Exception, ex:
             raise XendError(str(ex))
@@ -607,7 +606,7 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.bvtsched_domain_get(dominfo.domid)
+            return xc.bvtsched_domain_get(dominfo.getDomid())
         except Exception, ex:
             raise XendError(str(ex))
     
@@ -617,7 +616,8 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.sedf_domain_set(dominfo.domid, period, slice, latency, 
extratime, weight)
+            return xc.sedf_domain_set(dominfo.getDomid(), period, slice,
+                                      latency, extratime, weight)
         except Exception, ex:
             raise XendError(str(ex))
 
@@ -626,7 +626,7 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.sedf_domain_get(dominfo.domid)
+            return xc.sedf_domain_get(dominfo.getDomid())
         except Exception, ex:
             raise XendError(str(ex))
 
@@ -674,9 +674,8 @@
         @param type: device type
         """
         dominfo = self.domain_lookup(id)
-        val = dominfo.device_delete(type, devid)
-        dominfo.exportToDB()
-        return val
+        return dominfo.destroyDevice(type, devid)
+
 
     def domain_devtype_ls(self, id, type):
         """Get list of device sxprs for a domain.
@@ -716,7 +715,7 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.shadow_control(dominfo.domid, op)
+            return xc.shadow_control(dominfo.getDomid(), op)
         except Exception, ex:
             raise XendError(str(ex))
 
@@ -730,7 +729,8 @@
         dominfo = self.domain_lookup(id)
         maxmem = int(mem) * 1024
         try:
-            return xc.domain_setmaxmem(dominfo.domid, maxmem_kb = maxmem)
+            return xc.domain_setmaxmem(dominfo.getDomid(),
+                                       maxmem_kb = maxmem)
         except Exception, ex:
             raise XendError(str(ex))
 
@@ -742,7 +742,7 @@
         @return: 0 on success, -1 on error
         """
         dominfo = self.domain_lookup(id)
-        return dominfo.setMemoryTarget(mem * (1 << 20))
+        return dominfo.setMemoryTarget(mem << 10)
 
     def domain_vcpu_hotplug(self, id, vcpu, state):
         """Enable or disable VCPU vcpu in DOM id
@@ -762,12 +762,13 @@
         @param id: domain
         """
         dominfo = self.domain_lookup(id)
-        corefile = "/var/xen/dump/%s.%s.core"% (dominfo.name, dominfo.domid)
-        try:
-            xc.domain_dumpcore(dom=dominfo.domid, corefile=corefile)
+        corefile = "/var/xen/dump/%s.%s.core" % (dominfo.getName(),
+                                                 dominfo.getDomid())
+        try:
+            xc.domain_dumpcore(dom=dominfo.getDomid(), corefile=corefile)
         except Exception, ex:
             log.warning("Dumpcore failed, id=%s name=%s: %s",
-                        dominfo.domid, dominfo.name, ex)
+                        dominfo.getDomid(), dominfo.getName(), ex)
         
 def instance():
     """Singleton constructor. Use this instead of the class constructor.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py   Thu Sep 22 17:42:01 2005
@@ -1,4 +1,4 @@
-#============================================================================
+#===========================================================================
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of version 2.1 of the GNU Lesser General Public
 # License as published by the Free Software Foundation.
@@ -13,6 +13,7 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
 """Representation of a single domain.
@@ -23,31 +24,23 @@
 
 """
 
-import string, re
-import os
+import string
 import time
 import threading
 import errno
 
-import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
-from xen.util.ip import check_subnet, get_current_ipgw
+import xen.lowlevel.xc
 from xen.util.blkif import blkdev_uname_to_file
 
-from xen.xend.server import controller
-from xen.xend.server import SrvDaemon; xend = SrvDaemon.instance()
 from xen.xend.server.channel import EventChannel
-from xen.util.blkif import blkdev_name_to_number, expand_dev_name
 
 from xen.xend import sxp
-from xen.xend import Blkctl
-from xen.xend.PrettyPrint import prettyprintstring
 from xen.xend.XendBootloader import bootloader
 from xen.xend.XendLogging import log
 from xen.xend.XendError import XendError, VmError
 from xen.xend.XendRoot import get_component
 
 from xen.xend.uuid import getUuid
-from xen.xend.xenstore import DBVar, XenNode, DBMap
 from xen.xend.xenstore.xstransact import xstransact
 from xen.xend.xenstore.xsutil import IntroduceDomain
 
@@ -88,6 +81,18 @@
 STATE_VM_TERMINATED = "terminated"
 STATE_VM_SUSPENDED  = "suspended"
 
+"""Flag for a block device backend domain."""
+SIF_BLK_BE_DOMAIN = (1<<4)
+
+"""Flag for a net device backend domain."""
+SIF_NET_BE_DOMAIN = (1<<5)
+
+"""Flag for a TPM device backend domain."""
+SIF_TPM_BE_DOMAIN = (1<<7)
+
+
+xc = xen.lowlevel.xc.new()
+
 
 def domain_exists(name):
     # See comment in XendDomain constructor.
@@ -110,9 +115,13 @@
     @param dom: domain id
     @return: info or None
     """
-    domlist = xc.domain_getinfo(dom, 1)
-    if domlist and dom == domlist[0]['dom']:
-        return domlist[0]
+    try:
+        domlist = xc.domain_getinfo(dom, 1)
+        if domlist and dom == domlist[0]['dom']:
+            return domlist[0]
+    except Exception, err:
+        # ignore missing domain
+        log.exception("domain_getinfo(%d) failed, ignoring", dom)
     return None
 
 class XendDomainInfo:
@@ -122,149 +131,288 @@
     """
     MINIMUM_RESTART_TIME = 20
 
-    def create(cls, parentdb, config):
+
+    def create(cls, dompath, config):
         """Create a VM from a configuration.
 
-        @param parentdb:  parent db
+        @param dompath:   The path to all domain information
         @param config    configuration
         @raise: VmError for invalid configuration
         """
-        uuid = getUuid()
-        db = parentdb.addChild("%s/xend" % uuid)
-        path = parentdb.getPath()
-        vm = cls(uuid, path, db)
-        vm.construct(config)
-        vm.saveToDB(sync=True)
-
+
+        log.debug("XendDomainInfo.create(%s, ...)", dompath)
+        
+        vm = cls(getUuid(), dompath, cls.parseConfig(config))
+        vm.construct()
         return vm
 
     create = classmethod(create)
 
-    def recreate(cls, uuid, domid, db, info):
+
+    def recreate(cls, uuid, dompath, domid, info):
         """Create the VM object for an existing domain.
 
-        @param db:        domain db
+        @param dompath:   The path to all domain information
         @param info:      domain info from xc
         """
-        path = "/".join(db.getPath().split("/")[0:-2])
-        vm = cls(uuid, path, db)
-        vm.setDomid(domid)
-        vm.name, vm.start_time = vm.gatherVm(("name", str),
-                                             ("start-time", float))
-        try:
-            db.readDB()
-        except: pass
-        vm.importFromDB()
-        config = vm.config
-        log.debug('info=' + str(info))
-        log.debug('config=' + prettyprintstring(config))
-
-        vm.memory = info['mem_kb'] / 1024
-        vm.target = info['mem_kb'] * 1024
-
-        if config:
-            try:
-                vm.recreate = True
-                vm.construct(config)
-            finally:
-                vm.recreate = False
-        else:
-            vm.setName("Domain-%d" % domid)
-
-        vm.exportToDB(save=True)
-        return vm
+
+        log.debug("XendDomainInfo.recreate(%s, %s, %s, %s)", uuid, dompath,
+                  domid, info)
+
+        return cls(uuid, dompath, info, domid, True)
 
     recreate = classmethod(recreate)
 
-    def restore(cls, parentdb, config, uuid=None):
+
+    def restore(cls, dompath, config, uuid = None):
         """Create a domain and a VM object to do a restore.
 
-        @param parentdb:  parent db
+        @param dompath:   The path to all domain information
         @param config:    domain configuration
         @param uuid:      uuid to use
         """
+        
+        log.debug("XendDomainInfo.restore(%s, %s, %s)", dompath, config, uuid)
+
         if not uuid:
             uuid = getUuid()
-        db = parentdb.addChild("%s/xend" % uuid)
-        path = parentdb.getPath()
-        vm = cls(uuid, path, db)
-        ssidref = int(sxp.child_value(config, 'ssidref'))
-        log.debug('restoring with ssidref='+str(ssidref))
-        id = xc.domain_create(ssidref = ssidref)
-        vm.setDomid(id)
+
+        try:
+            ssidref = int(sxp.child_value(config, 'ssidref'))
+        except TypeError, exn:
+            raise VmError('Invalid ssidref in config: %s' % exn)
+
+        log.debug('restoring with ssidref = %d' % ssidref)
+
+        vm = cls(uuid, dompath, cls.parseConfig(config),
+                 xc.domain_create(ssidref = ssidref))
         vm.clear_shutdown()
+        vm.create_channel()
+        vm.configure()
+        vm.exportToDB()
+        return vm
+
+    restore = classmethod(restore)
+
+
+    def parseConfig(cls, config):
+        def get_cfg(name, conv = None):
+            val = sxp.child_value(config, name)
+
+            if conv and not val is None:
+                try:
+                    return conv(val)
+                except TypeError, exn:
+                    raise VmError(
+                        'Invalid setting %s = %s in configuration: %s' %
+                        (name, val, str(exn)))
+            else:
+                return val
+
+
+        log.debug("parseConfig: config is %s" % str(config))
+
+        result = {}
+        imagecfg = "()"
+
+        result['name']         = get_cfg('name')
+        result['ssidref']      = get_cfg('ssidref',    int)
+        result['memory']       = get_cfg('memory',     int)
+        result['mem_kb']       = get_cfg('mem_kb',     int)
+        result['maxmem']       = get_cfg('maxmem',     int)
+        result['maxmem_kb']    = get_cfg('maxmem_kb',  int)
+        result['cpu']          = get_cfg('cpu',        int)
+        result['cpu_weight']   = get_cfg('cpu_weight', float)
+        result['bootloader']   = get_cfg('bootloader')
+        result['restart_mode'] = get_cfg('restart')
+
         try:
-            vm.restore = True
-            vm.construct(config)
-        finally:
-            vm.restore = False
-        vm.exportToDB(save=True, sync=True)
-        return vm
-
-    restore = classmethod(restore)
-
-    __exports__ = [
-        DBVar('config',        ty='sxpr'),
-        DBVar('state',         ty='str'),
-        DBVar('restart_mode',  ty='str'),
-        DBVar('restart_state', ty='str'),
-        DBVar('restart_time',  ty='float'),
-        DBVar('restart_count', ty='int'),
-        DBVar('device_model_pid', ty='int'),
-        ]
+            imagecfg = get_cfg('image')
+
+            if imagecfg:
+                result['image'] = imagecfg
+                result['vcpus'] = int(sxp.child_value(imagecfg, 'vcpus',
+                                                      1))
+            else:
+                result['vcpus'] = 1
+        except TypeError, exn:
+            raise VmError(
+                'Invalid configuration setting: vcpus = %s: %s' %
+                (sxp.child_value(imagecfg, 'vcpus', 1),
+                 str(exn)))
+
+        result['backend'] = []
+        for c in sxp.children(config, 'backend'):
+            result['backend'].append(sxp.name(sxp.child0(c)))
+
+        result['device'] = []
+        for d in sxp.children(config, 'device'):
+            c = sxp.child0(d)
+            result['device'].append((sxp.name(c), c))
+
+        log.debug("parseConfig: result is %s" % str(result))
+        return result
+
+
+    parseConfig = classmethod(parseConfig)
+
     
-    def __init__(self, uuid, path, db):
+    def __init__(self, uuid, parentpath, info, domid = None, augment = False):
+
         self.uuid = uuid
-        self.path = path + "/" + uuid
-
-        self.db = db
-
-        self.recreate = 0
-        self.restore = 0
-        
-        self.config = None
-        self.domid = None
-        self.cpu_weight = 1
-        self.start_time = None
-        self.name = None
-        self.memory = None
-        self.ssidref = None
+        self.info = info
+
+        self.path = parentpath + "/" + uuid
+
+        if domid:
+            self.domid = domid
+        elif 'dom' in info:
+            self.domid = int(info['dom'])
+        else:
+            self.domid = None
+
+        if augment:
+            self.augmentInfo()
+
+        self.validateInfo()
+
         self.image = None
-
-        self.target = None
 
         self.store_channel = None
         self.store_mfn = None
         self.console_channel = None
         self.console_mfn = None
-        self.controllers = {}
-        
-        self.info = None
-        self.blkif_backend = False
-        self.netif_backend = False
-        self.netif_idx = 0
-        self.tpmif_backend = False
         
         #todo: state: running, suspended
         self.state = STATE_VM_OK
         self.state_updated = threading.Condition()
         self.shutdown_pending = None
 
-        #todo: set to migrate info if migrating
-        self.migrate = None
-        
-        self.restart_mode = RESTART_ONREBOOT
         self.restart_state = None
         self.restart_time = None
         self.restart_count = 0
         
-        self.vcpus = 1
-        self.bootloader = None
-        self.device_model_pid = 0
-
         self.writeVm("uuid", self.uuid)
         self.storeDom("vm", self.path)
 
+
+    def augmentInfo(self):
+        def useIfNeeded(name, val):
+            if not self.infoIsSet(name) and val is not None:
+                self.info[name] = val
+
+        params = (("name", str),
+                  ("start-time", float))
+
+        from_store = self.gatherVm(*params)
+
+        map(lambda x, y: useIfNeeded(x[0], y), params, from_store)
+
+
+    def validateInfo(self):
+        """Validate and normalise the info block.  This has either been parsed
+        by parseConfig, or received from xc through recreate.
+        """
+        def defaultInfo(name, val):
+            if not self.infoIsSet(name):
+                self.info[name] = val()
+
+        try:
+            defaultInfo('name',         lambda: "Domain-%d" % self.domid)
+            defaultInfo('restart_mode', lambda: RESTART_ONREBOOT)
+            defaultInfo('cpu_weight',   lambda: 1.0)
+            defaultInfo('bootloader',   lambda: None)
+            defaultInfo('backend',      lambda: [])
+            defaultInfo('device',       lambda: [])
+
+            self.check_name(self.info['name'])
+
+            # Internally, we keep only maxmem_KiB, and not maxmem or maxmem_kb
+            # (which come from outside, and are in MiB and KiB respectively).
+            # This means that any maxmem or maxmem_kb settings here have come
+            # from outside, and maxmem_KiB must be updated to reflect them.
+            # If we have both maxmem and maxmem_kb and these are not
+            # consistent, then this is an error, as we've no way to tell which
+            # one takes precedence.
+
+            # Exactly the same thing applies to memory_KiB, memory, and
+            # mem_kb.
+
+            def discard_negatives(name):
+                if self.infoIsSet(name) and self.info[name] <= 0:
+                    del self.info[name]
+
+            def valid_KiB_(mb_name, kb_name):
+                discard_negatives(kb_name)
+                discard_negatives(mb_name)
+                
+                if self.infoIsSet(kb_name):
+                    if self.infoIsSet(mb_name):
+                        mb = self.info[mb_name]
+                        kb = self.info[kb_name]
+                        if mb * 1024 == kb:
+                            return kb
+                        else:
+                            raise VmError(
+                                'Inconsistent %s / %s settings: %s / %s' %
+                                (mb_name, kb_name, mb, kb))
+                    else:
+                        return self.info[kb_name]
+                elif self.infoIsSet(mb_name):
+                    return self.info[mb_name] * 1024
+                else:
+                    return None
+
+            def valid_KiB(mb_name, kb_name):
+                result = valid_KiB_(mb_name, kb_name)
+                if result <= 0:
+                    raise VmError('Invalid %s / %s: %s' %
+                                  (mb_name, kb_name, result))
+                else:
+                    return result
+
+            def delIf(name):
+                if name in self.info:
+                    del self.info[name]
+
+            self.info['memory_KiB'] = valid_KiB('memory', 'mem_kb')
+            delIf('memory')
+            delIf('mem_kb')
+            self.info['maxmem_KiB'] = valid_KiB_('maxmem', 'maxmem_kb')
+            delIf('maxmem')
+            delIf('maxmem_kb')
+
+            if not self.info['maxmem_KiB']:
+                self.info['maxmem_KiB'] = 1 << 30
+
+            if self.info['maxmem_KiB'] > self.info['memory_KiB']:
+                self.info['maxmem_KiB'] = self.info['memory_KiB']
+
+            # Validate the given backend names.
+            for s in self.info['backend']:
+                if s not in backendFlags:
+                    raise VmError('Invalid backend type: %s' % s)
+
+            for (n, c) in self.info['device']:
+                if not n or not c or n not in controllerClasses:
+                    raise VmError('invalid device (%s, %s)' %
+                                  (str(n), str(c)))
+
+            if self.info['restart_mode'] not in restart_modes:
+                raise VmError('invalid restart mode: ' +
+                              str(self.info['restart_mode']))
+
+            if 'cpumap' not in self.info:
+                if [self.info['vcpus'] == 1]:
+                    self.info['cpumap'] = [1];
+                else:
+                    raise VmError('Cannot create CPU map')
+
+        except KeyError, exn:
+            log.exception(exn)
+            raise VmError('Unspecified domain detail: %s' % str(exn))
+
+
     def readVm(self, *args):
         return xstransact.Read(self.path, *args)
 
@@ -295,20 +443,28 @@
     def storeDom(self, *args):
         return xstransact.Store(self.path, *args)
 
-    def setDB(self, db):
-        self.db = db
-
-    def saveToDB(self, save=False, sync=False):
-        self.db.saveDB(save=save, sync=sync)
-
-    def exportToDB(self, save=False, sync=False):
-        if self.image:
-            self.image.exportToDB(save=save, sync=sync)
-        self.db.exportToDB(self, fields=self.__exports__, save=save, sync=sync)
-
-    def importFromDB(self):
-        self.db.importFromDB(self, fields=self.__exports__)
-        self.store_channel = self.eventChannel("store/port")
+
+    def exportToDB(self):
+        to_store = {
+            'domid':              str(self.domid),
+            'uuid':               self.uuid,
+
+            'restart_time':       str(self.restart_time),
+
+            'xend/state':         self.state,
+            'xend/restart_count': str(self.restart_count),
+            'xend/restart_mode':  str(self.info['restart_mode']),
+
+            'memory/target':      str(self.info['memory_KiB'])
+            }
+
+        for (k, v) in self.info.items():
+            to_store[k] = str(v)
+
+        log.debug("Storing %s" % str(to_store))
+
+        self.writeVm(to_store)
+
 
     def setDomid(self, domid):
         """Set the domain id.
@@ -318,40 +474,87 @@
         self.domid = domid
         self.storeDom("domid", self.domid)
 
-    def getDomain(self):
+    def getDomid(self):
         return self.domid
 
     def setName(self, name):
-        self.name = name
+        self.check_name(name)
+        self.info['name'] = name
         self.storeVm("name", name)
 
     def getName(self):
-        return self.name
+        return self.info['name']
+
+    def getPath(self):
+        return self.path
+
+    def getUuid(self):
+        return self.uuid
+
+    def getVCpuCount(self):
+        return self.info['vcpus']
+
+    def getSsidref(self):
+        return self.info['ssidref']
+
+    def getMemoryTarget(self):
+        """Get this domain's target memory size, in KiB."""
+        return self.info['memory_KiB']
 
     def setStoreRef(self, ref):
         self.store_mfn = ref
         self.storeDom("store/ring-ref", ref)
 
-    def setStoreChannel(self, channel):
-        if self.store_channel and self.store_channel != channel:
-            self.store_channel.close()
-        self.store_channel = channel
-        self.storeDom("store/port", channel.port1)
+
+    def getBackendFlags(self):
+        return reduce(lambda x, y: x | backendFlags[y],
+                      self.info['backend'], 0)
+
+
+    def closeStoreChannel(self):
+        """Close the store channel, if any.  Nothrow guarantee."""
+        
+        try:
+            if self.store_channel:
+                try:
+                    self.store_channel.close()
+                    self.removeDom("store/port")
+                finally:
+                    self.store_channel = None
+        except Exception, exn:
+            log.exception(exn)
+
 
     def setConsoleRef(self, ref):
         self.console_mfn = ref
         self.storeDom("console/ring-ref", ref)
 
+
     def setMemoryTarget(self, target):
-        self.memory_target = target
+        """Set the memory target of this domain.
+        @param target In KiB.
+        """
+        self.info['memory_KiB'] = target
         self.storeDom("memory/target", target)
 
-    def update(self, info=None):
-        """Update with  info from xc.domain_getinfo().
-        """
-        self.info = info or dom_get(self.domid)
-        self.memory = self.info['mem_kb'] / 1024
-        self.ssidref = self.info['ssidref']
+
+    def update(self, info = None):
+        """Update with info from xc.domain_getinfo().
+        """
+
+        log.debug("XendDomainInfo.update(%s) on domain %d", info, self.domid)
+
+        if not info:
+            info = dom_get(self.domid)
+            if not info:
+                return
+            
+        self.info.update(info)
+        self.validateInfo()
+
+        log.debug("XendDomainInfo.update done on domain %d: %s", self.domid,
+                  self.info)
+
 
     def state_set(self, state):
         self.state_updated.acquire()
@@ -359,7 +562,7 @@
             self.state = state
             self.state_updated.notifyAll()
         self.state_updated.release()
-        self.saveToDB()
+        self.exportToDB()
 
     def state_wait(self, state):
         self.state_updated.acquire()
@@ -370,190 +573,83 @@
     def __str__(self):
         s = "<domain"
         s += " id=" + str(self.domid)
-        s += " name=" + self.name
-        s += " memory=" + str(self.memory)
-        s += " ssidref=" + str(self.ssidref)
+        s += " name=" + self.info['name']
+        s += " memory=" + str(self.info['memory_KiB'] / 1024)
+        s += " ssidref=" + str(self.info['ssidref'])
         s += ">"
         return s
 
     __repr__ = __str__
 
-    def getDeviceController(self, type, error=True):
-        ctrl = self.controllers.get(type)
-        if not ctrl and error:
-            raise XendError("invalid device type:" + type)
-        return ctrl
-    
-    def findDeviceController(self, type):
-        return (self.getDeviceController(type, error=False)
-                or self.createDeviceController(type))
-
-    def createDeviceController(self, type):
-        ctrl = controller.createDevController(type, self, 
recreate=self.recreate)
-        self.controllers[type] = ctrl
-        return ctrl
-
-    def createDevice(self, type, devconfig, change=False):
-        if self.recreate:
-            return
-        if type == 'vbd':
-            typedev = sxp.child_value(devconfig, 'dev')
-            if re.match('^ioemu:', typedev):
-               return;
-
-            backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
-
-            devnum = blkdev_name_to_number(sxp.child_value(devconfig, 'dev'))
-
-            backpath = "%s/backend/%s/%s/%d" % (backdom.path, type,
-                                                self.uuid, devnum)
-            frontpath = "%s/device/%s/%d" % (self.path, type, devnum)
-
-            front = { 'backend' : backpath,
-                      'backend-id' : "%i" % backdom.domid,
-                      'virtual-device' : "%i" % devnum }
-            xstransact.Write(frontpath, front)
-
-            (type, params) = string.split(sxp.child_value(devconfig,
-                                                          'uname'), ':', 1)
-            back = { 'type' : type,
-                     'params' : params,
-                     'frontend' : frontpath,
-                     'frontend-id' : "%i" % self.domid }
-            xstransact.Write(backpath, back)
-
-            return
-
-        if type == 'vif':
-            from xen.xend import XendRoot
-            xroot = XendRoot.instance()
-
-            def _get_config_ipaddr(config):
-                val = []
-                for ipaddr in sxp.children(config, elt='ip'):
-                    val.append(sxp.child0(ipaddr))
-                return val
-
-            backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
-
-            devnum = self.netif_idx
-            self.netif_idx += 1
-
-            script = sxp.child_value(devconfig, 'script',
-                                     xroot.get_vif_script())
-            script = os.path.join(xroot.network_script_dir, script)
-            bridge = sxp.child_value(devconfig, 'bridge',
-                                     xroot.get_vif_bridge())
-            mac = sxp.child_value(devconfig, 'mac')
-            ipaddr = _get_config_ipaddr(devconfig)
-
-            backpath = "%s/backend/%s/%s/%d" % (backdom.path, type,
-                                                self.uuid, devnum)
-            frontpath = "%s/device/%s/%d" % (self.path, type, devnum)
-
-            front = { 'backend' : backpath,
-                      'backend-id' : "%i" % backdom.domid,
-                      'handle' : "%i" % devnum,
-                      'mac' : mac }
-            xstransact.Write(frontpath, front)
-
-            back = { 'script' : script,
-                     'domain' : self.name,
-                     'mac' : mac,
-                     'bridge' : bridge,
-                     'frontend' : frontpath,
-                     'frontend-id' : "%i" % self.domid,
-                     'handle' : "%i" % devnum }
-            if ipaddr:
-                back['ip'] = ' '.join(ipaddr)
-            xstransact.Write(backpath, back)
-
-            return
-        
-        if type == 'vtpm':
-            backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
-
-            devnum = int(sxp.child_value(devconfig, 'instance', '0'))
-            log.error("The domain has a TPM with instance %d." % devnum)
-
-            backpath = "%s/backend/%s/%s/%d" % (backdom.path, type,
-                                                self.uuid, devnum)
-            frontpath = "%s/device/%s/%d" % (self.path, type, devnum)
-
-            front = { 'backend' : backpath,
-                      'backend-id' : "%i" % backdom.domid,
-                      'handle' : "%i" % devnum }
-            xstransact.Write(frontpath, front)
-
-            back = { 'instance' : "%i" % devnum,
-                     'frontend' : frontpath,
-                     'frontend-id' : "%i" % self.domid }
-            xstransact.Write(backpath, back)
-
-            return
-
-        ctrl = self.findDeviceController(type)
-        return ctrl.createDevice(devconfig, recreate=self.recreate,
-                                 change=change)
-
-    def configureDevice(self, type, id, devconfig):
-        ctrl = self.getDeviceController(type)
-        return ctrl.configureDevice(id, devconfig)
-
-    def destroyDevice(self, type, id, change=False, reboot=False):
-        ctrl = self.getDeviceController(type)
-        return ctrl.destroyDevice(id, change=change, reboot=reboot)
-
-    def deleteDevice(self, type, id):
-        ctrl = self.getDeviceController(type)
-        return ctrl.deleteDevice(id)
-
-    def getDevice(self, type, id, error=True):
-        ctrl = self.getDeviceController(type)
-        return ctrl.getDevice(id, error=error)
-        
-    def getDeviceIds(self, type):
-        ctrl = self.getDeviceController(type)
-        return ctrl.getDeviceIds()
-    
-    def getDeviceSxprs(self, type):
-        ctrl = self.getDeviceController(type)
-        return ctrl.getDeviceSxprs()
+
+    def getDeviceController(self, name):
+        if name not in controllerClasses:
+            raise XendError("unknown device type: " + str(name))
+
+        return controllerClasses[name](self)
+
+
+    def createDevice(self, deviceClass, devconfig):
+        return self.getDeviceController(deviceClass).createDevice(devconfig)
+
+
+    def configureDevice(self, deviceClass, devid, devconfig):
+        return self.getDeviceController(deviceClass).configureDevice(
+            devid, devconfig)
+
+
+    def destroyDevice(self, deviceClass, devid):
+        return self.getDeviceController(deviceClass).destroyDevice(devid)
+
 
     def sxpr(self):
         sxpr = ['domain',
                 ['domid', self.domid],
-                ['name', self.name],
-                ['memory', self.memory],
-                ['ssidref', self.ssidref],
-                ['target', self.target] ]
+                ['name', self.info['name']],
+                ['memory', self.info['memory_KiB'] / 1024],
+                ['ssidref', self.info['ssidref']]]
         if self.uuid:
             sxpr.append(['uuid', self.uuid])
         if self.info:
-            sxpr.append(['maxmem', self.info['maxmem_kb']/1024 ])
-            run   = (self.info['running']  and 'r') or '-'
-            block = (self.info['blocked']  and 'b') or '-'
-            pause = (self.info['paused']   and 'p') or '-'
-            shut  = (self.info['shutdown'] and 's') or '-'
-            crash = (self.info['crashed']  and 'c') or '-'
-            state = run + block + pause + shut + crash
+            sxpr.append(['maxmem', self.info['maxmem_KiB'] / 1024])
+
+            if self.infoIsSet('device'):
+                for (n, c) in self.info['device']:
+                    sxpr.append(['device', c])
+
+            def stateChar(name):
+                if name in self.info:
+                    if self.info[name]:
+                        return name[0]
+                    else:
+                        return '-'
+                else:
+                    return '?'
+
+            state = reduce(
+                lambda x, y: x + y,
+                map(stateChar,
+                    ['running', 'blocked', 'paused', 'shutdown', 'crashed']))
+
             sxpr.append(['state', state])
-            if self.info['shutdown']:
+            if self.infoIsSet('shutdown'):
                 reason = shutdown_reason(self.info['shutdown_reason'])
                 sxpr.append(['shutdown_reason', reason])
-            sxpr.append(['cpu', self.info['vcpu_to_cpu'][0]])
-            sxpr.append(['cpu_time', self.info['cpu_time']/1e9])    
+            if self.infoIsSet('cpu_time'):
+                sxpr.append(['cpu_time', self.info['cpu_time']/1e9])    
             sxpr.append(['vcpus', self.info['vcpus']])
             sxpr.append(['cpumap', self.info['cpumap']])
-            # build a string, using '|' to seperate items, show only up
-            # to number of vcpus in domain, and trim the trailing '|'
-            sxpr.append(['vcpu_to_cpu', ''.join(map(lambda x: str(x)+'|',
-                        self.info['vcpu_to_cpu'][0:self.info['vcpus']]))[:-1]])
+            if self.infoIsSet('vcpu_to_cpu'):
+                sxpr.append(['cpu', self.info['vcpu_to_cpu'][0]])
+                # build a string, using '|' to separate items, show only up
+                # to number of vcpus in domain, and trim the trailing '|'
+                sxpr.append(['vcpu_to_cpu', ''.join(map(lambda x: str(x)+'|',
+                            
self.info['vcpu_to_cpu'][0:self.info['vcpus']]))[:-1]])
             
-        if self.start_time:
-            up_time =  time.time() - self.start_time  
+        if self.infoIsSet('start_time'):
+            up_time =  time.time() - self.info['start_time']
             sxpr.append(['up_time', str(up_time) ])
-            sxpr.append(['start_time', str(self.start_time) ])
+            sxpr.append(['start_time', str(self.info['start_time']) ])
 
         if self.store_channel:
             sxpr.append(self.store_channel.sxpr())
@@ -563,36 +659,12 @@
             sxpr.append(['console_channel', self.console_channel.sxpr()])
         if self.console_mfn:
             sxpr.append(['console_mfn', self.console_mfn])
-# already in (devices)
-#        console = self.getConsole()
-#        if console:
-#            sxpr.append(console.sxpr())
-
         if self.restart_count:
             sxpr.append(['restart_count', self.restart_count])
         if self.restart_state:
             sxpr.append(['restart_state', self.restart_state])
         if self.restart_time:
             sxpr.append(['restart_time', str(self.restart_time)])
-
-        devs = self.sxpr_devices()
-        if devs:
-            sxpr.append(devs)
-        if self.config:
-            sxpr.append(['config', self.config])
-        if self.device_model_pid:
-            sxpr.append(['device_model_pid',self.device_model_pid])
-        return sxpr
-
-    def sxpr_devices(self):
-        sxpr = []
-        for ty in self.controllers.keys():
-            devs = self.getDeviceSxprs(ty)
-            sxpr += devs
-        if sxpr:
-            sxpr.insert(0, 'devices')
-        else:
-            sxpr = None
         return sxpr
 
     def check_name(self, name):
@@ -601,9 +673,8 @@
         The same name cannot be used for more than one vm at the same time.
 
         @param name: name
-        @raise: VMerror if invalid
-        """
-        if self.recreate: return
+        @raise: VmError if invalid
+        """
         if name is None or name == '':
             raise VmError('missing vm name')
         for c in name:
@@ -619,33 +690,35 @@
             return
         if dominfo.is_terminated():
             return
-        if not self.domid or (dominfo.domid != self.domid):
-            raise VmError('vm name clash: ' + name)
-        
-    def construct(self, config):
+        if self.domid is None:
+            raise VmError("VM name '%s' already in use by domain %d" %
+                          (name, dominfo.domid))
+        if dominfo.domid != self.domid:
+            raise VmError("VM name '%s' is used in both domains %d and %d" %
+                          (name, self.domid, dominfo.domid))
+
+
+    def construct(self):
         """Construct the vm instance from its configuration.
 
         @param config: configuration
         @raise: VmError on error
         """
         # todo - add support for scheduling params?
-        self.config = config
         try:
-            # Initial domain create.
-            self.setName(sxp.child_value(config, 'name'))
-            self.check_name(self.name)
-            self.init_image()
-            self.configure_cpus(config)
-            self.init_domain()
-            self.register_domain()
-            self.configure_bootloader()
+            if 'image' not in self.info:
+                raise VmError('Missing image in configuration')
+
+            self.image = ImageHandler.create(self,
+                                             self.info['image'],
+                                             self.info['device'])
+
+            self.initDomain()
 
             # Create domain devices.
-            self.configure_backends()
-            self.configure_restart()
             self.construct_image()
             self.configure()
-            self.exportToDB(save=True)
+            self.exportToDB()
         except Exception, ex:
             # Catch errors, cleanup and re-raise.
             print 'Domain construction error:', ex
@@ -654,45 +727,43 @@
             self.destroy()
             raise
 
-    def register_domain(self):
-        xd = get_component('xen.xend.XendDomain')
-        xd._add_domain(self)
-        self.exportToDB(save=True)
-
-    def configure_cpus(self, config):
-        try:
-            self.cpu_weight = float(sxp.child_value(config, 'cpu_weight', '1'))
-        except:
-            raise VmError('invalid cpu weight')
-        self.memory = int(sxp.child_value(config, 'memory'))
-        if self.memory is None:
-            raise VmError('missing memory size')
-        self.setMemoryTarget(self.memory * (1 << 20))
-        self.ssidref = int(sxp.child_value(config, 'ssidref'))
-        cpu = sxp.child_value(config, 'cpu')
-        if self.recreate and self.domid and cpu is not None and int(cpu) >= 0:
-            xc.domain_pincpu(self.domid, 0, 1<<int(cpu))
-        try:
-            image = sxp.child_value(self.config, 'image')
-            vcpus = sxp.child_value(image, 'vcpus')
-            if vcpus:
-                self.vcpus = int(vcpus)
-        except:
-            raise VmError('invalid vcpus value')
+
+    def initDomain(self):
+        log.debug('XendDomainInfo.initDomain: %s %s %s %s)',
+                  str(self.domid),
+                  str(self.info['memory_KiB']),
+                  str(self.info['ssidref']),
+                  str(self.info['cpu_weight']))
+
+        self.domid = xc.domain_create(dom = self.domid or 0,
+                                      ssidref = self.info['ssidref'])
+        if self.domid <= 0:
+            raise VmError('Creating domain failed: name=%s' %
+                          self.info['name'])
+
+        if self.info['bootloader']:
+            self.image.handleBootloading()
+
+        xc.domain_setcpuweight(self.domid, self.info['cpu_weight'])
+        m = self.image.getDomainMemory(self.info['memory_KiB'])
+        xc.domain_setmaxmem(self.domid, m)
+        xc.domain_memory_increase_reservation(self.domid, m, 0, 0)
+
+        cpu = self.info['cpu']
+        if cpu is not None and cpu != -1:
+            xc.domain_pincpu(self.domid, 0, 1 << cpu)
+
+        self.info['start_time'] = time.time()
+
+        log.debug('init_domain> Created domain=%d name=%s memory=%d',
+                  self.domid, self.info['name'], self.info['memory_KiB'])
+
 
     def configure_vcpus(self, vcpus):
         d = {}
         for v in range(0, vcpus):
             d["cpu/%d/availability" % v] = "online"
         self.writeVm(d)
-
-    def init_image(self):
-        """Create boot image handler for the domain.
-        """
-        image = sxp.child_value(self.config, 'image')
-        if image is None:
-            raise VmError('missing image')
-        self.image = ImageHandler.create(self, image)
 
     def construct_image(self):
         """Construct the boot image for the domain.
@@ -704,23 +775,17 @@
             IntroduceDomain(self.domid, self.store_mfn,
                             self.store_channel.port1, self.path)
         # get the configured value of vcpus and update store
-        self.configure_vcpus(self.vcpus)
+        self.configure_vcpus(self.info['vcpus'])
+
 
     def delete(self):
         """Delete the vm's db.
         """
-        if dom_get(self.domid):
-            return
-        self.domid = None
-        self.saveToDB(sync=True)
         try:
-            # Todo: eventually will have to wait for devices to signal
-            # destruction before can delete the db.
-            if self.db:
-                self.db.delete()
+            xstransact.Remove(self.path, 'domid')
         except Exception, ex:
             log.warning("error in domain db delete: %s", ex)
-            pass
+
 
     def destroy_domain(self):
         """Destroy the vm's domain.
@@ -732,17 +797,16 @@
         try:
             xc.domain_destroy(dom=self.domid)
         except Exception, err:
-            log.exception("Domain destroy failed: %s", self.name)
+            log.exception("Domain destroy failed: %s", self.info['name'])
 
     def cleanup(self):
         """Cleanup vm resources: release devices.
         """
         self.state = STATE_VM_TERMINATED
         self.release_devices()
-        if self.store_channel:
-            self.setStoreChannel(None)
+        self.closeStoreChannel()
         if self.console_channel:
-            # notify processes using this cosole?
+            # notify processes using this console?
             try:
                 self.console_channel.close()
                 self.console_channel = None
@@ -750,18 +814,20 @@
                 pass
         if self.image:
             try:
-                self.device_model_pid = 0
                 self.image.destroy()
                 self.image = None
             except:
                 pass
 
     def destroy(self):
-        """Clenup vm and destroy domain.
-        """
+        """Cleanup vm and destroy domain.
+        """
+
+        log.debug("XendDomainInfo.destroy")
+
         self.destroy_domain()
         self.cleanup()
-        self.saveToDB()
+        self.exportToDB()
         return 0
 
     def is_terminated(self):
@@ -772,43 +838,21 @@
     def release_devices(self):
         """Release all vm devices.
         """
-        reboot = self.restart_pending()
-        for ctrl in self.controllers.values():
-            if ctrl.isDestroyed(): continue
-            ctrl.destroyController(reboot=reboot)
+
         t = xstransact("%s/device" % self.path)
-        for d in t.list("vbd"):
-            t.remove(d)
-        for d in t.list("vif"):
-            t.remove(d)
-        for d in t.list("vtpm"):
-            t.remove(d)
+
+        for n in controllerClasses.keys():
+            for d in t.list(n):
+                try:
+                    t.remove(d)
+                except ex:
+                    # Log and swallow any exceptions in removal -- there's
+                    # nothing more we can do.
+                    log.exception(
+                        "Device release failed: %s; %s; %s; %s" %
+                        (self.info['name'], n, d, str(ex)))
         t.commit()
 
-    def show(self):
-        """Print virtual machine info.
-        """
-        print "[VM dom=%d name=%s memory=%d ssidref=%d" % (self.domid, 
self.name, self.memory, self.ssidref)
-        print "image:"
-        sxp.show(self.image)
-        print "]"
-
-    def init_domain(self):
-        """Initialize the domain memory.
-        """
-        if self.recreate:
-            return
-        if self.start_time is None:
-            self.start_time = time.time()
-            self.storeVm(("start-time", self.start_time))
-        try:
-            cpu = int(sxp.child_value(self.config, 'cpu', '-1'))
-        except:
-            raise VmError('invalid cpu')
-        id = self.image.initDomain(self.domid, self.memory, self.ssidref, cpu, 
self.cpu_weight)
-        log.debug('init_domain> Created domain=%d name=%s memory=%d',
-                  id, self.name, self.memory)
-        self.setDomid(id)
 
     def eventChannel(self, path=None):
         """Create an event channel to the domain.
@@ -833,17 +877,8 @@
         self.console_channel = self.eventChannel("console/port")
 
     def create_configured_devices(self):
-        devices = sxp.children(self.config, 'device')
-        for d in devices:
-            dev_config = sxp.child0(d)
-            if dev_config is None:
-                raise VmError('invalid device')
-            dev_type = sxp.name(dev_config)
-
-            if not controller.isDevControllerClass(dev_type):
-                raise VmError('unknown device type: ' + dev_type)
-            
-            self.createDevice(dev_type, dev_config)
+        for (n, c) in self.info['device']:
+            self.createDevice(n, c)
 
 
     def create_devices(self):
@@ -851,13 +886,10 @@
 
         @raise: VmError for invalid devices
         """
-        if self.rebooting():
-            for ctrl in self.controllers.values():
-                ctrl.initController(reboot=True)
-        else:
+        if not self.rebooting():
             self.create_configured_devices()
-        if not self.device_model_pid:
-            self.device_model_pid = self.image.createDeviceModel()
+        if self.image:
+            self.image.createDeviceModel()
 
     def device_create(self, dev_config):
         """Create a new device.
@@ -865,60 +897,19 @@
         @param dev_config: device configuration
         """
         dev_type = sxp.name(dev_config)
-        dev = self.createDevice(dev_type, dev_config, change=True)
-        self.config.append(['device', dev.getConfig()])
-        return dev.sxpr()
-
-    def device_configure(self, dev_config, id):
+        devid = self.createDevice(dev_type, dev_config)
+#        self.config.append(['device', dev.getConfig()])
+        return self.getDeviceController(dev_type).sxpr(devid)
+
+
+    def device_configure(self, dev_config, devid):
         """Configure an existing device.
-
         @param dev_config: device configuration
-        @param id:         device id
-        """
-        type = sxp.name(dev_config)
-        dev = self.getDevice(type, id)
-        old_config = dev.getConfig()
-        new_config = dev.configure(dev_config, change=True)
-        # Patch new config into vm config.
-        new_full_config = ['device', new_config]
-        old_full_config = ['device', old_config]
-        old_index = self.config.index(old_full_config)
-        self.config[old_index] = new_full_config
-        return new_config
-
-    def device_refresh(self, type, id):
-        """Refresh a device.
-
-        @param type: device type
-        @param id:   device id
-        """
-        dev = self.getDevice(type, id)
-        dev.refresh()
-        
-    def device_delete(self, type, id):
-        """Destroy and remove a device.
-
-        @param type: device type
-        @param id:   device id
-        """
-        dev = self.getDevice(type, id)
-        dev_config = dev.getConfig()
-        if dev_config:
-            self.config.remove(['device', dev_config])
-        self.deleteDevice(type, dev.getId())
-
-    def configure_bootloader(self):
-        """Configure boot loader.
-        """
-        self.bootloader = sxp.child_value(self.config, "bootloader")
-
-    def configure_restart(self):
-        """Configure the vm restart mode.
-        """
-        r = sxp.child_value(self.config, 'restart', RESTART_ONREBOOT)
-        if r not in restart_modes:
-            raise VmError('invalid restart mode: ' + str(r))
-        self.restart_mode = r;
+        @param devid:      device id
+        """
+        deviceClass = sxp.name(dev_config)
+        self.configureDevice(deviceClass, devid, dev_config)
+
 
     def restart_needed(self, reason):
         """Determine if the vm needs to be restarted when shutdown
@@ -927,11 +918,11 @@
         @param reason: shutdown reason
         @return True if needs restart, False otherwise
         """
-        if self.restart_mode == RESTART_NEVER:
+        if self.info['restart_mode'] == RESTART_NEVER:
             return False
-        if self.restart_mode == RESTART_ALWAYS:
+        if self.info['restart_mode'] == RESTART_ALWAYS:
             return True
-        if self.restart_mode == RESTART_ONREBOOT:
+        if self.info['restart_mode'] == RESTART_ONREBOOT:
             return reason == 'reboot'
         return False
 
@@ -963,7 +954,7 @@
             tdelta = tnow - self.restart_time
             if tdelta < self.MINIMUM_RESTART_TIME:
                 self.restart_cancel()
-                msg = 'VM %s restarting too fast' % self.name
+                msg = 'VM %s restarting too fast' % self.info['name']
                 log.error(msg)
                 raise VmError(msg)
         self.restart_time = tnow
@@ -981,14 +972,15 @@
             self.restart_check()
             self.exportToDB()
             self.restart_state = STATE_RESTART_BOOTING
-            if self.bootloader:
-                self.config = self.bootloader_config()
-            self.construct(self.config)
-            self.saveToDB()
+            self.configure_bootloader()
+            self.construct()
+            self.exportToDB()
         finally:
             self.restart_state = None
 
-    def bootloader_config(self):
+    def configure_bootloader(self):
+        if not self.info['bootloader']:
+            return
         # if we're restarting with a bootloader, we need to run it
         # FIXME: this assumes the disk is the first device and
         # that we're booting from the first disk
@@ -998,72 +990,30 @@
         if dev:
             disk = sxp.child_value(dev, "uname")
             fn = blkdev_uname_to_file(disk)
-            blcfg = bootloader(self.bootloader, fn, 1, self.vcpus)
+            blcfg = bootloader(self.info['bootloader'], fn, 1, 
self.info['vcpus'])
         if blcfg is None:
             msg = "Had a bootloader specified, but can't find disk"
             log.error(msg)
             raise VmError(msg)
-        config = sxp.merge(['vm', blcfg ], self.config)
-        return config
-
-    def configure_backends(self):
-        """Set configuration flags if the vm is a backend for netif or blkif.
-        Configure the backends to use for vbd and vif if specified.
-        """
-        for c in sxp.children(self.config, 'backend'):
-            v = sxp.child0(c)
-            name = sxp.name(v)
-            if name == 'blkif':
-                self.blkif_backend = True
-            elif name == 'netif':
-                self.netif_backend = True
-            elif name == 'usbif':
-                self.usbif_backend = True
-            elif name == 'tpmif':
-                self.tpmif_backend = True
-            else:
-                raise VmError('invalid backend type:' + str(name))
+        self.config = sxp.merge(['vm', ['image', blcfg]], self.config)
+
 
     def configure(self):
         """Configure a vm.
 
         """
-        self.configure_fields()
+        self.configure_maxmem()
         self.create_devices()
-        self.create_blkif()
-
-    def create_blkif(self):
-        """Create the block device interface (blkif) for the vm.
-        The vm needs a blkif even if it doesn't have any disks
-        at creation time, for example when it uses NFS root.
-
-        """
-        return
-        blkif = self.getDeviceController("vbd", error=False)
-        if not blkif:
-            blkif = self.createDeviceController("vbd")
-            backend = blkif.getBackend(0)
-            backend.connect(recreate=self.recreate)
-
-    def configure_fields(self):
-        """Process the vm configuration fields using the registered handlers.
-        """
-        index = {}
-        for field in sxp.children(self.config):
-            field_name = sxp.name(field)
-            field_index = index.get(field_name, 0)
-            field_handler = config_handlers.get(field_name)
-            # Ignore unknown fields. Warn?
-            if field_handler:
-                v = field_handler(self, self.config, field, field_index)
-            else:
-                log.warning("Unknown config field %s", field_name)
-            index[field_name] = field_index + 1
+
+
+    def configure_maxmem(self):
+        xc.domain_setmaxmem(self.domid, maxmem_kb = self.info['maxmem_KiB'])
+
 
     def vcpu_hotplug(self, vcpu, state):
         """Disable or enable VCPU in domain.
         """
-        if vcpu > self.vcpus:
+        if vcpu > self.info['vcpus']:
             log.error("Invalid VCPU %d" % vcpu)
             return
         if int(state) == 0:
@@ -1109,26 +1059,29 @@
             # get run-time value of vcpus and update store
             self.configure_vcpus(dom_get(self.domid)['vcpus'])
 
-
-def vm_field_ignore(_, _1, _2, _3):
-    """Dummy config field handler used for fields with built-in handling.
-    Matches the signature required by config_handlers.
-    """
-    pass
-
-
-def vm_field_maxmem(vm, _1, val, _2):
-    """Config field handler to configure vm memory limit.  Matches the
-    signature required by config_handlers.
-    """
-    maxmem = sxp.child0(val)
-    if maxmem is None:
-        maxmem = vm.memory
-    try:
-        maxmem = int(maxmem)
-    except:
-        raise VmError("invalid maxmem: " + str(maxmem))
-    xc.domain_setmaxmem(vm.domid, maxmem_kb = maxmem * 1024)
+    def dom0_enforce_vcpus(self):
+        dom = 0
+        # get max number of vcpus to use for dom0 from config
+        from xen.xend import XendRoot
+        xroot = XendRoot.instance()
+        target = int(xroot.get_dom0_vcpus())
+        log.debug("number of vcpus to use is %d" % (target))
+   
+        # target = 0 means use all processors
+        if target > 0:
+            # count the number of online vcpus (cpu values in v2c map >= 0)
+            vcpu_to_cpu = dom_get(dom)['vcpu_to_cpu']
+            vcpus_online = len(filter(lambda x: x >= 0, vcpu_to_cpu))
+            log.debug("found %d vcpus online" % (vcpus_online))
+
+            # disable any extra vcpus that are online over the requested target
+            for vcpu in range(target, vcpus_online):
+                log.info("enforcement is disabling DOM%d VCPU%d" % (dom, vcpu))
+                self.vcpu_hotplug(vcpu, 0)
+
+
+    def infoIsSet(self, name):
+        return name in self.info and self.info[name] is not None
 
 
 #============================================================================
@@ -1144,37 +1097,32 @@
 addImageHandlerClass(VmxImageHandler)
 
 
-"""Table of handlers for field configuration.
-
-field_name[String]: fn(vm, config, field, index) -> value(ignored)
-"""
-config_handlers = {
-    
-    # Ignore the fields we already handle.
-    
-    'name':       vm_field_ignore,
-    'memory':     vm_field_ignore,
-    'ssidref':    vm_field_ignore,
-    'cpu':        vm_field_ignore,
-    'cpu_weight': vm_field_ignore,
-    'restart':    vm_field_ignore,
-    'image':      vm_field_ignore,
-    'device':     vm_field_ignore,
-    'backend':    vm_field_ignore,
-    'vcpus':      vm_field_ignore,
-    'bootloader': vm_field_ignore,
-    
-    # Register other config handlers.
-    'maxmem':     vm_field_maxmem
-    }
-
-
 #============================================================================
 # Register device controllers and their device config types.
 
+"""A map from device-class names to the subclass of DevController that
+implements the device control specific to that device-class."""
+controllerClasses = {}
+
+
+"""A map of backend names and the corresponding flag."""
+backendFlags = {}
+
+
+def addControllerClass(device_class, backend_name, backend_flag, cls):
+    """Register a subclass of DevController to handle the named device-class.
+
+    @param backend_flag One of the SIF_XYZ_BE_DOMAIN constants, or None if
+    no flag is to be set.
+    """
+    cls.deviceClass = device_class
+    backendFlags[backend_name] = backend_flag
+    controllerClasses[device_class] = cls
+
+
 from xen.xend.server import blkif, netif, tpmif, pciif, usbif
-controller.addDevControllerClass("vbd",  blkif.BlkifController)
-controller.addDevControllerClass("vif",  netif.NetifController)
-controller.addDevControllerClass("vtpm", tpmif.TPMifController)
-controller.addDevControllerClass("pci",  pciif.PciController)
-controller.addDevControllerClass("usb",  usbif.UsbifController)
+addControllerClass('vbd',  'blkif', SIF_BLK_BE_DOMAIN, blkif.BlkifController)
+addControllerClass('vif',  'netif', SIF_NET_BE_DOMAIN, netif.NetifController)
+addControllerClass('vtpm', 'tpmif', SIF_TPM_BE_DOMAIN, tpmif.TPMifController)
+addControllerClass('pci',  'pciif', None,              pciif.PciController)
+addControllerClass('usb',  'usbif', None,              usbif.UsbifController)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendLogging.py
--- a/tools/python/xen/xend/XendLogging.py      Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendLogging.py      Thu Sep 22 17:42:01 2005
@@ -50,9 +50,6 @@
         self.getLogger().setLevel(level)
         self.level = level
 
-    def getLevel(self, level):
-        return logging.getLevelName(self.level)
-
     def getLogger(self):
         return logging.getLogger("xend")
 
@@ -65,8 +62,7 @@
                                            backupCount=self.backupCount)
         self.logfilename = filename
         self.logfile.setFormatter(Formatter(self.logFileFormat, 
self.dateFormat))
-        log = self.getLogger()
-        log.addHandler(self.logfile)
+        self.getLogger().addHandler(self.logfile)
 
     def getLogFile(self):
         return self.logfile
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendNode.py Thu Sep 22 17:42:01 2005
@@ -36,7 +36,7 @@
     def reboot(self):
         return 0
 
-    def notify(self, uri):
+    def notify(self, _):
         return 0
     
     def cpu_bvt_slice_set(self, ctx_allow):
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendProtocol.py
--- a/tools/python/xen/xend/XendProtocol.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendProtocol.py     Thu Sep 22 17:42:01 2005
@@ -22,7 +22,7 @@
 from encode import *
 import sxp
 
-from xen.xend import XendRoot; xroot = XendRoot.instance()
+from xen.xend import XendRoot
 
 DEBUG = 0
 
@@ -30,6 +30,10 @@
 HTTP_CREATED                         = 201
 HTTP_ACCEPTED                        = 202
 HTTP_NO_CONTENT                      = 204
+
+
+xroot = XendRoot.instance()
+
 
 class XendError(RuntimeError):
     """Error class for 'expected errors' when talking to xend.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendRoot.py
--- a/tools/python/xen/xend/XendRoot.py Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendRoot.py Thu Sep 22 17:42:01 2005
@@ -87,7 +87,7 @@
 
     dom0_min_mem_default = '0'
 
-    dom0_cpus_default = '0'
+    dom0_vcpus_default = '0'
 
     components = {}
 
@@ -332,8 +332,8 @@
     def get_dom0_min_mem(self):
         return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default)
 
-    def get_dom0_cpus(self):
-        return self.get_config_int('dom0-cpus', self.dom0_cpus_default)
+    def get_dom0_vcpus(self):
+        return self.get_config_int('dom0-cpus', self.dom0_vcpus_default)
 
 def instance():
     """Get an instance of XendRoot.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/encode.py
--- a/tools/python/xen/xend/encode.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/encode.py   Thu Sep 22 17:42:01 2005
@@ -26,7 +26,6 @@
 from StringIO import StringIO
 
 import urllib
-import httplib
 import random
 import md5
 
@@ -104,7 +103,7 @@
     val = ({}, None)
     if d is None: return val
     multipart = 0
-    for (k, v) in data_values(d):
+    for (_, v) in data_values(d):
         if encode_isfile(v):
             multipart = 1
             break
@@ -156,7 +155,7 @@
 def mime_boundary():
     random.seed()
     m = md5.new()
-    for i in range(0, 10):
+    for _ in range(0, 10):
         c = chr(random.randint(1, 255))
         m.update(c)
     b = m.hexdigest()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/image.py    Thu Sep 22 17:42:01 2005
@@ -13,34 +13,29 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
+
 
 import os, string
 import re
 
-import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+import xen.lowlevel.xc
 from xen.xend import sxp
 from xen.xend.XendError import VmError
 from xen.xend.XendLogging import log
-from xen.xend.xenstore import DBVar
-from xen.xend.xenstore.xstransact import xstransact
 
 from xen.xend.server import channel
 
-"""Flag for a block device backend domain."""
-SIF_BLK_BE_DOMAIN = (1<<4)
-
-"""Flag for a net device backend domain."""
-SIF_NET_BE_DOMAIN = (1<<5)
-
-"""Flag for a TPM device backend domain."""
-SIF_TPM_BE_DOMAIN = (1<<7)
+
+xc = xen.lowlevel.xc.new()
+
+
+MAX_GUEST_CMDLINE = 1024
 
 class ImageHandler:
     """Abstract base class for image handlers.
 
-    initDomain() is called to initialise the domain memory.
-    
     createImage() is called to configure and build the domain from its
     kernel image and ramdisk etc.
 
@@ -88,49 +83,57 @@
 
     findImageHandlerClass = classmethod(findImageHandlerClass)
 
-    def create(cls, vm, image):
+    def create(cls, vm, imageConfig, deviceConfig):
         """Create an image handler for a vm.
 
-        @param vm vm
-        @param image image config
         @return ImageHandler instance
         """
-        imageClass = cls.findImageHandlerClass(image)
-        return imageClass(vm, image)
+        imageClass = cls.findImageHandlerClass(imageConfig)
+        return imageClass(vm, imageConfig, deviceConfig)
 
     create = classmethod(create)
 
     #======================================================================
     # Instance vars and methods.
 
-    db = None
     ostype = None
 
-    config = None
     kernel = None
     ramdisk = None
     cmdline = None
+
     flags = 0
 
-    __exports__ = [
-        DBVar('ostype',  ty='str'),
-        DBVar('config',  ty='sxpr'),
-        DBVar('kernel',  ty='str'),
-        DBVar('ramdisk', ty='str'),
-        DBVar('cmdline', ty='str'),
-        DBVar('flags',   ty='int'),
-        ]
-
-    def __init__(self, vm, config):
+    def __init__(self, vm, imageConfig, deviceConfig):
         self.vm = vm
-        self.db = vm.db.addChild('/image')
-        self.config = config
-
-    def exportToDB(self, save=False, sync=False):
-        self.db.exportToDB(self, fields=self.__exports__, save=save, sync=sync)
-
-    def importFromDB(self):
-        self.db.importFromDB(self, fields=self.__exports__)
+        self.configure(imageConfig, deviceConfig)
+
+    def configure(self, imageConfig, _):
+        """Config actions common to all unix-like domains."""
+
+        self.kernel = sxp.child_value(imageConfig, "kernel")
+        self.cmdline = ""
+        ip = sxp.child_value(imageConfig, "ip", None)
+        if ip:
+            self.cmdline += " ip=" + ip
+        root = sxp.child_value(imageConfig, "root")
+        if root:
+            self.cmdline += " root=" + root
+        args = sxp.child_value(imageConfig, "args")
+        if args:
+            self.cmdline += " " + args
+        self.ramdisk = sxp.child_value(imageConfig, "ramdisk", '')
+        
+        self.vm.storeVm(("image/ostype", self.ostype),
+                        ("image/kernel", self.kernel),
+                        ("image/cmdline", self.cmdline),
+                        ("image/ramdisk", self.ramdisk))
+
+
+    def handleBootloading():
+        self.unlink(self.kernel)
+        self.unlink(self.ramdisk)
+
 
     def unlink(self, f):
         if not f: return
@@ -139,94 +142,39 @@
         except OSError, ex:
             log.warning("error removing bootloader file '%s': %s", f, ex)
 
-    def initDomain(self, dom, memory, ssidref, cpu, cpu_weight):
-        """Initial domain create.
-
-        @return domain id
-        """
-
-        mem_kb = self.getDomainMemory(memory)
-        if not self.vm.restore:
-            dom = xc.domain_create(dom = dom or 0, ssidref = ssidref)
-            # if bootloader, unlink here. But should go after buildDomain() ?
-            if self.vm.bootloader:
-                self.unlink(self.kernel)
-                self.unlink(self.ramdisk)
-            if dom <= 0:
-                raise VmError('Creating domain failed: name=%s' % self.vm.name)
-        log.debug("initDomain: cpu=%d mem_kb=%d ssidref=%d dom=%d", cpu, 
mem_kb, ssidref, dom)
-        xc.domain_setcpuweight(dom, cpu_weight)
-        xc.domain_setmaxmem(dom, mem_kb)
-
-        try:
-            # Give the domain some memory below 4GB
-            lmem_kb = 0
-            if lmem_kb > 0:
-                xc.domain_memory_increase_reservation(dom, 
min(lmem_kb,mem_kb), 0, 32)
-            if mem_kb > lmem_kb:
-                xc.domain_memory_increase_reservation(dom, mem_kb-lmem_kb, 0, 
0)
-        except:
-            xc.domain_destroy(dom)
-            raise
-
-        if cpu != -1:
-            xc.domain_pincpu(dom, 0, 1<<int(cpu))
-        return dom
 
     def createImage(self):
         """Entry point to create domain memory image.
         Override in subclass  if needed.
         """
-        self.configure()
         self.createDomain()
 
-    def configure(self):
-        """Config actions common to all unix-like domains."""
-        self.kernel = sxp.child_value(self.config, "kernel")
-        self.cmdline = ""
-        ip = sxp.child_value(self.config, "ip", None)
-        if ip:
-            self.cmdline += " ip=" + ip
-        root = sxp.child_value(self.config, "root")
-        if root:
-            self.cmdline += " root=" + root
-        args = sxp.child_value(self.config, "args")
-        if args:
-            self.cmdline += " " + args
-        self.ramdisk = sxp.child_value(self.config, "ramdisk", '')
-        
     def createDomain(self):
         """Build the domain boot image.
         """
         # Set params and call buildDomain().
-        self.flags = 0
-        if self.vm.netif_backend: self.flags |= SIF_NET_BE_DOMAIN
-        if self.vm.blkif_backend: self.flags |= SIF_BLK_BE_DOMAIN
-        if self.vm.tpmif_backend: self.flags |= SIF_TPM_BE_DOMAIN
-
-        if self.vm.recreate or self.vm.restore:
-            return
+        self.flags = self.vm.getBackendFlags()
+
         if not os.path.isfile(self.kernel):
             raise VmError('Kernel image does not exist: %s' % self.kernel)
         if self.ramdisk and not os.path.isfile(self.ramdisk):
             raise VmError('Kernel ramdisk does not exist: %s' % self.ramdisk)
-        if len(self.cmdline) >= 256:
-            log.warning('kernel cmdline too long, domain %d', 
self.vm.getDomain())
+        if len(self.cmdline) >= MAX_GUEST_CMDLINE:
+            log.warning('kernel cmdline too long, domain %d',
+                        self.vm.getDomid())
         
         log.info("buildDomain os=%s dom=%d vcpus=%d", self.ostype,
-                 self.vm.getDomain(), self.vm.vcpus)
+                 self.vm.getDomid(), self.vm.getVCpuCount())
         err = self.buildDomain()
         if err != 0:
             raise VmError('Building domain failed: ostype=%s dom=%d err=%d'
-                          % (self.ostype, self.vm.getDomain(), err))
-
-    def getDomainMemory(self, mem_mb):
-        """Memory (in KB) the domain will need for mem_mb (in MB)."""
-        if os.uname()[4] == 'ia64':
-           """Append extra system pages, like xenstore and console"""
-           return (mem_mb * 1024 + 3 * 16)
-       else:
-            return mem_mb * 1024
+                          % (self.ostype, self.vm.getDomid(), err))
+
+    def getDomainMemory(self, mem):
+        """@return The memory required, in KiB, by the domain to store the
+        given amount, also in KiB.  This is normally just mem, but VMX domains
+        have overheads to account for."""
+        return mem
 
     def buildDomain(self):
         """Build the domain. Define in subclass."""
@@ -262,23 +210,23 @@
         else:
             console_evtchn = 0
 
-        log.debug("dom            = %d", self.vm.getDomain())
+        log.debug("dom            = %d", self.vm.getDomid())
         log.debug("image          = %s", self.kernel)
         log.debug("store_evtchn   = %d", store_evtchn)
         log.debug("console_evtchn = %d", console_evtchn)
         log.debug("cmdline        = %s", self.cmdline)
         log.debug("ramdisk        = %s", self.ramdisk)
         log.debug("flags          = %d", self.flags)
-        log.debug("vcpus          = %d", self.vm.vcpus)
-
-        ret = xc.linux_build(dom            = self.vm.getDomain(),
+        log.debug("vcpus          = %d", self.vm.getVCpuCount())
+
+        ret = xc.linux_build(dom            = self.vm.getDomid(),
                              image          = self.kernel,
                              store_evtchn   = store_evtchn,
                              console_evtchn = console_evtchn,
                              cmdline        = self.cmdline,
                              ramdisk        = self.ramdisk,
                              flags          = self.flags,
-                             vcpus          = self.vm.vcpus)
+                             vcpus          = self.vm.getVCpuCount())
         if isinstance(ret, dict):
             self.set_vminfo(ret)
             return 0
@@ -286,49 +234,72 @@
 
 class VmxImageHandler(ImageHandler):
 
-    __exports__ = ImageHandler.__exports__ + [
-        DBVar('memmap',        ty='str'),
-        DBVar('memmap_value',  ty='sxpr'),
-        # device channel?
-        ]
-    
     ostype = "vmx"
-    memmap = None
-    memmap_value = []
-    device_channel = None
-    pid = 0
+
+    def configure(self, imageConfig, deviceConfig):
+        ImageHandler.configure(self, imageConfig, deviceConfig)
+        
+        self.memmap = sxp.child_value(imageConfig, 'memmap')
+        self.dmargs = self.parseDeviceModelArgs(imageConfig, deviceConfig)
+        self.device_model = sxp.child_value(imageConfig, 'device_model')
+        if not self.device_model:
+            raise VmError("vmx: missing device model")
+        self.display = sxp.child_value(imageConfig, 'display')
+
+        self.vm.storeVm(("image/memmap", self.memmap),
+                        ("image/dmargs", " ".join(self.dmargs)),
+                        ("image/device-model", self.device_model),
+                        ("image/display", self.display))
+
+        self.device_channel = None
+        self.pid = 0
+        self.memmap_value = []
+
+        self.dmargs += self.configVNC(imageConfig)
+
+
     def createImage(self):
         """Create a VM for the VMX environment.
         """
-        self.configure()
         self.parseMemmap()
         self.createDomain()
 
     def buildDomain(self):
         # Create an event channel
-        self.device_channel = channel.eventChannel(0, self.vm.getDomain())
+        self.device_channel = channel.eventChannel(0, self.vm.getDomid())
         log.info("VMX device model port: %d", self.device_channel.port2)
         if self.vm.store_channel:
             store_evtchn = self.vm.store_channel.port2
         else:
             store_evtchn = 0
-        ret = xc.vmx_build(dom            = self.vm.getDomain(),
-                            image          = self.kernel,
-                            control_evtchn = self.device_channel.port2,
-                            store_evtchn   = store_evtchn,
-                            memsize        = self.vm.memory,
-                            memmap         = self.memmap_value,
-                            cmdline        = self.cmdline,
-                            ramdisk        = self.ramdisk,
-                            flags          = self.flags,
-                            vcpus          = self.vm.vcpus)
+
+        log.debug("dom            = %d", self.vm.getDomid())
+        log.debug("image          = %s", self.kernel)
+        log.debug("control_evtchn = %d", self.device_channel.port2)
+        log.debug("store_evtchn   = %d", store_evtchn)
+        log.debug("memsize        = %d", self.vm.getMemoryTarget() / 1024)
+        log.debug("memmap         = %s", self.memmap_value)
+        log.debug("cmdline        = %s", self.cmdline)
+        log.debug("ramdisk        = %s", self.ramdisk)
+        log.debug("flags          = %d", self.flags)
+        log.debug("vcpus          = %d", self.vm.getVCpuCount())
+
+        ret = xc.vmx_build(dom            = self.vm.getDomid(),
+                           image          = self.kernel,
+                           control_evtchn = self.device_channel.port2,
+                           store_evtchn   = store_evtchn,
+                           memsize        = self.vm.getMemoryTarget() / 1024,
+                           memmap         = self.memmap_value,
+                           cmdline        = self.cmdline,
+                           ramdisk        = self.ramdisk,
+                           flags          = self.flags,
+                           vcpus          = self.vm.getVCpuCount())
         if isinstance(ret, dict):
             self.set_vminfo(ret)
             return 0
         return ret
 
     def parseMemmap(self):
-        self.memmap = sxp.child_value(self.vm.config, "memmap")
         if self.memmap is None:
             return
         memmap = sxp.parse(open(self.memmap))[0]
@@ -337,12 +308,12 @@
         
     # Return a list of cmd line args to the device models based on the
     # xm config file
-    def parseDeviceModelArgs(self):
-       dmargs = [ 'cdrom', 'boot', 'fda', 'fdb',
-                   'localtime', 'serial', 'stdvga', 'isa' ] 
-       ret = []
-       for a in dmargs:
-                   v = sxp.child_value(self.vm.config, a)
+    def parseDeviceModelArgs(self, imageConfig, deviceConfig):
+        dmargs = [ 'cdrom', 'boot', 'fda', 'fdb',
+                   'localtime', 'serial', 'stdvga', 'isa', 'vcpus' ] 
+        ret = []
+        for a in dmargs:
+            v = sxp.child_value(imageConfig, a)
 
             # python doesn't allow '-' in variable names
             if a == 'stdvga': a = 'std-vga'
@@ -351,20 +322,17 @@
             if a in ['localtime', 'std-vga', 'isa']:
                 if v != None: v = int(v)
 
-           log.debug("args: %s, val: %s" % (a,v))
-           if v: 
-               ret.append("-%s" % a)
-               ret.append("%s" % v)
+            log.debug("args: %s, val: %s" % (a,v))
+            if v: 
+                ret.append("-%s" % a)
+                ret.append("%s" % v)
 
         # Handle disk/network related options
-        devices = sxp.children(self.vm.config, 'device')
-        for device in devices:
-            name = sxp.name(sxp.child0(device))
+        for (name, info) in deviceConfig:
             if name == 'vbd':
-               vbdinfo = sxp.child(device, 'vbd')
-               uname = sxp.child_value(vbdinfo, 'uname')
-               typedev = sxp.child_value(vbdinfo, 'dev')
-               (vbdtype, vbdparam) = string.split(uname, ':', 1)
+               uname = sxp.child_value(info, 'uname')
+               typedev = sxp.child_value(info, 'dev')
+               (_, vbdparam) = string.split(uname, ':', 1)
                if re.match('^ioemu:', typedev):
                   (emtype, vbddev) = string.split(typedev, ':', 1)
                else:
@@ -378,61 +346,59 @@
                ret.append("-%s" % vbddev)
                ret.append("%s" % vbdparam)
             if name == 'vif':
-               vifinfo = sxp.child(device, 'vif')
-               mac = sxp.child_value(vifinfo, 'mac')
+               mac = sxp.child_value(info, 'mac')
                ret.append("-macaddr")
                ret.append("%s" % mac)
             if name == 'vtpm':
-               vtpminfo = sxp.child(device, 'vtpm')
-               instance = sxp.child_value(vtpminfo, 'instance')
+               instance = sxp.child_value(info, 'instance')
                ret.append("-instance")
                ret.append("%s" % instance)
-
-       # Handle graphics library related options
-       vnc = sxp.child_value(self.vm.config, 'vnc')
-       sdl = sxp.child_value(self.vm.config, 'sdl')
-       nographic = sxp.child_value(self.vm.config, 'nographic')
-       if nographic:
-           ret.append('-nographic')
-           return ret
-       
-       if vnc and sdl:
-           ret = ret + ['-vnc-and-sdl', '-k', 'en-us']
-       elif vnc:
-           ret = ret + ['-vnc', '-k', 'en-us']
-       if vnc:
-           vncport = int(self.vm.getDomain()) + 5900
-           ret = ret + ['-vncport', '%d' % vncport]
-       return ret
-                 
+        return ret
+
+    def configVNC(self, config):
+        # Handle graphics library related options
+        vnc = sxp.child_value(config, 'vnc')
+        sdl = sxp.child_value(config, 'sdl')
+        ret = []
+        nographic = sxp.child_value(config, 'nographic')
+        if nographic:
+            ret.append('-nographic')
+            return ret
+
+        if vnc and sdl:
+            ret = ret + ['-vnc-and-sdl', '-k', 'en-us']
+        elif vnc:
+            ret = ret + ['-vnc', '-k', 'en-us']
+        if vnc:
+            vncport = int(self.vm.getDomid()) + 5900
+            ret = ret + ['-vncport', '%d' % vncport]
+        return ret
+
     def createDeviceModel(self):
-        device_model = sxp.child_value(self.vm.config, 'device_model')
-        if not device_model:
-            raise VmError("vmx: missing device model")
+        if self.pid:
+            return
         # Execute device model.
         #todo: Error handling
         # XXX RN: note that the order of args matter!
-        args = [device_model]
+        args = [self.device_model]
         vnc = self.vncParams()
         if len(vnc):
             args = args + vnc
-        args = args + ([ "-d",  "%d" % self.vm.getDomain(),
+        args = args + ([ "-d",  "%d" % self.vm.getDomid(),
                   "-p", "%d" % self.device_channel.port1,
-                  "-m", "%s" % self.vm.memory ])
-       args = args + self.parseDeviceModelArgs()
+                  "-m", "%s" % (self.vm.getMemoryTarget() / 1024)])
+        args = args + self.dmargs
         env = dict(os.environ)
-        env['DISPLAY'] = sxp.child_value(self.vm.config, 'display')
-        log.info("spawning device models: %s %s", device_model, args)
-        self.pid = os.spawnve(os.P_NOWAIT, device_model, args, env)
+        env['DISPLAY'] = self.display
+        log.info("spawning device models: %s %s", self.device_model, args)
+        self.pid = os.spawnve(os.P_NOWAIT, self.device_model, args, env)
         log.info("device model pid: %d", self.pid)
-        return self.pid
 
     def vncParams(self):
         # see if a vncviewer was specified
         # XXX RN: bit of a hack. should unify this, maybe stick in config space
         vncconnect=[]
-        image = self.config
-        args = sxp.child_value(image, "args")
+        args = self.cmdline
         if args:
             arg_list = string.split(args)
             for arg in arg_list:
@@ -446,15 +412,16 @@
         channel.eventChannelClose(self.device_channel)
         import signal
         if not self.pid:
-            self.pid = self.vm.device_model_pid
+            return
         os.kill(self.pid, signal.SIGKILL)
-        (pid, status) = os.waitpid(self.pid, 0)
+        os.waitpid(self.pid, 0)
         self.pid = 0
 
-    def getDomainMemory(self, mem_mb):
+    def getDomainMemory(self, mem):
+        """@see ImageHandler.getDomainMemory"""
         # for ioreq_t and xenstore
         static_pages = 2
-        return (mem_mb * 1024) + self.getPageTableSize(mem_mb) + 4 * 
static_pages
+        return mem + self.getPageTableSize(mem / 1024) + 4 * static_pages
             
     def getPageTableSize(self, mem_mb):
         """Return the size of memory needed for 1:1 page tables for physical
@@ -466,8 +433,9 @@
         # 1 page for the PGD + 1 pte page for 4MB of memory (rounded)
         if os.uname()[4] == 'x86_64':
             return (5 + ((mem_mb + 1) >> 1)) * 4
-       elif os.uname()[4] == 'ia64':
-           # XEN/IA64 has p2m table allocated on demand, so only return guest 
firmware size here.
-           return 16 * 1024
+        elif os.uname()[4] == 'ia64':
+            # XEN/IA64 has p2m table allocated on demand, so only return
+            # guest firmware size here.
+            return 16 * 1024
         else:
             return (1 + ((mem_mb + 3) >> 2)) * 4
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/scheduler.py
--- a/tools/python/xen/xend/scheduler.py        Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/scheduler.py        Thu Sep 22 17:42:01 2005
@@ -13,11 +13,12 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
 import threading
 
-def later(delay, fn, args=(), kwargs={}):
+def later(delay, fn, *args, **kwargs):
     """Schedule a function to be called later.
 
     @param delay:  delay in seconds
@@ -29,7 +30,7 @@
     timer.start()
     return timer
 
-def now(fn, args=(), kwargs={}):
+def now(fn, *args, **kwargs):
     """Schedule a function to be called now.
 
     @param fn:     function
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/SrvDaemon.py
--- a/tools/python/xen/xend/server/SrvDaemon.py Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/SrvDaemon.py Thu Sep 22 17:42:01 2005
@@ -25,7 +25,6 @@
 from xen.xend.XendLogging import log
 from xen.xend import XendRoot; xroot = XendRoot.instance()
 
-import controller
 import event
 import relocate
 from params import *
@@ -137,13 +136,6 @@
         else:
             return 0
 
-    def onSIGCHLD(self, signum, frame):
-        if self.child > 0: 
-            try: 
-                pid, sts = os.waitpid(self.child, os.WNOHANG)
-            except os.error, ex:
-                pass
-
     def fork_pid(self, pidfile):
         """Fork and write the pid of the child to 'pidfile'.
 
@@ -200,15 +192,29 @@
             # Trying to run an already-running service is a success.
             return 0
 
-        signal.signal(signal.SIGCHLD, self.onSIGCHLD)
+        ret = 0
+
+        # we use a pipe to communicate between the parent and the child process
+        # this way we know when the child has actually initialized itself so
+        # we can avoid a race condition during startup
+        
+        r,w = os.pipe()
         if self.fork_pid(XEND_PID_FILE):
-            #Parent. Sleep to give child time to start.
-            time.sleep(1)
+            os.close(w)
+            r = os.fdopen(r, 'r')
+            s = r.read()
+            r.close()
+            if not len(s):
+                ret = 1
+            else:
+                ret = int(s)
         else:
+            os.close(r)
             # Child
             self.tracing(trace)
-            self.run()
-        return 0
+            self.run(os.fdopen(w, 'w'))
+
+        return ret
 
     def tracing(self, traceon):
         """Turn tracing on or off.
@@ -290,20 +296,21 @@
     def stop(self):
         return self.cleanup(kill=True)
 
-    def run(self):
-        _enforce_dom0_cpus()
+    def run(self, status):
         try:
             log.info("Xend Daemon started")
             event.listenEvent(self)
             relocate.listenRelocation()
             servers = SrvServer.create()
             self.daemonize()
-            servers.start()
+            servers.start(status)
         except Exception, ex:
             print >>sys.stderr, 'Exception starting xend:', ex
             if XEND_DEBUG:
                 traceback.print_exc()
             log.exception("Exception starting xend (%s)" % ex)
+            status.write('1')
+            status.close()
             self.exit(1)
             
     def exit(self, rc=0):
@@ -314,32 +321,6 @@
         #sys.exit(rc)
         os._exit(rc)
 
-def _enforce_dom0_cpus():
-    dn = xroot.get_dom0_cpus()
-
-    for d in glob.glob("/sys/devices/system/cpu/cpu*"):
-        cpu = int(os.path.basename(d)[3:])
-        if (dn == 0) or (cpu < dn):
-            v = "1"
-        else:
-            v = "0"
-        try:
-            f = open("%s/online" %d, "r+")
-            c = f.read(1)
-            if (c != v):
-                if v == "0":
-                    log.info("dom0 is trying to give back cpu %d", cpu)
-                else:
-                    log.info("dom0 is trying to take cpu %d", cpu)
-                f.seek(0)
-                f.write(v)
-                f.close()
-                log.info("dom0 successfully enforced cpu %d", cpu)
-            else:
-                f.close()
-        except:
-            pass
-
 def instance():
     global inst
     try:
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/python/xen/xend/server/SrvDomainDir.py
--- a/tools/python/xen/xend/server/SrvDomainDir.py      Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/SrvDomainDir.py      Thu Sep 22 17:42:01 2005
@@ -85,7 +85,7 @@
     def _op_create_cb(self, dominfo, configstring, req):
         """Callback to handle domain creation.
         """
-        dom = dominfo.name
+        dom = dominfo.getName()
         domurl = "%s/%s" % (req.prePathURL(), dom)
         req.setResponseCode(http.CREATED, "created")
         req.setHeader("Location", domurl)
@@ -112,7 +112,7 @@
         fn = FormFn(self.xd.domain_restore,
                     [['file', 'str']])
         dominfo = fn(req.args)
-        dom = dominfo.name
+        dom = dominfo.getName()
         domurl = "%s/%s" % (req.prePathURL(), dom)
         req.setResponseCode(http.CREATED)
         req.setHeader("Location", domurl)
@@ -152,12 +152,12 @@
             domains = self.xd.list_sorted()
             req.write('<ul>')
             for d in domains:
-               req.write('<li><a href="%s%s"> Domain %s</a>'
-                         % (url, d.name, d.name))
-               req.write('id=%s' % d.domid)
-               req.write('memory=%d'% d.memory)
-               req.write('ssidref=%d'% d.ssidref)
-               req.write('</li>')
+                req.write('<li><a href="%s%s"> Domain %s</a>'
+                          % (url, d.getName(), d.getName()))
+                req.write('id=%s' % d.getDomain())
+                req.write('memory=%d'% d.getMemoryTarget())
+                req.write('ssidref=%d'% d.getSsidref())
+                req.write('</li>')
             req.write('</ul>')
 
     def form(self, req):
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/SrvNode.py
--- a/tools/python/xen/xend/server/SrvNode.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/SrvNode.py   Thu Sep 22 17:42:01 2005
@@ -15,7 +15,6 @@
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
 #============================================================================
 
-import os
 
 from xen.web.SrvDir import SrvDir
 from xen.xend import sxp
@@ -32,15 +31,15 @@
         self.add('dmesg', 'SrvDmesg')
         self.add('log', 'SrvXendLog')
 
-    def op_shutdown(self, op, req):
+    def op_shutdown(self, _1, _2):
         val = self.xn.shutdown()
         return val
 
-    def op_reboot(self, op, req):
+    def op_reboot(self, _1, _2):
         val = self.xn.reboot()
         return val
 
-    def op_cpu_bvt_slice_set(self, op, req):
+    def op_cpu_bvt_slice_set(self, _, req):
         fn = FormFn(self.xn.cpu_bvt_slice_set,
                     [['ctx_allow', 'int']])
         val = fn(req.args, {})
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/SrvServer.py
--- a/tools/python/xen/xend/server/SrvServer.py Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/SrvServer.py Thu Sep 22 17:42:01 2005
@@ -44,12 +44,17 @@
 
 from xen.web.httpserver import HttpServer, UnixHttpServer
 
-from xen.xend import XendRoot; xroot = XendRoot.instance()
+from xen.xend import XendRoot
 from xen.xend import Vifctl
 from xen.xend.XendLogging import log
 from xen.web.SrvDir import SrvDir
+import time
 
 from SrvRoot import SrvRoot
+
+
+xroot = XendRoot.instance()
+
 
 class XendServers:
 
@@ -59,13 +64,32 @@
     def add(self, server):
         self.servers.append(server)
 
-    def start(self):
+    def start(self, status):
         Vifctl.network('start')
         threads = []
         for server in self.servers:
             thread = Thread(target=server.run)
             thread.start()
             threads.append(thread)
+
+
+        # check for when all threads have initialized themselves and then
+        # close the status pipe
+
+        threads_left = True
+        while threads_left:
+            threads_left = False
+
+            for server in self.servers:
+                if not server.ready:
+                    threads_left = True
+                    break
+
+            if threads_left:
+                time.sleep(.5)
+
+        status.write('0')
+        status.close()
 
         for t in threads:
             t.join()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/blkif.py     Thu Sep 22 17:42:01 2005
@@ -13,322 +13,47 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
-"""Support for virtual block devices.
-"""
+
+import re
 import string
 
 from xen.util import blkif
-from xen.xend.XendError import XendError, VmError
-from xen.xend.XendRoot import get_component
-from xen.xend.XendLogging import log
 from xen.xend import sxp
-from xen.xend import Blkctl
-from xen.xend.xenstore import DBVar
 
-from xen.xend.server.controller import Dev, DevController
+from xen.xend.server.DevController import DevController
 
-class BlkifBackend:
-    """ Handler for the 'back-end' channel to a block device driver domain
-    on behalf of a front-end domain.
-    Must be connected using connect() before it can be used.
-    """
 
-    def __init__(self, controller, id, dom, recreate=False):
-        self.controller = controller
-        self.id = id
-        self.frontendDomain = self.controller.getDomain()
-        self.backendDomain = dom
-        self.destroyed = False
-        self.connected = False
-        self.status = None
-
-    def init(self, recreate=False, reboot=False):
-        self.destroyed = False
-        self.status = BLKIF_INTERFACE_STATUS_DISCONNECTED
-        self.frontendDomain = self.controller.getDomain()
-
-    def __str__(self):
-        return ('<BlkifBackend frontend=%d backend=%d id=%d>'
-                % (self.frontendDomain,
-                   self.backendDomain,
-                   self.id))
-
-    def getId(self):
-        return self.id
-
-    def connect(self, recreate=False):
-        """Connect to the blkif control interface.
-
-        @param recreate: true if after xend restart
-        """
-        log.debug("Connecting blkif %s", str(self))
-        if recreate or self.connected:
-            self.connected = True
-            pass
-        
-    def destroy(self, change=False, reboot=False):
-        """Disconnect from the blkif control interface and destroy it.
-        """
-        self.destroyed = True
-        # For change true need to notify front-end, or back-end will do it?
-
-    def connectInterface(self, val):
-        self.status = BLKIF_INTERFACE_STATUS_CONNECTED
-            
-    def interfaceDisconnected(self):
-        self.status = BLKIF_INTERFACE_STATUS_DISCONNECTED
-        
-class BlkDev(Dev):
-    """Info record for a block device.
-    """
-
-    __exports__ = Dev.__exports__ + [
-        DBVar('dev',          ty='str'),
-        DBVar('vdev',         ty='int'),
-        DBVar('mode',         ty='str'),
-        DBVar('viftype',      ty='str'),
-        DBVar('params',       ty='str'),
-        DBVar('node',         ty='str'),
-        DBVar('device',       ty='long'),
-        DBVar('dev_handle',   ty='long'),
-        DBVar('start_sector', ty='long'),
-        DBVar('nr_sectors',   ty='long'),
-        ]
-
-    def __init__(self, controller, id, config, recreate=False):
-        Dev.__init__(self, controller, id, config, recreate=recreate)
-        self.dev = None
-        self.uname = None
-        self.vdev = None
-        self.mode = None
-        self.type = None
-        self.params = None
-        self.node = None
-        self.device = None
-        self.dev_handle = 0
-        self.start_sector = None
-        self.nr_sectors = None
-        
-        self.frontendDomain = self.getDomain()
-        self.backendDomain = None
-        self.backendId = 0
-        self.configure(self.config, recreate=recreate)
-
-    def exportToDB(self, save=False):
-        Dev.exportToDB(self, save=save)
-        backend = self.getBackend()
-
-    def init(self, recreate=False, reboot=False):
-        self.frontendDomain = self.getDomain()
-        backend = self.getBackend()
-        self.backendId = backend.domid
-
-    def configure(self, config, change=False, recreate=False):
-        if change:
-            raise XendError("cannot reconfigure vbd")
-        self.config = config
-        self.uname = sxp.child_value(config, 'uname')
-        if not self.uname:
-            raise VmError('vbd: Missing uname')
-        # Split into type and type-specific params (which are passed to the
-        # type-specific control script).
-        (self.type, self.params) = string.split(self.uname, ':', 1)
-        self.dev = sxp.child_value(config, 'dev')
-        if not self.dev:
-            raise VmError('vbd: Missing dev')
-        self.mode = sxp.child_value(config, 'mode', 'r')
-        
-        self.vdev = blkif.blkdev_name_to_number(self.dev)
-        if not self.vdev:
-            raise VmError('vbd: Device not found: %s' % self.dev)
-        
-        try:
-            xd = get_component('xen.xend.XendDomain')
-            self.backendDomain = 
xd.domain_lookup_by_name(sxp.child_value(config, 'backend', '0')).domid
-        except:
-            raise XendError('invalid backend domain')
-
-        return self.config
-
-    def attach(self, recreate=False, change=False):
-        if recreate:
-            pass
-        else:
-            node = Blkctl.block('bind', self.type, self.params)
-            self.setNode(node)
-            self.attachBackend()
-        if change:
-            self.interfaceChanged()
-
-    def unbind(self):
-        if self.node is None: return
-        log.debug("Unbinding vbd (type %s) from %s"
-                  % (self.type, self.node))
-        Blkctl.block('unbind', self.type, self.node)
-
-    def setNode(self, node):
-    
-        # NOTE: 
-        # This clause is testing code for storage system experiments.
-        # Add a new disk type that will just pass an opaque id in the
-        # dev_handle and use an experimental device type.
-        # Please contact andrew.warfield@xxxxxxxxxxxx with any concerns.
-        if self.type == 'parallax':
-            self.node   = node
-            self.device =  61440 # (240,0)
-            self.dev_handle = long(self.params)
-            self.nr_sectors = long(0)
-            return
-        # done.
-            
-        mounted_mode = self.check_mounted(node)
-        if not '!' in self.mode and mounted_mode:
-            if mounted_mode == "w":
-                raise VmError("vbd: Segment %s is in writable use" %
-                              self.uname)
-            elif 'w' in self.mode:
-                raise VmError("vbd: Segment %s is in read-only use" %
-                              self.uname)
-            
-        segment = blkif.blkdev_segment(node)
-        if not segment:
-            raise VmError("vbd: Segment not found: uname=%s" % self.uname)
-        self.node = node
-        self.device = segment['device']
-        self.start_sector = segment['start_sector']
-        self.nr_sectors = segment['nr_sectors']
-
-    def check_mounted(self, name):
-        mode = blkif.mount_mode(name)
-        xd = get_component('xen.xend.XendDomain')
-        for vm in xd.list():
-            ctrl = vm.getDeviceController(self.getType(), error=False)
-            if (not ctrl): continue
-            for dev in ctrl.getDevices():
-                if dev is self: continue
-                if dev.type == 'phy' and name == 
blkif.expand_dev_name(dev.params):
-                    mode = dev.mode
-                    if 'w' in mode:
-                        return 'w'
-        if mode and 'r' in mode:
-            return 'r'
-        return None
-
-    def readonly(self):
-        return 'w' not in self.mode
-
-    def sxpr(self):
-        val = ['vbd',
-               ['id', self.id],
-               ['vdev', self.vdev],
-               ['device', self.device],
-               ['mode', self.mode]]
-        if self.dev:
-            val.append(['dev', self.dev])
-        if self.uname:
-            val.append(['uname', self.uname])
-        if self.node:
-            val.append(['node', self.node])
-        return val
-
-    def getBackend(self):
-        return self.controller.getBackend(self.backendDomain)
-
-    def refresh(self):
-        log.debug("Refreshing vbd domain=%d id=%s", self.frontendDomain,
-                  self.id)
-        self.interfaceChanged()
-
-    def destroy(self, change=False, reboot=False):
-        """Destroy the device. If 'change' is true notify the front-end 
interface.
-
-        @param change: change flag
-        """
-        self.destroyed = True
-        log.debug("Destroying vbd domain=%d id=%s", self.frontendDomain,
-                  self.id)
-        if change:
-            self.interfaceChanged()
-        self.unbind()
-
-    def interfaceChanged(self):
-        """Tell the back-end to notify the front-end that a device has been
-        added or removed.
-        """
-        self.getBackend().interfaceChanged()
-
-    def attachBackend(self):
-        """Attach the device to its controller.
-
-        """
-        self.getBackend().connect()
-        
 class BlkifController(DevController):
     """Block device interface controller. Handles all block devices
     for a domain.
     """
     
-    def __init__(self, vm, recreate=False):
+    def __init__(self, vm):
         """Create a block device controller.
         """
-        DevController.__init__(self, vm, recreate=recreate)
-        self.backends = {}
-        self.backendId = 0
+        DevController.__init__(self, vm)
 
-    def initController(self, recreate=False, reboot=False):
-        self.destroyed = False
-        if reboot:
-            self.rebootBackends()
-            self.rebootDevices()
 
-    def sxpr(self):
-        val = ['blkif', ['dom', self.getDomain()]]
-        return val
+    def getDeviceDetails(self, config):
+        """@see DevController.getDeviceDetails"""
+        
+        typedev = sxp.child_value(config, 'dev')
+        if re.match('^ioemu:', typedev):
+            return (0,{},{})
 
-    def rebootBackends(self):
-        for backend in self.backends.values():
-            backend.init(reboot=True)
+        devid = blkif.blkdev_name_to_number(sxp.child_value(config, 'dev'))
 
-    def getBackendById(self, id):
-        return self.backends.get(id)
+        (typ, params) = string.split(sxp.child_value(config, 'uname'), ':', 1)
+        back = { 'type' : typ,
+                 'params' : params
+                 }
 
-    def getBackendByDomain(self, dom):
-        for backend in self.backends.values():
-            if backend.backendDomain == dom:
-                return backend
-        return None
+        if 'r' == sxp.child_value(config, 'mode', 'r'):
+            back['read-only'] = ""  # existence indicates read-only
 
-    def getBackend(self, dom):
-        backend = self.getBackendByDomain(dom)
-        if backend: return backend
-        backend = BlkifBackend(self, self.backendId, dom)
-        self.backendId += 1
-        self.backends[backend.getId()] = backend
-        backend.init()
-        return backend
+        front = { 'virtual-device' : "%i" % devid }
 
-    def newDevice(self, id, config, recreate=False):
-        """Create a device..
-
-        @param id:      device id
-        @param config:   device configuration
-        @param recreate: if true it's being recreated (after xend restart)
-        @type  recreate: bool
-        @return: device
-        @rtype:  BlkDev
-        """
-        return BlkDev(self, id, config, recreate=recreate)
-        
-    def destroyController(self, reboot=False):
-        """Destroy the controller and all devices.
-        """
-        self.destroyed = True
-        log.debug("Destroying blkif domain=%d", self.getDomain())
-        self.destroyDevices(reboot=reboot)
-        self.destroyBackends(reboot=reboot)
-
-    def destroyBackends(self, reboot=False):
-        for backend in self.backends.values():
-            backend.destroy(reboot=reboot)
+        return (devid, back, front)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/channel.py
--- a/tools/python/xen/xend/server/channel.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/channel.py   Thu Sep 22 17:42:01 2005
@@ -43,33 +43,6 @@
 
     interdomain = classmethod(interdomain)
 
-    def restoreFromDB(cls, db, dom1, dom2, port1=0, port2=0):
-        """Create an event channel using db info if available.
-        Inverse to saveToDB().
-
-        @param db db
-        @param dom1
-        @param dom2
-        @param port1
-        @param port2
-        """
-        try:
-            dom1  = int(db['dom1'].getData())
-        except: pass
-        try:
-            dom2  = int(db['dom2'].getData())
-        except: pass
-        try:
-            port1 = int(db['port1'].getData())
-        except: pass
-        try:
-            port2 = int(db['port2'].getData())
-        except: pass
-        evtchn = cls.interdomain(dom1, dom2, port1=port1, port2=port2)
-        return evtchn
-
-    restoreFromDB = classmethod(restoreFromDB)
-
     def __init__(self, dom1, dom2, d):
         d['dom1'] = dom1
         d['dom2'] = dom2
@@ -92,18 +65,6 @@
             print 'EventChannel>close>', self
         evtchn_close(self.dom1, self.port1)
         evtchn_close(self.dom2, self.port2)
-
-    def saveToDB(self, db, save=False):
-        """Save the event channel to the db so it can be restored later,
-        using restoreFromDB() on the class.
-
-        @param db db
-        """
-        db['dom1']  = str(self.dom1)
-        db['dom2']  = str(self.dom2)
-        db['port1'] = str(self.port1)
-        db['port2'] = str(self.port2)
-        db.saveDB(save=save)
 
     def sxpr(self):
         return ['event-channel',
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/event.py
--- a/tools/python/xen/xend/server/event.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/event.py     Thu Sep 22 17:42:01 2005
@@ -174,11 +174,6 @@
         else:
             logging.removeLogStderr()
 
-    def op_debug_controller(self, name, v):
-        mode = v[1]
-        import controller
-        controller.DEBUG = (mode == 'on')
-
     def op_domain_ls(self, name, v):
         xd = xroot.get_component("xen.xend.XendDomain")
         return xd.list_names()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/netif.py
--- a/tools/python/xen/xend/server/netif.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/netif.py     Thu Sep 22 17:42:01 2005
@@ -13,396 +13,58 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
+
 
 """Support for virtual network interfaces.
 """
 
-import random
-
-from xen.util.mac import macFromString, macToString
+import os
 
 from xen.xend import sxp
-from xen.xend import Vifctl
-from xen.xend.XendError import XendError, VmError
-from xen.xend.XendLogging import log
-from xen.xend import XendVnet
-from xen.xend.XendRoot import get_component
-from xen.xend.xenstore import DBVar
 
-from xen.xend.server.controller import Dev, DevController
+from xen.xend.server.DevController import DevController
 
-class NetDev(Dev):
-    """A network device.
-    """
 
-    # State:
-    # inherited + 
-    # ./config
-    # ./mac
-    # ./be_mac
-    # ./bridge
-    # ./script
-    # ./ipaddr ?
-    #
-    # ./credit
-    # ./period
-    #
-    # ./vifctl: up/down?
-    # ./vifname
-    #
-    #
-    # Poss should have no backend state here - except for ref to backend's own 
tree
-    # for the device? And a status - the one we want.
-    # ./back/dom
-    # ./back/devid - id for back-end (netif_handle) - same as front/devid
-    # ./back/id    - backend id (if more than one b/e per domain)
-    # ./back/status
-    # ./back/tx_shmem_frame  - actually these belong in back-end state
-    # ./back/rx_shmem_frame
-    #
-    # ./front/dom
-    # ./front/devid
-    # ./front/status - need 2: one for requested, one for actual? Or drive 
from dev status
-    # and this is front status only.
-    # ./front/tx_shmem_frame
-    # ./front/rx_shmem_frame
-    #
-    # ./evtchn/front - here or in front/back?
-    # ./evtchn/back
-    # ./evtchn/status ?
-    # At present created by dev: but should be created unbound by front/back
-    # separately and then bound (by back)?
-
-    __exports__ = Dev.__exports__ + [
-        DBVar('config',  ty='sxpr'),
-        DBVar('mac',     ty='mac'),
-        DBVar('be_mac',  ty='mac'),
-        DBVar('bridge',  ty='str'),
-        DBVar('script',  ty='str'),
-        DBVar('credit',  ty='int'),
-        DBVar('period',  ty='int'),
-        DBVar('vifname', ty='str'),
-        ]
-
-    def __init__(self, controller, id, config, recreate=False):
-        Dev.__init__(self, controller, id, config, recreate=recreate)
-        self.vif = int(self.id)
-        self.status = None
-        self.frontendDomain = self.getDomain()
-        self.backendDomain = None
-        self.credit = None
-        self.period = None
-        self.mac = None
-        self.be_mac = None
-        self.bridge = None
-        self.script = None
-        self.ipaddr = None
-        self.mtu = None
-        self.vifname = None
-        self.configure(self.config, recreate=recreate)
-
-    def exportToDB(self, save=False):
-        Dev.exportToDB(self, save=save)
-
-    def init(self, recreate=False, reboot=False):
-        self.destroyed = False
-        self.status = NETIF_INTERFACE_STATUS_DISCONNECTED
-        self.frontendDomain = self.getDomain()
-
-    def _get_config_mac(self, config):
-        vmac = sxp.child_value(config, 'mac')
-        if not vmac: return None
-        try:
-            mac = macFromString(vmac)
-        except:
-            raise XendError("invalid mac: %s" % vmac)
-        return mac
-
-    def _get_config_be_mac(self, config):
-        vmac = sxp.child_value(config, 'be_mac')
-        if not vmac: return None
-        try:
-            mac = macFromString(vmac)
-        except:
-            raise XendError("invalid backend mac: %s" % vmac)
-        return mac
-
-    def _get_config_ipaddr(self, config):
-        ips = sxp.children(config, elt='ip')
-        if ips:
-            val = []
-            for ipaddr in ips:
-                val.append(sxp.child0(ipaddr))
-        else:
-            val = None
-        return val
-
-    def _get_config_mtu(self, config):
-        mtu = sxp.child_value(config, 'mtu')
-        if not mtu: return None
-        try:
-            mtu = int(mtu)
-        except:
-            raise XendError("invalid mtu: %s" & mtu)
-        return mtu
-
-    def configure(self, config, change=False, recreate=False):
-        if change:
-            return self.reconfigure(config)
-        self.config = config
-        self.mac = None
-        self.be_mac = None
-        self.bridge = None
-        self.script = None
-        self.ipaddr = []
-        self.vifname = None
-
-        self.vifname = sxp.child_value(config, 'vifname')
-        if self.vifname is None:
-            self.vifname = self.default_vifname()
-        if len(self.vifname) > 15:
-            raise XendError('invalid vifname: too long: ' + self.vifname)
-        mac = self._get_config_mac(config)
-        if mac is None:
-            raise XendError("invalid mac")
-        self.mac = mac
-        self.be_mac = self._get_config_be_mac(config)
-        self.bridge = sxp.child_value(config, 'bridge')
-        self.script = sxp.child_value(config, 'script')
-        self.ipaddr = self._get_config_ipaddr(config) or []
-        self.mtu = self._get_config_mtu(config)
-        self._config_credit_limit(config)
-        
-        try:
-            if recreate:
-                self.backendDomain = int(sxp.child_value(config, 'backend', 
'0'))
-            else:
-                #todo: Code below will fail on xend restart when backend is 
not domain 0.
-                xd = get_component('xen.xend.XendDomain')
-                self.backendDomain = 
xd.domain_lookup_by_name(sxp.child_value(config, 'backend', '0')).domid
-        except:
-            raise XendError('invalid backend domain')
-        return self.config
-
-    def reconfigure(self, config):
-        """Reconfigure the interface with new values.
-        Not all configuration parameters can be changed:
-        bridge, script and ip addresses can,
-        backend and mac cannot.
-
-        To leave a parameter unchanged, omit it from the changes.
-
-        @param config configuration changes
-        @return updated interface configuration
-        @raise XendError on errors
-        """
-        changes = {}
-        mac = self._get_config_mac(config)
-        be_mac = self._get_config_be_mac(config)
-        bridge = sxp.child_value(config, 'bridge')
-        script = sxp.child_value(config, 'script')
-        ipaddr = self._get_config_ipaddr(config)
-        mtu = self._get_config_mtu(config)
-        
-        xd = get_component('xen.xend.XendDomain')
-        backendDomain = xd.domain_lookup_by_name(sxp.child_value(config, 
'backend', '0')).domid
-
-        if (mac is not None) and (mac != self.mac):
-            raise XendError("cannot change mac")
-        if (be_mac is not None) and (be_mac != self.be_mac):
-            raise XendError("cannot change backend mac")
-        if (backendDomain is not None) and (backendDomain != 
self.backendDomain):
-            raise XendError("cannot change backend")
-        if (bridge is not None) and (bridge != self.bridge):
-            changes['bridge'] = bridge
-        if (script is not None) and (script != self.script):
-            changes['script'] = script
-        if (ipaddr is not None) and (ipaddr != self.ipaddr):
-            changes['ipaddr'] = ipaddr
-        if (mtu is not None) and (mtu != self.mtu):
-            changes['mtu'] = mtu
-
-        if changes:
-            self.vifctl("down")
-            for (k, v) in changes.items():
-                setattr(self, k, v)
-            self.config = sxp.merge(config, self.config)
-            self.vifctl("up")
-
-        self._config_credit_limit(config, change=True)
-        return self.config
-
-    def _config_credit_limit(self, config, change=False):
-        period = sxp.child_value(config, 'period')
-        credit = sxp.child_value(config, 'credit')
-        if period and credit:
-            try:
-                period = int(period)
-                credit = int(credit)
-            except ex:
-                raise XendError('vif: invalid credit limit')
-            if change:
-                self.setCreditLimit(credit, period)
-                self.config = sxp.merge([sxp.name(self.config),
-                                         ['credit', credit],
-                                         ['period', period]],
-                                        self.config)
-            else:
-                self.period = period
-                self.credit = credit
-        elif period or credit:
-            raise XendError('vif: invalid credit limit')
-
-    def sxpr(self):
-        vif = str(self.vif)
-        mac = self.get_mac()
-        val = ['vif',
-               ['id', self.id],
-               ['vif', vif],
-               ['mac', mac],
-               ['vifname', self.vifname],
-               ]
-
-        if self.be_mac:
-            val.append(['be_mac', self.get_be_mac()])
-        if self.bridge:
-            val.append(['bridge', self.bridge])
-        if self.script:
-            val.append(['script', self.script])
-        for ip in self.ipaddr:
-            val.append(['ip', ip])
-        if self.credit:
-            val.append(['credit', self.credit])
-        if self.period:
-            val.append(['period', self.period])
-        return val
-
-    def get_vifname(self):
-        """Get the virtual interface device name.
-        """
-        return self.vifname
-
-    def default_vifname(self):
-        return "vif%d.%d" % (self.frontendDomain, self.vif)
-    
-    def get_mac(self):
-        """Get the MAC address as a string.
-        """
-        return macToString(self.mac)
-
-    def get_be_mac(self):
-        """Get the backend MAC address as a string.
-        """
-        return macToString(self.be_mac)
-
-    def vifctl_params(self, vmname=None):
-        """Get the parameters to pass to vifctl.
-        """
-        dom = self.frontendDomain
-        if vmname is None:
-            xd = get_component('xen.xend.XendDomain')
-            try:
-                vm = xd.domain_lookup(dom)
-                vmname = vm.name
-            except:
-                vmname = 'Domain-%d' % dom
-        return { 'domain': vmname,
-                 'vif'   : self.get_vifname(), 
-                 'mac'   : self.get_mac(),
-                 'bridge': self.bridge,
-                 'script': self.script,
-                 'ipaddr': self.ipaddr, }
-
-    def vifctl(self, op, vmname=None):
-        """Bring the device up or down.
-        The vmname is needed when bringing a device up for a new domain because
-        the domain is not yet in the table so we can't look its name up.
-
-        @param op: operation name (up, down)
-        @param vmname: vmname
-        """
-        if op == 'up':
-            Vifctl.set_vif_name(self.default_vifname(), self.vifname)
-        Vifctl.vifctl(op, **self.vifctl_params(vmname=vmname))
-        vnet = XendVnet.instance().vnet_of_bridge(self.bridge)
-        if vnet:
-            vnet.vifctl(op, self.get_vifname(), self.get_mac())
-
-    def attach(self, recreate=False, change=False):
-        if recreate:
-            pass
-        else:
-            if self.credit and self.period:
-                #self.send_be_creditlimit(self.credit, self.period)
-                pass
-            self.vifctl('up', vmname=self.getDomainName())
-        
-    def destroy(self, change=False, reboot=False):
-        """Destroy the device's resources and disconnect from the back-end
-        device controller. If 'change' is true notify the front-end interface.
-
-        @param change: change flag
-        """
-        self.destroyed = True
-        self.status = NETIF_INTERFACE_STATUS_CLOSED
-        log.debug("Destroying vif domain=%d vif=%d", self.frontendDomain, 
self.vif)
-        self.vifctl('down')
-        if change:
-            self.reportStatus()
-
-    def setCreditLimit(self, credit, period):
-        #todo: these params should be in sxpr and vif config.
-        self.credit = credit
-        self.period = period
-
-    def getCredit(self):
-        return self.credit
-
-    def getPeriod(self):
-        return self.period
-        
-    def interfaceChanged(self):
-        """Notify the front-end that a device has been added or removed.
-        """
-        pass
-        
 class NetifController(DevController):
     """Network interface controller. Handles all network devices for a domain.
     """
     
-    def __init__(self, vm, recreate=False):
-        DevController.__init__(self, vm, recreate=recreate)
+    def __init__(self, vm):
+        DevController.__init__(self, vm)
 
-    def initController(self, recreate=False, reboot=False):
-        self.destroyed = False
-        if reboot:
-            self.rebootDevices()
 
-    def destroyController(self, reboot=False):
-        """Destroy the controller and all devices.
-        """
-        self.destroyed = True
-        log.debug("Destroying netif domain=%d", self.getDomain())
-        self.destroyDevices(reboot=reboot)
+    def getDeviceDetails(self, config):
+        """@see DevController.getDeviceDetails"""
 
-    def sxpr(self):
-        val = ['netif', ['dom', self.getDomain()]]
-        return val
-    
-    def newDevice(self, id, config, recreate=False):
-        """Create a network device.
+        from xen.xend import XendRoot
+        xroot = XendRoot.instance()
 
-        @param id: interface id
-        @param config: device configuration
-        @param recreate: recreate flag (true after xend restart)
-        """
-        return NetDev(self, id, config, recreate=recreate)
+        def _get_config_ipaddr(config):
+            val = []
+            for ipaddr in sxp.children(config, elt='ip'):
+                val.append(sxp.child0(ipaddr))
+            return val
 
-    def limitDevice(self, vif, credit, period):        
-        if vif not in self.devices:
-            raise XendError('device does not exist for credit limit: vif'
-                            + str(self.getDomain()) + '.' + str(vif))
-        
-        dev = self.devices[vif]
-        return dev.setCreditLimit(credit, period)
+        script = os.path.join(xroot.network_script_dir,
+                              sxp.child_value(config, 'script',
+                                              xroot.get_vif_script()))
+        bridge = sxp.child_value(config, 'bridge',
+                                 xroot.get_vif_bridge())
+        mac = sxp.child_value(config, 'mac')
+        ipaddr = _get_config_ipaddr(config)
+
+        devid = self.allocateDeviceID()
+
+        back = { 'script' : script,
+                 'mac' : mac,
+                 'bridge' : bridge,
+                 'handle' : "%i" % devid }
+        if ipaddr:
+            back['ip'] = ' '.join(ipaddr)
+
+        front = { 'handle' : "%i" % devid,
+                  'mac' : mac }
+
+        return (devid, back, front)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/pciif.py     Thu Sep 22 17:42:01 2005
@@ -13,16 +13,22 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
+
 
 import types
 
-import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+import xen.lowlevel.xc;
 
 from xen.xend import sxp
 from xen.xend.XendError import VmError
 
-from controller import Dev, DevController
+from xen.xend.server.DevController import DevController
+
+
+xc = xen.lowlevel.xc.new()
+
 
 def parse_pci(val):
     """Parse a pci field.
@@ -36,41 +42,41 @@
         v = val
     return v
 
-class PciDev(Dev):
 
-    def __init__(self, controller, id, config, recreate=False):
-        Dev.__init__(self, controller, id, config, recreate=recreate)
-        bus = sxp.child_value(self.config, 'bus')
-        if not bus:
-            raise VmError('pci: Missing bus')
-        dev = sxp.child_value(self.config, 'dev')
-        if not dev:
-            raise VmError('pci: Missing dev')
-        func = sxp.child_value(self.config, 'func')
-        if not func:
-            raise VmError('pci: Missing func')
-        try:
-            bus = parse_pci(bus)
-            dev = parse_pci(dev)
-            func = parse_pci(func)
-        except:
-            raise VmError('pci: invalid parameter')
+class PciController(DevController):
 
-    def attach(self, recreate=False, change=False):
-        rc = xc.physdev_pci_access_modify(dom    = self.getDomain(),
+    def __init__(self, vm):
+        DevController.__init__(self, vm)
+
+
+    def getDeviceDetails(self, config):
+        """@see DevController.getDeviceDetails"""
+
+        def get_param(field):
+            try:
+                val = sxp.child_value(config, field)
+
+                if not val:
+                    raise VmError('pci: Missing %s config setting' % field)
+
+                return parse_pci(val)
+            except:
+                raise VmError('pci: Invalid config setting %s: %s' %
+                              (field, val))
+        
+        bus  = get_param('bus')
+        dev  = get_param('dev')
+        func = get_param('func')
+
+        rc = xc.physdev_pci_access_modify(dom    = self.getDomid(),
                                           bus    = bus,
                                           dev    = dev,
                                           func   = func,
                                           enable = True)
         if rc < 0:
             #todo non-fatal
-            raise VmError('pci: Failed to configure device: bus=%s dev=%s 
func=%s' %
-                          (bus, dev, func))
+            raise VmError(
+                'pci: Failed to configure device: bus=%s dev=%s func=%s' %
+                (bus, dev, func))
 
-    def destroy(self, change=False, reboot=False):
-        pass
-
-class PciController(DevController):
-
-    def newDevice(self, id, config, recreate=False):
-        return PciDev(self, id, config, recreate=recreate)
+        return (dev, {}, {})
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/tpmif.py
--- a/tools/python/xen/xend/server/tpmif.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/tpmif.py     Thu Sep 22 17:42:01 2005
@@ -1,45 +1,47 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
 # Copyright (C) 2005 IBM Corporation
-#   Authort: Stefan Berger, stefanb@xxxxxxxxxx
-# Derived from netif.py:
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#   Author: Stefan Berger, stefanb@xxxxxxxxxx
+# Copyright (C) 2005 XenSource Ltd
+#============================================================================
+
 """Support for virtual TPM interfaces.
 """
 
-import random
+from xen.xend import sxp
+from xen.xend.XendLogging import log
 
-from xen.xend import sxp
-from xen.xend.XendError import XendError, VmError
-from xen.xend.XendLogging import log
-from xen.xend.XendRoot import get_component
-from xen.xend.xenstore import DBVar
+from xen.xend.server.DevController import DevController
 
-from xen.xend.server.controller import Dev, DevController
 
 class TPMifController(DevController):
     """TPM interface controller. Handles all TPM devices for a domain.
     """
 
-    def __init__(self, vm, recreate=False):
-        DevController.__init__(self, vm, recreate=recreate)
+    def __init__(self, vm):
+        DevController.__init__(self, vm)
 
-    def initController(self, recreate=False, reboot=False):
-        self.destroyed = False
 
-    def destroyController(self, reboot=False):
-        """Destroy the controller and all devices.
-        """
-        self.destroyed = True
-        self.destroyDevices(reboot=reboot)
+    def getDeviceDetails(self, config):
+        """@see DevController.getDeviceDetails"""
+        
+        devid = int(sxp.child_value(config, 'instance', '0'))
+        log.debug("The domain has a TPM with instance %d." % devid)
 
-    def sxpr(self):
-        val = ['tpmif', ['dom', self.getDomain()]]
-        return val
+        back  = { 'instance' : "%i" % devid }
+        front = { 'handle' : "%i" % devid }
 
-    def newDevice(self, id, config, recreate=False):
-        """Create a TPM device.
-
-        @param id: interface id
-        @param config: device configuration
-        @param recreate: recreate flag (true after xend restart)
-        """
-        return None
+        return (devid, back, front)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/usbif.py
--- a/tools/python/xen/xend/server/usbif.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/usbif.py     Thu Sep 22 17:42:01 2005
@@ -1,185 +1,42 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
 # Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
 # Copyright (C) 2004 Intel Research Cambridge
 # Copyright (C) 2004 Mark Williamson <mark.williamson@xxxxxxxxxxxx>
+# Copyright (C) 2005 XenSource Ltd
+#============================================================================
+
+
 """Support for virtual USB hubs.
 """
 
-from xen.xend import sxp
-from xen.xend.XendLogging import log
-from xen.xend.XendError import XendError
-from xen.xend.xenstore import DBVar
+from xen.xend.server.DevController import DevController
 
-from xen.xend.server.controller import Dev, DevController
-
-class UsbBackend:
-    """Handler for the 'back-end' channel to a USB device driver domain
-    on behalf of a front-end domain.
-    """
-    def __init__(self, controller, id, dom):
-        self.controller = controller
-        self.id = id
-        self.destroyed = False
-        self.connected = False
-        self.connecting = False
-        self.frontendDomain = self.controller.getDomain()
-        self.backendDomain = dom
-
-    def init(self, recreate=False, reboot=False):
-        pass
-    
-    def __str__(self):
-        return ('<UsbifBackend frontend=%d backend=%d id=%d>'
-                % (self.frontendDomain,
-                   self.backendDomain,
-                   self.id))
-
-    def connect(self, recreate=False):
-        """Connect the controller to the usbif control interface.
-
-        @param recreate: true if after xend restart
-        """
-        log.debug("Connecting usbif %s", str(self))
-        if recreate or self.connected or self.connecting:
-            pass
-        
-    def destroy(self, reboot=False):
-        """Disconnect from the usbif control interface and destroy it.
-        """
-        self.destroyed = True
-        
-    def interfaceChanged(self):
-        pass
-
-
-class UsbDev(Dev):
-
-    __exports__ = Dev.__exports__ + [
-        DBVar('port', ty='int'),
-        DBVar('path', ty='str'),
-        ]
-    
-    def __init__(self, controller, id, config, recreate=False):
-        Dev.__init__(self, controller, id, config, recreate=recreate)
-        self.port = id
-        self.path = None
-        self.frontendDomain = self.getDomain()
-        self.backendDomain = 0
-        self.configure(self.config, recreate=recreate)
-
-    def init(self, recreate=False, reboot=False):
-        self.destroyed = False
-        self.frontendDomain = self.getDomain()
-        
-    def configure(self, config, change=False, recreate=False):
-        if change:
-            raise XendError("cannot reconfigure usb")
-        #todo: FIXME: Use sxp access methods to get this value.
-        # Must not use direct indexing.
-        self.path = config[1][1]
-        
-        #todo: FIXME: Support configuring the backend domain.
-##         try:
-##             self.backendDomain = int(sxp.child_value(config, 'backend', 
'0'))
-##         except:
-##             raise XendError('invalid backend domain')
-
-    def attach(self, recreate=False, change=False):
-        if recreate:
-            pass
-        else:
-            self.attachBackend()
-        if change:
-            self.interfaceChanged()
-            
-    def sxpr(self):
-        val = ['usb',
-               ['id', self.id],
-               ['port', self.port],
-               ['path', self.path],
-               ]
-        return val
-
-    def getBackend(self):
-        return self.controller.getBackend(self.backendDomain)
-
-    def destroy(self, change=False, reboot=False):
-        """Destroy the device. If 'change' is true notify the front-end 
interface.
-
-        @param change: change flag
-        """
-        self.destroyed = True
-        log.debug("Destroying usb domain=%d id=%s", self.frontendDomain, 
self.id)
-        if change:
-            self.interfaceChanged()
-
-    def interfaceChanged(self):
-        """Tell the back-end to notify the front-end that a device has been
-        added or removed.
-        """
-        self.getBackend().interfaceChanged()
-
-    def attachBackend(self):
-        """Attach the device to its controller.
-
-        """
-        self.getBackend().connect()
 
 class UsbifController(DevController):
     """USB device interface controller. Handles all USB devices
     for a domain.
     """
     
-    def __init__(self, vm, recreate=False):
+    def __init__(self, vm):
         """Create a USB device controller.
         """
-        DevController.__init__(self, vm, recreate=recreate)
-        self.backends = {}
-        self.backendId = 0
+        DevController.__init__(self, vm)
 
-    def init(self, recreate=False, reboot=False):
-        self.destroyed = False
-        if reboot:
-            self.rebootBackends()
-            self.rebootDevices()
 
-    def sxpr(self):
-        val = ['usbif',
-               ['dom', self.getDomain()]]
-        return val
+    def getDeviceDetails(self, _):
+        """@see DevController.getDeviceDetails"""
 
-    def newDevice(self, id, config, recreate=False):
-        return UsbDev(self, id, config, recreate=recreate)
-
-    def destroyController(self, reboot=False):
-        """Destroy the controller and all devices.
-        """
-        self.destroyed = True
-        log.debug("Destroying blkif domain=%d", self.getDomain())
-        self.destroyDevices(reboot=reboot)
-        self.destroyBackends(reboot=reboot)
-
-    def rebootBackends(self):
-        for backend in self.backends.values():
-            backend.init(reboot=True)
-
-    def getBackendById(self, id):
-        return self.backends.get(id)
-
-    def getBackendByDomain(self, dom):
-        for backend in self.backends.values():
-            if backend.backendDomain == dom:
-                return backend
-        return None
-
-    def getBackend(self, dom):
-        backend = self.getBackendByDomain(dom)
-        if backend: return backend
-        backend = UsbBackend(self, self.backendId, dom)
-        self.backendId += 1
-        self.backends[backend.getId()] = backend
-        backend.init()
-        return backend
-    
-    def destroyBackends(self, reboot=False):
-        for backend in self.backends.values():
-            backend.destroy(reboot=reboot)
+        return (self.allocateDeviceID(), {}, {})
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/xenstore/xsnode.py
--- a/tools/python/xen/xend/xenstore/xsnode.py  Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/xenstore/xsnode.py  Thu Sep 22 17:42:01 2005
@@ -244,7 +244,9 @@
             if ex.args[0] == errno.ENOENT:
                 return False
             else:
-                raise
+                raise RuntimeError(ex.args[0],
+                                   ex.args[1] +
+                                   (', in exists(%s)' % (str(path))))
 
     def mkdirs(self, path):
         if self.exists(path):
@@ -255,7 +257,7 @@
             if x == "": continue
             p = os.path.join(p, x)
             if not self.exists(p):
-                self.getxs().write(p, "", create=True)
+                self.getxs().write(p, "")
 
     def read(self, path):
         try:
@@ -266,15 +268,17 @@
             else:
                 raise
 
-    def create(self, path, excl=False):
-        self.write(path, "", create=True, excl=excl)
-
-    def write(self, path, data, create=True, excl=False):
-        self.mkdirs(path)
-        try:
-            self.getxs().write(path, data, create=create, excl=excl)
-        except Exception, ex:
-            raise
+    def create(self, path):
+        self.write(path, "")
+
+    def write(self, path, data):
+        try:
+            self.getxs().write(path, data)
+        except RuntimeError, ex:
+            raise RuntimeError(ex.args[0],
+                               ex.args[1] +
+                               (', while writing %s : %s' % (str(path),
+                                                             str(data))))
 
     def begin(self, path):
         self.getxs().transaction_start(path)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/xenstore/xsobj.py
--- a/tools/python/xen/xend/xenstore/xsobj.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/xenstore/xsobj.py   Thu Sep 22 17:42:01 2005
@@ -469,9 +469,6 @@
             n = n._addChild(x)
         return n
 
-    def getDB(self):
-        return self.__db__
-
     def setDB(self, db):
         if (db is not None) and not isinstance(db, XenNode):
             raise ValueError("invalid db")
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py      Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/xenstore/xstransact.py      Thu Sep 22 17:42:01 2005
@@ -41,7 +41,11 @@
 
     def _read(self, key):
         path = "%s/%s" % (self.path, key)
-        return xshandle().read(path)
+        try:
+            return xshandle().read(path)
+        except RuntimeError, ex:
+            raise RuntimeError(ex.args[0],
+                               '%s, while reading %s' % (ex.args[1], path))
 
     def read(self, *args):
         if len(args) == 0:
@@ -53,13 +57,16 @@
             ret.append(self._read(key))
         return ret
 
-    def _write(self, key, data, create=True, excl=False):
-        path = "%s/%s" % (self.path, key)
-        xshandle().write(path, data, create=create, excl=excl)
+    def _write(self, key, data):
+        path = "%s/%s" % (self.path, key)
+        try:
+            xshandle().write(path, data)
+        except RuntimeError, ex:
+            raise RuntimeError(ex.args[0],
+                               ('%s, while writing %s : %s' %
+                                (ex.args[1], path, str(data))))
 
     def write(self, *args, **opts):
-        create = opts.get('create') or True
-        excl = opts.get('excl') or False
         if len(args) == 0:
             raise TypeError
         if isinstance(args[0], dict):
@@ -67,15 +74,19 @@
                 if not isinstance(d, dict):
                     raise TypeError
                 for key in d.keys():
-                    self._write(key, d[key], create, excl)
+                    try:
+                        self._write(key, d[key])
+                    except TypeError, msg:
+                        raise TypeError('Writing %s: %s: %s' %
+                                        (key, str(d[key]), msg))
         elif isinstance(args[0], list):
             for l in args:
                 if not len(l) == 2:
                     raise TypeError
-                self._write(l[0], l[1], create, excl)
+                self._write(l[0], l[1])
         elif len(args) % 2 == 0:
             for i in range(len(args) / 2):
-                self._write(args[i * 2], args[i * 2 + 1], create, excl)
+                self._write(args[i * 2], args[i * 2 + 1])
         else:
             raise TypeError
 
@@ -84,10 +95,15 @@
         return xshandle().rm(path)
 
     def remove(self, *args):
-        if len(args) == 0:
-            raise TypeError
-        for key in args:
-            self._remove(key)
+        """If no arguments are given, remove this transaction's path.
+        Otherwise, treat each argument as a subpath to this transaction's
+        path, and remove each of those instead.
+        """
+        if len(args) == 0:
+            xshandle().rm(self.path)
+        else:
+            for key in args:
+                self._remove(key)
 
     def _list(self, key):
         path = "%s/%s" % (self.path, key)
@@ -114,10 +130,20 @@
                 defval = None
             else:
                 (key, fn, defval) = tup
-            try:
-                val = fn(self._read(key))
-            except TypeError:
+
+            val = self._read(key)
+            # If fn is str, then this will successfully convert None to
+            # 'None'.  If it is int, then it will throw TypeError on None, or
+            # on any other non-integer value.  We have to, therefore, both
+            # check explicitly for None, and catch TypeError.  Either failure
+            # will result in defval being used instead.
+            if val is None:
                 val = defval
+            else:
+                try:
+                    val = fn(val)
+                except TypeError:
+                    val = defval
             ret.append(val)
         if len(ret) == 1:
             return ret[0]
@@ -146,8 +172,8 @@
 
     def Read(cls, path, *args):
         while True:
-            try:
-                t = cls(path)
+            t = cls(path)
+            try:
                 v = t.read(*args)
                 t.commit()
                 return v
@@ -165,8 +191,8 @@
 
     def Write(cls, path, *args, **opts):
         while True:
-            try:
-                t = cls(path)
+            t = cls(path)
+            try:
                 t.write(*args, **opts)
                 t.commit()
                 return
@@ -183,9 +209,13 @@
     Write = classmethod(Write)
 
     def Remove(cls, path, *args):
-        while True:
-            try:
-                t = cls(path)
+        """If only one argument is given (path), remove it.  Otherwise, treat
+        each further argument as a subpath to the given path, and remove each
+        of those instead.  This operation is performed inside a transaction.
+        """
+        while True:
+            t = cls(path)
+            try:
                 t.remove(*args)
                 t.commit()
                 return
@@ -203,8 +233,8 @@
 
     def List(cls, path, *args):
         while True:
-            try:
-                t = cls(path)
+            t = cls(path)
+            try:
                 v = t.list(*args)
                 t.commit()
                 return v
@@ -222,8 +252,8 @@
 
     def Gather(cls, path, *args):
         while True:
-            try:
-                t = cls(path)
+            t = cls(path)
+            try:
                 v = t.gather(*args)
                 t.commit()
                 return v
@@ -241,8 +271,8 @@
 
     def Store(cls, path, *args):
         while True:
-            try:
-                t = cls(path)
+            t = cls(path)
+            try:
                 v = t.store(*args)
                 t.commit()
                 return v
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/xenstore/xsutil.py
--- a/tools/python/xen/xend/xenstore/xsutil.py  Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/xenstore/xsutil.py  Thu Sep 22 17:42:01 2005
@@ -18,3 +18,6 @@
 
 def IntroduceDomain(domid, page, port, path):
     return xshandle().introduce_domain(domid, page, port, path)
+
+def GetDomainPath(domid):
+    return xshandle().get_domain_path(domid)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/xenstore/xswatch.py
--- a/tools/python/xen/xend/xenstore/xswatch.py Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/xenstore/xswatch.py Thu Sep 22 17:42:01 2005
@@ -1,4 +1,5 @@
 # Copyright (C) 2005 Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 
 # This file is subject to the terms and conditions of the GNU General
 # Public License.  See the file "COPYING" in the main directory of
@@ -15,7 +16,7 @@
     xs = None
     xslock = threading.Lock()
     
-    def __init__(self, path, fn, args=(), kwargs={}):
+    def __init__(self, path, fn, *args, **kwargs):
         self.fn = fn
         self.args = args
         self.kwargs = kwargs
@@ -46,11 +47,11 @@
         cls.threadcond.release()
         while True:
             try:
-                (ord, owr, oer) = select.select([ cls.xs ], [], [])
+                (fd, _1, _2) = select.select([ cls.xs ], [], [])
                 cls.xslock.acquire()
                 # reconfirm ready to read with lock
-                (ord, owr, oer) = select.select([ cls.xs ], [], [], 0.001)
-                if not cls.xs in ord:
+                (fd, _1, _2) = select.select([ cls.xs ], [], [], 0.001)
+                if not cls.xs in fd:
                     cls.xslock.release()
                     continue
                 we = cls.xs.read_watch()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xm/create.py     Thu Sep 22 17:42:01 2005
@@ -109,7 +109,7 @@
           The address of the vncviewer is passed to the domain on the kernel 
command
           line using 'VNC_SERVER=<host>:<port>'. The port used by vnc is 5500 
+ DISPLAY.
           A display value with a free port is chosen if possible.
-         Only valid when vnc=1.
+          Only valid when vnc=1.
           """)
 
 gopts.var('name', val='NAME',
@@ -141,7 +141,7 @@
           use="Domain memory in MB.")
 
 gopts.var('ssidref', val='SSIDREF',
-          fn=set_u32, default=-1, 
+          fn=set_u32, default=0, 
           use="Security Identifier.")
 
 gopts.var('maxmem', val='MEMORY',
@@ -342,7 +342,7 @@
     else:
         return s
 
-def configure_image(opts, config, vals):
+def configure_image(opts, vals):
     """Create the image config.
     """
     config_image = [ vals.builder ]
@@ -359,8 +359,7 @@
         config_image.append(['args', vals.extra])
     if vals.vcpus:
         config_image.append(['vcpus', vals.vcpus])
-    config.append(['image', config_image ])
-
+    return config_image
     
 def configure_disks(opts, config_devs, vals):
     """Create the config for disks (virtual block devices).
@@ -494,17 +493,17 @@
          config_vfr.append(['vif', ['id', idx], ['ip', ip]])
      config.append(config_vfr)
 
-def configure_vmx(opts, config_devs, vals):
+def configure_vmx(opts, config_image, vals):
     """Create the config for VMX devices.
     """
-    args = [ 'memmap', 'device_model', 'cdrom',
-            'boot', 'fda', 'fdb', 'localtime', 'serial', 'macaddr', 'stdvga', 
-             'isa', 'nographic', 'vnc', 'vncviewer', 'sdl', 'display']   
+    args = [ 'memmap', 'device_model', 'vcpus', 'cdrom',
+             'boot', 'fda', 'fdb', 'localtime', 'serial', 'macaddr', 'stdvga', 
+             'isa', 'nographic', 'vnc', 'vncviewer', 'sdl', 'display']
     for a in args:
-       if (vals.__dict__[a]):
-           config_devs.append([a, vals.__dict__[a]])
-
-def run_bootloader(opts, config, vals):
+        if (vals.__dict__[a]):
+            config_image.append([a, vals.__dict__[a]])
+
+def run_bootloader(opts, vals):
     if not os.access(vals.bootloader, os.X_OK):
         opts.err("Bootloader isn't executable")
     if len(vals.disk) < 1:
@@ -512,11 +511,8 @@
     (uname, dev, mode, backend) = vals.disk[0]
     file = blkif.blkdev_uname_to_file(uname)
 
-    blcfg = bootloader(vals.bootloader, file, not vals.console_autoconnect,
-                       vals.vcpus, vals.blentry)
-
-    config.append(['bootloader', vals.bootloader])
-    config.append(blcfg)
+    return bootloader(vals.bootloader, file, not vals.console_autoconnect,
+                      vals.vcpus, vals.blentry)
 
 def make_config(opts, vals):
     """Create the domain configuration.
@@ -542,16 +538,19 @@
         config.append(['restart', vals.restart])
 
     if vals.bootloader:
-        run_bootloader(opts, config, vals)
+        config.append(['bootloader', vals.bootloader])
+        config_image = run_bootloader(opts, vals)
     else:
-        configure_image(opts, config, vals)
+        config_image = configure_image(opts, vals)
+    configure_vmx(opts, config_image, vals)
+    config.append(['image', config_image ])
+
     config_devs = []
     configure_disks(opts, config_devs, vals)
     configure_pci(opts, config_devs, vals)
     configure_vifs(opts, config_devs, vals)
     configure_usb(opts, config_devs, vals)
     configure_vtpm(opts, config_devs, vals)
-    configure_vmx(opts, config_devs, vals)
     config += config_devs
 
     return config
@@ -673,7 +672,7 @@
             # Local port is field 3.
             y = x.split()[3]
             # Field is addr:port, split off the port.
-            y = y.split(':')[1]
+            y = y.split(':')[-1]
             r.append(int(y))
         return r
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm/Makefile
--- a/tools/vtpm/Makefile       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm/Makefile       Thu Sep 22 17:42:01 2005
@@ -4,7 +4,7 @@
 include $(XEN_ROOT)/tools/vtpm/Rules.mk
 
 # Dir name for emulator (as dom0 tpm driver)
-TPM_EMULATOR_DIR = tpm_emulator-0.2
+TPM_EMULATOR_DIR = tpm_emulator
 # Dir name for vtpm instance
 VTPM_DIR = vtpm
 
@@ -13,7 +13,7 @@
 
 all: build
 
-build: $(TPM_EMULATOR_TARFILE) extract patch build_sub
+build: $(TPM_EMULATOR_DIR) $(VTPM_DIR) build_sub
 
 install: build
        $(MAKE) -C $(TPM_EMULATOR_DIR) $@
@@ -26,36 +26,32 @@
        if [ -d $(VTPM_DIR) ]; \
                then $(MAKE) -C $(VTPM_DIR) clean; \
        fi
+
+mrproper:
+       rm -f $(TPM_EMULATOR_TARFILE)
        rm -rf $(TPM_EMULATOR_DIR)
        rm -rf $(VTPM_DIR)
-
-mrproper: clean
-       rm -f $(TPM_EMULATOR_TARFILE)
 
 # Download Swiss emulator
 $(TPM_EMULATOR_TARFILE):
        wget http://download.berlios.de/tpm-emulator/$(TPM_EMULATOR_TARFILE)
 
 # Create vtpm and TPM emulator dirs
-extract: $(TPM_EMULATOR_DIR)/README $(VTPM_DIR)/README
-
-$(TPM_EMULATOR_DIR)/README:
-       -rm -rf $(TPM_EMULATOR_DIR)
-       tar -xzf $(TPM_EMULATOR_TARFILE)
-
-$(VTPM_DIR)/README:
-       -rm -rf $(VTPM_DIR)
-       cp -r --preserve $(TPM_EMULATOR_DIR) $(VTPM_DIR)
-
 # apply patches for 1) used as dom0 tpm driver 2) used as vtpm device instance
-patch: $(TPM_EMULATOR_DIR)/Makefile $(VTPM_DIR)/Makefile
-
-$(TPM_EMULATOR_DIR)/Makefile: tpm_emulator.patch
+$(TPM_EMULATOR_DIR): $(TPM_EMULATOR_TARFILE) 
+       tar -xzf $(TPM_EMULATOR_TARFILE);  
+       mv tpm_emulator-0.2 $(TPM_EMULATOR_DIR); 
+       
        -cd $(TPM_EMULATOR_DIR); \
+       patch -p1 < ../tpm_emulator-0.2b-x86_64.patch; \
        patch -p1 <../tpm_emulator.patch
 
-$(VTPM_DIR)/Makefile: vtpm.patch
+$(VTPM_DIR): $(TPM_EMULATOR_TARFILE)
+       tar -xzf $(TPM_EMULATOR_TARFILE);  
+       mv tpm_emulator-0.2 $(VTPM_DIR); 
+
        -cd $(VTPM_DIR); \
+       patch -p1 < ../tpm_emulator-0.2b-x86_64.patch; \
        patch -p1 <../vtpm.patch
 
 build_sub:
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm/README
--- a/tools/vtpm/README Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm/README Thu Sep 22 17:42:01 2005
@@ -23,6 +23,7 @@
 - xen-unstable 
 - IBM frontend/backend vtpm driver patch
 - vtpm_managerd
+- GNU MP Big number library (GMP)
 
 vtpmd Flow (for vtpm_manager. vtpmd never run by default)
 ============================
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm/tpm_emulator.patch
--- a/tools/vtpm/tpm_emulator.patch     Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm/tpm_emulator.patch     Thu Sep 22 17:42:01 2005
@@ -1,12 +1,12 @@
-diff -uprN orig/tpm_emulator-0.2/AUTHORS tpm_emulator-0.2/AUTHORS
---- orig/tpm_emulator-0.2/AUTHORS      2005-08-17 10:58:36.000000000 -0700
-+++ tpm_emulator-0.2/AUTHORS   2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/AUTHORS tpm_emulator/AUTHORS
+--- orig/tpm_emulator-0.2-x86_64/AUTHORS       2005-08-15 00:58:57.000000000 
-0700
++++ tpm_emulator/AUTHORS       2005-09-14 20:27:22.000000000 -0700
 @@ -1 +1,2 @@
  Mario Strasser <mast@xxxxxxx>
 +INTEL Corp <>
-diff -uprN orig/tpm_emulator-0.2/ChangeLog tpm_emulator-0.2/ChangeLog
---- orig/tpm_emulator-0.2/ChangeLog    2005-08-17 10:58:36.000000000 -0700
-+++ tpm_emulator-0.2/ChangeLog 2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/ChangeLog tpm_emulator/ChangeLog
+--- orig/tpm_emulator-0.2-x86_64/ChangeLog     2005-08-15 00:58:57.000000000 
-0700
++++ tpm_emulator/ChangeLog     2005-09-14 20:27:22.000000000 -0700
 @@ -1,3 +1,7 @@
 +2005-08-16: INTEL Corp
 +      * Set default permissions to PCRs
@@ -15,10 +15,29 @@
  2005-08-15  Mario Strasser <mast@xxxxxxx>
        * all: some typos corrected
        * tpm_integrity.c: bug in TPM_Extend fixed
-diff -uprN orig/tpm_emulator-0.2/Makefile tpm_emulator-0.2/Makefile
---- orig/tpm_emulator-0.2/Makefile     2005-08-17 10:58:36.000000000 -0700
-+++ tpm_emulator-0.2/Makefile  2005-08-17 10:55:52.000000000 -0700
-@@ -1,15 +1,19 @@
+diff -uprN orig/tpm_emulator-0.2-x86_64/linux_module.h 
tpm_emulator/linux_module.h
+--- orig/tpm_emulator-0.2-x86_64/linux_module.h        2005-09-15 
19:21:14.844078720 -0700
++++ tpm_emulator/linux_module.h        2005-09-14 20:27:22.000000000 -0700
+@@ -1,5 +1,6 @@
+ /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
+  * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
++ * Copyright (C) 2005 INTEL Corp.
+  *
+  * This module is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published
+@@ -35,7 +36,7 @@
+ #include "tpm_version.h"
+ 
+ #define TPM_DEVICE_MINOR      224
+-#define TPM_DEVICE_NAME         "tpm"
++#define TPM_DEVICE_NAME         "tpm0"
+ #define TPM_MODULE_NAME       "tpm_emulator"
+ 
+ /* debug and log output functions */
+diff -uprN orig/tpm_emulator-0.2-x86_64/Makefile tpm_emulator/Makefile
+--- orig/tpm_emulator-0.2-x86_64/Makefile      2005-09-15 19:21:14.845078568 
-0700
++++ tpm_emulator/Makefile      2005-09-14 20:27:22.000000000 -0700
+@@ -1,16 +1,20 @@
  # Software-Based Trusted Platform Module (TPM) Emulator for Linux
  # Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>
 +# Copyright (C) 2005 INTEL Corp.
@@ -33,6 +52,7 @@
 -KERNEL_BUILD   := /lib/modules/$(KERNEL_RELEASE)/build
 +KERNEL_BUILD   := $(XEN_ROOT)/linux-2.6.12-xen0
  MOD_SUBDIR     := misc
+ COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/)
  
  # module settings
 -MODULE_NAME    := tpm_emulator
@@ -40,7 +60,7 @@
  VERSION_MAJOR  := 0
  VERSION_MINOR  := 2
  VERSION_BUILD  := $(shell date +"%s")
-@@ -27,11 +30,9 @@ DIRS           := . crypto tpm 
+@@ -34,11 +38,9 @@ DIRS           := . crypto tpm 
  SRCS           := $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.c))
  OBJS           := $(patsubst %.c, %.o, $(SRCS))
  SRCS           += $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.h))
@@ -54,7 +74,7 @@
  
  EXTRA_CFLAGS   += -I$(src) -I$(src)/crypto -I$(src)/tpm 
  
-@@ -42,23 +43,17 @@ all:       $(src)/crypto/gmp.h $(src)/crypto/l
+@@ -49,23 +51,17 @@ all:       $(src)/crypto/gmp.h $(src)/crypto/l
        @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules
  
  install:
@@ -84,9 +104,9 @@
  
  $(src)/crypto/libgmp.a:
        test -f $(src)/crypto/libgmp.a || ln -s $(GMP_LIB) 
$(src)/crypto/libgmp.a
-diff -uprN orig/tpm_emulator-0.2/README tpm_emulator-0.2/README
---- orig/tpm_emulator-0.2/README       2005-08-17 10:58:36.000000000 -0700
-+++ tpm_emulator-0.2/README    2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/README tpm_emulator/README
+--- orig/tpm_emulator-0.2-x86_64/README        2005-08-15 00:58:57.000000000 
-0700
++++ tpm_emulator/README        2005-09-14 20:27:22.000000000 -0700
 @@ -13,7 +13,8 @@ $Id: README 8 2005-01-25 21:11:45Z jmoli
  Copyright
  --------------------------------------------------------------------------
@@ -97,28 +117,9 @@
                
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
-diff -uprN orig/tpm_emulator-0.2/linux_module.h tpm_emulator-0.2/linux_module.h
---- orig/tpm_emulator-0.2/linux_module.h       2005-08-17 10:58:36.000000000 
-0700
-+++ tpm_emulator-0.2/linux_module.h    2005-08-17 10:55:52.000000000 -0700
-@@ -1,5 +1,6 @@
- /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
-  * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
-+ * Copyright (C) 2005 INTEL Corp.
-  *
-  * This module is free software; you can redistribute it and/or modify
-  * it under the terms of the GNU General Public License as published
-@@ -33,7 +34,7 @@
- #include "tpm_version.h"
- 
- #define TPM_DEVICE_MINOR      224
--#define TPM_DEVICE_NAME         "tpm"
-+#define TPM_DEVICE_NAME         "tpm0"
- #define TPM_MODULE_NAME       "tpm_emulator"
- 
- /* debug and log output functions */
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_data.c tpm_emulator-0.2/tpm/tpm_data.c
---- orig/tpm_emulator-0.2/tpm/tpm_data.c       2005-08-17 10:58:36.000000000 
-0700
-+++ tpm_emulator-0.2/tpm/tpm_data.c    2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c 
tpm_emulator/tpm/tpm_data.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c        2005-09-15 
19:21:14.847078264 -0700
++++ tpm_emulator/tpm/tpm_data.c        2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -139,13 +140,3 @@
      tpmData.permanent.data.pcrAttrib[i].pcrReset = TRUE;
    }
    /* set tick type */
-diff -uprN orig/tpm_emulator-0.2/tpm_version.h tpm_emulator-0.2/tpm_version.h
---- orig/tpm_emulator-0.2/tpm_version.h        2005-08-17 10:58:36.000000000 
-0700
-+++ tpm_emulator-0.2/tpm_version.h     2005-08-17 10:55:53.000000000 -0700
-@@ -2,5 +2,5 @@
- #define _TPM_VERSION_H_
- #define VERSION_MAJOR 0
- #define VERSION_MINOR 2
--#define VERSION_BUILD 1123950310
-+#define VERSION_BUILD 1124301353
- #endif /* _TPM_VERSION_H_ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm/vtpm.patch
--- a/tools/vtpm/vtpm.patch     Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm/vtpm.patch     Thu Sep 22 17:42:01 2005
@@ -1,12 +1,12 @@
-diff -uprN orig/tpm_emulator-0.2/AUTHORS vtpm/AUTHORS
---- orig/tpm_emulator-0.2/AUTHORS      2005-08-17 10:58:36.000000000 -0700
-+++ vtpm/AUTHORS       2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/AUTHORS vtpm/AUTHORS
+--- orig/tpm_emulator-0.2-x86_64/AUTHORS       2005-08-15 00:58:57.000000000 
-0700
++++ vtpm/AUTHORS       2005-09-14 20:27:22.000000000 -0700
 @@ -1 +1,2 @@
  Mario Strasser <mast@xxxxxxx>
 +INTEL Corp <>
-diff -uprN orig/tpm_emulator-0.2/ChangeLog vtpm/ChangeLog
---- orig/tpm_emulator-0.2/ChangeLog    2005-08-17 10:58:36.000000000 -0700
-+++ vtpm/ChangeLog     2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/ChangeLog vtpm/ChangeLog
+--- orig/tpm_emulator-0.2-x86_64/ChangeLog     2005-08-15 00:58:57.000000000 
-0700
++++ vtpm/ChangeLog     2005-09-14 20:27:22.000000000 -0700
 @@ -1,3 +1,7 @@
 +2005-08-16 Intel Corp
 +      Moved module out of kernel to run as a ring 3 app
@@ -15,115 +15,9 @@
  2005-08-15  Mario Strasser <mast@xxxxxxx>
        * all: some typos corrected
        * tpm_integrity.c: bug in TPM_Extend fixed
-diff -uprN orig/tpm_emulator-0.2/Makefile vtpm/Makefile
---- orig/tpm_emulator-0.2/Makefile     2005-08-17 10:58:36.000000000 -0700
-+++ vtpm/Makefile      2005-08-17 10:55:52.000000000 -0700
-@@ -1,21 +1,29 @@
- # Software-Based Trusted Platform Module (TPM) Emulator for Linux
- # Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>
-+# Copyright (C) 2005 INTEL Corp.
- #
- # $Id: Makefile 10 2005-04-26 20:59:50Z mast $
- 
--# kernel settings
--KERNEL_RELEASE := $(shell uname -r)
--KERNEL_BUILD   := /lib/modules/$(KERNEL_RELEASE)/build
--MOD_SUBDIR     := misc
--
- # module settings
--MODULE_NAME    := tpm_emulator
-+BIN            := vtpmd
- VERSION_MAJOR  := 0
- VERSION_MINOR  := 2
- VERSION_BUILD  := $(shell date +"%s")
- 
--# enable/disable DEBUG messages
--EXTRA_CFLAGS   += -DDEBUG -g  
-+# Installation program and options
-+INSTALL         = install
-+INSTALL_PROG    = $(INSTALL) -m0755
-+INSTALL_DIR     = $(INSTALL) -d -m0755
-+
-+# Xen tools installation directory
-+TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
-+
-+CC      := gcc
-+CFLAGS  += -g -Wall $(INCLUDE) -DDEBUG
-+CFLAGS  += -I. -Itpm
-+
-+# Is the simulator running in it's own vm?
-+#CFLAGS += -DVTPM_MULTI_VM
- 
- # GNU MP configuration
- GMP_LIB        := /usr/lib/libgmp.a
-@@ -27,38 +35,31 @@ DIRS           := . crypto tpm 
- SRCS           := $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.c))
- OBJS           := $(patsubst %.c, %.o, $(SRCS))
- SRCS           += $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.h))
--DISTSRC        := ./README ./AUTHORS ./ChangeLog ./Makefile $(SRCS)
--DISTDIR        := tpm_emulator-$(VERSION_MAJOR).$(VERSION_MINOR)
- 
--obj-m               := $(MODULE_NAME).o
--$(MODULE_NAME)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
-+obj-m               := $(BIN)
-+$(BIN)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
- 
- EXTRA_CFLAGS   += -I$(src) -I$(src)/crypto -I$(src)/tpm 
- 
- # do not print "Entering directory ..."
- MAKEFLAGS      += --no-print-directory
- 
--all:  $(src)/crypto/gmp.h $(src)/crypto/libgmp.a version
--      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules
-+all: $(BIN)
-+
-+$(BIN):       $(src)/crypto/gmp.h $(src)/crypto/libgmp.a version $(SRCS) 
$(OBJS)
-+      $(CC) $(CFLAGS) $(OBJS) $(src)/crypto/libgmp.a -o $(BIN)
-+
-+%.o: %.c
-+      $(CC) $(CFLAGS) -c $< -o $@
- 
- install:
--      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules_install
--      test -d /var/tpm || mkdir /var/tpm
--      test -c /dev/tpm || mknod /dev/tpm c 10 224
--      chmod 666 /dev/tpm
--      depmod -a
-+      $(INSTALL_PROG) $(BIN) $(TOOLS_INSTALL_DIR)
- 
- clean:
--      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) clean
--      rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a
-+      rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a $(OBJS)
- 
--dist: $(DISTSRC)
--      rm -rf $(DISTDIR)
--      mkdir $(DISTDIR)
--      cp --parents $(DISTSRC) $(DISTDIR)/
--      rm -f $(DISTDIR)/crypto/gmp.h 
--      tar -chzf $(DISTDIR).tar.gz $(DISTDIR)
--      rm -rf $(DISTDIR)
-+mrproper: clean
-+      rm -f $(BIN)
- 
- $(src)/crypto/libgmp.a:
-       test -f $(src)/crypto/libgmp.a || ln -s $(GMP_LIB) 
$(src)/crypto/libgmp.a
-diff -uprN orig/tpm_emulator-0.2/README vtpm/README
---- orig/tpm_emulator-0.2/README       2005-08-17 10:58:36.000000000 -0700
-+++ vtpm/README        2005-08-17 10:55:52.000000000 -0700
-@@ -13,7 +13,8 @@ $Id: README 8 2005-01-25 21:11:45Z jmoli
- Copyright
- --------------------------------------------------------------------------
- Copyright (C) 2004 Mario Strasser <mast@xxxxxxx> and Swiss Federal 
--Institute of Technology (ETH) Zurich.
-+                   Institute of Technology (ETH) Zurich.
-+Copyright (C) 2005 INTEL Corp 
-               
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
-diff -uprN orig/tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c 
vtpm/crypto/gmp_kernel_wrapper.c
---- orig/tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c  2005-08-17 
10:58:36.000000000 -0700
-+++ vtpm/crypto/gmp_kernel_wrapper.c   2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/crypto/gmp_kernel_wrapper.c 
vtpm/crypto/gmp_kernel_wrapper.c
+--- orig/tpm_emulator-0.2-x86_64/crypto/gmp_kernel_wrapper.c   2005-09-15 
19:21:42.508873032 -0700
++++ vtpm/crypto/gmp_kernel_wrapper.c   2005-09-15 19:25:37.319176440 -0700
 @@ -1,5 +1,6 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -154,9 +48,9 @@
  {
 -  void *ret  = (void*)kmalloc(size, GFP_KERNEL);
 -  if (!ret) panic(KERN_CRIT TPM_MODULE_NAME 
--    "GMP: cannot allocate memory (size=%u)\n", size);
+-    "GMP: cannot allocate memory (size=%Zu)\n", size);
 +  void *ret  = (void*)malloc(size);
-+  if (!ret) error("GMP: cannot allocate memory (size=%u)\n", size);
++  if (!ret) error("GMP: cannot allocate memory (size=%Zu)\n", size);
    return ret;
  }
  
@@ -165,9 +59,10 @@
  {
 -  void *ret = (void*)kmalloc(new_size, GFP_KERNEL);
 -  if (!ret) panic(KERN_CRIT TPM_MODULE_NAME "GMP: Cannot reallocate memory "
+-    "(old_size=%Zu new_size=%Zu)\n", old_size, new_size);
 +  void *ret = (void*)malloc(new_size);
 +  if (!ret) error("GMP: Cannot reallocate memory "
-     "(old_size=%u new_size=%u)\n", old_size, new_size);
++    "(old_size=%Zu new_size=%Zu)\n", old_size, new_size);
    memcpy(ret, oldptr, old_size);
 -  kfree(oldptr);
 +  free(oldptr);
@@ -183,9 +78,9 @@
    }
  }
  
-diff -uprN orig/tpm_emulator-0.2/crypto/rsa.c vtpm/crypto/rsa.c
---- orig/tpm_emulator-0.2/crypto/rsa.c 2005-08-17 10:58:36.000000000 -0700
-+++ vtpm/crypto/rsa.c  2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/crypto/rsa.c vtpm/crypto/rsa.c
+--- orig/tpm_emulator-0.2-x86_64/crypto/rsa.c  2005-08-15 00:58:57.000000000 
-0700
++++ vtpm/crypto/rsa.c  2005-09-14 20:27:22.000000000 -0700
 @@ -1,5 +1,6 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -211,8 +106,8 @@
        sha1_final(&ctx, &msg[1]);
        if (memcmp(&msg[1], &msg[1 + SHA1_DIGEST_LENGTH], 
            SHA1_DIGEST_LENGTH) != 0) return -1;
-diff -uprN orig/tpm_emulator-0.2/linux_module.c vtpm/linux_module.c
---- orig/tpm_emulator-0.2/linux_module.c       2005-08-17 10:58:36.000000000 
-0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/linux_module.c vtpm/linux_module.c
+--- orig/tpm_emulator-0.2-x86_64/linux_module.c        2005-09-15 
19:22:40.343080896 -0700
 +++ vtpm/linux_module.c        1969-12-31 16:00:00.000000000 -0800
 @@ -1,163 +0,0 @@
 -/* Software-Based Trusted Platform Module (TPM) Emulator for Linux 
@@ -283,7 +178,7 @@
 -
 -static ssize_t tpm_read(struct file *file, char *buf, size_t count, loff_t 
*ppos)
 -{
--  debug("%s(%d)", __FUNCTION__, count);
+-  debug("%s(%Zu)", __FUNCTION__, count);
 -  down(&tpm_mutex);
 -  if (tpm_response.data != NULL) {
 -    count = min(count, (size_t)tpm_response.size - (size_t)*ppos);
@@ -298,7 +193,7 @@
 -
 -static ssize_t tpm_write(struct file *file, const char *buf, size_t count, 
loff_t *ppos)
 -{
--  debug("%s(%d)", __FUNCTION__, count);
+-  debug("%s(%Zu)", __FUNCTION__, count);
 -  down(&tpm_mutex);
 -  *ppos = 0;
 -  if (tpm_response.data != NULL) kfree(tpm_response.data);
@@ -378,9 +273,9 @@
 -  return (ticks > 0) ? ticks : 1;
 -}
 -
-diff -uprN orig/tpm_emulator-0.2/linux_module.h vtpm/linux_module.h
---- orig/tpm_emulator-0.2/linux_module.h       2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/linux_module.h        2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/linux_module.h vtpm/linux_module.h
+--- orig/tpm_emulator-0.2-x86_64/linux_module.h        2005-09-15 
19:21:14.844078720 -0700
++++ vtpm/linux_module.h        2005-09-14 20:27:22.000000000 -0700
 @@ -1,5 +1,6 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -416,17 +311,20 @@
  
 +/* module settings */
 +#define min(A,B) ((A)<(B)?(A):(B))
+ #ifndef STR
  #define STR(s) __STR__(s)
  #define __STR__(s) #s
- #include "tpm_version.h"
-@@ -39,32 +45,35 @@
+@@ -39,34 +45,38 @@
+ #define TPM_MODULE_NAME       "tpm_emulator"
+ 
  /* debug and log output functions */
++extern int dmi_id; 
  
  #ifdef DEBUG
 -#define debug(fmt, ...) printk(KERN_DEBUG "%s %s:%d: Debug: " fmt "\n", \
 -                        TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__)
-+#define debug(fmt, ...) printf("%s:%d: Debug: " fmt "\n", \
-+                        __FILE__, __LINE__, ## __VA_ARGS__)
++#define debug(fmt, ...) printf("TPMD[%d]: %s:%d: Debug: " fmt "\n", \
++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
  #else
  #define debug(fmt, ...) 
  #endif
@@ -436,12 +334,12 @@
 -                        TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__)
 -#define alert(fmt, ...) printk(KERN_ALERT "%s %s:%d: Alert: " fmt "\n", \
 -                        TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__)
-+#define info(fmt, ...)  printf("%s:%d: Info: " fmt "\n", \
-+                        __FILE__, __LINE__, ## __VA_ARGS__)
-+#define error(fmt, ...) printf("%s:%d: Error: " fmt "\n", \
-+                        __FILE__, __LINE__, ## __VA_ARGS__)
-+#define alert(fmt, ...) printf("%s:%d: Alert: " fmt "\n", \
-+                        __FILE__, __LINE__, ## __VA_ARGS__)
++#define info(fmt, ...)  printf("TPMD[%d]: %s:%d: Info: " fmt "\n", \
++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
++#define error(fmt, ...) printf("TPMD[%d]: %s:%d: Error: " fmt "\n", \
++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
++#define alert(fmt, ...) printf("TPMD[%d]: %s:%d: Alert: " fmt "\n", \
++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
  
  /* memory allocation */
  
@@ -465,7 +363,7 @@
  static inline void tpm_get_random_bytes(void *buf, int nbytes)
  {
    get_random_bytes(buf, nbytes);
-@@ -84,9 +93,9 @@ uint64_t tpm_get_ticks(void);
+@@ -86,9 +96,9 @@ uint64_t tpm_get_ticks(void);
  #define CPU_TO_LE16(x) __cpu_to_le16(x)
  
  #define BE64_TO_CPU(x) __be64_to_cpu(x)
@@ -477,9 +375,116 @@
  #define BE16_TO_CPU(x) __be16_to_cpu(x)
  #define LE16_TO_CPU(x) __le16_to_cpu(x)
  
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_audit.c vtpm/tpm/tpm_audit.c
---- orig/tpm_emulator-0.2/tpm/tpm_audit.c      2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_audit.c       2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/Makefile vtpm/Makefile
+--- orig/tpm_emulator-0.2-x86_64/Makefile      2005-09-15 19:21:14.845078568 
-0700
++++ vtpm/Makefile      2005-09-14 20:27:22.000000000 -0700
+@@ -1,22 +1,31 @@
+ # Software-Based Trusted Platform Module (TPM) Emulator for Linux
+ # Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>
++# Copyright (C) 2005 INTEL Corp.
+ #
+ # $Id: Makefile 10 2005-04-26 20:59:50Z mast $
+ 
+-# kernel settings
+-KERNEL_RELEASE := $(shell uname -r)
+-KERNEL_BUILD   := /lib/modules/$(KERNEL_RELEASE)/build
+-MOD_SUBDIR     := misc
+ COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/)
+ 
+ # module settings
+-MODULE_NAME    := tpm_emulator
++BIN            := vtpmd
+ VERSION_MAJOR  := 0
+ VERSION_MINOR  := 2
+ VERSION_BUILD  := $(shell date +"%s")
+ 
+-# enable/disable DEBUG messages
+-EXTRA_CFLAGS   += -DDEBUG -g  
++# Installation program and options
++INSTALL         = install
++INSTALL_PROG    = $(INSTALL) -m0755
++INSTALL_DIR     = $(INSTALL) -d -m0755
++
++# Xen tools installation directory
++TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
++
++CC      := gcc
++CFLAGS  += -g -Wall $(INCLUDE) -DDEBUG
++CFLAGS  += -I. -Itpm
++
++# Is the simulator running in it's own vm?
++#CFLAGS += -DVTPM_MULTI_VM
+ 
+ ifeq ($(COMPILE_ARCH),x86_64)
+ LIBDIR = lib64
+@@ -34,38 +43,31 @@ DIRS           := . crypto tpm 
+ SRCS           := $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.c))
+ OBJS           := $(patsubst %.c, %.o, $(SRCS))
+ SRCS           += $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.h))
+-DISTSRC        := ./README ./AUTHORS ./ChangeLog ./Makefile $(SRCS)
+-DISTDIR        := tpm_emulator-$(VERSION_MAJOR).$(VERSION_MINOR)
+ 
+-obj-m               := $(MODULE_NAME).o
+-$(MODULE_NAME)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
++obj-m               := $(BIN)
++$(BIN)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
+ 
+ EXTRA_CFLAGS   += -I$(src) -I$(src)/crypto -I$(src)/tpm 
+ 
+ # do not print "Entering directory ..."
+ MAKEFLAGS      += --no-print-directory
+ 
+-all:  $(src)/crypto/gmp.h $(src)/crypto/libgmp.a version
+-      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules
++all: $(BIN)
++
++$(BIN):       $(src)/crypto/gmp.h $(src)/crypto/libgmp.a version $(SRCS) 
$(OBJS)
++      $(CC) $(CFLAGS) $(OBJS) $(src)/crypto/libgmp.a -o $(BIN)
++
++%.o: %.c
++      $(CC) $(CFLAGS) -c $< -o $@
+ 
+ install:
+-      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules_install
+-      test -d /var/tpm || mkdir /var/tpm
+-      test -c /dev/tpm || mknod /dev/tpm c 10 224
+-      chmod 666 /dev/tpm
+-      depmod -a
++      $(INSTALL_PROG) $(BIN) $(TOOLS_INSTALL_DIR)
+ 
+ clean:
+-      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) clean
+-      rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a
++      rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a $(OBJS)
+ 
+-dist: $(DISTSRC)
+-      rm -rf $(DISTDIR)
+-      mkdir $(DISTDIR)
+-      cp --parents $(DISTSRC) $(DISTDIR)/
+-      rm -f $(DISTDIR)/crypto/gmp.h 
+-      tar -chzf $(DISTDIR).tar.gz $(DISTDIR)
+-      rm -rf $(DISTDIR)
++mrproper: clean
++      rm -f $(BIN) tpm_version.h
+ 
+ $(src)/crypto/libgmp.a:
+       test -f $(src)/crypto/libgmp.a || ln -s $(GMP_LIB) 
$(src)/crypto/libgmp.a
+diff -uprN orig/tpm_emulator-0.2-x86_64/README vtpm/README
+--- orig/tpm_emulator-0.2-x86_64/README        2005-08-15 00:58:57.000000000 
-0700
++++ vtpm/README        2005-09-14 20:27:22.000000000 -0700
+@@ -13,7 +13,8 @@ $Id: README 8 2005-01-25 21:11:45Z jmoli
+ Copyright
+ --------------------------------------------------------------------------
+ Copyright (C) 2004 Mario Strasser <mast@xxxxxxx> and Swiss Federal 
+-Institute of Technology (ETH) Zurich.
++                   Institute of Technology (ETH) Zurich.
++Copyright (C) 2005 INTEL Corp 
+               
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_audit.c vtpm/tpm/tpm_audit.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_audit.c       2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_audit.c       2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -542,9 +547,9 @@
    return TPM_SUCCESS;
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_authorization.c 
vtpm/tpm/tpm_authorization.c
---- orig/tpm_emulator-0.2/tpm/tpm_authorization.c      2005-08-17 
10:58:36.000000000 -0700
-+++ vtpm/tpm/tpm_authorization.c       2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_authorization.c 
vtpm/tpm/tpm_authorization.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_authorization.c       2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_authorization.c       2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -568,9 +573,9 @@
  }
 -
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_capability.c vtpm/tpm/tpm_capability.c
---- orig/tpm_emulator-0.2/tpm/tpm_capability.c 2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_capability.c  2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_capability.c 
vtpm/tpm/tpm_capability.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_capability.c  2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_capability.c  2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -593,9 +598,9 @@
    }
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_cmd_handler.c 
vtpm/tpm/tpm_cmd_handler.c
---- orig/tpm_emulator-0.2/tpm/tpm_cmd_handler.c        2005-08-17 
10:58:36.000000000 -0700
-+++ vtpm/tpm/tpm_cmd_handler.c 2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_cmd_handler.c 
vtpm/tpm/tpm_cmd_handler.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_cmd_handler.c 2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_cmd_handler.c 2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -658,9 +663,9 @@
    return 0;
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_crypto.c vtpm/tpm/tpm_crypto.c
---- orig/tpm_emulator-0.2/tpm/tpm_crypto.c     2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_crypto.c      2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_crypto.c vtpm/tpm/tpm_crypto.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_crypto.c      2005-09-15 
19:21:14.846078416 -0700
++++ vtpm/tpm/tpm_crypto.c      2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -678,14 +683,14 @@
      memcpy(&buf[30], areaToSign, areaToSignSize);
      if (rsa_sign(&key->key, RSA_SSA_PKCS1_SHA1, 
          buf, areaToSignSize + 30, *sig)) {
-@@ -379,4 +380,3 @@ TPM_RESULT TPM_CertifyKey2(TPM_KEY_HANDL
+@@ -383,4 +384,3 @@ TPM_RESULT TPM_CertifyKey2(TPM_KEY_HANDL
    }  
    return TPM_SUCCESS;
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_data.c vtpm/tpm/tpm_data.c
---- orig/tpm_emulator-0.2/tpm/tpm_data.c       2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_data.c        2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c vtpm/tpm/tpm_data.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c        2005-09-15 
19:21:14.847078264 -0700
++++ vtpm/tpm/tpm_data.c        2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1005,7 +1010,7 @@
  }
  
  #else
-@@ -231,7 +431,6 @@ int tpm_restore_permanent_data(void)
+@@ -232,7 +432,6 @@ int tpm_restore_permanent_data(void)
  
  int tpm_erase_permanent_data(void)
  {
@@ -1014,9 +1019,9 @@
    return res;
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_deprecated.c vtpm/tpm/tpm_deprecated.c
---- orig/tpm_emulator-0.2/tpm/tpm_deprecated.c 2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_deprecated.c  2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_deprecated.c 
vtpm/tpm/tpm_deprecated.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_deprecated.c  2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_deprecated.c  2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1043,9 +1048,9 @@
                          authContextSize, &contextBlob);
    if (res != TPM_SUCCESS) return res;
    len = *authContextSize;
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_emulator.h vtpm/tpm/tpm_emulator.h
---- orig/tpm_emulator-0.2/tpm/tpm_emulator.h   2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_emulator.h    2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_emulator.h 
vtpm/tpm/tpm_emulator.h
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_emulator.h    2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_emulator.h    2005-09-14 20:27:22.000000000 -0700
 @@ -1,5 +1,6 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1063,9 +1068,9 @@
  
  /**
   * tpm_emulator_init - initialises and starts the TPM emulator
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_integrity.c vtpm/tpm/tpm_integrity.c
---- orig/tpm_emulator-0.2/tpm/tpm_integrity.c  2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_integrity.c   2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_integrity.c 
vtpm/tpm/tpm_integrity.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_integrity.c   2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_integrity.c   2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1079,9 +1084,9 @@
    return TPM_SUCCESS;
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_structures.h vtpm/tpm/tpm_structures.h
---- orig/tpm_emulator-0.2/tpm/tpm_structures.h 2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_structures.h  2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_structures.h 
vtpm/tpm/tpm_structures.h
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_structures.h  2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_structures.h  2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1099,9 +1104,9 @@
  #include "crypto/rsa.h"
  
  /*
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_testing.c vtpm/tpm/tpm_testing.c
---- orig/tpm_emulator-0.2/tpm/tpm_testing.c    2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_testing.c     2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_testing.c 
vtpm/tpm/tpm_testing.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_testing.c     2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_testing.c     2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1217,9 +1222,9 @@
    rsa_private_key_t priv_key;
    rsa_public_key_t pub_key;
  
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_ticks.c vtpm/tpm/tpm_ticks.c
---- orig/tpm_emulator-0.2/tpm/tpm_ticks.c      2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_ticks.c       2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_ticks.c vtpm/tpm/tpm_ticks.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_ticks.c       2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_ticks.c       2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1302,9 +1307,9 @@
  }
    
  
-diff -uprN orig/tpm_emulator-0.2/tpm/vtpm_manager.h vtpm/tpm/vtpm_manager.h
---- orig/tpm_emulator-0.2/tpm/vtpm_manager.h   1969-12-31 16:00:00.000000000 
-0800
-+++ vtpm/tpm/vtpm_manager.h    2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/vtpm_manager.h 
vtpm/tpm/vtpm_manager.h
+--- orig/tpm_emulator-0.2-x86_64/tpm/vtpm_manager.h    1969-12-31 
16:00:00.000000000 -0800
++++ vtpm/tpm/vtpm_manager.h    2005-09-14 20:27:22.000000000 -0700
 @@ -0,0 +1,126 @@
 +// ===================================================================
 +// 
@@ -1432,9 +1437,9 @@
 +*********************************************************************/
 +
 +#endif //_VTPM_MANAGER_H_
-diff -uprN orig/tpm_emulator-0.2/tpmd.c vtpm/tpmd.c
---- orig/tpm_emulator-0.2/tpmd.c       1969-12-31 16:00:00.000000000 -0800
-+++ vtpm/tpmd.c        2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpmd.c vtpm/tpmd.c
+--- orig/tpm_emulator-0.2-x86_64/tpmd.c        1969-12-31 16:00:00.000000000 
-0800
++++ vtpm/tpmd.c        2005-09-15 19:28:55.783005352 -0700
 @@ -0,0 +1,207 @@
 +/* Software-Based Trusted Platform Module (TPM) Emulator for Linux
 + * Copyright (C) 2005 INTEL Corp
@@ -1468,9 +1473,9 @@
 +#else
 + #define GUEST_RX_FIFO_D "/var/vtpm/fifos/guest-to-%d.fifo"
 + #define GUEST_TX_FIFO "/var/vtpm/fifos/guest-from-all.fifo"
++#endif
 +
 + int dmi_id;
-+#endif
 +                                              
 +#define BUFFER_SIZE 2048
 +
@@ -1506,7 +1511,7 @@
 +{
 +  uint8_t in[BUFFER_SIZE], *out, *addressed_out;
 +  uint32_t out_size;
-+  int in_size, written ;
++  int in_size, written;
 +  int i, guest_id=-1;
 + 
 +  int vtpm_tx_fh=-1, vtpm_rx_fh=-1;
@@ -1602,7 +1607,7 @@
 +      written = write(vtpm_tx_fh, ctrl_msg, sizeof(ctrl_msg));
 +
 +      if (written != sizeof(ctrl_msg)) {
-+        printf("ERROR: Part of response not written %d/%d.\n", written, 
sizeof(ctrl_msg));
++        printf("ERROR: Part of response not written %d/%Zu.\n", written, 
sizeof(ctrl_msg));
 +      } else {
 +        printf("Send Ctrl Message confermation\n");
 +      }
@@ -1623,7 +1628,7 @@
 +          printf("%x ", addressed_out[i]);
 +        printf("\n");
 +      } else {
-+        printf("Sent[%d]: ", out_size + sizeof(uint32_t));
++        printf("Sent[%Zu]: ", out_size + sizeof(uint32_t));
 +        for (i=0; i< out_size+ sizeof(uint32_t); i++)
 +          printf("%x ", addressed_out[i]);
 +        printf("\n");
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/README
--- a/tools/vtpm_manager/README Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/README Thu Sep 22 17:42:01 2005
@@ -51,14 +51,24 @@
 DUMMY_BACKEND                -> vtpm_manager listens on /tmp/in.fifo and 
                                 /tmp/out.fifo rather than backend
 
-MANUAL_DM_LAUNCH             -> User must manually launch & kill VTPMs
+MANUAL_DM_LAUNCH             -> Must manually launch & kill VTPMs
 
-USE_FIXED_SRK_AUTH           -> Do not randomly generate a random SRK & Owner 
auth
+WELL_KNOWN_SRK_AUTH          -> Rather than randomly generating the password 
for the SRK,
+                                use a well known value. This is necessary for 
sharing use
+                                of the SRK across applications. Such as VTPM 
and Dom0
+                                measurement software.
+
+WELL_KNOWN_OWNER_AUTH        -> Rather than randomly generating the password 
for the owner,
+                                use a well known value. This is useful for 
debugging and for
+                                poor bios which do not support clearing TPM if 
OwnerAuth is
+                                lost. However this has no protection from 
malicious app
+                                issuing a TPM_OwnerClear to wipe the TPM 
 
 Requirements
 ============
 - xen-unstable 
-- IBM frontend/backend vtpm driver patch
+- vtpm frontend/backend driver patch
+- OpenSSL Library
 
 Single-VM Flow
 ============================
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/Rules.mk
--- a/tools/vtpm_manager/Rules.mk       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/Rules.mk       Thu Sep 22 17:42:01 2005
@@ -57,7 +57,8 @@
 #CFLAGS += -DMANUAL_DM_LAUNCH
 
 # Fixed SRK
-CFLAGS += -DUSE_FIXED_SRK_AUTH
+CFLAGS += -DWELL_KNOWN_SRK_AUTH
+#CFLAGS += -DWELL_KNOWN_OWNER_AUTH
 
 # TPM Hardware Device or TPM Simulator
 #CFLAGS += -DTPM_HWDEV
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/crypto/Makefile
--- a/tools/vtpm_manager/crypto/Makefile        Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/crypto/Makefile        Thu Sep 22 17:42:01 2005
@@ -13,6 +13,7 @@
        rm -f *.a *.so *.o *.rpm $(DEP_FILES)
 
 mrproper: clean
+       rm -f *~
 
 $(BIN): $(OBJS)
        $(AR) rcs $(BIN) $(OBJS)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/manager/Makefile
--- a/tools/vtpm_manager/manager/Makefile       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/manager/Makefile       Thu Sep 22 17:42:01 2005
@@ -17,7 +17,7 @@
        rm -f *.a *.so *.o *.rpm $(DEP_FILES)
 
 mrproper: clean
-       rm -f $(BIN)
+       rm -f $(BIN) *~
 
 $(BIN): $(OBJS)
        $(CC) $(LDFLAGS) $^ $(LIBS) -o $@
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/manager/dmictl.c
--- a/tools/vtpm_manager/manager/dmictl.c       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/manager/dmictl.c       Thu Sep 22 17:42:01 2005
@@ -1,339 +1,344 @@
-// ===================================================================
-// 
-// Copyright (c) 2005, Intel Corp.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions 
-// are met:
-//
-//   * Redistributions of source code must retain the above copyright 
-//     notice, this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above 
-//     copyright notice, this list of conditions and the following 
-//     disclaimer in the documentation and/or other materials provided 
-//     with the distribution.
-//   * Neither the name of Intel Corporation nor the names of its 
-//     contributors may be used to endorse or promote products derived
-//     from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
-// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
-// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-// OF THE POSSIBILITY OF SUCH DAMAGE.
-// ===================================================================
-// 
-//   dmictl.c
-// 
-//     Functions for creating and destroying DMIs
-//
-// ==================================================================
-
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-
-#ifndef VTPM_MUTLI_VM
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <fcntl.h>
- #include <signal.h>
- #include <wait.h>
-#endif
-
-#include "vtpmpriv.h"
-#include "bsg.h"
-#include "buffer.h"
-#include "log.h"
-#include "hashtable.h"
-#include "hashtable_itr.h"
-
-#define TPM_EMULATOR_PATH "/usr/bin/vtpmd"
-
-TPM_RESULT close_dmi( VTPM_DMI_RESOURCE *dmi_res) {
-       TPM_RESULT status = TPM_FAIL;
-       
-       if (dmi_res == NULL) 
-               return TPM_SUCCESS;
-       
-       status = TCS_CloseContext(dmi_res->TCSContext);
-       free ( dmi_res->NVMLocation );
-       dmi_res->connected = FALSE;
-
-#ifndef VTPM_MULTI_VM  
-       free(dmi_res->guest_tx_fname);
-       free(dmi_res->vtpm_tx_fname);
-               
-       close(dmi_res->guest_tx_fh); dmi_res->guest_tx_fh = -1;
-       close(dmi_res->vtpm_tx_fh);  dmi_res->vtpm_tx_fh = -1; 
-       
-               
- #ifndef MANUAL_DM_LAUNCH
-  if (dmi_res->dmi_id != VTPM_CTL_DM) {
-    if (dmi_res->dmi_pid != 0) {
-      vtpmloginfo(VTPM_LOG_VTPM, "Killing dmi on pid %d.\n", dmi_res->dmi_pid);
-      if ((kill(dmi_res->dmi_pid, SIGKILL) !=0) ||
-         (waitpid(dmi_res->dmi_pid, NULL, 0) != dmi_res->dmi_pid)){
-        vtpmlogerror(VTPM_LOG_VTPM, "Could not kill dmi on pid %d.\n", 
dmi_res->dmi_pid);
-        status = TPM_FAIL;
-      }
-    } else 
-      vtpmlogerror(VTPM_LOG_VTPM, "Could not kill dmi because it's pid was 
0.\n");
-  }
- #endif
-#endif
-
-       return status;
-}
-       
-TPM_RESULT VTPM_Handle_New_DMI( const buffer_t *param_buf) {
-  
-  VTPM_DMI_RESOURCE *new_dmi=NULL;
-  TPM_RESULT status=TPM_FAIL;
-  BYTE type;
-  UINT32 dmi_id, domain_id, *dmi_id_key; 
-  int fh;
-
-#ifndef VTPM_MUTLI_VM
-  char dmi_id_str[11]; // UINT32s are up to 10 digits + NULL
-  struct stat file_info;
-#endif
-  
-  if (param_buf == NULL) { // Assume creation of Dom 0 control
-    type = 0;
-    domain_id = VTPM_CTL_DM;
-    dmi_id = VTPM_CTL_DM;
-  } else if (buffer_len(param_buf) != sizeof(BYTE) + sizeof(UINT32) *2) {
-    vtpmloginfo(VTPM_LOG_VTPM, "New DMI command wrong length: %d.\n", 
buffer_len(param_buf));
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  } else {
-    BSG_UnpackList( param_buf->bytes, 3,
-                   BSG_TYPE_BYTE, &type,
-                   BSG_TYPE_UINT32, &domain_id,
-                   BSG_TYPE_UINT32,  &dmi_id);
-  }
-  
-  new_dmi = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi_id);
-  if (new_dmi == NULL) { 
-    vtpmloginfo(VTPM_LOG_VTPM, "Creating new DMI instance %d attached on 
domain %d.\n", dmi_id, domain_id);
-    // Brand New DMI. Initialize the persistent pieces
-    if ((new_dmi = (VTPM_DMI_RESOURCE *) malloc (sizeof(VTPM_DMI_RESOURCE))) 
== NULL) {
-      status = TPM_RESOURCES;
-      goto abort_egress;
-    }
-    memset(new_dmi, 0, sizeof(VTPM_DMI_RESOURCE));
-    new_dmi->dmi_id = dmi_id;
-    new_dmi->connected = FALSE;
-    
-    if ((dmi_id_key = (UINT32 *) malloc (sizeof(UINT32))) == NULL) {
-      status = TPM_RESOURCES;
-      goto abort_egress;
-    }      
-    *dmi_id_key = new_dmi->dmi_id;
-    
-    // install into map
-    if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, new_dmi)){
-      free(new_dmi);
-      free(dmi_id_key);
-      status = TPM_FAIL;
-      goto egress;
-    }
-    
-  } else 
-    vtpmloginfo(VTPM_LOG_VTPM, "Re-attaching DMI instance %d on domain %d 
.\n", dmi_id, domain_id);
-  
-  if (new_dmi->connected) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Attempt to re-attach, currently attached 
instance %d. Ignoring\n", dmi_id);
-    status = TPM_BAD_PARAMETER;
-    goto egress;
-  }
-  
-  // Initialize the Non-persistent pieces
-  new_dmi->dmi_domain_id = domain_id;
-  new_dmi->NVMLocation = NULL;
-  
-  new_dmi->TCSContext = 0;
-  TPMTRYRETURN( TCS_OpenContext(&new_dmi->TCSContext) );
-  
-  new_dmi->NVMLocation = (char *) malloc(11 + strlen(DMI_NVM_FILE));
-  sprintf(new_dmi->NVMLocation, DMI_NVM_FILE, (uint32_t) new_dmi->dmi_id);
-  
-  // Measure DMI
-  // FIXME: This will measure DMI. Until then use a fixed DMI_Measurement value
-  /*
-  fh = open(TPM_EMULATOR_PATH, O_RDONLY);
-  stat_ret = fstat(fh, &file_stat);
-  if (stat_ret == 0) 
-    dmi_size = file_stat.st_size;
-  else {
-       vtpmlogerror(VTPM_LOG_VTPM, "Could not open tpm_emulator!!\n");
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  dmi_buffer
-  */
-  memset(&new_dmi->DMI_measurement, 0xcc, sizeof(TPM_DIGEST));
-  
-#ifndef VTPM_MULTI_VM
-  if (dmi_id != VTPM_CTL_DM) {
-    // Create a pair of fifo pipes
-               if( (new_dmi->guest_tx_fname = (char *) malloc(11 + 
strlen(GUEST_TX_FIFO))) == NULL){ 
-                       status = TPM_RESOURCES;
-                       goto abort_egress;
-               }
-               sprintf(new_dmi->guest_tx_fname, GUEST_TX_FIFO, (uint32_t) 
dmi_id);
-    
-               if ((new_dmi->vtpm_tx_fname = (char *) malloc(11 + 
strlen(VTPM_TX_FIFO))) == NULL) {
-                       status = TPM_RESOURCES;
-                       goto abort_egress;
-               }
-               sprintf(new_dmi->vtpm_tx_fname, VTPM_TX_FIFO, (uint32_t) 
dmi_id);
-    
-    new_dmi->guest_tx_fh = -1;
-    new_dmi->vtpm_tx_fh= -1;
-    
-    if ( stat(new_dmi->guest_tx_fname, &file_info) == -1) {
-      if ( mkfifo(new_dmi->guest_tx_fname, S_IWUSR | S_IRUSR ) ){
-                               status = TPM_FAIL;
-                               goto abort_egress;
-      }
-    }
-            
-    if ( (fh = open(new_dmi->vtpm_tx_fname, O_RDWR)) == -1) {
-      if ( mkfifo(new_dmi->vtpm_tx_fname, S_IWUSR | S_IRUSR ) ) {
-       status = TPM_FAIL;
-       goto abort_egress;
-      }
-    }
-                
-    // Launch DMI
-    sprintf(dmi_id_str, "%d", (int) dmi_id);
-#ifdef MANUAL_DM_LAUNCH
-    vtpmlogerror(VTPM_LOG_VTPM, "FAKING starting vtpm with dmi=%s\n", 
dmi_id_str);
-    new_dmi->dmi_pid = 0;
-#else
-    pid_t pid = fork();
-    
-    if (pid == -1) {
-                       vtpmlogerror(VTPM_LOG_VTPM, "Could not fork to launch 
vtpm\n");
-                 status = TPM_RESOURCES;
-      goto abort_egress;
-               } else if (pid == 0) {
-                 if ( stat(new_dmi->NVMLocation, &file_info) == -1)
-                               execl (TPM_EMULATOR_PATH, "vtmpd", "clear", 
dmi_id_str, NULL);
-                       else 
-                               execl (TPM_EMULATOR_PATH, "vtpmd", "save", 
dmi_id_str, NULL);
-                       
-                       // Returning from these at all is an error.
-                       vtpmlogerror(VTPM_LOG_VTPM, "Could not exec to launch 
vtpm\n");
-    } else {
-      new_dmi->dmi_pid = pid;
-      vtpmloginfo(VTPM_LOG_VTPM, "Launching DMI on PID = %d\n", pid);
-    }
-#endif // MANUAL_DM_LAUNCH
-  }
-#else // VTPM_MUTLI_VM
-  // FIXME: Measure DMI through call to Measurement agent in platform.
-#endif 
-       
-  vtpm_globals->DMI_table_dirty = TRUE;
-  new_dmi->connected = TRUE;  
-  status=TPM_SUCCESS;
-  goto egress;
-  
- abort_egress:
-       close_dmi( new_dmi );
-       
- egress:
-  return status;
-}
-
-TPM_RESULT VTPM_Handle_Close_DMI( const buffer_t *param_buf) {
-  
-  TPM_RESULT status=TPM_FAIL;
-  VTPM_DMI_RESOURCE *dmi_res=NULL;
-  UINT32 dmi_id;
-  
-  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.");
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-  
-  BSG_UnpackList( param_buf->bytes, 1,
-                 BSG_TYPE_UINT32, &dmi_id);
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Closing DMI %d.\n", dmi_id);
-  
-  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi_id);
-  if (dmi_res == NULL ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Trying to close nonexistent DMI.\n");
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-       
-       if (!dmi_res->connected) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-connected DMI.\n");
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-  
-  // Close Dmi
-       TPMTRYRETURN(close_dmi( dmi_res ));
-  
-  status=TPM_SUCCESS;    
-  goto egress;
-  
- abort_egress:
- egress:
-  
-  return status;
-}
-
-TPM_RESULT VTPM_Handle_Delete_DMI( const buffer_t *param_buf) {
-  
-  TPM_RESULT status=TPM_FAIL;
-  VTPM_DMI_RESOURCE *dmi_res=NULL;
-  UINT32 dmi_id;
-    
-  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.\n");
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-  
-  BSG_UnpackList( param_buf->bytes, 1,
-                 BSG_TYPE_UINT32, &dmi_id);
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Deleting DMI %d.\n", dmi_id);    
-  
-  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_remove(vtpm_globals->dmi_map, 
&dmi_id);
-  if (dmi_res == NULL) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-existent DMI.\n");
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-  
-       //TODO: Automatically delete file dmi_res->NVMLocation
-  
-  // Close DMI first
-  TPMTRYRETURN(close_dmi( dmi_res ));
-       free ( dmi_res );
-       
-  status=TPM_SUCCESS;    
-  goto egress;
-  
- abort_egress:
- egress:
-  
-  return status;
-}
+// ===================================================================
+// 
+// Copyright (c) 2005, Intel Corp.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without 
+// modification, are permitted provided that the following conditions 
+// are met:
+//
+//   * Redistributions of source code must retain the above copyright 
+//     notice, this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above 
+//     copyright notice, this list of conditions and the following 
+//     disclaimer in the documentation and/or other materials provided 
+//     with the distribution.
+//   * Neither the name of Intel Corporation nor the names of its 
+//     contributors may be used to endorse or promote products derived
+//     from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+// OF THE POSSIBILITY OF SUCH DAMAGE.
+// ===================================================================
+// 
+//   dmictl.c
+// 
+//     Functions for creating and destroying DMIs
+//
+// ==================================================================
+
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+
+#ifndef VTPM_MUTLI_VM
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ #include <signal.h>
+ #include <wait.h>
+#endif
+
+#include "vtpmpriv.h"
+#include "bsg.h"
+#include "buffer.h"
+#include "log.h"
+#include "hashtable.h"
+#include "hashtable_itr.h"
+
+#define TPM_EMULATOR_PATH "/usr/bin/vtpmd"
+
+TPM_RESULT close_dmi( VTPM_DMI_RESOURCE *dmi_res) {
+  TPM_RESULT status = TPM_FAIL;
+  
+  if (dmi_res == NULL) 
+    return TPM_SUCCESS;
+
+  status = TCS_CloseContext(dmi_res->TCSContext);
+  free ( dmi_res->NVMLocation );
+  dmi_res->connected = FALSE;
+
+#ifndef VTPM_MULTI_VM  
+  free(dmi_res->guest_tx_fname);
+  free(dmi_res->vtpm_tx_fname);
+         
+  close(dmi_res->guest_tx_fh); dmi_res->guest_tx_fh = -1;
+  close(dmi_res->vtpm_tx_fh);  dmi_res->vtpm_tx_fh = -1; 
+               
+ #ifndef MANUAL_DM_LAUNCH
+  if (dmi_res->dmi_id != VTPM_CTL_DM) {
+    if (dmi_res->dmi_pid != 0) {
+      vtpmloginfo(VTPM_LOG_VTPM, "Killing dmi on pid %d.\n", dmi_res->dmi_pid);
+      if (kill(dmi_res->dmi_pid, SIGKILL) !=0) {
+        vtpmloginfo(VTPM_LOG_VTPM, "DMI on pid %d is already dead.\n", 
dmi_res->dmi_pid);
+      } else if (waitpid(dmi_res->dmi_pid, NULL, 0) != dmi_res->dmi_pid) {
+        vtpmlogerror(VTPM_LOG_VTPM, "DMI on pid %d failed to stop.\n", 
dmi_res->dmi_pid);
+        status = TPM_FAIL;
+      }
+    } else { 
+      vtpmlogerror(VTPM_LOG_VTPM, "Could not kill dmi because it's pid was 
0.\n");
+      status = TPM_FAIL;
+    }
+  }
+ #endif
+#endif
+
+  return status;
+}
+       
+TPM_RESULT VTPM_Handle_New_DMI( const buffer_t *param_buf) {
+  
+  VTPM_DMI_RESOURCE *new_dmi=NULL;
+  TPM_RESULT status=TPM_FAIL;
+  BYTE type;
+  UINT32 dmi_id, domain_id, *dmi_id_key; 
+
+#ifndef VTPM_MULTI_VM
+  int fh;
+  char dmi_id_str[11]; // UINT32s are up to 10 digits + NULL
+  struct stat file_info;
+#endif
+  
+  if (param_buf == NULL) { // Assume creation of Dom 0 control
+    type = 0;
+    domain_id = VTPM_CTL_DM;
+    dmi_id = VTPM_CTL_DM;
+  } else if (buffer_len(param_buf) != sizeof(BYTE) + sizeof(UINT32) *2) {
+    vtpmloginfo(VTPM_LOG_VTPM, "New DMI command wrong length: %d.\n", 
buffer_len(param_buf));
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  } else {
+    BSG_UnpackList( param_buf->bytes, 3,
+                   BSG_TYPE_BYTE, &type,
+                   BSG_TYPE_UINT32, &domain_id,
+                   BSG_TYPE_UINT32,  &dmi_id);
+  }
+  
+  new_dmi = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi_id);
+  if (new_dmi == NULL) { 
+    vtpmloginfo(VTPM_LOG_VTPM, "Creating new DMI instance %d attached on 
domain %d.\n", dmi_id, domain_id);
+    // Brand New DMI. Initialize the persistent pieces
+    if ((new_dmi = (VTPM_DMI_RESOURCE *) malloc (sizeof(VTPM_DMI_RESOURCE))) 
== NULL) {
+      status = TPM_RESOURCES;
+      goto abort_egress;
+    }
+    memset(new_dmi, 0, sizeof(VTPM_DMI_RESOURCE));
+    new_dmi->dmi_id = dmi_id;
+    new_dmi->connected = FALSE;
+    
+    if ((dmi_id_key = (UINT32 *) malloc (sizeof(UINT32))) == NULL) {
+      status = TPM_RESOURCES;
+      goto abort_egress;
+    }      
+    *dmi_id_key = new_dmi->dmi_id;
+    
+    // install into map
+    if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, new_dmi)){
+      free(new_dmi);
+      free(dmi_id_key);
+      status = TPM_FAIL;
+      goto egress;
+    }
+    
+  } else 
+    vtpmloginfo(VTPM_LOG_VTPM, "Re-attaching DMI instance %d on domain %d 
.\n", dmi_id, domain_id);
+  
+  if (new_dmi->connected) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Attempt to re-attach, currently attached 
instance %d. Ignoring\n", dmi_id);
+    status = TPM_BAD_PARAMETER;
+    goto egress;
+  }
+  
+  // Initialize the Non-persistent pieces
+  new_dmi->dmi_domain_id = domain_id;
+  new_dmi->NVMLocation = NULL;
+  
+  new_dmi->TCSContext = 0;
+  TPMTRYRETURN( TCS_OpenContext(&new_dmi->TCSContext) );
+  
+  new_dmi->NVMLocation = (char *) malloc(11 + strlen(DMI_NVM_FILE));
+  sprintf(new_dmi->NVMLocation, DMI_NVM_FILE, (uint32_t) new_dmi->dmi_id);
+  
+  // Measure DMI
+  // FIXME: This will measure DMI. Until then use a fixed DMI_Measurement value
+  /*
+  fh = open(TPM_EMULATOR_PATH, O_RDONLY);
+  stat_ret = fstat(fh, &file_stat);
+  if (stat_ret == 0) 
+    dmi_size = file_stat.st_size;
+  else {
+      vtpmlogerror(VTPM_LOG_VTPM, "Could not open tpm_emulator!!\n");
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  dmi_buffer
+  */
+  memset(&new_dmi->DMI_measurement, 0xcc, sizeof(TPM_DIGEST));
+  
+#ifndef VTPM_MULTI_VM
+  if (dmi_id != VTPM_CTL_DM) {
+    // Create a pair of fifo pipes
+    if( (new_dmi->guest_tx_fname = (char *) malloc(11 + 
strlen(GUEST_TX_FIFO))) == NULL){ 
+      status = TPM_RESOURCES;
+      goto abort_egress;
+    }
+    sprintf(new_dmi->guest_tx_fname, GUEST_TX_FIFO, (uint32_t) dmi_id);
+    
+    if ((new_dmi->vtpm_tx_fname = (char *) malloc(11 + strlen(VTPM_TX_FIFO))) 
== NULL) {
+      status = TPM_RESOURCES;
+      goto abort_egress;
+    }
+    sprintf(new_dmi->vtpm_tx_fname, VTPM_TX_FIFO, (uint32_t) dmi_id);
+    
+    new_dmi->guest_tx_fh = -1;
+    new_dmi->vtpm_tx_fh= -1;
+    
+    if ( stat(new_dmi->guest_tx_fname, &file_info) == -1) {
+      if ( mkfifo(new_dmi->guest_tx_fname, S_IWUSR | S_IRUSR ) ){
+       vtpmlogerror(VTPM_LOG_VTPM, "Failed to create dmi fifo.\n");
+       status = TPM_IOERROR;
+       goto abort_egress;
+      }
+    }
+            
+    if ( (fh = open(new_dmi->vtpm_tx_fname, O_RDWR)) == -1) {
+      if ( mkfifo(new_dmi->vtpm_tx_fname, S_IWUSR | S_IRUSR ) ) {
+       vtpmlogerror(VTPM_LOG_VTPM, "Failed to create dmi fifo.\n");
+       status = TPM_IOERROR;
+       goto abort_egress;
+      }
+    }
+                
+    // Launch DMI
+    sprintf(dmi_id_str, "%d", (int) dmi_id);
+#ifdef MANUAL_DM_LAUNCH
+    vtpmlogerror(VTPM_LOG_VTPM, "FAKING starting vtpm with dmi=%s\n", 
dmi_id_str);
+    new_dmi->dmi_pid = 0;
+#else
+    pid_t pid = fork();
+    
+    if (pid == -1) {
+      vtpmlogerror(VTPM_LOG_VTPM, "Could not fork to launch vtpm\n");
+      status = TPM_RESOURCES;
+      goto abort_egress;
+    } else if (pid == 0) {
+      if ( stat(new_dmi->NVMLocation, &file_info) == -1)
+       execl (TPM_EMULATOR_PATH, "vtmpd", "clear", dmi_id_str, NULL);
+      else 
+       execl (TPM_EMULATOR_PATH, "vtpmd", "save", dmi_id_str, NULL);
+                       
+      // Returning from these at all is an error.
+      vtpmlogerror(VTPM_LOG_VTPM, "Could not exec to launch vtpm\n");
+    } else {
+      new_dmi->dmi_pid = pid;
+      vtpmloginfo(VTPM_LOG_VTPM, "Launching DMI on PID = %d\n", pid);
+    }
+#endif // MANUAL_DM_LAUNCH
+  }
+#else // VTPM_MUTLI_VM
+  // FIXME: Measure DMI through call to Measurement agent in platform.
+#endif 
+       
+  vtpm_globals->DMI_table_dirty = TRUE;
+  new_dmi->connected = TRUE;  
+  status=TPM_SUCCESS;
+  goto egress;
+  
+ abort_egress:
+  vtpmlogerror(VTPM_LOG_VTPM, "Failed to create DMI id=%d due to status=%s. 
Cleaning.\n", dmi_id, tpm_get_error_name(status));
+  close_dmi( new_dmi );
+       
+ egress:
+  return status;
+}
+
+TPM_RESULT VTPM_Handle_Close_DMI( const buffer_t *param_buf) {
+  
+  TPM_RESULT status=TPM_FAIL;
+  VTPM_DMI_RESOURCE *dmi_res=NULL;
+  UINT32 dmi_id;
+  
+  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.");
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  }
+  
+  BSG_UnpackList( param_buf->bytes, 1,
+                 BSG_TYPE_UINT32, &dmi_id);
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Closing DMI %d.\n", dmi_id);
+  
+  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi_id);
+  if (dmi_res == NULL ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Trying to close nonexistent DMI.\n");
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  }
+       
+       if (!dmi_res->connected) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-connected DMI.\n");
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  }
+  
+  // Close Dmi
+       TPMTRYRETURN(close_dmi( dmi_res ));
+  
+  status=TPM_SUCCESS;    
+  goto egress;
+  
+ abort_egress:
+ egress:
+  
+  return status;
+}
+
+TPM_RESULT VTPM_Handle_Delete_DMI( const buffer_t *param_buf) {
+  
+  TPM_RESULT status=TPM_FAIL;
+  VTPM_DMI_RESOURCE *dmi_res=NULL;
+  UINT32 dmi_id;
+    
+  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.\n");
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  }
+  
+  BSG_UnpackList( param_buf->bytes, 1,
+                 BSG_TYPE_UINT32, &dmi_id);
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Deleting DMI %d.\n", dmi_id);    
+  
+  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_remove(vtpm_globals->dmi_map, 
&dmi_id);
+  if (dmi_res == NULL) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-existent DMI.\n");
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  }
+  
+       //TODO: Automatically delete file dmi_res->NVMLocation
+  
+  // Close DMI first
+  TPMTRYRETURN(close_dmi( dmi_res ));
+       free ( dmi_res );
+       
+  status=TPM_SUCCESS;    
+  goto egress;
+  
+ abort_egress:
+ egress:
+  
+  return status;
+}
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/manager/securestorage.c
--- a/tools/vtpm_manager/manager/securestorage.c        Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/manager/securestorage.c        Thu Sep 22 17:42:01 2005
@@ -1,401 +1,401 @@
-// ===================================================================
-// 
-// Copyright (c) 2005, Intel Corp.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions 
-// are met:
-//
-//   * Redistributions of source code must retain the above copyright 
-//     notice, this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above 
-//     copyright notice, this list of conditions and the following 
-//     disclaimer in the documentation and/or other materials provided 
-//     with the distribution.
-//   * Neither the name of Intel Corporation nor the names of its 
-//     contributors may be used to endorse or promote products derived
-//     from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
-// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
-// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-// OF THE POSSIBILITY OF SUCH DAMAGE.
-// ===================================================================
-// 
-// securestorage.c
-// 
-//  Functions regarding securely storing DMI secrets.
-//
-// ==================================================================
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <string.h>
-
-#include "tcg.h"
-#include "vtpm_manager.h"
-#include "vtpmpriv.h"
-#include "vtsp.h"
-#include "bsg.h"
-#include "crypto.h"
-#include "hashtable.h"
-#include "hashtable_itr.h"
-#include "buffer.h"
-#include "log.h"
-
-TPM_RESULT VTPM_Handle_Save_NVM(VTPM_DMI_RESOURCE *myDMI, 
-                               const buffer_t *inbuf, 
-                               buffer_t *outbuf) {
-  
-  TPM_RESULT status = TPM_SUCCESS;
-  symkey_t    symkey;
-  buffer_t    state_cipher = NULL_BUF,
-              symkey_cipher = NULL_BUF;
-  int fh;
-  long bytes_written;
-  BYTE *sealed_NVM=NULL;
-  UINT32 sealed_NVM_size, i;
-  struct pack_constbuf_t symkey_cipher32, state_cipher32;
-  
-  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Save_NVMing[%d]: 0x", buffer_len(inbuf));
-  for (i=0; i< buffer_len(inbuf); i++)
-    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", inbuf->bytes[i]);
-  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-  
-  // Generate a sym key and encrypt state with it
-  TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_genkey (&symkey) );
-  TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_encrypt (&symkey, inbuf, 
&state_cipher) );
-  
-  // Encrypt symmetric key
-  TPMTRYRETURN( VTSP_Bind(    &vtpm_globals->storageKey, 
-                             &symkey.key, 
-                             &symkey_cipher) );
-  
-  // Create output blob: symkey_size + symkey_cipher + state_cipher_size + 
state_cipher
-  
-  symkey_cipher32.size = buffer_len(&symkey_cipher);
-  symkey_cipher32.data = symkey_cipher.bytes;
-  
-  state_cipher32.size = buffer_len(&state_cipher);
-  state_cipher32.data = state_cipher.bytes;
-  
-  sealed_NVM = (BYTE *) malloc( 2 * sizeof(UINT32) + symkey_cipher32.size + 
state_cipher32.size);
-  
-  sealed_NVM_size = BSG_PackList(sealed_NVM, 2,
-                                BSG_TPM_SIZE32_DATA, &symkey_cipher32,
-                                BSG_TPM_SIZE32_DATA, &state_cipher32);
-  
-  // Mark DMI Table so new save state info will get pushed to disk on return.
-  vtpm_globals->DMI_table_dirty = TRUE;
-  
-  // Write sealed blob off disk from NVMLocation
-  // TODO: How to properly return from these. Do we care if we return failure
-  //       after writing the file? We can't get the old one back.
-  // TODO: Backup old file and try and recover that way.
-  fh = open(myDMI->NVMLocation, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
-  if ( (bytes_written = write(fh, sealed_NVM, sealed_NVM_size) ) != (long) 
sealed_NVM_size) {
-    vtpmlogerror(VTPM_LOG_VTPM, "We just overwrote a DMI_NVM and failed to 
finish. %ld/%ld bytes.\n", bytes_written, (long)sealed_NVM_size);
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  close(fh);
-  
-  Crypto_SHA1Full (sealed_NVM, sealed_NVM_size, (BYTE *) 
&myDMI->NVM_measurement);   
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Saved %d bytes of E(symkey) + %d bytes of 
E(NVM)\n", buffer_len(&symkey_cipher), buffer_len(&state_cipher));
-  goto egress;
-  
- abort_egress:
-  vtpmlogerror(VTPM_LOG_VTPM, "Failed to load NVM\n.");
-  
- egress:
-  
-  buffer_free ( &state_cipher);
-  buffer_free ( &symkey_cipher);
-  free(sealed_NVM);
-  Crypto_symcrypto_freekey (&symkey);
-  
-  return status;
-}
-
-
-/* inbuf = null outbuf = sealed blob size, sealed blob.*/
-TPM_RESULT VTPM_Handle_Load_NVM(VTPM_DMI_RESOURCE *myDMI, 
-                               const buffer_t *inbuf, 
-                               buffer_t *outbuf) {
-  
-  TPM_RESULT status = TPM_SUCCESS;
-  symkey_t    symkey;
-  buffer_t    state_cipher = NULL_BUF, 
-              symkey_clear = NULL_BUF, 
-              symkey_cipher = NULL_BUF;
-  struct pack_buf_t symkey_cipher32, state_cipher32;
-  
-  UINT32 sealed_NVM_size;
-  BYTE *sealed_NVM = NULL;
-  long fh_size;
-  int fh, stat_ret, i;
-  struct stat file_stat;
-  TPM_DIGEST sealedNVMHash;
-  
-  memset(&symkey, 0, sizeof(symkey_t));
-  
-  if (myDMI->NVMLocation == NULL) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Unable to load NVM because the file name 
NULL.\n");
-    status = TPM_AUTHFAIL;
-    goto abort_egress;
-  }
-  
-  //Read sealed blob off disk from NVMLocation
-  fh = open(myDMI->NVMLocation, O_RDONLY);
-  stat_ret = fstat(fh, &file_stat);
-  if (stat_ret == 0) 
-    fh_size = file_stat.st_size;
-  else {
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  
-  sealed_NVM = (BYTE *) malloc(fh_size);
-  if (read(fh, sealed_NVM, fh_size) != fh_size) {
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  close(fh);
-  
-  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Load_NVMing[%ld]: 0x", fh_size);
-  for (i=0; i< fh_size; i++)
-    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", sealed_NVM[i]);
-  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-  
-  sealed_NVM_size = BSG_UnpackList(sealed_NVM, 2,
-                                  BSG_TPM_SIZE32_DATA, &symkey_cipher32,
-                                  BSG_TPM_SIZE32_DATA, &state_cipher32);
-  
-  TPMTRYRETURN( buffer_init_convert (&symkey_cipher, 
-                                    symkey_cipher32.size, 
-                                    symkey_cipher32.data) );
-  
-  TPMTRYRETURN( buffer_init_convert (&state_cipher, 
-                                    state_cipher32.size, 
-                                    state_cipher32.data) );
-  
-  Crypto_SHA1Full(sealed_NVM, sealed_NVM_size, (BYTE *) &sealedNVMHash);    
-  
-  // Verify measurement of sealed blob.
-  if (memcmp(&sealedNVMHash, &myDMI->NVM_measurement, sizeof(TPM_DIGEST)) ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "VTPM LoadNVM NVM measurement check 
failed.\n");
-    vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Correct hash: ");
-    for (i=0; i< sizeof(TPM_DIGEST); i++)
-      vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
((BYTE*)&myDMI->NVM_measurement)[i]);
-    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-
-    vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Measured hash: ");
-    for (i=0; i< sizeof(TPM_DIGEST); i++)
-      vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", ((BYTE*)&sealedNVMHash)[i]);
-    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-    
-    status = TPM_AUTHFAIL;
-    goto abort_egress;
-  }
-  
-  // Decrypt Symmetric Key
-  TPMTRYRETURN( VTSP_Unbind(  myDMI->TCSContext,
-                             vtpm_globals->storageKeyHandle,
-                             &symkey_cipher,
-                             (const 
TPM_AUTHDATA*)&vtpm_globals->storage_key_usage_auth,
-                             &symkey_clear,
-                             &(vtpm_globals->keyAuth) ) );
-  
-  // create symmetric key using saved bits
-  Crypto_symcrypto_initkey (&symkey, &symkey_clear);
-  
-  // Decrypt State
-  TPMTRY(TPM_DECRYPT_ERROR, Crypto_symcrypto_decrypt (&symkey, &state_cipher, 
outbuf) );
-  
-  goto egress;
-  
- abort_egress:
-  vtpmlogerror(VTPM_LOG_VTPM, "Failed to load NVM\n.");
-  
- egress:
-  
-  buffer_free ( &state_cipher);
-  buffer_free ( &symkey_clear);
-  buffer_free ( &symkey_cipher);
-  free( sealed_NVM );
-  Crypto_symcrypto_freekey (&symkey);
-  
-  return status;
-}
-
-TPM_RESULT VTPM_SaveService(void) {
-  TPM_RESULT status=TPM_SUCCESS;
-  int fh, dmis=-1;
-  
-  BYTE *flat_global;
-  int flat_global_size, bytes_written;
-  UINT32 storageKeySize = buffer_len(&vtpm_globals->storageKeyWrap);
-  struct pack_buf_t storage_key_pack = {storageKeySize, 
vtpm_globals->storageKeyWrap.bytes};
-  
-  struct hashtable_itr *dmi_itr;
-  VTPM_DMI_RESOURCE *dmi_res;
-  
-  UINT32 flat_global_full_size;
-  
-  // Global Values needing to be saved
-  flat_global_full_size = 3*sizeof(TPM_DIGEST) + // Auths
-    sizeof(UINT32) +       // storagekeysize
-    storageKeySize +       // storage key
-    hashtable_count(vtpm_globals->dmi_map) * // num DMIS
-    (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
-  
-  
-  flat_global = (BYTE *) malloc( flat_global_full_size);
-  
-  flat_global_size = BSG_PackList(flat_global, 4,
-                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->owner_usage_auth,
-                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->srk_usage_auth,
-                                 BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
-                                 BSG_TPM_SIZE32_DATA, &storage_key_pack);
-  
-  // Per DMI values to be saved
-  if (hashtable_count(vtpm_globals->dmi_map) > 0) {
-    
-    dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
-    do {
-      dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
-      dmis++;
-
-      // No need to save dmi0.
-      if (dmi_res->dmi_id == 0)        
-       continue;
-      
-      
-      flat_global_size += BSG_PackList( flat_global + flat_global_size, 3,
-                                       BSG_TYPE_UINT32, &dmi_res->dmi_id,
-                                       BSG_TPM_DIGEST, 
&dmi_res->NVM_measurement,
-                                       BSG_TPM_DIGEST, 
&dmi_res->DMI_measurement);
-      
-    } while (hashtable_iterator_advance(dmi_itr));
-  }
-  
-  //FIXME: Once we have a way to protect a TPM key, we should use it to 
-  //       encrypt this blob. BUT, unless there is a way to ensure the key is
-  //       not used by other apps, this encryption is useless.
-  fh = open(STATE_FILE, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
-  if (fh == -1) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Unable to open %s file for write.\n", 
STATE_FILE);
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  
-  if ( (bytes_written = write(fh, flat_global, flat_global_size)) != 
flat_global_size ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data. %d/%d bytes 
written.\n", bytes_written, flat_global_size);
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  vtpm_globals->DMI_table_dirty = FALSE; 
-  
-  goto egress;
-  
- abort_egress:
- egress:
-  
-  free(flat_global);
-  close(fh);
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Saved VTPM Service state (status = %d, dmis = 
%d)\n", (int) status, dmis);
-  return status;
-}
-
-TPM_RESULT VTPM_LoadService(void) {
-  
-  TPM_RESULT status=TPM_SUCCESS;
-  int fh, stat_ret, dmis=0;
-  long fh_size = 0, step_size;
-  BYTE *flat_global=NULL;
-  struct pack_buf_t storage_key_pack;
-  UINT32 *dmi_id_key;
-  
-  VTPM_DMI_RESOURCE *dmi_res;
-  struct stat file_stat;
-  
-  fh = open(STATE_FILE, O_RDONLY );
-  stat_ret = fstat(fh, &file_stat);
-  if (stat_ret == 0) 
-    fh_size = file_stat.st_size;
-  else {
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  
-  flat_global = (BYTE *) malloc(fh_size);
-  
-  if ((long) read(fh, flat_global, fh_size) != fh_size ) {
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  
-  // Global Values needing to be saved
-  step_size = BSG_UnpackList( flat_global, 4,
-                             BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
-                             BSG_TPM_AUTHDATA, &vtpm_globals->srk_usage_auth,
-                             BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
-                             BSG_TPM_SIZE32_DATA, &storage_key_pack);
-  
-  TPMTRYRETURN(buffer_init(&vtpm_globals->storageKeyWrap, 0, 0) );
-  TPMTRYRETURN(buffer_append_raw(&vtpm_globals->storageKeyWrap, 
storage_key_pack.size, storage_key_pack.data) );
-  
-  // Per DMI values to be saved
-  while ( step_size < fh_size ){
-    if (fh_size - step_size < (long) (sizeof(UINT32) + 2*sizeof(TPM_DIGEST))) {
-      vtpmlogerror(VTPM_LOG_VTPM, "Encountered %ld extra bytes at end of 
manager state.\n", fh_size-step_size);
-      step_size = fh_size;
-    } else {
-      dmi_res = (VTPM_DMI_RESOURCE *) malloc(sizeof(VTPM_DMI_RESOURCE));
-      dmis++;
-      
-      dmi_res->connected = FALSE;
-      
-      step_size += BSG_UnpackList(flat_global + step_size, 3,
-                                 BSG_TYPE_UINT32, &dmi_res->dmi_id, 
-                                 BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
-                                 BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
-      
-      // install into map
-      dmi_id_key = (UINT32 *) malloc (sizeof(UINT32));
-      *dmi_id_key = dmi_res->dmi_id;
-      if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, dmi_res)) {
-       status = TPM_FAIL;
-       goto abort_egress;
-      }
-      
-    }
-    
-  }
-  
-  goto egress;
-  
- abort_egress:
-  vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data\n");
- egress:
-  
-  if (flat_global)
-    free(flat_global);
-  close(fh);
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Previously saved state reloaded (status = %d, 
dmis = %d).\n", (int) status, dmis);
-  return status;
-}
+// ===================================================================
+// 
+// Copyright (c) 2005, Intel Corp.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without 
+// modification, are permitted provided that the following conditions 
+// are met:
+//
+//   * Redistributions of source code must retain the above copyright 
+//     notice, this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above 
+//     copyright notice, this list of conditions and the following 
+//     disclaimer in the documentation and/or other materials provided 
+//     with the distribution.
+//   * Neither the name of Intel Corporation nor the names of its 
+//     contributors may be used to endorse or promote products derived
+//     from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+// OF THE POSSIBILITY OF SUCH DAMAGE.
+// ===================================================================
+// 
+// securestorage.c
+// 
+//  Functions regarding securely storing DMI secrets.
+//
+// ==================================================================
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "tcg.h"
+#include "vtpm_manager.h"
+#include "vtpmpriv.h"
+#include "vtsp.h"
+#include "bsg.h"
+#include "crypto.h"
+#include "hashtable.h"
+#include "hashtable_itr.h"
+#include "buffer.h"
+#include "log.h"
+
+TPM_RESULT VTPM_Handle_Save_NVM(VTPM_DMI_RESOURCE *myDMI, 
+                               const buffer_t *inbuf, 
+                               buffer_t *outbuf) {
+  
+  TPM_RESULT status = TPM_SUCCESS;
+  symkey_t    symkey;
+  buffer_t    state_cipher = NULL_BUF,
+              symkey_cipher = NULL_BUF;
+  int fh;
+  long bytes_written;
+  BYTE *sealed_NVM=NULL;
+  UINT32 sealed_NVM_size, i;
+  struct pack_constbuf_t symkey_cipher32, state_cipher32;
+  
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Save_NVMing[%d]: 0x", buffer_len(inbuf));
+  for (i=0; i< buffer_len(inbuf); i++)
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", inbuf->bytes[i]);
+  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+  
+  // Generate a sym key and encrypt state with it
+  TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_genkey (&symkey) );
+  TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_encrypt (&symkey, inbuf, 
&state_cipher) );
+  
+  // Encrypt symmetric key
+  TPMTRYRETURN( VTSP_Bind(    &vtpm_globals->storageKey, 
+                             &symkey.key, 
+                             &symkey_cipher) );
+  
+  // Create output blob: symkey_size + symkey_cipher + state_cipher_size + 
state_cipher
+  
+  symkey_cipher32.size = buffer_len(&symkey_cipher);
+  symkey_cipher32.data = symkey_cipher.bytes;
+  
+  state_cipher32.size = buffer_len(&state_cipher);
+  state_cipher32.data = state_cipher.bytes;
+  
+  sealed_NVM = (BYTE *) malloc( 2 * sizeof(UINT32) + symkey_cipher32.size + 
state_cipher32.size);
+  
+  sealed_NVM_size = BSG_PackList(sealed_NVM, 2,
+                                BSG_TPM_SIZE32_DATA, &symkey_cipher32,
+                                BSG_TPM_SIZE32_DATA, &state_cipher32);
+  
+  // Mark DMI Table so new save state info will get pushed to disk on return.
+  vtpm_globals->DMI_table_dirty = TRUE;
+  
+  // Write sealed blob off disk from NVMLocation
+  // TODO: How to properly return from these. Do we care if we return failure
+  //       after writing the file? We can't get the old one back.
+  // TODO: Backup old file and try and recover that way.
+  fh = open(myDMI->NVMLocation, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
+  if ( (bytes_written = write(fh, sealed_NVM, sealed_NVM_size) ) != (long) 
sealed_NVM_size) {
+    vtpmlogerror(VTPM_LOG_VTPM, "We just overwrote a DMI_NVM and failed to 
finish. %ld/%ld bytes.\n", bytes_written, (long)sealed_NVM_size);
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  close(fh);
+  
+  Crypto_SHA1Full (sealed_NVM, sealed_NVM_size, (BYTE *) 
&myDMI->NVM_measurement);   
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Saved %d bytes of E(symkey) + %d bytes of 
E(NVM)\n", buffer_len(&symkey_cipher), buffer_len(&state_cipher));
+  goto egress;
+  
+ abort_egress:
+  vtpmlogerror(VTPM_LOG_VTPM, "Failed to load NVM\n.");
+  
+ egress:
+  
+  buffer_free ( &state_cipher);
+  buffer_free ( &symkey_cipher);
+  free(sealed_NVM);
+  Crypto_symcrypto_freekey (&symkey);
+  
+  return status;
+}
+
+
+/* inbuf = null outbuf = sealed blob size, sealed blob.*/
+TPM_RESULT VTPM_Handle_Load_NVM(VTPM_DMI_RESOURCE *myDMI, 
+                               const buffer_t *inbuf, 
+                               buffer_t *outbuf) {
+  
+  TPM_RESULT status = TPM_SUCCESS;
+  symkey_t    symkey;
+  buffer_t    state_cipher = NULL_BUF, 
+              symkey_clear = NULL_BUF, 
+              symkey_cipher = NULL_BUF;
+  struct pack_buf_t symkey_cipher32, state_cipher32;
+  
+  UINT32 sealed_NVM_size;
+  BYTE *sealed_NVM = NULL;
+  long fh_size;
+  int fh, stat_ret, i;
+  struct stat file_stat;
+  TPM_DIGEST sealedNVMHash;
+  
+  memset(&symkey, 0, sizeof(symkey_t));
+  
+  if (myDMI->NVMLocation == NULL) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Unable to load NVM because the file name 
NULL.\n");
+    status = TPM_AUTHFAIL;
+    goto abort_egress;
+  }
+  
+  //Read sealed blob off disk from NVMLocation
+  fh = open(myDMI->NVMLocation, O_RDONLY);
+  stat_ret = fstat(fh, &file_stat);
+  if (stat_ret == 0) 
+    fh_size = file_stat.st_size;
+  else {
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  
+  sealed_NVM = (BYTE *) malloc(fh_size);
+  if (read(fh, sealed_NVM, fh_size) != fh_size) {
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  close(fh);
+  
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Load_NVMing[%ld]: 0x", fh_size);
+  for (i=0; i< fh_size; i++)
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", sealed_NVM[i]);
+  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+  
+  sealed_NVM_size = BSG_UnpackList(sealed_NVM, 2,
+                                  BSG_TPM_SIZE32_DATA, &symkey_cipher32,
+                                  BSG_TPM_SIZE32_DATA, &state_cipher32);
+  
+  TPMTRYRETURN( buffer_init_convert (&symkey_cipher, 
+                                    symkey_cipher32.size, 
+                                    symkey_cipher32.data) );
+  
+  TPMTRYRETURN( buffer_init_convert (&state_cipher, 
+                                    state_cipher32.size, 
+                                    state_cipher32.data) );
+  
+  Crypto_SHA1Full(sealed_NVM, sealed_NVM_size, (BYTE *) &sealedNVMHash);    
+  
+  // Verify measurement of sealed blob.
+  if (memcmp(&sealedNVMHash, &myDMI->NVM_measurement, sizeof(TPM_DIGEST)) ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "VTPM LoadNVM NVM measurement check 
failed.\n");
+    vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Correct hash: ");
+    for (i=0; i< sizeof(TPM_DIGEST); i++)
+      vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
((BYTE*)&myDMI->NVM_measurement)[i]);
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+
+    vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Measured hash: ");
+    for (i=0; i< sizeof(TPM_DIGEST); i++)
+      vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", ((BYTE*)&sealedNVMHash)[i]);
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+    
+    status = TPM_AUTHFAIL;
+    goto abort_egress;
+  }
+  
+  // Decrypt Symmetric Key
+  TPMTRYRETURN( VTSP_Unbind(  myDMI->TCSContext,
+                             vtpm_globals->storageKeyHandle,
+                             &symkey_cipher,
+                             (const 
TPM_AUTHDATA*)&vtpm_globals->storage_key_usage_auth,
+                             &symkey_clear,
+                             &(vtpm_globals->keyAuth) ) );
+  
+  // create symmetric key using saved bits
+  Crypto_symcrypto_initkey (&symkey, &symkey_clear);
+  
+  // Decrypt State
+  TPMTRY(TPM_DECRYPT_ERROR, Crypto_symcrypto_decrypt (&symkey, &state_cipher, 
outbuf) );
+  
+  goto egress;
+  
+ abort_egress:
+  vtpmlogerror(VTPM_LOG_VTPM, "Failed to load NVM\n.");
+  
+ egress:
+  
+  buffer_free ( &state_cipher);
+  buffer_free ( &symkey_clear);
+  buffer_free ( &symkey_cipher);
+  free( sealed_NVM );
+  Crypto_symcrypto_freekey (&symkey);
+  
+  return status;
+}
+
+TPM_RESULT VTPM_SaveService(void) {
+  TPM_RESULT status=TPM_SUCCESS;
+  int fh, dmis=-1;
+  
+  BYTE *flat_global;
+  int flat_global_size, bytes_written;
+  UINT32 storageKeySize = buffer_len(&vtpm_globals->storageKeyWrap);
+  struct pack_buf_t storage_key_pack = {storageKeySize, 
vtpm_globals->storageKeyWrap.bytes};
+  
+  struct hashtable_itr *dmi_itr;
+  VTPM_DMI_RESOURCE *dmi_res;
+  
+  UINT32 flat_global_full_size;
+  
+  // Global Values needing to be saved
+  flat_global_full_size = 3*sizeof(TPM_DIGEST) + // Auths
+    sizeof(UINT32) +       // storagekeysize
+    storageKeySize +       // storage key
+    hashtable_count(vtpm_globals->dmi_map) * // num DMIS
+    (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
+  
+  
+  flat_global = (BYTE *) malloc( flat_global_full_size);
+  
+  flat_global_size = BSG_PackList(flat_global, 4,
+                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->owner_usage_auth,
+                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->srk_usage_auth,
+                                 BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
+                                 BSG_TPM_SIZE32_DATA, &storage_key_pack);
+  
+  // Per DMI values to be saved
+  if (hashtable_count(vtpm_globals->dmi_map) > 0) {
+    
+    dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
+    do {
+      dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
+      dmis++;
+
+      // No need to save dmi0.
+      if (dmi_res->dmi_id == 0)        
+       continue;
+      
+      
+      flat_global_size += BSG_PackList( flat_global + flat_global_size, 3,
+                                       BSG_TYPE_UINT32, &dmi_res->dmi_id,
+                                       BSG_TPM_DIGEST, 
&dmi_res->NVM_measurement,
+                                       BSG_TPM_DIGEST, 
&dmi_res->DMI_measurement);
+      
+    } while (hashtable_iterator_advance(dmi_itr));
+  }
+  
+  //FIXME: Once we have a way to protect a TPM key, we should use it to 
+  //       encrypt this blob. BUT, unless there is a way to ensure the key is
+  //       not used by other apps, this encryption is useless.
+  fh = open(STATE_FILE, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
+  if (fh == -1) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Unable to open %s file for write.\n", 
STATE_FILE);
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  
+  if ( (bytes_written = write(fh, flat_global, flat_global_size)) != 
flat_global_size ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data. %d/%d bytes 
written.\n", bytes_written, flat_global_size);
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  vtpm_globals->DMI_table_dirty = FALSE; 
+  
+  goto egress;
+  
+ abort_egress:
+ egress:
+  
+  free(flat_global);
+  close(fh);
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Saved VTPM Service state (status = %d, dmis = 
%d)\n", (int) status, dmis);
+  return status;
+}
+
+TPM_RESULT VTPM_LoadService(void) {
+  
+  TPM_RESULT status=TPM_SUCCESS;
+  int fh, stat_ret, dmis=0;
+  long fh_size = 0, step_size;
+  BYTE *flat_global=NULL;
+  struct pack_buf_t storage_key_pack;
+  UINT32 *dmi_id_key;
+  
+  VTPM_DMI_RESOURCE *dmi_res;
+  struct stat file_stat;
+  
+  fh = open(STATE_FILE, O_RDONLY );
+  stat_ret = fstat(fh, &file_stat);
+  if (stat_ret == 0) 
+    fh_size = file_stat.st_size;
+  else {
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  
+  flat_global = (BYTE *) malloc(fh_size);
+  
+  if ((long) read(fh, flat_global, fh_size) != fh_size ) {
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  
+  // Global Values needing to be saved
+  step_size = BSG_UnpackList( flat_global, 4,
+                             BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
+                             BSG_TPM_AUTHDATA, &vtpm_globals->srk_usage_auth,
+                             BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
+                             BSG_TPM_SIZE32_DATA, &storage_key_pack);
+  
+  TPMTRYRETURN(buffer_init(&vtpm_globals->storageKeyWrap, 0, 0) );
+  TPMTRYRETURN(buffer_append_raw(&vtpm_globals->storageKeyWrap, 
storage_key_pack.size, storage_key_pack.data) );
+  
+  // Per DMI values to be saved
+  while ( step_size < fh_size ){
+    if (fh_size - step_size < (long) (sizeof(UINT32) + 2*sizeof(TPM_DIGEST))) {
+      vtpmlogerror(VTPM_LOG_VTPM, "Encountered %ld extra bytes at end of 
manager state.\n", fh_size-step_size);
+      step_size = fh_size;
+    } else {
+      dmi_res = (VTPM_DMI_RESOURCE *) malloc(sizeof(VTPM_DMI_RESOURCE));
+      dmis++;
+      
+      dmi_res->connected = FALSE;
+      
+      step_size += BSG_UnpackList(flat_global + step_size, 3,
+                                 BSG_TYPE_UINT32, &dmi_res->dmi_id, 
+                                 BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
+                                 BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
+      
+      // install into map
+      dmi_id_key = (UINT32 *) malloc (sizeof(UINT32));
+      *dmi_id_key = dmi_res->dmi_id;
+      if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, dmi_res)) {
+       status = TPM_FAIL;
+       goto abort_egress;
+      }
+      
+    }
+    
+  }
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Loaded saved state (dmis = %d).\n", dmis);
+  goto egress;
+  
+ abort_egress:
+  vtpmlogerror(VTPM_LOG_VTPM, "Failed to load service data with error = %s\n", 
tpm_get_error_name(status));
+ egress:
+  
+  if (flat_global)
+    free(flat_global);
+  close(fh);
+  
+  return status;
+}
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/manager/vtpm_manager.c
--- a/tools/vtpm_manager/manager/vtpm_manager.c Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/manager/vtpm_manager.c Thu Sep 22 17:42:01 2005
@@ -1,735 +1,811 @@
-// ===================================================================
-// 
-// Copyright (c) 2005, Intel Corp.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions 
-// are met:
-//
-//   * Redistributions of source code must retain the above copyright 
-//     notice, this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above 
-//     copyright notice, this list of conditions and the following 
-//     disclaimer in the documentation and/or other materials provided 
-//     with the distribution.
-//   * Neither the name of Intel Corporation nor the names of its 
-//     contributors may be used to endorse or promote products derived
-//     from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
-// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
-// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-// OF THE POSSIBILITY OF SUCH DAMAGE.
-// ===================================================================
-// 
-// vtpm_manager.c
-// 
-//  This file will house the main logic of the VTPM Manager
-//
-// ==================================================================
-
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <string.h>
-
-#ifndef VTPM_MULTI_VM
-#include <pthread.h>
-#include <errno.h>
-#include <aio.h>
-#include <time.h>
-#endif
-
-#include "vtpm_manager.h"
-#include "vtpmpriv.h"
-#include "vtsp.h"
-#include "bsg.h"
-#include "hashtable.h"
-#include "hashtable_itr.h"
-
-#include "log.h"
-#include "buffer.h"
-
-VTPM_GLOBALS *vtpm_globals=NULL;
-
-#ifdef VTPM_MULTI_VM
- #define vtpmhandlerloginfo(module,fmt,args...) vtpmloginfo (module, fmt, 
##args );
- #define vtpmhandlerloginfomore(module,fmt,args...) vtpmloginfomore (module, 
fmt, ##args );
- #define vtpmhandlerlogerror(module,fmt,args...) vtpmlogerror (module, fmt, 
##args );
-#else 
- #define vtpmhandlerloginfo(module,fmt,args...) vtpmloginfo (module, "[%d]: " 
fmt, threadType, ##args );
- #define vtpmhandlerloginfomore(module,fmt,args...) vtpmloginfomore (module, 
fmt, ##args );
- #define vtpmhandlerlogerror(module,fmt,args...) vtpmlogerror (module, "[%d]: 
" fmt, threadType, ##args );
-#endif
-
-// --------------------------- Static Auths --------------------------
-#ifdef USE_FIXED_SRK_AUTH
-
-static BYTE FIXED_SRK_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
-                                  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
-
-static BYTE FIXED_EK_AUTH[20] =  {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
-                                  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
-
-#endif
-                                  
-// -------------------------- Hash table functions --------------------
-
-static unsigned int hashfunc32(void *ky) {
-  return (* (UINT32 *) ky);
-}
-
-static int equals32(void *k1, void *k2) {
-  return (*(UINT32 *) k1 == *(UINT32 *) k2);
-}
-
-// --------------------------- Functions ------------------------------
-
-TPM_RESULT VTPM_Create_Service(){
-  
-  TPM_RESULT status = TPM_SUCCESS;
-  
-  // Generate Auth's for SRK & Owner
-#ifdef USE_FIXED_SRK_AUTH
-  memcpy(vtpm_globals->owner_usage_auth, FIXED_SRK_AUTH, sizeof(TPM_AUTHDATA));
-  memcpy(vtpm_globals->srk_usage_auth, FIXED_EK_AUTH, sizeof(TPM_AUTHDATA));
-#else    
-  Crypto_GetRandom(vtpm_globals->owner_usage_auth, sizeof(TPM_AUTHDATA) );
-  Crypto_GetRandom(vtpm_globals->srk_usage_auth, sizeof(TPM_AUTHDATA) );  
-#endif
-  
-  // Take Owership of TPM
-  CRYPTO_INFO ek_cryptoInfo;
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Attempting Pubek Read. NOTE: Failure is ok.\n");
-  status = VTSP_ReadPubek(vtpm_globals->manager_tcs_handle, &ek_cryptoInfo);
-  
-  // If we can read PubEK then there is no owner and we should take it.
-  if (status == TPM_SUCCESS) { 
-    TPMTRYRETURN(VTSP_TakeOwnership(vtpm_globals->manager_tcs_handle,
-                                   (const 
TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth, 
-                                   (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
-                                   &ek_cryptoInfo,
-                                   &vtpm_globals->keyAuth)); 
-  
-    TPMTRYRETURN(VTSP_DisablePubekRead(vtpm_globals->manager_tcs_handle,
-                                       (const 
TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth,  
-                                       &vtpm_globals->keyAuth));     
-  }
-  
-  // Generate storage key's auth
-  Crypto_GetRandom(  &vtpm_globals->storage_key_usage_auth, 
-                    sizeof(TPM_AUTHDATA) );
-  
-  TCS_AUTH osap;
-  TPM_AUTHDATA sharedsecret;
-  
-  TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
-                         TPM_ET_SRK,
-                         0, 
-                         (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
-                         &sharedsecret, 
-                         &osap) ); 
-  
-  TPMTRYRETURN( VTSP_CreateWrapKey( vtpm_globals->manager_tcs_handle,
-                                   TPM_KEY_BIND,
-                                   (const 
TPM_AUTHDATA*)&vtpm_globals->storage_key_usage_auth,
-                                   TPM_SRK_KEYHANDLE, 
-                                   (const TPM_AUTHDATA*)&sharedsecret,
-                                   &vtpm_globals->storageKeyWrap,
-                                   &osap) );
-  
-  vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
-  
-  goto egress;
-  
- abort_egress:
-  exit(1);
-  
- egress:
-  vtpmloginfo(VTPM_LOG_VTPM, "New VTPM Service initialized (Status = %d).\n", 
status);
-  return status;
-  
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-#ifdef VTPM_MULTI_VM
-int VTPM_Service_Handler(){
-#else
-void *VTPM_Service_Handler(void *threadTypePtr){
-#endif
-  TPM_RESULT      status =  TPM_FAIL; // Should never return
-  UINT32          dmi, in_param_size, cmd_size, out_param_size, 
out_message_size, out_message_size_full, dmi_cmd_size;
-  BYTE            *cmd_header, *in_param, *out_message, *dmi_cmd;
-  buffer_t        *command_buf=NULL, *result_buf=NULL;
-  TPM_TAG         tag;
-  TPM_COMMAND_CODE ord;
-  VTPM_DMI_RESOURCE *dmi_res;
-  int  size_read, size_write, i;
-  
-#ifndef VTPM_MULTI_VM
-  int threadType = *(int *) threadTypePtr;
-  
-  // async io structures
-  struct aiocb dmi_aio;
-  struct aiocb *dmi_aio_a[1];
-  dmi_aio_a[0] = &dmi_aio;
-#endif
-  
-#ifdef DUMMY_BACKEND
-  int dummy_rx;  
-#endif
-  
-  // TODO: Reinsert ifdefs to enable support for MULTI-VM 
-  
-  cmd_header = (BYTE *) malloc(VTPM_COMMAND_HEADER_SIZE_SRV);
-  command_buf = (buffer_t *) malloc(sizeof(buffer_t));
-  result_buf = (buffer_t *) malloc(sizeof(buffer_t));
-  
-#ifndef VTPM_MULTI_VM
-  TPM_RESULT *ret_value = (TPM_RESULT *) malloc(sizeof(TPM_RESULT));
-#endif
-  
-  int *tx_fh, *rx_fh;
-  
-#ifdef VTPM_MULTI_VM
-  rx_fh = &vtpm_globals->be_fh;
-#else
-  if (threadType == BE_LISTENER_THREAD) {
-#ifdef DUMMY_BACKEND    
-    dummy_rx = -1;
-    rx_fh = &dummy_rx;
-#else
-    rx_fh = &vtpm_globals->be_fh;
-#endif
-  } else { // DMI_LISTENER_THREAD
-    rx_fh = &vtpm_globals->vtpm_rx_fh;
-  }
-#endif
-  
-#ifndef VTPM_MULTI_VM
-  int fh;
-  if (threadType == BE_LISTENER_THREAD) {
-    tx_fh = &vtpm_globals->be_fh;
-    if ( (fh = open(GUEST_RX_FIFO, O_RDWR)) == -1) {
-      if ( mkfifo(GUEST_RX_FIFO, S_IWUSR | S_IRUSR ) ){
-                               *ret_value = TPM_FAIL;
-                               pthread_exit(ret_value);
-      }
-    } else 
-      close(fh);
-    
-  } else { // else DMI_LISTENER_THREAD
-    // tx_fh will be set once the DMI is identified
-    // But we need to make sure the read pip is created.
-    if ( (fh = open(VTPM_RX_FIFO, O_RDWR)) == -1) {
-      if ( mkfifo(VTPM_RX_FIFO, S_IWUSR | S_IRUSR ) ){
-       *ret_value = TPM_FAIL;
-       pthread_exit(ret_value);
-      }
-    } else 
-      close(fh);
-    
-  }
-#endif
-  
-  while(1) {
-    
-    if (threadType == BE_LISTENER_THREAD) {
-      vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for Guest requests & ctrl 
messages.\n");
-    } else 
-      vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for DMI messages.\n");
-    
-    
-    if (*rx_fh < 0) {
-      if (threadType == BE_LISTENER_THREAD) 
-#ifdef DUMMY_BACKEND
-       *rx_fh = open("/tmp/in.fifo", O_RDWR);
-#else
-        *rx_fh = open(VTPM_BE_DEV, O_RDWR);
-#endif
-      else  // DMI Listener   
-       *rx_fh = open(VTPM_RX_FIFO, O_RDWR);
-      
-    }
-    
-    if (*rx_fh < 0) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh.\n");
-#ifdef VTPM_MULTI_VM
-      return TPM_IOERROR; 
-#else
-      *ret_value = TPM_IOERROR;
-      pthread_exit(ret_value);
-#endif
-    }
-    
-    size_read = read(*rx_fh, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
-    if (size_read > 0) {
-      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "RECV[%d}: 0x", size_read);
-      for (i=0; i<size_read; i++) 
-               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
cmd_header[i]);
-    } else {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't read from BE. Aborting... \n");
-      close(*rx_fh);
-      *rx_fh = -1;
-      goto abort_command;
-    }
-
-    if (size_read < (int) VTPM_COMMAND_HEADER_SIZE_SRV) {
-      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "\n");
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command shorter than normal header 
(%d bytes). Aborting...\n", size_read);
-      goto abort_command;
-    }
-    
-    BSG_UnpackList(cmd_header, 4,
-                  BSG_TYPE_UINT32, &dmi,
-                  BSG_TPM_TAG, &tag,
-                  BSG_TYPE_UINT32, &in_param_size,
-                  BSG_TPM_COMMAND_CODE, &ord );
-    
-    // Note that in_param_size is in the client's context
-    cmd_size = in_param_size - VTPM_COMMAND_HEADER_SIZE_CLT;
-    if (cmd_size > 0) {
-      in_param = (BYTE *) malloc(cmd_size);
-      size_read = read( *rx_fh, in_param, cmd_size);
-      if (size_read > 0) {
-       for (i=0; i<size_read; i++) 
-         vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
-       
-      } else {
-        vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from BE. Aborting... 
\n");
-       close(*rx_fh);
-       *rx_fh = -1;
-       goto abort_command;
-      }
-      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-      
-      if (size_read < (int) cmd_size) {
-       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-       vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command read(%d) is shorter than 
header indicates(%d). Aborting...\n", size_read, cmd_size);
-       goto abort_command;
-      }
-    } else {
-      in_param = NULL;
-      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-    }            
-    
-    if ((threadType != BE_LISTENER_THREAD) && (dmi == 0)) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to access dom0 commands from 
DMI interface. Aborting...\n");
-      goto abort_command;
-    }
-    
-    dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi);
-    if (dmi_res == NULL) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempted access to non-existent DMI 
in domain: %d. Aborting...\n", dmi);
-      goto abort_command;
-    }
-    if (!dmi_res->connected) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempted access to disconnected DMI 
in domain: %d. Aborting...\n", dmi);
-      goto abort_command;
-    }
-    
-    if (threadType != BE_LISTENER_THREAD) 
-      tx_fh = &dmi_res->vtpm_tx_fh;
-    // else we set this before the while loop since it doesn't change.
-    
-    if ( (buffer_init_convert(command_buf, cmd_size, in_param) != TPM_SUCCESS) 
|| 
-        (buffer_init(result_buf, 0, 0) != TPM_SUCCESS) ) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Failed to setup buffers. 
Aborting...\n");
-      goto abort_command;
-    }
-    
-    // Dispatch it as either control or user request.
-    if (tag == VTPM_TAG_REQ) { 
-      if (dmi_res->dmi_id == VTPM_CTL_DM){ 
-       switch (ord) {
-       case VTPM_ORD_OPEN:
-         status = VTPM_Handle_New_DMI(command_buf);
-         break;
-          
-       case VTPM_ORD_CLOSE:
-         status = VTPM_Handle_Close_DMI(command_buf);
-         break;
-          
-       case VTPM_ORD_DELETE:
-         status = VTPM_Handle_Delete_DMI(command_buf);
-         break;
-       default:
-         status = TPM_BAD_ORDINAL; 
-       } // switch
-      } else {
-       
-       switch (ord) {                
-       case VTPM_ORD_SAVENVM:
-         status= VTPM_Handle_Save_NVM(dmi_res,
-                                      command_buf, 
-                                      result_buf);
-         break;
-       case VTPM_ORD_LOADNVM:
-         status= VTPM_Handle_Load_NVM(dmi_res, 
-                                      command_buf, 
-                                      result_buf);
-         break;
-         
-       case VTPM_ORD_TPMCOMMAND:
-         status= VTPM_Handle_TPM_Command(dmi_res, 
-                                         command_buf, 
-                                         result_buf);
-         break;
-         
-       default:
-         status = TPM_BAD_ORDINAL; 
-       } // switch
-      }
-    } else { // This is not a VTPM Command at all
-      
-      if (threadType == BE_LISTENER_THREAD) {
-       if (dmi == 0) {
-         // This usually indicates a FE/BE driver.
-         vtpmhandlerlogerror(VTPM_LOG_VTPM, "Illegal use of TPM command from 
dom0\n");
-         status = TPM_FAIL;
-       } else {
-         vtpmhandlerloginfo(VTPM_LOG_VTPM, "Forwarding command to DMI.\n");
-         
-         if (dmi_res->guest_tx_fh < 0)
-           dmi_res->guest_tx_fh = open(dmi_res->guest_tx_fname, O_WRONLY | 
O_NONBLOCK);
-          
-         if (dmi_res->guest_tx_fh < 0){
-           vtpmhandlerlogerror(VTPM_LOG_VTPM, "VTPM ERROR: Can't open outbound 
fh to dmi.\n");
-           status = TPM_IOERROR;
-           goto abort_with_error;
-         }        
-          
-         //Note: Send message + dmi_id
-         if (cmd_size) {
-           dmi_cmd = (BYTE *) malloc(VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size);
-           dmi_cmd_size = VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size;
-           memcpy(dmi_cmd, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
-           memcpy(dmi_cmd + VTPM_COMMAND_HEADER_SIZE_SRV, in_param, cmd_size);
-           size_write = write(dmi_res->guest_tx_fh, dmi_cmd, dmi_cmd_size);
-           
-           if (size_write > 0) {
-             vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "SENT (DMI): 0x");
-             for (i=0; i<VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size; i++) {
-               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", dmi_cmd[i]);
-             }
-             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-           } else {
-              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to DMI. 
Aborting... \n");
-             close(dmi_res->guest_tx_fh);
-             dmi_res->guest_tx_fh = -1;
-              status = TPM_IOERROR;
-             goto abort_with_error;
-           }
-           free(dmi_cmd);
-         } else {
-           dmi_cmd_size = VTPM_COMMAND_HEADER_SIZE_SRV;
-           size_write = write(dmi_res->guest_tx_fh, cmd_header, 
VTPM_COMMAND_HEADER_SIZE_SRV );
-           if (size_write > 0) {
-             for (i=0; i<VTPM_COMMAND_HEADER_SIZE_SRV; i++) 
-               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
cmd_header[i]);
-             
-             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-           } else {
-              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to DMI. 
Aborting... \n");
-             close(dmi_res->guest_tx_fh);
-             dmi_res->guest_tx_fh = -1;
-              status = TPM_IOERROR;
-             goto abort_with_error;
-           }
-         }
-          
-         if (size_write != (int) dmi_cmd_size) 
-           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Could not write entire command 
to DMI (%d/%d)\n", size_write, dmi_cmd_size);
-         buffer_free(command_buf);
-         
-         if (vtpm_globals->guest_rx_fh < 0) 
-           vtpm_globals->guest_rx_fh = open(GUEST_RX_FIFO, O_RDONLY);
-          
-         if (vtpm_globals->guest_rx_fh < 0){
-           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh to 
dmi.\n");
-            status = TPM_IOERROR;
-           goto abort_with_error;
-         }                  
-         
-          size_read = read( vtpm_globals->guest_rx_fh, cmd_header, 
VTPM_COMMAND_HEADER_SIZE_SRV);
-         if (size_read > 0) {
-           vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "RECV (DMI): 0x");
-           for (i=0; i<size_read; i++) 
-             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", cmd_header[i]);
-           
-         } else {
-            vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from DMI. 
Aborting... \n");
-           close(vtpm_globals->guest_rx_fh);
-           vtpm_globals->guest_rx_fh = -1;
-            status = TPM_IOERROR;
-           goto abort_with_error;
-         }
-          
-         if (size_read < (int) VTPM_COMMAND_HEADER_SIZE_SRV) {
-           //vtpmdeepsublog("\n");
-           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command from DMI shorter than 
normal header. Aborting...\n");
-            status = TPM_IOERROR;
-           goto abort_with_error;
-         }
-          
-         BSG_UnpackList(cmd_header, 4,
-                        BSG_TYPE_UINT32, &dmi,
-                        BSG_TPM_TAG, &tag,
-                        BSG_TYPE_UINT32, &in_param_size,
-                        BSG_TPM_COMMAND_CODE, &status );
-        
-         // Note that in_param_size is in the client's context
-         cmd_size = in_param_size - VTPM_COMMAND_HEADER_SIZE_CLT;
-         if (cmd_size > 0) {
-           in_param = (BYTE *) malloc(cmd_size);
-           size_read = read( vtpm_globals->guest_rx_fh, in_param, cmd_size);
-           if (size_read > 0) {
-             for (i=0; i<size_read; i++) 
-               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
-             
-           } else {
-              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from BE. 
Aborting... \n");
-             close(vtpm_globals->guest_rx_fh);
-             vtpm_globals->guest_rx_fh = -1;
-              status = TPM_IOERROR;
-             goto abort_with_error;
-           }
-           vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
-            
-           if (size_read < (int)cmd_size) {
-             vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
-             vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command read(%d) from DMI is 
shorter than header indicates(%d). Aborting...\n", size_read, cmd_size);
-              status = TPM_IOERROR;
-             goto abort_with_error;
-           }
-         } else {
-           in_param = NULL;
-           vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
-         }
-                           
-         if (buffer_init_convert(result_buf, cmd_size, in_param) != 
TPM_SUCCESS) {
-           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Failed to setup buffers. 
Aborting...\n");
-            status = TPM_FAIL;
-           goto abort_with_error;
-         }
-         
-         vtpmhandlerloginfo(VTPM_LOG_VTPM, "Sending DMI's response to 
guest.\n");
-       } // end else for if (dmi==0)
-        
-      } else { // This is a DMI lister thread. Thus this is from a DMI
-#ifdef VTPM_MULTI_VM
-       vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to use unsupported direct 
access to TPM.\n");
-       vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "Bad Command. dmi:%d, tag:%d, 
size:%d, ord:%d, Params: ", dmi, tag, in_param_size, ord);
-       for (UINT32 q=0; q<cmd_size; q++) 
-         vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[q]);
-       
-       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-        
-       status = TPM_FAIL;
-#else
-       
-#endif
-      } // end else for if BE Listener
-    } // end else for is VTPM Command
-    
-    // Send response to Backend
-    if (*tx_fh < 0) {
-      if (threadType == BE_LISTENER_THREAD) 
-#ifdef DUMMY_BACKEND
-       *tx_fh = open("/tmp/out.fifo", O_RDWR);
-#else
-        *tx_fh = open(VTPM_BE_DEV, O_RDWR);
-#endif
-      else  // DMI Listener
-       *tx_fh = open(dmi_res->vtpm_tx_fname, O_WRONLY);
-    }
-    
-    if (*tx_fh < 0) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "VTPM ERROR: Can't open outbound 
fh.\n");
-#ifdef VTPM_MULTI_VM
-      return TPM_IOERROR; 
-#else
-      *ret_value = TPM_IOERROR;
-      pthread_exit(ret_value);
-#endif
-    }        
-    
- abort_with_error:
-    // Prepend VTPM header with destination DM stamped
-    out_param_size = buffer_len(result_buf);
-    out_message_size = VTPM_COMMAND_HEADER_SIZE_CLT + out_param_size;
-    out_message_size_full = VTPM_COMMAND_HEADER_SIZE_SRV + out_param_size;
-    out_message = (BYTE *) malloc (out_message_size_full);
-    
-    BSG_PackList(out_message, 4,
-                BSG_TYPE_UINT32, (BYTE *) &dmi,
-                BSG_TPM_TAG, (BYTE *) &tag,
-                BSG_TYPE_UINT32, (BYTE *) &out_message_size,
-                BSG_TPM_RESULT, (BYTE *) &status);
-    
-    if (buffer_len(result_buf) > 0) 
-      memcpy(out_message + VTPM_COMMAND_HEADER_SIZE_SRV, result_buf->bytes, 
out_param_size);
-    
-    
-    //Note: Send message + dmi_id
-    size_write = write(*tx_fh, out_message, out_message_size_full );
-    if (size_write > 0) {
-      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "SENT: 0x");
-      for (i=0; i < out_message_size_full; i++) 
-       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", out_message[i]);
-      
-      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");            
-    } else {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to BE. Aborting... 
\n");
-      close(*tx_fh);
-      *tx_fh = -1;
-      goto abort_command;
-    }
-    free(out_message);
-    
-    if (size_write < (int)out_message_size_full) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Unable to write full command to BE 
(%d/%d)\n", size_write, out_message_size_full);
-      goto abort_command;
-    }
-    
-  abort_command:
-    //free buffers
-    bzero(cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
-    //free(in_param); // This was converted to command_buf. No need to free 
-    if (command_buf != result_buf) 
-      buffer_free(result_buf);
-    
-    buffer_free(command_buf);
-    
-#ifndef VTPM_MULTI_VM
-    if (threadType != BE_LISTENER_THREAD) {
-#endif
-      if ( (vtpm_globals->DMI_table_dirty) &&
-          (VTPM_SaveService() != TPM_SUCCESS) ) {
-       vtpmhandlerlogerror(VTPM_LOG_VTPM, "ERROR: Unable to save manager 
data.\n");
-      }
-#ifndef VTPM_MULTI_VM
-    }
-#endif
-    
-  } // End while(1)
-  
-}
-
-
-///////////////////////////////////////////////////////////////////////////////
-TPM_RESULT VTPM_Init_Service() {
-  TPM_RESULT status = TPM_FAIL;   
-  BYTE *randomsead;
-       UINT32 randomsize;
-       
-  if ((vtpm_globals = (VTPM_GLOBALS *) malloc(sizeof(VTPM_GLOBALS))) == NULL){
-               status = TPM_FAIL;
-               goto abort_egress;
-       }
-       memset(vtpm_globals, 0, sizeof(VTPM_GLOBALS));
-  vtpm_globals->be_fh = -1;
-
-#ifndef VTPM_MULTI_VM
-  vtpm_globals->vtpm_rx_fh = -1;
-  vtpm_globals->guest_rx_fh = -1;
-#endif
-  if ((vtpm_globals->dmi_map = create_hashtable(10, hashfunc32, equals32)) == 
NULL){
-               status = TPM_FAIL;
-               goto abort_egress;
-       }
-  
-  vtpm_globals->DMI_table_dirty = FALSE;
-  
-  // Create new TCS Object
-  vtpm_globals->manager_tcs_handle = 0;
-  
-  TPMTRYRETURN(TCS_create());
-  
-  // Create TCS Context for service
-  TPMTRYRETURN( TCS_OpenContext(&vtpm_globals->manager_tcs_handle ) );
-
-       TPMTRYRETURN( TCSP_GetRandom(vtpm_globals->manager_tcs_handle, 
-                                                                               
                                         &randomsize, 
-                                                                               
                                         &randomsead));
-
-       Crypto_Init(randomsead, randomsize);
-       TPMTRYRETURN( TCS_FreeMemory (vtpm_globals->manager_tcs_handle, 
randomsead)); 
-       
-  // Create OIAP session for service's authorized commands
-  TPMTRYRETURN( VTSP_OIAP( vtpm_globals->manager_tcs_handle, 
-                          &vtpm_globals->keyAuth) );
-  vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
-
-       // If failed, create new Service.
-  if (VTPM_LoadService() != TPM_SUCCESS)
-    TPMTRYRETURN( VTPM_Create_Service() );    
-
-  
-  //Load Storage Key 
-  TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
-                             TPM_SRK_KEYHANDLE,
-                             &vtpm_globals->storageKeyWrap,
-                             (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
-                             &vtpm_globals->storageKeyHandle,
-                             &vtpm_globals->keyAuth,
-                             &vtpm_globals->storageKey) );
-  
-  // Create entry for Dom0 for control messages
-  TPMTRYRETURN( VTPM_Handle_New_DMI(NULL) );
-  
-  // --------------------- Command handlers ---------------------------
-  
-  goto egress;
-  
- abort_egress:
- egress:
-  
-  return(status);
-}
- 
-void VTPM_Stop_Service() {
-  VTPM_DMI_RESOURCE *dmi_res;
-  struct hashtable_itr *dmi_itr;
-  
-  // Close all the TCS contexts. TCS should evict keys based on this
-  if (hashtable_count(vtpm_globals->dmi_map) > 0) {
-    dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
-    do {
-      dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
-      if (dmi_res->connected) 
-                               if (close_dmi( dmi_res ) != TPM_SUCCESS) 
-                                       vtpmlogerror(VTPM_LOG_VTPM, "Failed to 
close dmi %d properly.\n", dmi_res->dmi_id);
-      
-    } while (hashtable_iterator_advance(dmi_itr));
-               free (dmi_itr);
-  }
-  
-       
-  TCS_CloseContext(vtpm_globals->manager_tcs_handle);
-  
-  if ( (vtpm_globals->DMI_table_dirty) &&
-       (VTPM_SaveService() != TPM_SUCCESS) )
-    vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n");
-  
-  hashtable_destroy(vtpm_globals->dmi_map, 1);
-  free(vtpm_globals);
-  
-  close(vtpm_globals->be_fh);
-  Crypto_Exit();
-       
-  vtpmloginfo(VTPM_LOG_VTPM, "VTPM Manager stopped.\n");
-}
+// ===================================================================
+// 
+// Copyright (c) 2005, Intel Corp.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without 
+// modification, are permitted provided that the following conditions 
+// are met:
+//
+//   * Redistributions of source code must retain the above copyright 
+//     notice, this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above 
+//     copyright notice, this list of conditions and the following 
+//     disclaimer in the documentation and/or other materials provided 
+//     with the distribution.
+//   * Neither the name of Intel Corporation nor the names of its 
+//     contributors may be used to endorse or promote products derived
+//     from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+// OF THE POSSIBILITY OF SUCH DAMAGE.
+// ===================================================================
+// 
+// vtpm_manager.c
+// 
+//  This file will house the main logic of the VTPM Manager
+//
+// ==================================================================
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+
+#ifndef VTPM_MULTI_VM
+#include <pthread.h>
+#include <errno.h>
+#include <aio.h>
+#include <time.h>
+#endif
+
+#include "vtpm_manager.h"
+#include "vtpmpriv.h"
+#include "vtsp.h"
+#include "bsg.h"
+#include "hashtable.h"
+#include "hashtable_itr.h"
+
+#include "log.h"
+#include "buffer.h"
+
+VTPM_GLOBALS *vtpm_globals=NULL;
+
+#ifdef VTPM_MULTI_VM
+ #define vtpmhandlerloginfo(module,fmt,args...) vtpmloginfo (module, fmt, 
##args );
+ #define vtpmhandlerloginfomore(module,fmt,args...) vtpmloginfomore (module, 
fmt, ##args );
+ #define vtpmhandlerlogerror(module,fmt,args...) vtpmlogerror (module, fmt, 
##args );
+#else 
+ #define vtpmhandlerloginfo(module,fmt,args...) vtpmloginfo (module, "[%d]: " 
fmt, threadType, ##args );
+ #define vtpmhandlerloginfomore(module,fmt,args...) vtpmloginfomore (module, 
fmt, ##args );
+ #define vtpmhandlerlogerror(module,fmt,args...) vtpmlogerror (module, "[%d]: 
" fmt, threadType, ##args );
+#endif
+
+// --------------------------- Well Known Auths --------------------------
+#ifdef WELL_KNOWN_SRK_AUTH
+static BYTE FIXED_SRK_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
+                                  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
+#endif
+
+#ifdef WELL_KNOWN_OWNER_AUTH
+static BYTE FIXED_OWNER_AUTH[20] =  {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
+                                  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
+#endif
+                                  
+// -------------------------- Hash table functions --------------------
+
+static unsigned int hashfunc32(void *ky) {
+  return (* (UINT32 *) ky);
+}
+
+static int equals32(void *k1, void *k2) {
+  return (*(UINT32 *) k1 == *(UINT32 *) k2);
+}
+
+// --------------------------- Functions ------------------------------
+
+TPM_RESULT VTPM_Create_Service(){
+  
+  TPM_RESULT status = TPM_SUCCESS;
+  
+  // Generate Auth's for SRK & Owner
+#ifdef WELL_KNOWN_SRK_AUTH 
+  memcpy(vtpm_globals->srk_usage_auth, FIXED_SRK_AUTH, sizeof(TPM_AUTHDATA));
+#else    
+  Crypto_GetRandom(vtpm_globals->srk_usage_auth, sizeof(TPM_AUTHDATA) );  
+#endif
+  
+#ifdef WELL_KNOWN_OWNER_AUTH 
+  memcpy(vtpm_globals->owner_usage_auth, FIXED_OWNER_AUTH, 
sizeof(TPM_AUTHDATA));
+#else    
+  Crypto_GetRandom(vtpm_globals->owner_usage_auth, sizeof(TPM_AUTHDATA) );
+#endif
+
+  // Take Owership of TPM
+  CRYPTO_INFO ek_cryptoInfo;
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Attempting Pubek Read. NOTE: Failure is ok.\n");
+  status = VTSP_ReadPubek(vtpm_globals->manager_tcs_handle, &ek_cryptoInfo);
+  
+  // If we can read PubEK then there is no owner and we should take it.
+  if (status == TPM_SUCCESS) { 
+    TPMTRYRETURN(VTSP_TakeOwnership(vtpm_globals->manager_tcs_handle,
+                                   (const 
TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth, 
+                                   (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                                   &ek_cryptoInfo,
+                                   &vtpm_globals->keyAuth)); 
+  
+    TPMTRYRETURN(VTSP_DisablePubekRead(vtpm_globals->manager_tcs_handle,
+                                       (const 
TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth,  
+                                       &vtpm_globals->keyAuth));     
+  }
+  
+  // Generate storage key's auth
+  Crypto_GetRandom(  &vtpm_globals->storage_key_usage_auth, 
+                    sizeof(TPM_AUTHDATA) );
+  
+  TCS_AUTH osap;
+  TPM_AUTHDATA sharedsecret;
+  
+  TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
+                         TPM_ET_SRK,
+                         0, 
+                         (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                         &sharedsecret, 
+                         &osap) ); 
+  
+  TPMTRYRETURN( VTSP_CreateWrapKey( vtpm_globals->manager_tcs_handle,
+                                   TPM_KEY_BIND,
+                                   (const 
TPM_AUTHDATA*)&vtpm_globals->storage_key_usage_auth,
+                                   TPM_SRK_KEYHANDLE, 
+                                   (const TPM_AUTHDATA*)&sharedsecret,
+                                   &vtpm_globals->storageKeyWrap,
+                                   &osap) );
+  
+  vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
+  
+  goto egress;
+  
+ abort_egress:
+  exit(1);
+  
+ egress:
+  vtpmloginfo(VTPM_LOG_VTPM, "Finished initialized new VTPM service (Status = 
%d).\n", status);
+  return status;
+  
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+#ifdef VTPM_MULTI_VM
+int VTPM_Service_Handler(){
+#else
+void *VTPM_Service_Handler(void *threadTypePtr){
+#endif
+  TPM_RESULT      status =  TPM_FAIL; // Should never return
+  UINT32          dmi, in_param_size, cmd_size, out_param_size, 
out_message_size, out_message_size_full;
+  BYTE            *cmd_header, *in_param, *out_message;
+  buffer_t        *command_buf=NULL, *result_buf=NULL;
+  TPM_TAG         tag;
+  TPM_COMMAND_CODE ord;
+  VTPM_DMI_RESOURCE *dmi_res;
+  int  size_read, size_write, i;
+  
+#ifndef VTPM_MULTI_VM
+  UINT32 dmi_cmd_size;
+  BYTE *dmi_cmd;
+  int threadType = *(int *) threadTypePtr;
+  
+  // async io structures
+  struct aiocb dmi_aio;
+  struct aiocb *dmi_aio_a[1];
+  dmi_aio_a[0] = &dmi_aio;
+#endif
+  
+#ifdef DUMMY_BACKEND
+  int dummy_rx;  
+#endif
+  
+  cmd_header = (BYTE *) malloc(VTPM_COMMAND_HEADER_SIZE_SRV);
+  command_buf = (buffer_t *) malloc(sizeof(buffer_t));
+  result_buf = (buffer_t *) malloc(sizeof(buffer_t));
+  
+#ifndef VTPM_MULTI_VM
+  TPM_RESULT *ret_value = (TPM_RESULT *) malloc(sizeof(TPM_RESULT));
+#endif
+  
+  int *tx_fh, // Pointer to the filehandle this function will write to
+      *rx_fh; // Pointer to the filehandle this function will read from
+              // For a multi VM VTPM system, this function tx/rx with the BE
+              //   via vtpm_globals->be_fh.
+              // For a single VM system, the BE_LISTENER_THREAD tx/rx with 
theBE
+              //   via vtpm_globals->be_fh, and the DMI_LISTENER_THREAD rx from
+             //   vtpm_globals->vtpm_rx_fh and tx to dmi_res->vtpm_tx_fh
+
+  // Set rx_fh to point to the correct fh based on this mode.
+#ifdef VTPM_MULTI_VM
+  rx_fh = &vtpm_globals->be_fh;
+#else
+  if (threadType == BE_LISTENER_THREAD) {
+ #ifdef DUMMY_BACKEND    
+    dummy_rx = -1;
+    rx_fh = &dummy_rx;
+ #else
+    rx_fh = &vtpm_globals->be_fh;
+ #endif
+  } else { // DMI_LISTENER_THREAD
+    rx_fh = &vtpm_globals->vtpm_rx_fh;
+  }
+#endif
+  
+  // Set tx_fh to point to the correct fh based on this mode (If static)
+  // Create any fifos that these fh will use.  
+#ifndef VTPM_MULTI_VM
+  int fh;
+  if (threadType == BE_LISTENER_THREAD) {
+    tx_fh = &vtpm_globals->be_fh;
+    if ( (fh = open(GUEST_RX_FIFO, O_RDWR)) == -1) {
+      if ( mkfifo(GUEST_RX_FIFO, S_IWUSR | S_IRUSR ) ){
+        vtpmlogerror(VTPM_LOG_VTPM, "Unable to create FIFO: %s.\n", 
GUEST_RX_FIFO);        
+       *ret_value = TPM_FAIL;
+       pthread_exit(ret_value);
+      }
+    } else 
+      close(fh);
+    
+  } else { // else DMI_LISTENER_THREAD
+    // tx_fh will be set once the DMI is identified
+    // But we need to make sure the read pip is created.
+    if ( (fh = open(VTPM_RX_FIFO, O_RDWR)) == -1) {
+      if ( mkfifo(VTPM_RX_FIFO, S_IWUSR | S_IRUSR ) ){
+        vtpmlogerror(VTPM_LOG_VTPM, "Unable to create FIFO: %s.\n", 
VTPM_RX_FIFO);
+       *ret_value = TPM_FAIL;
+       pthread_exit(ret_value);
+      }
+    } else 
+      close(fh);
+    
+  }
+#else
+  tx_fh = &vtpm_globals->be_fh;
+#endif
+  
+  ////////////////////////// Main Loop //////////////////////////////////
+  while(1) {
+    
+#ifdef VTPM_MULTI_VM
+    vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for DMI messages.\n");
+#else
+    if (threadType == BE_LISTENER_THREAD) {
+      vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for Guest requests & ctrl 
messages.\n");
+    } else    
+      vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for DMI messages.\n");
+#endif
+
+    // Check status of rx_fh. If necessary attempt to re-open it.    
+    if (*rx_fh < 0) {
+#ifdef VTPM_MULTI_VM
+      *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+#else
+      if (threadType == BE_LISTENER_THREAD) 
+  #ifdef DUMMY_BACKEND
+       *rx_fh = open("/tmp/in.fifo", O_RDWR);
+  #else
+        *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+  #endif
+      else  // DMI Listener   
+       *rx_fh = open(VTPM_RX_FIFO, O_RDWR);
+#endif    
+    }
+    
+    // Respond to failures to open rx_fh
+    if (*rx_fh < 0) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh.\n");
+#ifdef VTPM_MULTI_VM
+      return TPM_IOERROR; 
+#else
+      *ret_value = TPM_IOERROR;
+      pthread_exit(ret_value);
+#endif
+    }
+    
+    // Read command header from rx_fh
+    size_read = read(*rx_fh, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
+    if (size_read > 0) {
+      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "RECV[%d}: 0x", size_read);
+      for (i=0; i<size_read; i++) 
+               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
cmd_header[i]);
+    } else {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't read from BE. Aborting... \n");
+      close(*rx_fh);
+      *rx_fh = -1;
+      goto abort_command;
+    }
+
+    if (size_read < (int) VTPM_COMMAND_HEADER_SIZE_SRV) {
+      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "\n");
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command shorter than normal header 
(%d bytes). Aborting...\n", size_read);
+      goto abort_command;
+    }
+    
+    // Unpack header
+    BSG_UnpackList(cmd_header, 4,
+                  BSG_TYPE_UINT32, &dmi,
+                  BSG_TPM_TAG, &tag,
+                  BSG_TYPE_UINT32, &in_param_size,
+                  BSG_TPM_COMMAND_CODE, &ord );
+    
+    // Using the header info, read from rx_fh the parameters of the command
+    // Note that in_param_size is in the client's context
+    cmd_size = in_param_size - VTPM_COMMAND_HEADER_SIZE_CLT;
+    if (cmd_size > 0) {
+      in_param = (BYTE *) malloc(cmd_size);
+      size_read = read( *rx_fh, in_param, cmd_size);
+      if (size_read > 0) {
+       for (i=0; i<size_read; i++) 
+         vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
+       
+      } else {
+        vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from cmd. 
Aborting... \n");
+       close(*rx_fh);
+       *rx_fh = -1;
+       goto abort_command;
+      }
+      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+      
+      if (size_read < (int) cmd_size) {
+       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+       vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command read(%d) is shorter than 
header indicates(%d). Aborting...\n", size_read, cmd_size);
+       goto abort_command;
+      }
+    } else {
+      in_param = NULL;
+      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+    }            
+
+#ifndef VTPM_MULTI_VM
+    // It's illegal to receive a Dom0 command from a DMI.
+    if ((threadType != BE_LISTENER_THREAD) && (dmi == 0)) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to access dom0 commands from 
DMI interface. Aborting...\n");
+      goto abort_command;
+    }
+#endif
+    
+    // Fetch infomation about the DMI issuing the request.
+    dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi);
+    if (dmi_res == NULL) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempted access to non-existent DMI 
in domain: %d. Aborting...\n", dmi);
+      goto abort_command;
+    }
+    if (!dmi_res->connected) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempted access to disconnected DMI 
in domain: %d. Aborting...\n", dmi);
+      goto abort_command;
+    }
+
+#ifndef VTPM_MULTI_VM
+    // Now that we know which DMI this is, we can set the tx_fh handle.
+    if (threadType != BE_LISTENER_THREAD) 
+      tx_fh = &dmi_res->vtpm_tx_fh;
+    // else we set this before the while loop since it doesn't change.
+#endif 
+   
+    // Init the buffers used to handle the command and the response
+    if ( (buffer_init_convert(command_buf, cmd_size, in_param) != TPM_SUCCESS) 
|| 
+        (buffer_init(result_buf, 0, 0) != TPM_SUCCESS) ) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Failed to setup buffers. 
Aborting...\n");
+      goto abort_command;
+    }
+    
+    // Dispatch it as either control or user request.
+    if (tag == VTPM_TAG_REQ) { 
+      if (dmi_res->dmi_id == VTPM_CTL_DM){ 
+       switch (ord) {
+       case VTPM_ORD_OPEN:
+         status = VTPM_Handle_New_DMI(command_buf);
+         break;
+          
+       case VTPM_ORD_CLOSE:
+         status = VTPM_Handle_Close_DMI(command_buf);
+         break;
+          
+       case VTPM_ORD_DELETE:
+         status = VTPM_Handle_Delete_DMI(command_buf);
+         break;
+       default:
+         status = TPM_BAD_ORDINAL; 
+       } // switch
+      } else {
+       
+       switch (ord) {                
+       case VTPM_ORD_SAVENVM:
+         status= VTPM_Handle_Save_NVM(dmi_res,
+                                      command_buf, 
+                                      result_buf);
+         break;
+       case VTPM_ORD_LOADNVM:
+         status= VTPM_Handle_Load_NVM(dmi_res, 
+                                      command_buf, 
+                                      result_buf);
+         break;
+         
+       case VTPM_ORD_TPMCOMMAND:
+         status= VTPM_Handle_TPM_Command(dmi_res, 
+                                         command_buf, 
+                                         result_buf);
+         break;
+         
+       default:
+         status = TPM_BAD_ORDINAL; 
+       } // switch
+      }
+    } else { // This is not a VTPM Command at all.
+            // This happens in two cases. 
+            // MULTI_VM = A DMI illegally sent a raw TPM command to the manager
+            // Single VM:
+            //   BE_LISTENER_THREAD: Guest issued a TPM command.
+            //                       Send this to DMI and wait for response
+            //   DMI_LISTENER_THREAD: A DMI illegally sent a raw TPM command.
+    
+#ifdef VTPM_MULTI_VM
+      // Raw TPM commands are not supported from the DMI
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to use unsupported direct 
access to TPM.\n");
+      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "Bad Command. dmi:%d, tag:%d, 
size:%d, ord:%d, Params: ", dmi, tag, in_param_size, ord);
+      for (i=0; i<cmd_size; i++) 
+       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
+      
+      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+      status = TPM_FAIL;
+    
+#else
+      // If BE_LISTENER_THREAD then this is a TPM command from a guest
+      if (threadType == BE_LISTENER_THREAD) {
+       // Dom0 can't talk to the BE, so this must be a broken FE/BE or badness
+       if (dmi == 0) {
+         vtpmhandlerlogerror(VTPM_LOG_VTPM, "Illegal use of TPM command from 
dom0\n");
+         status = TPM_FAIL;
+       } else {
+         vtpmhandlerloginfo(VTPM_LOG_VTPM, "Forwarding command to DMI.\n");
+         
+         // open the dmi_res->guest_tx_fh to send command to DMI
+         if (dmi_res->guest_tx_fh < 0)
+           dmi_res->guest_tx_fh = open(dmi_res->guest_tx_fname, O_WRONLY | 
O_NONBLOCK);
+
+         // handle failed opens dmi_res->guest_tx_fh        
+         if (dmi_res->guest_tx_fh < 0){
+           vtpmhandlerlogerror(VTPM_LOG_VTPM, "VTPM ERROR: Can't open outbound 
fh to dmi.\n");
+           status = TPM_IOERROR;
+           goto abort_with_error;
+         }        
+          
+         //Forward TPM CMD stamped with dmi_id to DMI for handling
+         if (cmd_size) {
+           dmi_cmd = (BYTE *) malloc(VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size);
+           dmi_cmd_size = VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size;
+           memcpy(dmi_cmd, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
+           memcpy(dmi_cmd + VTPM_COMMAND_HEADER_SIZE_SRV, in_param, cmd_size);
+           size_write = write(dmi_res->guest_tx_fh, dmi_cmd, dmi_cmd_size);
+           
+           if (size_write > 0) {
+             vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "SENT (DMI): 0x");
+             for (i=0; i<VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size; i++) {
+               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", dmi_cmd[i]);
+             }
+             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+           } else {
+              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to DMI. 
Aborting... \n");
+             close(dmi_res->guest_tx_fh);
+             dmi_res->guest_tx_fh = -1;
+              status = TPM_IOERROR;
+             goto abort_with_error;
+           }
+           free(dmi_cmd);
+         } else {
+           dmi_cmd_size = VTPM_COMMAND_HEADER_SIZE_SRV;
+           size_write = write(dmi_res->guest_tx_fh, cmd_header, 
VTPM_COMMAND_HEADER_SIZE_SRV );
+           if (size_write > 0) {
+             for (i=0; i<VTPM_COMMAND_HEADER_SIZE_SRV; i++) 
+               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
cmd_header[i]);
+             
+             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+           } else {
+              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to DMI. 
Aborting... \n");
+             close(dmi_res->guest_tx_fh);
+             dmi_res->guest_tx_fh = -1;
+              status = TPM_IOERROR;
+             goto abort_with_error;
+           }
+         }
+         
+         if (size_write != (int) dmi_cmd_size) 
+           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Could not write entire command 
to DMI (%d/%d)\n", size_write, dmi_cmd_size);
+         buffer_free(command_buf);
+        
+         // Open vtpm_globals->guest_rx_fh to receive DMI response       
+         if (vtpm_globals->guest_rx_fh < 0) 
+           vtpm_globals->guest_rx_fh = open(GUEST_RX_FIFO, O_RDONLY);
+          
+         // Handle open failures
+         if (vtpm_globals->guest_rx_fh < 0){
+           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh to 
dmi.\n");
+            status = TPM_IOERROR;
+           goto abort_with_error;
+         }                  
+         
+         // Read header for response to TPM command from DMI
+          size_read = read( vtpm_globals->guest_rx_fh, cmd_header, 
VTPM_COMMAND_HEADER_SIZE_SRV);
+         if (size_read > 0) {
+           vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "RECV (DMI): 0x");
+           for (i=0; i<size_read; i++) 
+             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", cmd_header[i]);
+           
+         } else {
+            vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from DMI. 
Aborting... \n");
+           close(vtpm_globals->guest_rx_fh);
+           vtpm_globals->guest_rx_fh = -1;
+            status = TPM_IOERROR;
+           goto abort_with_error;
+         }
+          
+         if (size_read < (int) VTPM_COMMAND_HEADER_SIZE_SRV) {
+           //vtpmdeepsublog("\n");
+           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command from DMI shorter than 
normal header. Aborting...\n");
+            status = TPM_IOERROR;
+           goto abort_with_error;
+         }
+          
+         // Unpack response from DMI for TPM command
+         BSG_UnpackList(cmd_header, 4,
+                        BSG_TYPE_UINT32, &dmi,
+                        BSG_TPM_TAG, &tag,
+                        BSG_TYPE_UINT32, &in_param_size,
+                        BSG_TPM_COMMAND_CODE, &status );
+        
+         // If response has parameters, read them.
+         // Note that in_param_size is in the client's context
+         cmd_size = in_param_size - VTPM_COMMAND_HEADER_SIZE_CLT;
+         if (cmd_size > 0) {
+           in_param = (BYTE *) malloc(cmd_size);
+           size_read = read( vtpm_globals->guest_rx_fh, in_param, cmd_size);
+           if (size_read > 0) {
+             for (i=0; i<size_read; i++) 
+               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
+             
+           } else {
+              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from BE. 
Aborting... \n");
+             close(vtpm_globals->guest_rx_fh);
+             vtpm_globals->guest_rx_fh = -1;
+              status = TPM_IOERROR;
+             goto abort_with_error;
+           }
+           vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
+            
+           if (size_read < (int)cmd_size) {
+             vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
+             vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command read(%d) from DMI is 
shorter than header indicates(%d). Aborting...\n", size_read, cmd_size);
+              status = TPM_IOERROR;
+             goto abort_with_error;
+           }
+         } else {
+           in_param = NULL;
+           vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
+         }
+          
+         if (buffer_init_convert(result_buf, cmd_size, in_param) != 
TPM_SUCCESS) {
+           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Failed to setup buffers. 
Aborting...\n");
+            status = TPM_FAIL;
+           goto abort_with_error;
+         }
+         
+         vtpmhandlerloginfo(VTPM_LOG_VTPM, "Sending DMI's response to 
guest.\n");
+       } // end else for if (dmi==0)
+        
+      } else { // This is a DMI lister thread. Thus this is from a DMI
+       // Raw TPM commands are not supported from the DMI
+       vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to use unsupported direct 
access to TPM.\n");
+       vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "Bad Command. dmi:%d, tag:%d, 
size:%d, ord:%d, Params: ", dmi, tag, in_param_size, ord);
+       for (i=0; i<cmd_size; i++) 
+         vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
+       
+       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+        
+       status = TPM_FAIL;
+      } // end else for if BE Listener
+#endif
+      
+    } // end else for is VTPM Command
+
+    // This marks the beginning of preparing response to be sent out.
+    // Errors while handling responses jump here to reply with error messages
+    // NOTE: Currently there are no recoverable errors in multi-VM mode. If one
+    //       is added to the code, this ifdef should be removed.
+    //       Also note this is NOT referring to errors in commands, but rather
+    //       this is about I/O errors and such.
+#ifndef VTPM_MULTI_VM
+ abort_with_error:
+#endif
+    
+    // Open tx_fh in preperation to send reponse back
+    if (*tx_fh < 0) {
+#ifdef VTPM_MULTI_VM
+      *tx_fh = open(VTPM_BE_DEV, O_RDWR);
+#else
+      if (threadType == BE_LISTENER_THREAD) 
+ #ifdef DUMMY_BACKEND
+       *tx_fh = open("/tmp/out.fifo", O_RDWR);
+ #else
+        *tx_fh = open(VTPM_BE_DEV, O_RDWR);
+ #endif
+      else  // DMI Listener
+       *tx_fh = open(dmi_res->vtpm_tx_fname, O_WRONLY);
+#endif
+      }
+
+    
+    // Handle failed open
+    if (*tx_fh < 0) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "VTPM ERROR: Can't open outbound 
fh.\n");
+#ifdef VTPM_MULTI_VM
+      return TPM_IOERROR; 
+#else
+      *ret_value = TPM_IOERROR;
+      pthread_exit(ret_value);
+#endif
+    }        
+    
+    // Prepend VTPM header with destination DM stamped
+    out_param_size = buffer_len(result_buf);
+    out_message_size = VTPM_COMMAND_HEADER_SIZE_CLT + out_param_size;
+    out_message_size_full = VTPM_COMMAND_HEADER_SIZE_SRV + out_param_size;
+    out_message = (BYTE *) malloc (out_message_size_full);
+    
+    BSG_PackList(out_message, 4,
+                BSG_TYPE_UINT32, (BYTE *) &dmi,
+                BSG_TPM_TAG, (BYTE *) &tag,
+                BSG_TYPE_UINT32, (BYTE *) &out_message_size,
+                BSG_TPM_RESULT, (BYTE *) &status);
+    
+    if (buffer_len(result_buf) > 0) 
+      memcpy(out_message + VTPM_COMMAND_HEADER_SIZE_SRV, result_buf->bytes, 
out_param_size);
+    
+    
+    //Note: Send message + dmi_id
+    size_write = write(*tx_fh, out_message, out_message_size_full );
+    if (size_write > 0) {
+      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "SENT: 0x");
+      for (i=0; i < out_message_size_full; i++) 
+       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", out_message[i]);
+      
+      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");            
+    } else {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to BE. Aborting... 
\n");
+      close(*tx_fh);
+      *tx_fh = -1;
+      goto abort_command;
+    }
+    free(out_message);
+    
+    if (size_write < (int)out_message_size_full) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Unable to write full command to BE 
(%d/%d)\n", size_write, out_message_size_full);
+      goto abort_command;
+    }
+    
+    // On certain failures an error message cannot be sent. 
+    // This marks the beginning of cleanup in preperation for the next command.
+  abort_command:
+    //free buffers
+    bzero(cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
+    //free(in_param); // This was converted to command_buf. No need to free 
+    if (command_buf != result_buf) 
+      buffer_free(result_buf);
+    
+    buffer_free(command_buf);
+    
+#ifndef VTPM_MULTI_VM
+    if (threadType != BE_LISTENER_THREAD) {
+#endif
+      if ( (vtpm_globals->DMI_table_dirty) &&
+          (VTPM_SaveService() != TPM_SUCCESS) ) {
+       vtpmhandlerlogerror(VTPM_LOG_VTPM, "ERROR: Unable to save manager 
data.\n");
+      }
+#ifndef VTPM_MULTI_VM
+    }
+#endif
+    
+  } // End while(1)
+  
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+TPM_RESULT VTPM_Init_Service() {
+  TPM_RESULT status = TPM_FAIL;   
+  BYTE *randomsead;
+  UINT32 randomsize;
+
+  if ((vtpm_globals = (VTPM_GLOBALS *) malloc(sizeof(VTPM_GLOBALS))) == NULL){
+    status = TPM_FAIL;
+    goto abort_egress;
+  }
+  memset(vtpm_globals, 0, sizeof(VTPM_GLOBALS));
+  vtpm_globals->be_fh = -1;
+
+#ifndef VTPM_MULTI_VM
+  vtpm_globals->vtpm_rx_fh = -1;
+  vtpm_globals->guest_rx_fh = -1;
+#endif
+  if ((vtpm_globals->dmi_map = create_hashtable(10, hashfunc32, equals32)) == 
NULL){
+    status = TPM_FAIL;
+    goto abort_egress;
+  }
+  
+  vtpm_globals->DMI_table_dirty = FALSE;
+  
+  // Create new TCS Object
+  vtpm_globals->manager_tcs_handle = 0;
+  
+  TPMTRYRETURN(TCS_create());
+  
+  // Create TCS Context for service
+  TPMTRYRETURN( TCS_OpenContext(&vtpm_globals->manager_tcs_handle ) );
+
+  TPMTRYRETURN( TCSP_GetRandom(vtpm_globals->manager_tcs_handle, 
+                              &randomsize, 
+                              &randomsead));
+  
+  Crypto_Init(randomsead, randomsize);
+  TPMTRYRETURN( TCS_FreeMemory (vtpm_globals->manager_tcs_handle, 
randomsead)); 
+       
+  // Create OIAP session for service's authorized commands
+  TPMTRYRETURN( VTSP_OIAP( vtpm_globals->manager_tcs_handle, 
+                          &vtpm_globals->keyAuth) );
+  vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
+
+       // If failed, create new Service.
+  if (VTPM_LoadService() != TPM_SUCCESS)
+    TPMTRYRETURN( VTPM_Create_Service() );    
+
+  //Load Storage Key 
+  TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
+                             TPM_SRK_KEYHANDLE,
+                             &vtpm_globals->storageKeyWrap,
+                             (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                             &vtpm_globals->storageKeyHandle,
+                             &vtpm_globals->keyAuth,
+                             &vtpm_globals->storageKey) );
+
+  // Create entry for Dom0 for control messages
+  TPMTRYRETURN( VTPM_Handle_New_DMI(NULL) );
+    
+  // --------------------- Command handlers ---------------------------
+  
+  goto egress;
+  
+ abort_egress:
+ egress:
+  
+  return(status);
+}
+ 
+void VTPM_Stop_Service() {
+  VTPM_DMI_RESOURCE *dmi_res;
+  struct hashtable_itr *dmi_itr;
+  
+  // Close all the TCS contexts. TCS should evict keys based on this
+  if (hashtable_count(vtpm_globals->dmi_map) > 0) {
+    dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
+    do {
+      dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
+      if (dmi_res->connected) 
+       close_dmi( dmi_res ); // Not really interested in return code
+      
+    } while (hashtable_iterator_advance(dmi_itr));
+               free (dmi_itr);
+  }
+  
+       
+  TCS_CloseContext(vtpm_globals->manager_tcs_handle);
+  
+  if ( (vtpm_globals->DMI_table_dirty) &&
+       (VTPM_SaveService() != TPM_SUCCESS) )
+    vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n");
+  
+  hashtable_destroy(vtpm_globals->dmi_map, 1);
+  free(vtpm_globals);
+  
+  close(vtpm_globals->be_fh);
+  Crypto_Exit();
+       
+  vtpmloginfo(VTPM_LOG_VTPM, "VTPM Manager stopped.\n");
+}
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/manager/vtpmpriv.h
--- a/tools/vtpm_manager/manager/vtpmpriv.h     Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/manager/vtpmpriv.h     Thu Sep 22 17:42:01 2005
@@ -47,8 +47,8 @@
 
 #define STATE_FILE    "/var/vtpm/VTPM"
 #define DMI_NVM_FILE  "/var/vtpm/vtpm_dm_%d.data"
-#define VTPM_BE_DEV   "/dev/vtpm"
-#define VTPM_CTL_DM         0
+#define VTPM_BE_DEV   "/dev/vtpm0"
+#define VTPM_CTL_DM   0
 
 #ifndef VTPM_MUTLI_VM
  #include <sys/types.h>
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/Makefile
--- a/tools/vtpm_manager/tcs/Makefile   Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/Makefile   Thu Sep 22 17:42:01 2005
@@ -13,6 +13,7 @@
        rm -f *.a *.so *.o *.rpm $(DEP_FILES)
 
 mrproper: clean
+       rm -f *~
 
 $(BIN): $(OBJS)
        $(AR) rcs $(BIN) $(OBJS)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/contextmgr.c
--- a/tools/vtpm_manager/tcs/contextmgr.c       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/contextmgr.c       Thu Sep 22 17:42:01 2005
@@ -43,6 +43,7 @@
 #include "tcs.h"
 #include "contextmgr.h"
 #include "log.h"
+#include "hashtable.h"
 
 BYTE* AddMemBlock(CONTEXT_HANDLE* pContextHandle, // in
                  int    BlockSize)  { // in
@@ -131,12 +132,14 @@
   return bFound;
 }
 
-BOOL AddHandleToList(CONTEXT_HANDLE* pContextHandle, // in
+BOOL AddHandleToList(TCS_CONTEXT_HANDLE hContext, // in
                     TPM_RESOURCE_TYPE type, // in
                     TPM_HANDLE    handle)  { // in
   HANDLE_LIST* pNewHandle = NULL;
-  
+
   vtpmloginfo(VTPM_LOG_TCS_DEEP, "Adding Handle to list\n");
+  CONTEXT_HANDLE* pContextHandle = LookupContext(hContext);
+
   if (pContextHandle == NULL)
     return 0;
   
@@ -154,11 +157,13 @@
   return 1;
 }
 
-BOOL DeleteHandleFromList(   CONTEXT_HANDLE*     pContextHandle, // in
+BOOL DeleteHandleFromList(   TCS_CONTEXT_HANDLE hContext, // in                
     
                              TPM_HANDLE          handle) { // in
     
+  CONTEXT_HANDLE* pContextHandle = LookupContext(hContext);
+
   HANDLE_LIST *pCurrentHandle = pContextHandle->pHandleList, 
-    *pLastHandle = pCurrentHandle;
+              *pLastHandle = pCurrentHandle;
   
   vtpmloginfo(VTPM_LOG_TCS_DEEP, "Deleting Handle from list\n");
   
@@ -202,10 +207,10 @@
     
     switch (pCurrentHandle->type) {
     case TPM_RT_KEY:
-      returncode = returncode && !TCSP_EvictKey((TCS_CONTEXT_HANDLE) 
pContextHandle, pCurrentHandle->handle);
+      returncode = returncode && !TCSP_EvictKey(pContextHandle->handle, 
pCurrentHandle->handle);
       break;
     case TPM_RT_AUTH:
-      returncode = returncode && !TCSP_TerminateHandle((TCS_CONTEXT_HANDLE) 
pContextHandle, pCurrentHandle->handle);
+      returncode = returncode && !TCSP_TerminateHandle(pContextHandle->handle, 
pCurrentHandle->handle);
       break;
     default:
       returncode = FALSE;
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/contextmgr.h
--- a/tools/vtpm_manager/tcs/contextmgr.h       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/contextmgr.h       Thu Sep 22 17:42:01 2005
@@ -57,6 +57,7 @@
 } HANDLE_LIST;
 
 typedef struct context_handle {
+  TCS_CONTEXT_HANDLE handle;
   int nBlockCount;
   BLOCK* pTopBlock;
   HANDLE_LIST* pHandleList;
@@ -69,11 +70,11 @@
                     BYTE*           pTCPA_BYTEs); // in
 
 
-BOOL AddHandleToList(   CONTEXT_HANDLE*     pContextHandle, // in
+BOOL AddHandleToList(   TCS_CONTEXT_HANDLE hContext, // in     
                         TPM_RESOURCE_TYPE   type, // in
                         TPM_HANDLE          handle); // in
 
-BOOL DeleteHandleFromList(   CONTEXT_HANDLE*     pContextHandle, // in
+BOOL DeleteHandleFromList(   TCS_CONTEXT_HANDLE hContext, // in        
                              TPM_HANDLE          handle); // in
 
 BOOL FreeHandleList(    CONTEXT_HANDLE*     pContextHandle); // in
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/tcs.c
--- a/tools/vtpm_manager/tcs/tcs.c      Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/tcs.c      Thu Sep 22 17:42:01 2005
@@ -47,9 +47,10 @@
 #include "contextmgr.h"
 #include "tpmddl.h"
 #include "log.h"
+#include "hashtable.h"
+#include "hashtable_itr.h"
 
 // Static Global Vars for the TCS
-static BOOL TCS_m_bConnected;
 static int TCS_m_nCount = 0;
 
 #define TCPA_MAX_BUFFER_LENGTH 0x2000
@@ -57,6 +58,21 @@
 static BYTE InBuf [TCPA_MAX_BUFFER_LENGTH];
 static BYTE OutBuf[TCPA_MAX_BUFFER_LENGTH];
 
+struct hashtable *context_ht;
+
+// -------------------------- Hash table functions --------------------
+
+static unsigned int hashfunc32(void *ky) {
+  return (* (UINT32 *) ky);
+}
+
+static int equals32(void *k1, void *k2) {
+  return (*(UINT32 *) k1 == *(UINT32 *) k2);
+}
+
+CONTEXT_HANDLE *LookupContext( TCS_CONTEXT_HANDLE  hContext) {
+  return( (CONTEXT_HANDLE *) hashtable_search(context_ht, &hContext) );
+}
 
 // 
---------------------------------------------------------------------------------
 // Initialization/Uninitialization SubComponent API
@@ -64,34 +80,50 @@
 TPM_RESULT TCS_create() {
   TDDL_RESULT hRes = TDDL_E_FAIL;
   TPM_RESULT result = TPM_FAIL;
-  TCS_m_bConnected = FALSE;
   
   if (TCS_m_nCount == 0) {
     vtpmloginfo(VTPM_LOG_TCS, "Constructing new TCS:\n");
     hRes = TDDL_Open();
-    
-    if (hRes == TDDL_SUCCESS) {
-      TCS_m_bConnected = TRUE;
+
+    context_ht = create_hashtable(10, hashfunc32, equals32);
+         
+    if ((hRes == TDDL_SUCCESS) && (context_ht != NULL)) {
       result = TPM_SUCCESS;
+      TCS_m_nCount++;
+    } else {
+      result = TPM_IOERROR;
+      hashtable_destroy(context_ht, 1);
     }
   } else
-    TCS_m_bConnected = TRUE;
-  
-  TCS_m_nCount++;
-  
+    TCS_m_nCount++;
+    
   return(result);
 }
 
 
 void TCS_destroy()
 {
-  // FIXME: Should iterate through all open contexts and close them.
   TCS_m_nCount--;
   
-  if (TCS_m_bConnected == TRUE && TCS_m_nCount == 0) {
+  if (TCS_m_nCount == 0) {
     vtpmloginfo(VTPM_LOG_TCS, "Destructing TCS:\n");
     TDDL_Close();
-    TCS_m_bConnected = FALSE;
+
+    struct hashtable_itr *context_itr;
+    TCS_CONTEXT_HANDLE  *hContext;
+    
+    // Close all the TCS contexts. TCS should evict keys based on this
+    if (hashtable_count(context_ht) > 0) {
+      context_itr = hashtable_iterator(context_ht);
+      do {
+        hContext = (TCS_CONTEXT_HANDLE *) hashtable_iterator_key(context_itr);
+       if (TCS_CloseContext(*hContext) != TPM_SUCCESS) 
+           vtpmlogerror(VTPM_LOG_TCS, "Failed to close context %d 
properly.\n", *hContext);
+      
+      } while (hashtable_iterator_advance(context_itr));
+      free(context_itr);
+    }
+    hashtable_destroy(context_ht, 1);
   }
   
 }
@@ -101,7 +133,7 @@
                         BYTE**              ppMemPtr) {// out
 
   TPM_RESULT returnCode = TPM_FAIL;
-  CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE*)hContext;
+  CONTEXT_HANDLE* pContextHandle = LookupContext(hContext);
   
   if (pContextHandle != NULL && ppMemPtr != NULL) {
     *ppMemPtr = (BYTE *)AddMemBlock(pContextHandle, MemSize);
@@ -114,7 +146,7 @@
 TPM_RESULT TCS_FreeMemory(  TCS_CONTEXT_HANDLE  hContext, // in
                             BYTE*               pMemory) { // in
   TPM_RESULT returnCode = TPM_FAIL;
-  CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE*)hContext;
+  CONTEXT_HANDLE* pContextHandle = LookupContext(hContext);
   
   if ( (pContextHandle != NULL && pMemory != NULL) &&
        (DeleteMemBlock(pContextHandle, pMemory) == TRUE) )
@@ -126,15 +158,15 @@
 
 TPM_RESULT TCS_OpenContext(TCS_CONTEXT_HANDLE* hContext) { // out
   TPM_RESULT returnCode = TPM_FAIL;
+  TCS_CONTEXT_HANDLE *newContext;
   
   vtpmloginfo(VTPM_LOG_TCS, "Calling TCS_OpenContext:\n");
   
   // hContext must point to a null memory context handle
   if(*hContext == HANDLE_NULL) {
-    CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE 
*)malloc(sizeof(CONTEXT_HANDLE));
+    CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE *) 
malloc(sizeof(CONTEXT_HANDLE));
     if (pContextHandle == NULL) 
       return TPM_SIZE;
-    
     
     // initialize to 0
     pContextHandle->nBlockCount = 0;
@@ -144,19 +176,32 @@
     // Create New Block
     AddMemBlock(pContextHandle, BLOCK_SIZE);
     
-    *hContext = (TCS_CONTEXT_HANDLE)pContextHandle;
-    returnCode = TPM_SUCCESS;
+    newContext = (TCS_CONTEXT_HANDLE *) malloc(sizeof(TCS_CONTEXT_HANDLE));
+    *newContext = (TCS_CONTEXT_HANDLE) (((uintptr_t) pContextHandle >> 2) & 
0xffffffff);
+    
+    if (hashtable_search(context_ht, &newContext) !=NULL)
+       *newContext += 1;
+    
+    pContextHandle->handle = *newContext;
+    if (!hashtable_insert(context_ht, newContext, pContextHandle)) {
+        free(newContext);
+        free(pContextHandle);
+       returnCode = TPM_FAIL;
+    } else {
+       *hContext = *newContext;
+       returnCode = TPM_SUCCESS;
+    }
   }
   
   return(returnCode);
 }
 
 TPM_RESULT TCS_CloseContext(TCS_CONTEXT_HANDLE hContext) {// in
-  //FIXME: TCS SHOULD Track track failed auths and make sure
+  //FIXME: TCS SHOULD Track failed auths and make sure
   //we don't try and re-free them here.
   TPM_RESULT returnCode = TPM_FAIL;
   
-  CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE*)hContext;
+  CONTEXT_HANDLE* pContextHandle = LookupContext(hContext);
   
   if(pContextHandle != NULL) {
     // Print test info
@@ -171,6 +216,9 @@
       vtpmlogerror(VTPM_LOG_TCS, "Not all handles evicted from TPM.\n");
     
     // Release the TPM's resources
+    if (hashtable_remove(context_ht, &hContext) == NULL) 
+      vtpmlogerror(VTPM_LOG_TCS, "Not all handles evicted from TPM.\n");
+    
     free(pContextHandle);
     returnCode = TPM_SUCCESS;
   }
@@ -255,7 +303,7 @@
                     BSG_TYPE_UINT32, authHandle, 
                     BSG_TPM_NONCE, nonce0);
       
-      if (!AddHandleToList((CONTEXT_HANDLE *)hContext, TPM_RT_AUTH, 
*authHandle)) 
+      if (!AddHandleToList(hContext, TPM_RT_AUTH, *authHandle)) 
         vtpmlogerror(VTPM_LOG_TCS, "New AuthHandle not recorded\n");
       
       vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize);
@@ -321,7 +369,7 @@
                     BSG_TPM_NONCE, nonceEven, 
                     BSG_TPM_NONCE, nonceEvenOSAP);
       
-      if (!AddHandleToList((CONTEXT_HANDLE *)hContext, TPM_RT_AUTH, 
*authHandle)) {
+      if (!AddHandleToList(hContext, TPM_RT_AUTH, *authHandle)) {
            vtpmlogerror(VTPM_LOG_TCS, "New AuthHandle not recorded\n");
       }
       
@@ -498,7 +546,7 @@
                           BSG_TYPE_UINT32, &paramSize, 
                           BSG_TPM_COMMAND_CODE, &returnCode);
     
-    if (!DeleteHandleFromList((CONTEXT_HANDLE *)hContext, handle)) 
+    if (!DeleteHandleFromList(hContext, handle)) 
       vtpmlogerror(VTPM_LOG_TCS, "KeyHandle not removed from list\n");
        
     
@@ -897,7 +945,7 @@
                      phKeyTCSI);
       unpackAuth(pAuth, OutBuf+i);
       
-      if (!AddHandleToList((CONTEXT_HANDLE *)hContext, TPM_RT_KEY, 
*phKeyTCSI)) {
+      if (!AddHandleToList(hContext, TPM_RT_KEY, *phKeyTCSI)) {
         vtpmlogerror(VTPM_LOG_TCS, "New KeyHandle not recorded\n");
       }
       
@@ -942,7 +990,7 @@
                           BSG_TYPE_UINT32, &paramSize, 
                           BSG_TPM_COMMAND_CODE, &returnCode);
     
-    if (!DeleteHandleFromList((CONTEXT_HANDLE *)hContext, hKey)) {
+    if (!DeleteHandleFromList(hContext, hKey)) {
       vtpmlogerror(VTPM_LOG_TCS, "KeyHandle not removed from list\n");
     }   
     
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/tcs.h
--- a/tools/vtpm_manager/tcs/tcs.h      Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/tcs.h      Thu Sep 22 17:42:01 2005
@@ -41,6 +41,7 @@
 #define __TCS_H__
 
 #include "tcg.h"
+#include "contextmgr.h"
 #include "buffer.h"
 
 #define HANDLE_NULL 0
@@ -235,4 +236,7 @@
                                UINT32 *outDataSize,// in/out
                                BYTE *outData);     // out
 
+///////////// Private Functions ////////////////////
+CONTEXT_HANDLE* LookupContext( TCS_CONTEXT_HANDLE hContext);
+
 #endif //TCS_H
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/transmit.c
--- a/tools/vtpm_manager/tcs/transmit.c Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/transmit.c Thu Sep 22 17:42:01 2005
@@ -69,7 +69,7 @@
     ERRORDIE (TPM_IOERROR);
   }
   else if ((TDDL_UINT32) size < insize) {
-    vtpmlogerror(VTPM_LOG_TXDATA, "Wrote %d instead of %d bytes!\n", size, 
insize);
+    vtpmlogerror(VTPM_LOG_TXDATA, "Wrote %d instead of %d bytes!\n", (int) 
size, insize);
     // ... ?
   }
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/util/Makefile
--- a/tools/vtpm_manager/util/Makefile  Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/util/Makefile  Thu Sep 22 17:42:01 2005
@@ -13,6 +13,7 @@
        rm -f *.a *.so *.o *.rpm $(DEP_FILES)
 
 mrproper: clean
+       rm -f *~
 
 $(BIN): $(OBJS)
        $(AR) rcs $(BIN) $(OBJS)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/util/tcg.h
--- a/tools/vtpm_manager/util/tcg.h     Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/util/tcg.h     Thu Sep 22 17:42:01 2005
@@ -453,14 +453,14 @@
 // DEPENDS: local var 'status' of type TPM_RESULT
 // DEPENDS: label 'abort_egress' which cleans up and returns the status
 #define ERRORDIE(s) do { status = s; \
-                         fprintf (stderr, "*** ERRORDIE in %s, line %i\n", 
__func__, __LINE__); \
+                         fprintf (stderr, "*** ERRORDIE in %s at %s: %i\n", 
__func__, __FILE__, __LINE__); \
                          goto abort_egress; } \
                     while (0)
 
 // ASSUME: the return value used after the abort_egress label has been set
 // already (eg. the 'status' local var)
 #define STATUSCHECK(s) if (s != TPM_SUCCESS) { \
-                            fprintf (stderr, "*** ERR in %s, line %i\n", 
__func__, __LINE__); \
+                            fprintf (stderr, "*** ERR in %s at %s:%i\n", 
__func__, __FILE__, __LINE__); \
                             goto abort_egress; \
                         }
 
@@ -475,7 +475,7 @@
 // Try command c. If it fails, print error message, set status to actual 
return code. Goto shame
 #define TPMTRYRETURN(c) do { status = c; \
                              if (status != TPM_SUCCESS) { \
-                               printf("ERROR in %s:%i code: %s.\n", __func__, 
__LINE__, tpm_get_error_name(status)); \
+                               printf("ERROR in %s at %s:%i code: %s.\n", 
__func__, __FILE__, __LINE__, tpm_get_error_name(status)); \
                                goto abort_egress; \
                              } \
                         } while(0)    
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/01simple.test
--- a/tools/xenstore/testsuite/01simple.test    Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/01simple.test    Thu Sep 22 17:42:01 2005
@@ -1,4 +1,4 @@
 # Create an entry, read it.
-write /test create contents
+write /test contents
 expect contents
 read /test
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/02directory.test
--- a/tools/xenstore/testsuite/02directory.test Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/02directory.test Thu Sep 22 17:42:01 2005
@@ -3,7 +3,7 @@
 dir /
 
 # Create a file.
-write /test create contents
+write /test contents
 
 # Directory shows it.
 expect test
@@ -21,16 +21,14 @@
 dir /dir
 
 # Create a file, check it exists.
-write /dir/test2 create contents2
+write /dir/test2 contents2
 expect test2
 dir /dir
 expect contents2
 read /dir/test2
 
-# Creating dir over the top should fail.
-expect mkdir failed: File exists
+# Creating dir over the top should succeed.
 mkdir /dir
-expect mkdir failed: File exists
 mkdir /dir/test2
 
 # Mkdir implicitly creates directories.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/03write.test
--- a/tools/xenstore/testsuite/03write.test     Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/03write.test     Thu Sep 22 17:42:01 2005
@@ -1,31 +1,20 @@
-# Write without create fails.
-expect write failed: No such file or directory
-write /test none contents
-
-# Exclusive write succeeds
-write /test excl contents
+# Write succeeds
+write /test contents
 expect contents
 read /test
 
-# Exclusive write fails to overwrite.
-expect write failed: File exists
-write /test excl contents
-
-# Non-exclusive overwrite succeeds.
-write /test none contents2
+# Overwrite succeeds.
+write /test contents2
 expect contents2
-read /test
-write /test create contents3
-expect contents3
 read /test
 
 # Write should implicitly create directories
-write /dir/test create contents
+write /dir/test contents
 expect test
 dir /dir
 expect contents
 read /dir/test
-write /dir/1/2/3/4 excl contents4
+write /dir/1/2/3/4 contents4
 expect test
 expect 1
 dir /dir
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/04rm.test
--- a/tools/xenstore/testsuite/04rm.test        Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/04rm.test        Thu Sep 22 17:42:01 2005
@@ -1,11 +1,10 @@
-# Remove non-existant fails.
-expect rm failed: No such file or directory
+# Remove non-existant is OK, as long as parent exists
 rm /test
 expect rm failed: No such file or directory
 rm /dir/test
 
 # Create file and remove it
-write /test excl contents
+write /test contents
 rm /test
 
 # Create directory and remove it.
@@ -14,5 +13,5 @@
 
 # Create directory, create file, remove all.
 mkdir /dir
-write /dir/test excl contents
+write /dir/test contents
 rm /dir
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/05filepermissions.test
--- a/tools/xenstore/testsuite/05filepermissions.test   Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/05filepermissions.test   Thu Sep 22 17:42:01 2005
@@ -5,7 +5,7 @@
 getperm /dir/test
 
 # Create file: inherits from root (0 READ)
-write /test excl contents
+write /test contents
 expect 0 READ
 getperm /test
 setid 1
@@ -14,7 +14,7 @@
 expect contents
 read /test
 expect write failed: Permission denied
-write /test none contents
+write /test contents
 
 # Take away read access to file.
 setid 0
@@ -25,7 +25,7 @@
 expect read failed: Permission denied
 read /test
 expect write failed: Permission denied
-write /test none contents
+write /test contents
 
 # Grant everyone write access to file.
 setid 0
@@ -35,7 +35,7 @@
 getperm /test
 expect read failed: Permission denied
 read /test
-write /test none contents2
+write /test contents2
 setid 0
 expect contents2
 read /test
@@ -47,7 +47,7 @@
 getperm /test
 expect contents2
 read /test
-write /test none contents3
+write /test contents3
 expect contents3
 read /test
 
@@ -59,7 +59,7 @@
 getperm /test
 expect contents3
 read /test
-write /test none contents4
+write /test contents4
 
 # User 2 can do nothing.
 setid 2
@@ -70,7 +70,7 @@
 expect read failed: Permission denied
 read /test
 expect write failed: Permission denied
-write /test none contents4
+write /test contents4
 
 # Tools can always access things.
 setid 0
@@ -78,4 +78,4 @@
 getperm /test
 expect contents4
 read /test
-write /test none contents5
+write /test contents5
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/06dirpermissions.test
--- a/tools/xenstore/testsuite/06dirpermissions.test    Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/06dirpermissions.test    Thu Sep 22 17:42:01 2005
@@ -11,7 +11,7 @@
 getperm /dir
 dir /dir
 expect write failed: Permission denied
-write /dir/test create contents2
+write /dir/test contents2
 
 # Remove everyone's read access to directoy.
 setid 0
@@ -22,7 +22,7 @@
 expect read failed: Permission denied
 read /dir/test create contents2
 expect write failed: Permission denied
-write /dir/test create contents2
+write /dir/test contents2
 
 # Grant everyone write access to directory.
 setid 0
@@ -32,7 +32,7 @@
 getperm /dir
 expect dir failed: Permission denied
 dir /dir
-write /dir/test create contents
+write /dir/test contents
 setid 0
 expect 1 WRITE
 getperm /dir/test
@@ -47,7 +47,7 @@
 getperm /dir
 expect test
 dir /dir
-write /dir/test2 create contents
+write /dir/test2 contents
 expect contents
 read /dir/test2
 setperm /dir/test2 1 NONE
@@ -60,7 +60,7 @@
 expect test
 expect test2
 dir /dir
-write /dir/test3 create contents
+write /dir/test3 contents
 
 # User 2 can do nothing.  Can't even tell if file exists.
 setid 2
@@ -79,17 +79,9 @@
 expect read failed: Permission denied
 read /dir/test4
 expect write failed: Permission denied
-write /dir/test none contents
+write /dir/test contents
 expect write failed: Permission denied
-write /dir/test create contents
-expect write failed: Permission denied
-write /dir/test excl contents
-expect write failed: Permission denied
-write /dir/test4 none contents
-expect write failed: Permission denied
-write /dir/test4 create contents
-expect write failed: Permission denied
-write /dir/test4 excl contents
+write /dir/test4 contents
 
 # Tools can always access things.
 setid 0
@@ -99,13 +91,13 @@
 expect test2
 expect test3
 dir /dir
-write /dir/test4 create contents
+write /dir/test4 contents
 
 # Inherited by child.
 mkdir /dir/subdir
 expect 1 NONE
 getperm /dir/subdir
-write /dir/subfile excl contents
+write /dir/subfile contents
 expect 1 NONE
 getperm /dir/subfile
 
@@ -114,12 +106,12 @@
 expect 2 READ/WRITE
 getperm /dir/subdir
 setid 3
-write /dir/subdir/subfile excl contents
+write /dir/subdir/subfile contents
 expect 3 READ/WRITE
 getperm /dir/subdir/subfile
 
 # Inheritence works through multiple directories, too.
-write /dir/subdir/1/2/3/4 excl contents
+write /dir/subdir/1/2/3/4 contents
 expect 3 READ/WRITE
 getperm /dir/subdir/1/2/3/4
 mkdir /dir/subdir/a/b/c/d
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/07watch.test
--- a/tools/xenstore/testsuite/07watch.test     Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/07watch.test     Thu Sep 22 17:42:01 2005
@@ -1,8 +1,8 @@
 # Watch something, write to it, check watch has fired.
-write /test create contents
+write /test contents
 
 1 watch /test token
-2 write /test create contents2
+2 write /test contents2
 expect 1:/test:token
 1 waitwatch
 1 ackwatch token
@@ -44,7 +44,7 @@
 
 # ignore watches while doing commands, should work.
 watch /dir token
-1 write /dir/test create contents
+1 write /dir/test contents
 expect contents
 read /dir/test
 expect /dir/test:token
@@ -56,7 +56,7 @@
 1 watch /dir token1
 3 watch /dir token3
 2 watch /dir token2
-write /dir/test create contents
+write /dir/test contents
 expect 3:/dir/test:token3
 3 waitwatch
 3 ackwatch token3
@@ -73,7 +73,7 @@
 # If one dies (without acking), the other should still get ack.
 1 watch /dir token1
 2 watch /dir token2
-write /dir/test create contents
+write /dir/test contents
 expect 2:/dir/test:token2
 2 waitwatch
 2 close
@@ -85,7 +85,7 @@
 # If one dies (without reading at all), the other should still get ack.
 1 watch /dir token1
 2 watch /dir token2
-write /dir/test create contents
+write /dir/test contents
 2 close
 expect 1:/dir/test:token1
 1 waitwatch
@@ -97,7 +97,7 @@
 1 watch /dir token1
 1 unwatch /dir token1
 1 watch /dir token2
-2 write /dir/test2 create contents
+2 write /dir/test2 contents
 expect 1:/dir/test2:token2
 1 waitwatch
 1 unwatch /dir token2
@@ -107,7 +107,7 @@
 # unwatch while watch pending.  Other watcher still gets the event.
 1 watch /dir token1
 2 watch /dir token2
-write /dir/test create contents
+write /dir/test contents
 2 unwatch /dir token2
 expect 1:/dir/test:token1
 1 waitwatch
@@ -117,17 +117,17 @@
 
 # unwatch while watch pending.  Should clear this so we get next event.
 1 watch /dir token1
-write /dir/test create contents
+write /dir/test contents
 1 unwatch /dir token1
 1 watch /dir/test token2
-write /dir/test none contents2
+write /dir/test contents2
 expect 1:/dir/test:token2
 1 waitwatch
 1 ackwatch token2
 
 # check we only get notified once.
 1 watch /test token
-2 write /test create contents2
+2 write /test contents2
 expect 1:/test:token
 1 waitwatch
 1 ackwatch token
@@ -137,9 +137,9 @@
 
 # watches are queued in order.
 1 watch / token
-2 write /test1 create contents
-2 write /test2 create contents
-2 write /test3 create contents
+2 write /test1 contents
+2 write /test2 contents
+2 write /test3 contents
 expect 1:/test1:token
 1 waitwatch
 1 ackwatch token
@@ -153,8 +153,8 @@
 
 # Creation of subpaths should be covered correctly.
 1 watch / token
-2 write /test/subnode create contents2
-2 write /test/subnode/subnode create contents2
+2 write /test/subnode contents2
+2 write /test/subnode/subnode contents2
 expect 1:/test/subnode:token
 1 waitwatch
 1 ackwatch token
@@ -167,11 +167,13 @@
 
 # Watch event must have happened before we registered interest.
 1 watch / token
-2 write /test/subnode create contents2
-1 watch / token2 0
+2 write /test/subnode contents2
+1 watchnoack / token2 0
 expect 1:/test/subnode:token
 1 waitwatch
 1 ackwatch token
+expect 1:/:token2
+1 waitwatch
 expect 1: waitwatch failed: Connection timed out
 1 waitwatch
 1 close
@@ -185,7 +187,7 @@
 
 # Watch should not double-send after we ack, even if we did something in 
between.
 1 watch /test2 token
-2 write /test2/foo create contents2
+2 write /test2/foo contents2
 expect 1:/test2/foo:token
 1 waitwatch
 expect 1:contents2
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/08transaction.slowtest
--- a/tools/xenstore/testsuite/08transaction.slowtest   Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/08transaction.slowtest   Thu Sep 22 17:42:01 2005
@@ -1,7 +1,7 @@
 # Test transaction timeouts.  Take a second each.
 
 mkdir /test
-write /test/entry1 create contents
+write /test/entry1 contents
 
 # Transactions can take as long as the want...
 start /test
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/08transaction.test
--- a/tools/xenstore/testsuite/08transaction.test       Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/08transaction.test       Thu Sep 22 17:42:01 2005
@@ -4,7 +4,7 @@
 
 # Simple transaction: create a file inside transaction.
 1 start /test
-1 write /test/entry1 create contents
+1 write /test/entry1 contents
 2 dir /test
 expect 1:entry1
 1 dir /test
@@ -16,14 +16,14 @@
 
 # Create a file and abort transaction.
 1 start /test
-1 write /test/entry1 create contents
+1 write /test/entry1 contents
 2 dir /test
 expect 1:entry1
 1 dir /test
 1 abort
 2 dir /test
 
-write /test/entry1 create contents
+write /test/entry1 contents
 # Delete in transaction, commit
 1 start /test
 1 rm /test/entry1
@@ -34,7 +34,7 @@
 2 dir /test
 
 # Delete in transaction, abort.
-write /test/entry1 create contents
+write /test/entry1 contents
 1 start /test
 1 rm /test/entry1
 expect 2:entry1
@@ -84,8 +84,8 @@
 # Multiple events from single transaction don't trigger assert
 1 watch /test token
 2 start /test
-2 write /test/1 create contents
-2 write /test/2 create contents
+2 write /test/1 contents
+2 write /test/2 contents
 2 commit
 expect 1:/test/1:token
 1 waitwatch
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/09domain.test
--- a/tools/xenstore/testsuite/09domain.test    Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/09domain.test    Thu Sep 22 17:42:01 2005
@@ -3,7 +3,7 @@
 # Create a domain, write an entry.
 expect handle is 1
 introduce 1 100 7 /my/home
-1 write /entry1 create contents
+1 write /entry1 contents
 expect entry1
 expect tool
 dir /
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/10domain-homedir.test
--- a/tools/xenstore/testsuite/10domain-homedir.test    Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/10domain-homedir.test    Thu Sep 22 17:42:01 2005
@@ -4,7 +4,7 @@
 mkdir /home
 expect handle is 1
 introduce 1 100 7 /home
-1 write entry1 create contents
+1 write entry1 contents
 expect contents
 read /home/entry1
 expect entry1
@@ -13,7 +13,7 @@
 # Place a watch using a relative path: expect relative answer.
 1 mkdir foo
 1 watch foo token
-write /home/foo/bar create contents
+write /home/foo/bar contents
 expect 1:foo/bar:token
 1 waitwatch
 1 ackwatch token
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/11domain-watch.test
--- a/tools/xenstore/testsuite/11domain-watch.test      Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/11domain-watch.test      Thu Sep 22 17:42:01 2005
@@ -1,13 +1,13 @@
 # Test watching from a domain.
 
 # Watch something, write to it, check watch has fired.
-write /test create contents
+write /test contents
 mkdir /dir
 
 expect handle is 1
 introduce 1 100 7 /my/home
 1 watch /test token
-write /test create contents2
+write /test contents2
 expect 1:/test:token
 1 waitwatch
 1 ackwatch token
@@ -19,10 +19,10 @@
 expect handle is 1
 introduce 1 100 7 /my/home
 1 watch /dir token
-write /dir/test create contents
-1 write /dir/test2 create contents2
-1 write /dir/test3 create contents3
-1 write /dir/test4 create contents4
+write /dir/test contents
+1 write /dir/test2 contents2
+1 write /dir/test3 contents3
+1 write /dir/test4 contents4
 expect 1:/dir/test:token
 1 waitwatch
 1 ackwatch token
@@ -35,7 +35,7 @@
 1 watch /dir token1
 1 unwatch /dir token1
 1 watch /dir token2
-write /dir/test2 create contents
+write /dir/test2 contents
 expect 1:/dir/test2:token2
 1 waitwatch
 1 unwatch /dir token2
@@ -46,7 +46,7 @@
 expect handle is 1
 introduce 1 100 7 /my/home
 1 watch /dir token1
-write /dir/test2 create contents
+write /dir/test2 contents
 1 unwatch /dir token1
 release 1
 1 close
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/12readonly.test
--- a/tools/xenstore/testsuite/12readonly.test  Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/12readonly.test  Thu Sep 22 17:42:01 2005
@@ -1,6 +1,6 @@
 # Test that read only connection can't alter store.
 
-write /test create contents
+write /test contents
 
 readonly
 expect test
@@ -20,9 +20,9 @@
 
 # These don't work
 expect write failed: Read-only file system
-write /test2 create contents
+write /test2 contents
 expect write failed: Read-only file system
-write /test create contents
+write /test contents
 expect setperm failed: Read-only file system
 setperm /test 100 NONE
 expect setperm failed: Read-only file system
@@ -35,7 +35,7 @@
 # Check that watches work like normal.
 watch / token
 1 readwrite
-1 write /test create contents
+1 write /test contents
 expect /test:token
 waitwatch
 ackwatch token
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/13watch-ack.test
--- a/tools/xenstore/testsuite/13watch-ack.test Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/13watch-ack.test Thu Sep 22 17:42:01 2005
@@ -13,10 +13,10 @@
 1 watch /test/1 token1
 1 watch /test/2 token2
 1 watch /test/3 token3
-2 write /test/2 create contents2
+2 write /test/2 contents2
 expect 1:/test/2:token2
 1 waitwatch
-3 write /test/1 create contents1
-4 write /test/3 create contents3
+3 write /test/1 contents1
+4 write /test/3 contents3
 1 ackwatch token2
 1 close
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/14complexperms.test
--- a/tools/xenstore/testsuite/14complexperms.test      Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/14complexperms.test      Thu Sep 22 17:42:01 2005
@@ -12,13 +12,7 @@
 expect *Permission denied
 read /dir/file 
 expect *Permission denied
-write /dir/file none value 
-expect *Permission denied
-write /dir/file create value 
-expect *Permission denied
-write /dir/file excl value 
-expect write failed: Invalid argument
-write /dir/file crap value 
+write /dir/file value 
 expect *Permission denied
 mkdir /dir/file 
 expect *Permission denied
@@ -29,8 +23,9 @@
 getperm /dir/file 
 expect *Permission denied
 setperm /dir/file 0 NONE 
-watch /dir/file token 
-1 write /dir/file create contents
+# We get no watch event when there's no permission.  It's a corner case.
+watchnoack /dir/file token 
+1 write /dir/file contents
 1 rm /dir/file
 expect waitwatch failed: Connection timed out
 waitwatch
@@ -50,7 +45,7 @@
 
 # Now it exists
 setid 0
-write /dir/file create contents
+write /dir/file contents
 
 setid 1
 expect *Permission denied
@@ -58,13 +53,7 @@
 expect *Permission denied
 read /dir/file 
 expect *Permission denied
-write /dir/file none value 
-expect *Permission denied
-write /dir/file create value 
-expect *Permission denied
-write /dir/file excl value 
-expect write failed: Invalid argument
-write /dir/file crap value 
+write /dir/file value 
 expect *Permission denied
 mkdir /dir/file 
 expect *Permission denied
@@ -75,8 +64,8 @@
 getperm /dir/file 
 expect *Permission denied
 setperm /dir/file 0 NONE 
-watch /dir/file token 
-1 write /dir/file create contents
+watchnoack /dir/file token 
+1 write /dir/file contents
 1 rm /dir/file
 expect waitwatch failed: Connection timed out
 waitwatch
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/15nowait.test
--- a/tools/xenstore/testsuite/15nowait.test    Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/15nowait.test    Thu Sep 22 17:42:01 2005
@@ -1,10 +1,10 @@
 # If we don't wait for an ack, we can crash daemon as it never expects to be
 # sending out two replies on top of each other.
-noackwrite /1 create 1
-noackwrite /2 create 2
-noackwrite /3 create 3
-noackwrite /4 create 4
-noackwrite /5 create 5
+noackwrite /1 1
+noackwrite /2 2
+noackwrite /3 3
+noackwrite /4 4
+noackwrite /5 5
 readack
 readack
 readack
@@ -13,11 +13,11 @@
 
 expect handle is 1
 introduce 1 100 7 /my/home
-1 noackwrite /1 create 1
-1 noackwrite /2 create 2
-1 noackwrite /3 create 3
-1 noackwrite /4 create 4
-1 noackwrite /5 create 5
+1 noackwrite /1 1
+1 noackwrite /2 2
+1 noackwrite /3 3
+1 noackwrite /4 4
+1 noackwrite /5 5
 1 readack
 1 readack
 1 readack
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/16block-watch-crash.test
--- a/tools/xenstore/testsuite/16block-watch-crash.test Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/16block-watch-crash.test Thu Sep 22 17:42:01 2005
@@ -4,8 +4,8 @@
 watch /test token
 1 start /test
 # This will block on above
-noackwrite /test/entry create contents
-1 write /test/entry2 create contents
+noackwrite /test/entry contents
+1 write /test/entry2 contents
 1 commit
 readack
 expect /test/entry2:token
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xenstore_client.c
--- a/tools/xenstore/xenstore_client.c  Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xenstore_client.c  Thu Sep 22 17:42:01 2005
@@ -102,7 +102,7 @@
        optind++;
 #elif defined(CLIENT_write)
        success = xs_write(xsh, argv[optind], argv[optind + 1],
-                          strlen(argv[optind + 1]), O_CREAT);
+                          strlen(argv[optind + 1]));
        if (!success) {
            warnx("could not write path %s", argv[optind]);
            ret = 1;
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xenstored_core.c   Thu Sep 22 17:42:01 2005
@@ -961,14 +961,19 @@
        return dir;
 }
 
-/* path, flags, data... */
+static bool node_exists(struct connection *conn, const char *node)
+{
+       struct stat st;
+
+       return lstat(node_dir(conn->transaction, node), &st) == 0;
+}
+
+/* path, data... */
 static void do_write(struct connection *conn, struct buffered_data *in)
 {
        unsigned int offset, datalen;
-       char *vec[2];
+       char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
        char *node, *tmppath;
-       enum xs_perm_type mode;
-       struct stat st;
 
        /* Extra "strings" can be created by binary data. */
        if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
@@ -985,37 +990,20 @@
        if (transaction_block(conn, node))
                return;
 
-       offset = strlen(vec[0]) + strlen(vec[1]) + 2;
+       offset = strlen(vec[0]) + 1;
        datalen = in->used - offset;
 
-       if (streq(vec[1], XS_WRITE_NONE))
-               mode = XS_PERM_WRITE;
-       else if (streq(vec[1], XS_WRITE_CREATE))
-               mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
-       else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
-               mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
-       else {
-               send_error(conn, EINVAL);
-               return;
-       }
-
-       if (!check_node_perms(conn, node, mode)) {
+       if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_ENOENT_OK)) {
                send_error(conn, errno);
                return;
        }
 
-       if (lstat(node_dir(conn->transaction, node), &st) != 0) {
+       if (!node_exists(conn, node)) {
                char *dir;
 
                /* Does not exist... */
                if (errno != ENOENT) {
                        send_error(conn, errno);
-                       return;
-               }
-
-               /* Not going to create it? */
-               if (streq(vec[1], XS_WRITE_NONE)) {
-                       send_error(conn, ENOENT);
                        return;
                }
 
@@ -1027,11 +1015,6 @@
                
        } else {
                /* Exists... */
-               if (streq(vec[1], XS_WRITE_CREATE_EXCL)) {
-                       send_error(conn, EEXIST);
-                       return;
-               }
-
                tmppath = tempfile(node_datafile(conn->transaction, node),
                                   in->buffer + offset, datalen);
                if (!tmppath) {
@@ -1050,7 +1033,6 @@
 static void do_mkdir(struct connection *conn, const char *node)
 {
        char *dir;
-       struct stat st;
 
        node = canonicalize(conn, node);
        if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_ENOENT_OK)) {
@@ -1066,9 +1048,9 @@
        if (transaction_block(conn, node))
                return;
 
-       /* Must not already exist. */
-       if (lstat(node_dir(conn->transaction, node), &st) == 0) {
-               send_error(conn, EEXIST);
+       /* If it already exists, fine. */
+       if (node_exists(conn, node)) {
+               send_ack(conn, XS_MKDIR);
                return;
        }
 
@@ -1089,6 +1071,15 @@
 
        node = canonicalize(conn, node);
        if (!check_node_perms(conn, node, XS_PERM_WRITE)) {
+               /* Didn't exist already?  Fine, if parent exists. */
+               if (errno == ENOENT) {
+                       if (node_exists(conn, get_parent(node))) {
+                               send_ack(conn, XS_RM);
+                               return;
+                       }
+                       /* Restore errno, just in case. */
+                       errno = ENOENT;
+               }
                send_error(conn, errno);
                return;
        }
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xenstored_watch.c
--- a/tools/xenstore/xenstored_watch.c  Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xenstored_watch.c  Thu Sep 22 17:42:01 2005
@@ -236,6 +236,9 @@
        trace_create(watch, "watch");
        talloc_set_destructor(watch, destroy_watch);
        send_ack(conn, XS_WATCH);
+
+       /* We fire once up front: simplifies clients and restart. */
+       add_event(conn, watch, watch->node);
 }
 
 void do_watch_ack(struct connection *conn, const char *token)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs.c
--- a/tools/xenstore/xs.c       Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs.c       Thu Sep 22 17:42:01 2005
@@ -326,38 +326,23 @@
 }
 
 /* Write the value of a single file.
- * Returns false on failure.  createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ * Returns false on failure.
  */
 bool xs_write(struct xs_handle *h, const char *path,
-             const void *data, unsigned int len, int createflags)
-{
-       const char *flags;
-       struct iovec iovec[3];
-
-       /* Format: Flags (as string), path, data. */
-       if (createflags == 0)
-               flags = XS_WRITE_NONE;
-       else if (createflags == O_CREAT)
-               flags = XS_WRITE_CREATE;
-       else if (createflags == (O_CREAT|O_EXCL))
-               flags = XS_WRITE_CREATE_EXCL;
-       else {
-               errno = EINVAL;
-               return false;
-       }
+             const void *data, unsigned int len)
+{
+       struct iovec iovec[2];
 
        iovec[0].iov_base = (void *)path;
        iovec[0].iov_len = strlen(path) + 1;
-       iovec[1].iov_base = (void *)flags;
-       iovec[1].iov_len = strlen(flags) + 1;
-       iovec[2].iov_base = (void *)data;
-       iovec[2].iov_len = len;
+       iovec[1].iov_base = (void *)data;
+       iovec[1].iov_len = len;
 
        return xs_bool(xs_talkv(h, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
 }
 
 /* Create a new directory.
- * Returns false on failure.
+ * Returns false on failure, or success if it already exists.
  */
 bool xs_mkdir(struct xs_handle *h, const char *path)
 {
@@ -365,7 +350,7 @@
 }
 
 /* Destroy a file or directory (directories must be empty).
- * Returns false on failure.
+ * Returns false on failure, or success if it doesn't exist.
  */
 bool xs_rm(struct xs_handle *h, const char *path)
 {
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs.h
--- a/tools/xenstore/xs.h       Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs.h       Thu Sep 22 17:42:01 2005
@@ -53,18 +53,18 @@
 void *xs_read(struct xs_handle *h, const char *path, unsigned int *len);
 
 /* Write the value of a single file.
- * Returns false on failure.  createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ * Returns false on failure.
  */
 bool xs_write(struct xs_handle *h, const char *path, const void *data,
-             unsigned int len, int createflags);
+             unsigned int len);
 
 /* Create a new directory.
- * Returns false on failure.
+ * Returns false on failure, or success if it already exists.
  */
 bool xs_mkdir(struct xs_handle *h, const char *path);
 
 /* Destroy a file or directory (and children).
- * Returns false on failure.
+ * Returns false on failure, or success if it doesn't exist.
  */
 bool xs_rm(struct xs_handle *h, const char *path);
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs_crashme.c
--- a/tools/xenstore/xs_crashme.c       Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs_crashme.c       Thu Sep 22 17:42:01 2005
@@ -267,17 +267,12 @@
                free(xs_read(h, name, &num));
                break;
        case 2: {
-               int flags = random_flags(&state);
                char *contents = talloc_asprintf(NULL, "%i",
                                                 get_randomness(&state));
                unsigned int len = get_randomness(&state)%(strlen(contents)+1);
                if (verbose)
-                       printf("WRITE %s %s %.*s\n", name,
-                              flags == O_CREAT ? "O_CREAT" 
-                              : flags == (O_CREAT|O_EXCL) ? "O_CREAT|O_EXCL"
-                              : flags == 0 ? "0" : "CRAPFLAGS",
-                              len, contents);
-               xs_write(h, name, contents, len, flags);
+                       printf("WRITE %s %.*s\n", name, len, contents);
+               xs_write(h, name, contents, len);
                break;
        }
        case 3:
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs_random.c
--- a/tools/xenstore/xs_random.c        Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs_random.c        Thu Sep 22 17:42:01 2005
@@ -26,7 +26,7 @@
        void *(*read)(void *h, const char *path, unsigned int *len);
 
        bool (*write)(void *h, const char *path, const void *data,
-                     unsigned int len, int createflags);
+                     unsigned int len);
 
        bool (*mkdir)(void *h, const char *path);
 
@@ -74,9 +74,9 @@
 static void maybe_convert_to_directory(const char *filename)
 {
        struct stat st;
-       char *dirname = talloc_asprintf(filename, "%.*s", 
-                                       strrchr(filename, '/') - filename,
-                                       filename);
+       char *dirname = talloc_asprintf(
+               filename, "%.*s",
+               (int)(strrchr(filename, '/') - filename), filename);
        if (lstat(dirname, &st) == 0 && S_ISREG(st.st_mode))
                convert_to_dir(dirname);
 }
@@ -249,7 +249,7 @@
 
        /* Copy permissions from parent */
        command = talloc_asprintf(filename, "cp %.*s/.perms %s",
-                                 strrchr(filename, '/') - filename,
+                                 (int)(strrchr(filename, '/') - filename),
                                  filename, permfile);
        do_command(command);
 }      
@@ -308,7 +308,7 @@
        char *slash = strrchr(name + 1, '/');
        if (!slash)
                return talloc_strdup(name, "/");
-       return talloc_asprintf(name, "%.*s", slash-name, name);
+       return talloc_asprintf(name, "%.*s", (int)(slash-name), name);
 }
 
 static void make_dirs(const char *filename)
@@ -333,40 +333,18 @@
 
 static bool file_write(struct file_ops_info *info,
                       const char *path, const void *data,
-                      unsigned int len, int createflags)
+                      unsigned int len)
 {
        char *filename = filename_to_data(path_to_name(info, path));
        int fd;
 
-       /* Kernel isn't strict, but library is. */
-       if (createflags & ~(O_CREAT|O_EXCL)) {
-               errno = EINVAL;
-               return false;
-       }
-
        if (!write_ok(info, path))
                return false;
 
-       /* We regard it as existing if dir exists. */
-       if (strends(filename, ".DATA")) {
-               if (!createflags)
-                       createflags = O_CREAT;
-               if (createflags & O_EXCL) {
-                       errno = EEXIST;
-                       return false;
-               }
-       }
-
-       if (createflags & O_CREAT)
-               make_dirs(parent_filename(filename));
-
-       fd = open(filename, createflags|O_TRUNC|O_WRONLY, 0600);
-       if (fd < 0) {
-               /* FIXME: Another hack. */
-               if (!(createflags & O_CREAT) && errno == EISDIR)
-                       errno = EEXIST;
+       make_dirs(parent_filename(filename));
+       fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0600);
+       if (fd < 0)
                return false;
-       }
 
        if (write(fd, data, len) != (int)len)
                barf_perror("Bad write to %s", filename);
@@ -385,7 +363,7 @@
 
        make_dirs(parent_filename(dirname));
        if (mkdir(dirname, 0700) != 0)
-               return false;
+               return (errno == EEXIST);
 
        init_perms(dirname);
        return true;
@@ -401,8 +379,11 @@
                return false;
        }
 
-       if (lstat(filename, &st) != 0)
-               return false;
+       if (lstat(filename, &st) != 0) {
+               if (lstat(parent_filename(filename), &st) != 0)
+                       return false;
+               return true;
+       }
 
        if (!write_ok(info, path))
                return false;
@@ -843,20 +824,6 @@
        return ret;
 }
 
-static int random_flags(int *state)
-{
-       switch (get_randomness(state) % 4) {
-       case 0:
-               return 0;
-       case 1:
-               return O_CREAT;
-       case 2:
-               return O_CREAT|O_EXCL;
-       default:
-               return get_randomness(state);
-       }
-}
-
 /* Do the next operation, return the results. */
 static char *do_next_op(struct ops *ops, void *h, int state, bool verbose)
 {
@@ -880,18 +847,12 @@
                ret = linearize_read(ops->read(h, name, &num), &num);
                break;
        case 2: {
-               int flags = random_flags(&state);
                char *contents = talloc_asprintf(NULL, "%i",
                                                 get_randomness(&state));
                unsigned int len = get_randomness(&state)%(strlen(contents)+1);
                if (verbose)
-                       printf("WRITE %s %s %.*s\n", name,
-                              flags == O_CREAT ? "O_CREAT" 
-                              : flags == (O_CREAT|O_EXCL) ? "O_CREAT|O_EXCL"
-                              : flags == 0 ? "0" : "CRAPFLAGS",
-                              len, contents);
-               ret = bool_to_errstring(ops->write(h, name, contents, len,
-                                                  flags));
+                       printf("WRITE %s %.*s\n", name, len, contents);
+               ret = bool_to_errstring(ops->write(h, name, contents, len));
                talloc_steal(ret, contents);
                break;
        }
@@ -1102,7 +1063,8 @@
 
                ret = do_next_op(data->ops, h, i + data->seed, verbose);
                if (verbose)
-                       printf("-> %.*s\n", strchr(ret, '\n') - ret, ret);
+                       printf("-> %.*s\n",
+                              (int)(strchr(ret, '\n') - ret), ret);
                if (streq(ret, "FAILED:Bad file descriptor"))
                        goto out;
                if (kill(daemon_pid, 0) != 0)
@@ -1373,13 +1335,14 @@
 
                file = do_next_op(&file_ops, fileh, i+data->seed, verbose);
                if (verbose)
-                       printf("-> %.*s\n", strchr(file, '/') - file, file);
+                       printf("-> %.*s\n",
+                              (int)(strchr(file, '/') - file), file);
                
                if (verbose)
                        printf("XS: ");
                xs = do_next_op(&xs_ops, xsh, i+data->seed, verbose);
                if (verbose)
-                       printf("-> %.*s\n", strchr(xs, '/') - xs, xs);
+                       printf("-> %.*s\n", (int)(strchr(xs, '/') - xs), xs);
 
                if (!streq(file, xs))
                        goto out;
@@ -1547,7 +1510,8 @@
                        aborted++;
 
                if (verbose)
-                       printf("-> %.*s\n", strchr(ret, '\n') - ret, ret);
+                       printf("-> %.*s\n",
+                              (int)(strchr(ret, '\n') - ret), ret);
 
                talloc_free(ret);
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs_stress.c
--- a/tools/xenstore/xs_stress.c        Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs_stress.c        Thu Sep 22 17:42:01 2005
@@ -61,7 +61,7 @@
                        barf_perror("%i: can't read %s iter %i",
                                    childnum, file, i);
                sprintf(tmp, "%i", atoi(contents) + 1);
-               if (!xs_write(h, file, tmp, strlen(tmp)+1, 0))
+               if (!xs_write(h, file, tmp, strlen(tmp)+1))
                        barf_perror("%i: can't write %s iter %i",
                                    childnum, file, i);
 
@@ -91,7 +91,7 @@
 
        if (togo == 0) {
                sprintf(filename, "%s/count", base);
-               if (!xs_write(h, filename, "0", 2, O_EXCL|O_CREAT))
+               if (!xs_write(h, filename, "0", 1))
                        barf_perror("Writing to %s", filename);
                return;
        }
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs_test.c
--- a/tools/xenstore/xs_test.c  Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs_test.c  Thu Sep 22 17:42:01 2005
@@ -192,7 +192,7 @@
             "Reads commands from stdin, one per line:"
             "  dir <path>\n"
             "  read <path>\n"
-            "  write <path> <flags> <value>...\n"
+            "  write <path> <value>...\n"
             "  setid <id>\n"
             "  mkdir <path>\n"
             "  rm <path>\n"
@@ -200,6 +200,7 @@
             "  setperm <path> <id> <flags> ...\n"
             "  shutdown\n"
             "  watch <path> <token>\n"
+            "  watchnoack <path> <token>\n"
             "  waitwatch\n"
             "  ackwatch <token>\n"
             "  unwatch <path> <token>\n"
@@ -213,7 +214,7 @@
             "  notimeout\n"
             "  readonly\n"
             "  readwrite\n"
-            "  noackwrite <path> <flags> <value>...\n"
+            "  noackwrite <path> <value>...\n"
             "  readack\n"
             "  dump\n");
 }
@@ -348,47 +349,22 @@
                output("%.*s\n", len, value);
 }
 
-static void do_write(unsigned int handle, char *path, char *flags, char *data)
-{
-       int f;
-
-       if (streq(flags, "none"))
-               f = 0;
-       else if (streq(flags, "create"))
-               f = O_CREAT;
-       else if (streq(flags, "excl"))
-               f = O_CREAT | O_EXCL;
-       else if (streq(flags, "crap"))
-               f = 100;
-       else
-               barf("write flags 'none', 'create' or 'excl' only");
-
-       if (!xs_write(handles[handle], path, data, strlen(data), f))
+static void do_write(unsigned int handle, char *path, char *data)
+{
+       if (!xs_write(handles[handle], path, data, strlen(data)))
                failed(handle);
 }
 
 static void do_noackwrite(unsigned int handle,
-                         char *path, const char *flags, char *data)
+                         char *path, char *data)
 {
        struct xsd_sockmsg msg;
 
-       /* Format: Flags (as string), path, data. */
-       if (streq(flags, "none"))
-               flags = XS_WRITE_NONE;
-       else if (streq(flags, "create"))
-               flags = XS_WRITE_CREATE;
-       else if (streq(flags, "excl"))
-               flags = XS_WRITE_CREATE_EXCL;
-       else
-               barf("noackwrite flags 'none', 'create' or 'excl' only");
-
-       msg.len = strlen(path) + 1 + strlen(flags) + 1 + strlen(data);
+       msg.len = strlen(path) + 1 + strlen(data);
        msg.type = XS_WRITE;
        if (!write_all_choice(handles[handle]->fd, &msg, sizeof(msg)))
                failed(handle);
        if (!write_all_choice(handles[handle]->fd, path, strlen(path) + 1))
-               failed(handle);
-       if (!write_all_choice(handles[handle]->fd, flags, strlen(flags) + 1))
                failed(handle);
        if (!write_all_choice(handles[handle]->fd, data, strlen(data)))
                failed(handle);
@@ -505,10 +481,20 @@
                failed(handle);
 }
 
-static void do_watch(unsigned int handle, const char *node, const char *token)
+static void do_watch(unsigned int handle, const char *node, const char *token,
+                    bool swallow_event)
 {
        if (!xs_watch(handles[handle], node, token))
                failed(handle);
+
+       /* Convenient for testing... */
+       if (swallow_event) {
+               char **vec = xs_read_watch(handles[handle]);
+               if (!vec || !streq(vec[0], node) || !streq(vec[1], token))
+                       failed(handle);
+               if (!xs_acknowledge_watch(handles[handle], token))
+                       failed(handle);
+       }
 }
 
 static void set_timeout(void)
@@ -778,8 +764,7 @@
        else if (streq(command, "read"))
                do_read(handle, arg(line, 1));
        else if (streq(command, "write"))
-               do_write(handle,
-                        arg(line, 1), arg(line, 2), arg(line, 3));
+               do_write(handle, arg(line, 1), arg(line, 2));
        else if (streq(command, "setid"))
                do_setid(handle, arg(line, 1));
        else if (streq(command, "mkdir"))
@@ -793,7 +778,9 @@
        else if (streq(command, "shutdown"))
                do_shutdown(handle);
        else if (streq(command, "watch"))
-               do_watch(handle, arg(line, 1), arg(line, 2));
+               do_watch(handle, arg(line, 1), arg(line, 2), true);
+       else if (streq(command, "watchnoack"))
+               do_watch(handle, arg(line, 1), arg(line, 2), false);
        else if (streq(command, "waitwatch"))
                do_waitwatch(handle);
        else if (streq(command, "ackwatch"))
@@ -832,7 +819,7 @@
                xs_daemon_close(handles[handle]);
                handles[handle] = NULL;
        } else if (streq(command, "noackwrite"))
-               do_noackwrite(handle, arg(line,1), arg(line,2), arg(line,3));
+               do_noackwrite(handle, arg(line,1), arg(line,2));
        else if (streq(command, "readack"))
                do_readack(handle);
        else
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/ia64/xen/grant_table.c
--- a/xen/arch/ia64/xen/grant_table.c   Thu Sep 22 17:34:14 2005
+++ b/xen/arch/ia64/xen/grant_table.c   Thu Sep 22 17:42:01 2005
@@ -850,7 +850,7 @@
 #endif
 
 static long
-gnttab_donate(gnttab_donate_t *uop, unsigned int count)
+gnttab_transfer(gnttab_transfer_t *uop, unsigned int count)
 {
     struct domain *d = current->domain;
     struct domain *e;
@@ -864,27 +864,27 @@
     return GNTST_general_error;
 #else
     for (i = 0; i < count; i++) {
-        gnttab_donate_t *gop = &uop[i];
+        gnttab_transfer_t *gop = &uop[i];
 #if GRANT_DEBUG
-        printk("gnttab_donate: i=%d mfn=%lx domid=%d gref=%08x\n",
+        printk("gnttab_transfer: i=%d mfn=%lx domid=%d gref=%08x\n",
                i, gop->mfn, gop->domid, gop->handle);
 #endif
         page = &frame_table[gop->mfn];
         
         if (unlikely(IS_XEN_HEAP_FRAME(page))) { 
-            printk("gnttab_donate: xen heap frame mfn=%lx\n", 
+            printk("gnttab_transfer: xen heap frame mfn=%lx\n", 
                    (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
         }
         if (unlikely(!pfn_valid(page_to_pfn(page)))) {
-            printk("gnttab_donate: invalid pfn for mfn=%lx\n", 
+            printk("gnttab_transfer: invalid pfn for mfn=%lx\n", 
                    (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
         }
         if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
-            printk("gnttab_donate: can't find domain %d\n", gop->domid);
+            printk("gnttab_transfer: can't find domain %d\n", gop->domid);
             gop->status = GNTST_bad_domain;
             continue;
         }
@@ -904,7 +904,7 @@
             x = y;
             if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
                          (1 | PGC_allocated)) || unlikely(_nd != _d)) {
-                printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+                printk("gnttab_transfer: Bad page values %p: ed=%p(%u), sd=%p,"
                        " caf=%08x, taf=%" PRtype_info "\n", 
                        (void *) page_to_pfn(page),
                         d, d->domain_id, unpickle_domptr(_nd), x, 
@@ -947,14 +947,14 @@
             break;
         }
         if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags))) {
-            printk("gnttab_donate: target domain is dying\n");
+            printk("gnttab_transfer: target domain is dying\n");
             spin_unlock(&e->page_alloc_lock);
             put_domain(e);
             result = GNTST_general_error;
             break;
         }
-        if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
-            printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
+        if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->ref))) {
+            printk("gnttab_transfer: gnttab_prepare_for_transfer fails\n");
             spin_unlock(&e->page_alloc_lock);
             put_domain(e);
             result = GNTST_general_error;
@@ -964,10 +964,10 @@
         ASSERT(e->tot_pages <= e->max_pages);
         if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
             unlikely(e->tot_pages == e->max_pages) ||
-            unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
-            printk("gnttab_donate: Transferee has no reservation headroom (%d,"
+            unlikely(!gnttab_prepare_for_transfer(e, d, gop->ref))) {
+            printk("gnttab_transfer: Transferee has no reservation headroom 
(%d,"
                    "%d) or provided a bad grant ref (%08x) or is dying (%p)\n",
-                   e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+                   e->tot_pages, e->max_pages, gop->ref, e->d_flags);
             spin_unlock(&e->page_alloc_lock);
             put_domain(e);
             result = GNTST_general_error;
@@ -987,7 +987,7 @@
          * Transfer is all done: tell the guest about its new page
          * frame.
          */
-        gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+        gnttab_notify_transfer(e, d, gop->ref, gop->mfn);
         
         put_domain(e);
         
@@ -1037,11 +1037,11 @@
             rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
             break;
 #endif
-        case GNTTABOP_donate:
+        case GNTTABOP_transfer:
             if (unlikely(!array_access_ok(uop, count, 
-                                          sizeof(gnttab_donate_t))))
+                                          sizeof(gnttab_transfer_t))))
                 goto out;
-            rc = gnttab_donate(uop, count);
+            rc = gnttab_transfer(uop, count);
             break;
         default:
             rc = -ENOSYS;
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/shadow.c     Thu Sep 22 17:42:01 2005
@@ -697,6 +697,8 @@
         }
     }
 
+    __shadow_get_l2e(v, va, &sl2e);
+
     if ( shadow_mode_refcounts(d) )
     {
         l1_pgentry_t old_spte;
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/shadow32.c   Thu Sep 22 17:42:01 2005
@@ -399,22 +399,26 @@
         perfc_decr(shadow_l1_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_l1_table(d, smfn);
+        d->arch.shadow_page_count--;
         break;
 
     case PGT_l2_shadow:
         perfc_decr(shadow_l2_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
+        d->arch.shadow_page_count--;
         break;
 
     case PGT_hl2_shadow:
         perfc_decr(hl2_table_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_hl2_table(d, smfn);
+        d->arch.hl2_page_count--;
         break;
 
     case PGT_snapshot:
         perfc_decr(snapshot_pages);
+        d->arch.snapshot_page_count--;
         break;
 
     default:
@@ -422,8 +426,6 @@
                page_to_pfn(page), page->u.inuse.type_info);
         break;
     }
-
-    d->arch.shadow_page_count--;
 
     // No TLB flushes are needed the next time this page gets allocated.
     //
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/shadow_public.c      Thu Sep 22 17:42:01 2005
@@ -595,18 +595,21 @@
         perfc_decr(shadow_l1_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_l1_table(d, smfn);
+        d->arch.shadow_page_count--;
         break;
 #if defined (__i386__)
     case PGT_l2_shadow:
         perfc_decr(shadow_l2_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
+        d->arch.shadow_page_count--;
         break;
 
     case PGT_hl2_shadow:
         perfc_decr(hl2_table_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_hl2_table(d, smfn);
+        d->arch.hl2_page_count--;
         break;
 #else
     case PGT_l2_shadow:
@@ -614,12 +617,13 @@
     case PGT_l4_shadow:
         shadow_demote(d, gpfn, gmfn);
         free_shadow_tables(d, smfn, shadow_type_to_level(type));
+        d->arch.shadow_page_count--;
         break;
 
     case PGT_fl1_shadow:
         free_shadow_fl1_table(d, smfn);
+        d->arch.shadow_page_count--;
         break;
-
 #endif
 
     case PGT_snapshot:
@@ -631,8 +635,6 @@
                page_to_pfn(page), page->u.inuse.type_info);
         break;
     }
-
-    d->arch.shadow_page_count--;
 
     // No TLB flushes are needed the next time this page gets allocated.
     //
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/vmx.c        Thu Sep 22 17:42:01 2005
@@ -377,12 +377,13 @@
 
 static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs) 
 {
-    unsigned long eip;
     unsigned long gpa; /* FIXME: PAE */
     int result;
 
-#if VMX_DEBUG
+#if 0 /* keep for debugging */
     {
+        unsigned long eip;
+
         __vmread(GUEST_RIP, &eip);
         VMX_DBG_LOG(DBG_LEVEL_VMMU, 
                     "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
@@ -429,9 +430,9 @@
         
     clts();
     setup_fpu(current);
-    __vmread(CR0_READ_SHADOW, &cr0);
+    __vmread_vcpu(CR0_READ_SHADOW, &cr0);
     if (!(cr0 & X86_CR0_TS)) {
-        __vmread(GUEST_CR0, &cr0);
+        __vmread_vcpu(GUEST_CR0, &cr0);
         cr0 &= ~X86_CR0_TS;
         __vmwrite(GUEST_CR0, cr0);
     }
@@ -470,6 +471,8 @@
         }
 #endif
 
+        /* Unsupportable for virtualised CPUs. */
+        clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
     }
 
     regs->eax = (unsigned long) eax;
@@ -1100,6 +1103,11 @@
                     d->arch.arch_vmx.cpu_cr3, mfn);
     }
 
+    if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
+        if(d->arch.arch_vmx.cpu_cr3)
+            put_page(pfn_to_page(get_mfn_from_pfn(
+                      d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)));
+
     /*
      * VMX does not implement real-mode virtualization. We emulate
      * real-mode by performing a world switch to VMXAssist whenever
@@ -1124,9 +1132,7 @@
                 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
             }
         }
-        __vmread(GUEST_RIP, &eip);
-        VMX_DBG_LOG(DBG_LEVEL_1,
-                    "Disabling CR0.PE at %%eip 0x%lx\n", eip);
+
         if (vmx_assist(d, VMX_ASSIST_INVOKE)) {
             set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &d->arch.arch_vmx.cpu_state);
             __vmread(GUEST_RIP, &eip);
@@ -1365,17 +1371,17 @@
         clts();
         setup_fpu(current);
 
-        __vmread(GUEST_CR0, &value);
+        __vmread_vcpu(GUEST_CR0, &value);
         value &= ~X86_CR0_TS; /* clear TS */
         __vmwrite(GUEST_CR0, value);
 
-        __vmread(CR0_READ_SHADOW, &value);
+        __vmread_vcpu(CR0_READ_SHADOW, &value);
         value &= ~X86_CR0_TS; /* clear TS */
         __vmwrite(CR0_READ_SHADOW, value);
         break;
     case TYPE_LMSW:
         TRACE_VMEXIT(1,TYPE_LMSW);
-        __vmread(CR0_READ_SHADOW, &value);
+        __vmread_vcpu(CR0_READ_SHADOW, &value);
         value = (value & ~0xF) |
             (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
         return vmx_set_cr0(value);
@@ -1451,17 +1457,13 @@
                 (unsigned long)regs->edx);
 }
 
+volatile unsigned long do_hlt_count;
 /*
  * Need to use this exit to reschedule
  */
-static inline void vmx_vmexit_do_hlt(void)
-{
-#if VMX_DEBUG
-    unsigned long eip;
-    __vmread(GUEST_RIP, &eip);
-#endif
-    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%lx", eip);
-    raise_softirq(SCHEDULE_SOFTIRQ);
+void vmx_vmexit_do_hlt(void)
+{
+    do_block();
 }
 
 static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs)
@@ -1511,16 +1513,6 @@
     }
 }
 
-static inline void vmx_vmexit_do_mwait(void)
-{
-#if VMX_DEBUG
-    unsigned long eip;
-    __vmread(GUEST_RIP, &eip);
-#endif
-    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%lx", eip);
-    raise_softirq(SCHEDULE_SOFTIRQ);
-}
-
 #define BUF_SIZ     256
 #define MAX_LINE    80
 char print_buf[BUF_SIZ];
@@ -1626,9 +1618,13 @@
         return;
     }
 
-    __vmread(GUEST_RIP, &eip);
-    TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
-    TRACE_VMEXIT(0,exit_reason);
+#ifdef TRACE_BUFFER
+    {
+        __vmread(GUEST_RIP, &eip);
+        TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
+        TRACE_VMEXIT(0,exit_reason);
+    }
+#endif
 
     switch (exit_reason) {
     case EXIT_REASON_EXCEPTION_NMI:
@@ -1798,9 +1794,7 @@
         __update_guest_eip(inst_len);
         break;
     case EXIT_REASON_MWAIT_INSTRUCTION:
-        __get_instruction_length(inst_len);
-        __update_guest_eip(inst_len);
-        vmx_vmexit_do_mwait();
+        __vmx_bug(&regs);
         break;
     default:
         __vmx_bug(&regs);       /* should not happen */
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c      Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/vmx_intercept.c      Thu Sep 22 17:42:01 2005
@@ -28,6 +28,7 @@
 #include <xen/sched.h>
 #include <asm/current.h>
 #include <io_ports.h>
+#include <xen/event.h>
 
 #ifdef CONFIG_VMX
 
@@ -205,6 +206,7 @@
     /* Set the pending intr bit, and send evtchn notification to myself. */
     if (test_and_set_bit(vpit->vector, vpit->intr_bitmap))
         vpit->pending_intr_nr++; /* already set, then count the pending intr */
+    evtchn_set_pending(vpit->v, iopacket_port(vpit->v->domain));
 
     /* pick up missed timer tick */
     if ( missed_ticks > 0 ) {
@@ -281,6 +283,7 @@
         }
 
         vpit->intr_bitmap = intr;
+        vpit->v = d;
 
         vpit->scheduled = NOW() + vpit->period;
         set_ac_timer(&vpit->pit_timer, vpit->scheduled);
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/vmx_io.c     Thu Sep 22 17:42:01 2005
@@ -891,7 +891,7 @@
     struct vcpu *v = current;
 
     highest_vector = find_highest_pending_irq(v, &intr_type);
-    __vmread(CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control);
+    __vmread_vcpu(CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control);
 
     if (highest_vector == -1) {
         disable_irq_window(cpu_exec_control);
@@ -948,14 +948,6 @@
 void vmx_do_resume(struct vcpu *d) 
 {
     vmx_stts();
-    if ( vmx_paging_enabled(d) )
-        __vmwrite(GUEST_CR3, pagetable_get_paddr(d->arch.shadow_table));
-    else
-        // paging is not enabled in the guest
-        __vmwrite(GUEST_CR3, pagetable_get_paddr(d->domain->arch.phys_table));
-
-    __vmwrite(HOST_CR3, pagetable_get_paddr(d->arch.monitor_table));
-    __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
 
     if (event_pending(d)) {
         vmx_check_events(d);
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/vmx_platform.c
--- a/xen/arch/x86/vmx_platform.c       Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/vmx_platform.c       Thu Sep 22 17:42:01 2005
@@ -671,13 +671,13 @@
     if (inst->operand[0] & REGISTER) { /* dest is memory */
         index = operand_index(inst->operand[0]);
         value = get_reg_value(size, index, 0, regs);
-        send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0);
+        send_mmio_req(type, gpa, 1, inst->op_size, value, IOREQ_WRITE, 0);
     } else if (inst->operand[0] & IMMEDIATE) { /* dest is memory */
         value = inst->immediate;
-        send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0);
+        send_mmio_req(type, gpa, 1, inst->op_size, value, IOREQ_WRITE, 0);
     } else if (inst->operand[0] & MEMORY) { /* dest is register */
         /* send the request and wait for the value */
-        send_mmio_req(type, gpa, 1, size, 0, IOREQ_READ, 0);
+        send_mmio_req(type, gpa, 1, inst->op_size, 0, IOREQ_READ, 0);
     } else {
         printf("mmio_operands: invalid operand\n");
         domain_crash_synchronous();
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/vmx_vmcs.c   Thu Sep 22 17:42:01 2005
@@ -67,9 +67,6 @@
 
     error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL, 
                        MONITOR_PIN_BASED_EXEC_CONTROLS);
-
-    error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL, 
-                       MONITOR_CPU_BASED_EXEC_CONTROLS);
 
     error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
 
@@ -117,12 +114,6 @@
     unsigned long fs_base; 
     unsigned long gs_base; 
 #endif 
-
-    /* control registers */
-    unsigned long cr3;
-    unsigned long cr0;
-    unsigned long cr4;
-    unsigned long dr7;
 };
 
 #define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
@@ -217,8 +208,32 @@
 /* Update CR3, GDT, LDT, TR */
     unsigned int  error = 0;
     unsigned long pfn = 0;
+    unsigned long cr0, cr4;
     struct pfn_info *page;
     struct cpu_user_regs *regs = guest_cpu_user_regs();
+
+    __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (cr0) : );
+
+    error |= __vmwrite(GUEST_CR0, cr0);
+    cr0 &= ~X86_CR0_PG;
+    error |= __vmwrite(CR0_READ_SHADOW, cr0);
+    error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL, 
+                       MONITOR_CPU_BASED_EXEC_CONTROLS);
+
+    __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (cr4) : );
+
+#ifdef __x86_64__
+    error |= __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE);
+#else
+    error |= __vmwrite(GUEST_CR4, cr4);
+#endif
+
+#ifdef __x86_64__
+    cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
+#else
+    cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE);
+#endif
+    error |= __vmwrite(CR4_READ_SHADOW, cr4);
 
     vmx_stts();
 
@@ -254,7 +269,7 @@
     int error = 0;
     union vmcs_arbytes arbytes;
     unsigned long dr7;
-    unsigned long eflags, shadow_cr;
+    unsigned long eflags;
 
     /* MSR */
     error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
@@ -326,27 +341,7 @@
 
     arbytes.fields.seg_type = 0xb;          /* 32-bit TSS (busy) */
     error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
-
-    error |= __vmwrite(GUEST_CR0, host_env->cr0); /* same CR0 */
-
-    /* Initally PG, PE are not set*/
-    shadow_cr = host_env->cr0;
-    shadow_cr &= ~X86_CR0_PG;
-    error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
     /* CR3 is set in vmx_final_setup_guest */
-#ifdef __x86_64__
-    error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PSE);
-#else
-    error |= __vmwrite(GUEST_CR4, host_env->cr4);
-#endif
-    shadow_cr = host_env->cr4;
-
-#ifdef __x86_64__
-    shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
-#else
-    shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
-#endif
-    error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
 
     error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
     error |= __vmwrite(GUEST_CS_BASE, host_env->cs_base);
@@ -403,12 +398,10 @@
     host_env->cs_base = 0;
 
     __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : );
-    host_env->cr0 = crn;
     error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
 
     /* CR3 is set in vmx_final_setup_hostos */
     __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) : ); 
-    host_env->cr4 = crn;
     error |= __vmwrite(HOST_CR4, crn);
 
     error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler);
diff -r 97dbd9524a7e -r 06d84bf87159 xen/common/grant_table.c
--- a/xen/common/grant_table.c  Thu Sep 22 17:34:14 2005
+++ b/xen/common/grant_table.c  Thu Sep 22 17:42:01 2005
@@ -797,7 +797,7 @@
 #endif
 
 static long
-gnttab_donate(gnttab_donate_t *uop, unsigned int count)
+gnttab_transfer(gnttab_transfer_t *uop, unsigned int count)
 {
     struct domain *d = current->domain;
     struct domain *e;
@@ -805,19 +805,20 @@
     u32 _d, _nd, x, y;
     int i;
     int result = GNTST_okay;
+    grant_entry_t *sha;
 
     for ( i = 0; i < count; i++ )
     {
-        gnttab_donate_t *gop = &uop[i];
+        gnttab_transfer_t *gop = &uop[i];
 #if GRANT_DEBUG
-        printk("gnttab_donate: i=%d mfn=%lx domid=%d gref=%08x\n",
+        printk("gnttab_transfer: i=%d mfn=%lx domid=%d gref=%08x\n",
                i, gop->mfn, gop->domid, gop->handle);
 #endif
         page = &frame_table[gop->mfn];
         
         if ( unlikely(IS_XEN_HEAP_FRAME(page)))
         { 
-            printk("gnttab_donate: xen heap frame mfn=%lx\n", 
+            printk("gnttab_transfer: xen heap frame mfn=%lx\n", 
                    (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
@@ -825,7 +826,7 @@
         
         if ( unlikely(!pfn_valid(page_to_pfn(page))) )
         {
-            printk("gnttab_donate: invalid pfn for mfn=%lx\n", 
+            printk("gnttab_transfer: invalid pfn for mfn=%lx\n", 
                    (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
@@ -833,7 +834,7 @@
 
         if ( unlikely((e = find_domain_by_id(gop->domid)) == NULL) )
         {
-            printk("gnttab_donate: can't find domain %d\n", gop->domid);
+            printk("gnttab_transfer: can't find domain %d\n", gop->domid);
             gop->status = GNTST_bad_domain;
             continue;
         }
@@ -853,7 +854,7 @@
             x = y;
             if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
                          (1 | PGC_allocated)) || unlikely(_nd != _d)) {
-                printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+                printk("gnttab_transfer: Bad page values %p: ed=%p(%u), sd=%p,"
                        " caf=%08x, taf=%" PRtype_info "\n", 
                        (void *) page_to_pfn(page),
                         d, d->domain_id, unpickle_domptr(_nd), x, 
@@ -888,12 +889,12 @@
          */
         if ( unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
              unlikely(e->tot_pages >= e->max_pages) ||
-             unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle)) )
-        {
-            DPRINTK("gnttab_donate: Transferee has no reservation headroom "
+             unlikely(!gnttab_prepare_for_transfer(e, d, gop->ref)) )
+        {
+            DPRINTK("gnttab_transfer: Transferee has no reservation headroom "
                     "(%d,%d) or provided a bad grant ref (%08x) or "
                     "is dying (%lx)\n",
-                    e->tot_pages, e->max_pages, gop->handle, e->domain_flags);
+                    e->tot_pages, e->max_pages, gop->ref, e->domain_flags);
             spin_unlock(&e->page_alloc_lock);
             put_domain(e);
             gop->status = result = GNTST_general_error;
@@ -908,11 +909,11 @@
         
         spin_unlock(&e->page_alloc_lock);
         
-        /*
-         * Transfer is all done: tell the guest about its new page
-         * frame.
-         */
-        gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+        /* Tell the guest about its new page frame. */
+        sha = &e->grant_table->shared[gop->ref];
+        sha->frame = gop->mfn;
+        wmb();
+        sha->flags |= GTF_transfer_completed;
         
         put_domain(e);
         
@@ -960,11 +961,11 @@
         rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
         break;
 #endif
-    case GNTTABOP_donate:
+    case GNTTABOP_transfer:
         if (unlikely(!array_access_ok(
-            uop, count, sizeof(gnttab_donate_t))))
+            uop, count, sizeof(gnttab_transfer_t))))
             goto out;
-        rc = gnttab_donate(uop, count);
+        rc = gnttab_transfer(uop, count);
         break;
     default:
         rc = -ENOSYS;
@@ -1171,46 +1172,6 @@
     return 0;
 }
 
-void 
-gnttab_notify_transfer(
-    struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame)
-{
-    grant_entry_t  *sha;
-    unsigned long   pfn;
-
-#if GRANT_DEBUG_VERBOSE
-    DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
-            rd->domain_id, ld->domain_id, ref);
-#endif
-
-    sha = &rd->grant_table->shared[ref];
-
-    spin_lock(&rd->grant_table->lock);
-
-    pfn = sha->frame;
-
-    if ( unlikely(pfn >= max_page ) )
-        DPRINTK("Bad pfn (%lx)\n", pfn);
-    else
-    {
-        set_pfn_from_mfn(frame, pfn);
-
-        if ( unlikely(shadow_mode_log_dirty(ld)))
-             mark_dirty(ld, frame);
-
-        if (shadow_mode_translate(ld))
-            set_mfn_from_pfn(pfn, frame);
-    }
-    sha->frame = __mfn_to_gpfn(rd, frame);
-    sha->domid = rd->domain_id;
-    wmb();
-    sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
-
-    spin_unlock(&rd->grant_table->lock);
-
-    return;
-}
-
 int 
 grant_table_create(
     struct domain *d)
diff -r 97dbd9524a7e -r 06d84bf87159 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Thu Sep 22 17:34:14 2005
+++ b/xen/common/sched_sedf.c   Thu Sep 22 17:42:01 2005
@@ -846,7 +846,7 @@
  *      the domain can't finish it's workload in the period
  *     -in addition to that the domain can be treated prioritised when
  *      extratime is available
- *     -addition: experiments hve shown that this may have a HUGE impact on
+ *     -addition: experiments have shown that this may have a HUGE impact on
  *      performance of other domains, becaus it can lead to excessive context
  *      switches
  
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Thu Sep 22 17:34:14 2005
+++ b/xen/include/asm-x86/shadow.h      Thu Sep 22 17:42:01 2005
@@ -1595,6 +1595,8 @@
         }
     }
 
+    __shadow_get_l2e(v, va, &sl2e);
+
     if ( shadow_mode_refcounts(d) )
     {
         l1_pgentry_t old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Thu Sep 22 17:34:14 2005
+++ b/xen/include/asm-x86/vmx.h Thu Sep 22 17:42:01 2005
@@ -314,6 +314,57 @@
     return 0;
 }
 
+
+static always_inline void __vmwrite_vcpu(unsigned long field, unsigned long 
value)
+{
+    struct vcpu *v = current;
+
+    switch(field) {
+    case CR0_READ_SHADOW:
+       v->arch.arch_vmx.cpu_shadow_cr0 = value;
+       break;
+    case GUEST_CR0:
+       v->arch.arch_vmx.cpu_cr0 = value;
+       break;
+    case CPU_BASED_VM_EXEC_CONTROL:
+       v->arch.arch_vmx.cpu_based_exec_control = value;
+       break;
+    default:
+       printk("__vmwrite_cpu: invalid field %lx\n", field);
+       break;
+    }
+}
+
+static always_inline void __vmread_vcpu(unsigned long field, unsigned long 
*value)
+{
+    struct vcpu *v = current;
+
+    switch(field) {
+    case CR0_READ_SHADOW:
+       *value = v->arch.arch_vmx.cpu_shadow_cr0;
+       break;
+    case GUEST_CR0:
+       *value = v->arch.arch_vmx.cpu_cr0;
+       break;
+    case CPU_BASED_VM_EXEC_CONTROL:
+       *value = v->arch.arch_vmx.cpu_based_exec_control;
+       break;
+    default:
+       printk("__vmread_cpu: invalid field %lx\n", field);
+       break;
+    }
+
+   /* 
+    * __vmwrite() can be used for non-current vcpu, and it's possible that
+    * the vcpu field is not initialized at that case.
+    * 
+    */
+    if (!*value) {
+       __vmread(field, value);
+       __vmwrite_vcpu(field, *value);
+    }
+}
+
 static inline int __vmwrite (unsigned long field, unsigned long value)
 {
     unsigned long eflags;
@@ -326,6 +377,15 @@
     __save_flags(eflags);
     if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
         return -1;
+
+    switch(field) {
+    case CR0_READ_SHADOW:
+    case GUEST_CR0:
+    case CPU_BASED_VM_EXEC_CONTROL:
+       __vmwrite_vcpu(field, value);
+       break;
+    }
+
     return 0;
 }
 
@@ -379,11 +439,12 @@
 {
     unsigned long cr0;
 
-    __vmread(GUEST_CR0, &cr0);
-    if (!(cr0 & X86_CR0_TS))
+    __vmread_vcpu(GUEST_CR0, &cr0);
+    if (!(cr0 & X86_CR0_TS)) {
         __vmwrite(GUEST_CR0, cr0 | X86_CR0_TS);
-
-    __vmread(CR0_READ_SHADOW, &cr0);
+    }
+
+    __vmread_vcpu(CR0_READ_SHADOW, &cr0);
     if (!(cr0 & X86_CR0_TS))
        __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
 }
@@ -393,7 +454,7 @@
 {
     unsigned long cr0;
 
-    __vmread(CR0_READ_SHADOW, &cr0);
+    __vmread_vcpu(CR0_READ_SHADOW, &cr0);
     return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
 }
 
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/asm-x86/vmx_virpit.h
--- a/xen/include/asm-x86/vmx_virpit.h  Thu Sep 22 17:34:14 2005
+++ b/xen/include/asm-x86/vmx_virpit.h  Thu Sep 22 17:42:01 2005
@@ -35,8 +35,8 @@
 
     unsigned int count;  /* the 16 bit channel count */
     unsigned int init_val; /* the init value for the counter */
-
-} ;
+    struct vcpu *v;
+};
 
 /* to hook the ioreq packet to get the PIT initializaiton info */
 extern void vmx_hooks_assist(struct vcpu *d);
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h    Thu Sep 22 17:34:14 2005
+++ b/xen/include/asm-x86/vmx_vmcs.h    Thu Sep 22 17:42:01 2005
@@ -74,9 +74,12 @@
 struct arch_vmx_struct {
     struct vmcs_struct      *vmcs;  /* VMCS pointer in virtual */
     unsigned long           flags;  /* VMCS flags */
+    unsigned long           cpu_cr0; /* copy of guest CR0 */
+    unsigned long           cpu_shadow_cr0; /* copy of guest read shadow CR0 */
     unsigned long           cpu_cr2; /* save CR2 */
     unsigned long           cpu_cr3;
     unsigned long           cpu_state;
+    unsigned long           cpu_based_exec_control;
     struct msr_state        msr_content;
     void                   *io_bitmap_a, *io_bitmap_b;
 };
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h  Thu Sep 22 17:34:14 2005
+++ b/xen/include/public/grant_table.h  Thu Sep 22 17:42:01 2005
@@ -215,18 +215,19 @@
 } gnttab_dump_table_t;
 
 /*
- * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain.  The
+ * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain.  The
  * foreign domain has previously registered the details of the transfer.
  * These can be identified from <handle>, a grant reference.
  */
-#define GNTTABOP_donate                4
+#define GNTTABOP_transfer                4
 typedef struct {
-    unsigned long mfn;               /*  0 */
-    domid_t     domid;               /*  4 */
-    u16         handle;               /*  8 */
-    s16         status;               /*  10: GNTST_* */
-    u32         __pad;
-} gnttab_donate_t;           /*  14 bytes */
+    /* IN parameters. */
+    unsigned long mfn;
+    domid_t     domid;
+    grant_ref_t ref;
+    /* OUT parameters. */
+    s16         status;
+} gnttab_transfer_t;
 
 /*
  * Bitfield values for update_pin_status.flags.
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h     Thu Sep 22 17:34:14 2005
+++ b/xen/include/public/io/netif.h     Thu Sep 22 17:42:01 2005
@@ -10,10 +10,11 @@
 #define __XEN_PUBLIC_IO_NETIF_H__
 
 typedef struct netif_tx_request {
-    unsigned long addr;   /* Machine address of packet.  */
+    grant_ref_t gref;      /* Reference to buffer page */
+    u16      offset:15;    /* Offset within buffer page */
     u16      csum_blank:1; /* Proto csum field blank?   */
-    u16      id:15;  /* Echoed in response message. */
-    u16      size;   /* Packet size in bytes.       */
+    u16      id;           /* Echoed in response message. */
+    u16      size;         /* Packet size in bytes.       */
 } netif_tx_request_t;
 
 typedef struct netif_tx_response {
@@ -22,21 +23,15 @@
 } netif_tx_response_t;
 
 typedef struct {
-    u16       id;    /* Echoed in response message.        */
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    grant_ref_t gref;  /* 2: Reference to incoming granted frame */
-#endif
+    u16       id;       /* Echoed in response message.        */
+    grant_ref_t gref;  /* Reference to incoming granted frame */
 } netif_rx_request_t;
 
 typedef struct {
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    u32      addr;   /*  0: Offset in page of start of received packet  */
-#else
-    unsigned long addr; /* Machine address of packet.              */
-#endif
-    u16      csum_valid:1; /* Protocol checksum is validated?       */
-    u16      id:15;
-    s16      status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
+    u16      offset;     /* Offset in page of start of received packet  */
+    u16      csum_valid; /* Protocol checksum is validated?       */
+    u16      id;
+    s16      status;     /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
 } netif_rx_response_t;
 
 /*
@@ -53,18 +48,8 @@
 #define MASK_NETIF_RX_IDX(_i) ((_i)&(NETIF_RX_RING_SIZE-1))
 #define MASK_NETIF_TX_IDX(_i) ((_i)&(NETIF_TX_RING_SIZE-1))
 
-#ifdef __x86_64__
-/*
- * This restriction can be lifted when we move netfront/netback to use
- * grant tables. This will remove memory_t fields from the above structures
- * and thus relax natural alignment restrictions.
- */
-#define NETIF_TX_RING_SIZE 128
-#define NETIF_RX_RING_SIZE 128
-#else
 #define NETIF_TX_RING_SIZE 256
 #define NETIF_RX_RING_SIZE 256
-#endif
 
 /* This structure must fit in a memory page. */
 typedef struct netif_tx_interface {
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/public/io/tpmif.h
--- a/xen/include/public/io/tpmif.h     Thu Sep 22 17:34:14 2005
+++ b/xen/include/public/io/tpmif.h     Thu Sep 22 17:42:01 2005
@@ -20,8 +20,7 @@
     unsigned long addr;   /* Machine address of packet.   */
     int      ref;         /* grant table access reference */
     u16      id;          /* Echoed in response message.  */
-    u16      size:15;     /* Packet size in bytes.        */
-    u16      mapped:1;
+    u16      size;        /* Packet size in bytes.        */
 } tpmif_tx_request_t;
 
 /*
@@ -30,13 +29,16 @@
  */
 typedef u32 TPMIF_RING_IDX;
 
-#define TPMIF_TX_RING_SIZE 16
+#define TPMIF_TX_RING_SIZE 10
 
 /* This structure must fit in a memory page. */
+
 typedef struct {
-    union {
-        tpmif_tx_request_t  req;
-    } ring[TPMIF_TX_RING_SIZE];
+    tpmif_tx_request_t req;
+} tpmif_ring_t;
+
+typedef struct {
+    tpmif_ring_t ring[TPMIF_TX_RING_SIZE];
 } tpmif_tx_interface_t;
 
 #endif
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/xen/grant_table.h
--- a/xen/include/xen/grant_table.h     Thu Sep 22 17:34:14 2005
+++ b/xen/include/xen/grant_table.h     Thu Sep 22 17:42:01 2005
@@ -106,12 +106,6 @@
 gnttab_prepare_for_transfer(
     struct domain *rd, struct domain *ld, grant_ref_t ref);
 
-/* Notify 'rd' of a completed transfer via an already-locked grant entry. */
-void 
-gnttab_notify_transfer(
-    struct domain *rd, struct domain *ld,
-    grant_ref_t ref, unsigned long frame);
-
 /* Domain death release of granted device mappings of other domains.*/
 void
 gnttab_release_dev_mappings(grant_table_t *gt);
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/architecture.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/architecture.tex       Thu Sep 22 17:42:01 2005
@@ -0,0 +1,140 @@
+\chapter{Virtual Architecture}
+
+On a Xen-based system, the hypervisor itself runs in {\it ring 0}.  It
+has full access to the physical memory available in the system and is
+responsible for allocating portions of it to the domains.  Guest
+operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
+they see fit. Segmentation is used to prevent the guest OS from
+accessing the portion of the address space that is reserved for Xen.
+We expect most guest operating systems will use ring 1 for their own
+operation and place applications in ring 3.
+
+In this chapter we consider the basic virtual architecture provided by
+Xen: the basic CPU state, exception and interrupt handling, and time.
+Other aspects such as memory and device access are discussed in later
+chapters.
+
+
+\section{CPU state}
+
+All privileged state must be handled by Xen.  The guest OS has no
+direct access to CR3 and is not permitted to update privileged bits in
+EFLAGS. Guest OSes use \emph{hypercalls} to invoke operations in Xen;
+these are analogous to system calls but occur from ring 1 to ring 0.
+
+A list of all hypercalls is given in Appendix~\ref{a:hypercalls}.
+
+
+\section{Exceptions}
+
+A virtual IDT is provided --- a domain can submit a table of trap
+handlers to Xen via the {\tt set\_trap\_table()} hypercall.  Most trap
+handlers are identical to native x86 handlers, although the page-fault
+handler is somewhat different.
+
+
+\section{Interrupts and events}
+
+Interrupts are virtualized by mapping them to \emph{events}, which are
+delivered asynchronously to the target domain using a callback
+supplied via the {\tt set\_callbacks()} hypercall.  A guest OS can map
+these events onto its standard interrupt dispatch mechanisms.  Xen is
+responsible for determining the target domain that will handle each
+physical interrupt source. For more details on the binding of event
+sources to events, see Chapter~\ref{c:devices}.
+
+
+\section{Time}
+
+Guest operating systems need to be aware of the passage of both real
+(or wallclock) time and their own `virtual time' (the time for which
+they have been executing). Furthermore, Xen has a notion of time which
+is used for scheduling. The following notions of time are provided:
+
+\begin{description}
+\item[Cycle counter time.]
+
+  This provides a fine-grained time reference.  The cycle counter time
+  is used to accurately extrapolate the other time references.  On SMP
+  machines it is currently assumed that the cycle counter time is
+  synchronized between CPUs.  The current x86-based implementation
+  achieves this within inter-CPU communication latencies.
+
+\item[System time.]
+
+  This is a 64-bit counter which holds the number of nanoseconds that
+  have elapsed since system boot.
+
+\item[Wall clock time.]
+
+  This is the time of day in a Unix-style {\tt struct timeval}
+  (seconds and microseconds since 1 January 1970, adjusted by leap
+  seconds).  An NTP client hosted by {\it domain 0} can keep this
+  value accurate.
+
+\item[Domain virtual time.]
+
+  This progresses at the same pace as system time, but only while a
+  domain is executing --- it stops while a domain is de-scheduled.
+  Therefore the share of the CPU that a domain receives is indicated
+  by the rate at which its virtual time increases.
+
+\end{description}
+
+
+Xen exports timestamps for system time and wall-clock time to guest
+operating systems through a shared page of memory.  Xen also provides
+the cycle counter time at the instant the timestamps were calculated,
+and the CPU frequency in Hertz.  This allows the guest to extrapolate
+system and wall-clock times accurately based on the current cycle
+counter time.
+
+Since all time stamps need to be updated and read \emph{atomically}
+two version numbers are also stored in the shared info page. The first
+is incremented prior to an update, while the second is only
+incremented afterwards. Thus a guest can be sure that it read a
+consistent state by checking the two version numbers are equal.
+
+Xen includes a periodic ticker which sends a timer event to the
+currently executing domain every 10ms.  The Xen scheduler also sends a
+timer event whenever a domain is scheduled; this allows the guest OS
+to adjust for the time that has passed while it has been inactive.  In
+addition, Xen allows each domain to request that they receive a timer
+event sent at a specified system time by using the {\tt
+  set\_timer\_op()} hypercall.  Guest OSes may use this timer to
+implement timeout values when they block.
+
+
+
+%% % akw: demoting this to a section -- not sure if there is any point
+%% % though, maybe just remove it.
+
+\section{Xen CPU Scheduling}
+
+Xen offers a uniform API for CPU schedulers.  It is possible to choose
+from a number of schedulers at boot and it should be easy to add more.
+The BVT, Atropos and Round Robin schedulers are part of the normal Xen
+distribution.  BVT provides proportional fair shares of the CPU to the
+running domains.  Atropos can be used to reserve absolute shares of
+the CPU for each domain.  Round-robin is provided as an example of
+Xen's internal scheduler API.
+
+\paragraph*{Note: SMP host support}
+Xen has always supported SMP host systems.  Domains are statically
+assigned to CPUs, either at creation time or when manually pinning to
+a particular CPU.  The current schedulers then run locally on each CPU
+to decide which of the assigned domains should be run there. The
+user-level control software can be used to perform coarse-grain
+load-balancing between CPUs.
+
+
+%% More information on the characteristics and use of these schedulers
+%% is available in {\tt Sched-HOWTO.txt}.
+
+
+\section{Privileged operations}
+
+Xen exports an extended interface to privileged domains (viz.\ {\it
+  Domain 0}). This allows such domains to build and boot other domains
+on the server, and provides control interfaces for managing
+scheduling, memory, networking, and block devices.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/debugging.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/debugging.tex  Thu Sep 22 17:42:01 2005
@@ -0,0 +1,62 @@
+\chapter{Debugging}
+
+Xen provides tools for debugging both Xen and guest OSes.  Currently, the
+Pervasive Debugger provides a GDB stub, which provides facilities for symbolic
+debugging of Xen itself and of OS kernels running on top of Xen.  The Trace
+Buffer provides a lightweight means to log data about Xen's internal state and
+behaviour at runtime, for later analysis.
+
+\section{Pervasive Debugger}
+
+Information on using the pervasive debugger is available in pdb.txt.
+
+
+\section{Trace Buffer}
+
+The trace buffer provides a means to observe Xen's operation from domain 0.
+Trace events, inserted at key points in Xen's code, record data that can be
+read by the {\tt xentrace} tool.  Recording these events has a low overhead
+and hence the trace buffer may be useful for debugging timing-sensitive
+behaviours.
+
+\subsection{Internal API}
+
+To use the trace buffer functionality from within Xen, you must {\tt \#include
+<xen/trace.h>}, which contains definitions related to the trace buffer.  Trace
+events are inserted into the buffer using the {\tt TRACE\_xD} ({\tt x} = 0, 1,
+2, 3, 4 or 5) macros.  These all take an event number, plus {\tt x} additional
+(32-bit) data as their arguments.  For trace buffer-enabled builds of Xen these
+will insert the event ID and data into the trace buffer, along with the current
+value of the CPU cycle-counter.  For builds without the trace buffer enabled,
+the macros expand to no-ops and thus can be left in place without incurring
+overheads.
+
+\subsection{Trace-enabled builds}
+
+By default, the trace buffer is enabled only in debug builds (i.e. {\tt NDEBUG}
+is not defined).  It can be enabled separately by defining {\tt TRACE\_BUFFER},
+either in {\tt <xen/config.h>} or on the gcc command line.
+
+The size (in pages) of the per-CPU trace buffers can be specified using the
+{\tt tbuf\_size=n } boot parameter to Xen.  If the size is set to 0, the trace
+buffers will be disabled.
+
+\subsection{Dumping trace data}
+
+When running a trace buffer build of Xen, trace data are written continuously
+into the buffer data areas, with newer data overwriting older data.  This data
+can be captured using the {\tt xentrace} program in domain 0.
+
+The {\tt xentrace} tool uses {\tt /dev/mem} in domain 0 to map the trace
+buffers into its address space.  It then periodically polls all the buffers for
+new data, dumping out any new records from each buffer in turn.  As a result,
+for machines with multiple (logical) CPUs, the trace buffer output will not be
+in overall chronological order.
+
+The output from {\tt xentrace} can be post-processed using {\tt
+xentrace\_cpusplit} (used to split trace data out into per-cpu log files) and
+{\tt xentrace\_format} (used to pretty-print trace data).  For the predefined
+trace points, there is an example format file in {\tt tools/xentrace/formats }.
+
+For more information, see the manual pages for {\tt xentrace}, {\tt
+xentrace\_format} and {\tt xentrace\_cpusplit}.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/devices.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/devices.tex    Thu Sep 22 17:42:01 2005
@@ -0,0 +1,178 @@
+\chapter{Devices}
+\label{c:devices}
+
+Devices such as network and disk are exported to guests using a split
+device driver.  The device driver domain, which accesses the physical
+device directly also runs a \emph{backend} driver, serving requests to
+that device from guests.  Each guest will use a simple \emph{frontend}
+driver, to access the backend.  Communication between these domains is
+composed of two parts: First, data is placed onto a shared memory page
+between the domains.  Second, an event channel between the two domains
+is used to pass notification that data is outstanding.  This
+separation of notification from data transfer allows message batching,
+and results in very efficient device access.
+
+Event channels are used extensively in device virtualization; each
+domain has a number of end-points or \emph{ports} each of which may be
+bound to one of the following \emph{event sources}:
+\begin{itemize}
+  \item a physical interrupt from a real device, 
+  \item a virtual interrupt (callback) from Xen, or 
+  \item a signal from another domain 
+\end{itemize}
+
+Events are lightweight and do not carry much information beyond the
+source of the notification. Hence when performing bulk data transfer,
+events are typically used as synchronization primitives over a shared
+memory transport. Event channels are managed via the {\tt
+  event\_channel\_op()} hypercall; for more details see
+Section~\ref{s:idc}.
+
+This chapter focuses on some individual device interfaces available to
+Xen guests.
+
+
+\section{Network I/O}
+
+Virtual network device services are provided by shared memory
+communication with a backend domain.  From the point of view of other
+domains, the backend may be viewed as a virtual ethernet switch
+element with each domain having one or more virtual network interfaces
+connected to it.
+
+\subsection{Backend Packet Handling}
+
+The backend driver is responsible for a variety of actions relating to
+the transmission and reception of packets from the physical device.
+With regard to transmission, the backend performs these key actions:
+
+\begin{itemize}
+\item {\bf Validation:} To ensure that domains do not attempt to
+  generate invalid (e.g. spoofed) traffic, the backend driver may
+  validate headers ensuring that source MAC and IP addresses match the
+  interface that they have been sent from.
+
+  Validation functions can be configured using standard firewall rules
+  ({\small{\tt iptables}} in the case of Linux).
+  
+\item {\bf Scheduling:} Since a number of domains can share a single
+  physical network interface, the backend must mediate access when
+  several domains each have packets queued for transmission.  This
+  general scheduling function subsumes basic shaping or rate-limiting
+  schemes.
+  
+\item {\bf Logging and Accounting:} The backend domain can be
+  configured with classifier rules that control how packets are
+  accounted or logged.  For example, log messages might be generated
+  whenever a domain attempts to send a TCP packet containing a SYN.
+\end{itemize}
+
+On receipt of incoming packets, the backend acts as a simple
+demultiplexer: Packets are passed to the appropriate virtual interface
+after any necessary logging and accounting have been carried out.
+
+\subsection{Data Transfer}
+
+Each virtual interface uses two ``descriptor rings'', one for
+transmit, the other for receive.  Each descriptor identifies a block
+of contiguous physical memory allocated to the domain.
+
+The transmit ring carries packets to transmit from the guest to the
+backend domain.  The return path of the transmit ring carries messages
+indicating that the contents have been physically transmitted and the
+backend no longer requires the associated pages of memory.
+
+To receive packets, the guest places descriptors of unused pages on
+the receive ring.  The backend will return received packets by
+exchanging these pages in the domain's memory with new pages
+containing the received data, and passing back descriptors regarding
+the new packets on the ring.  This zero-copy approach allows the
+backend to maintain a pool of free pages to receive packets into, and
+then deliver them to appropriate domains after examining their
+headers.
+
+% Real physical addresses are used throughout, with the domain
+% performing translation from pseudo-physical addresses if that is
+% necessary.
+
+If a domain does not keep its receive ring stocked with empty buffers
+then packets destined to it may be dropped.  This provides some
+defence against receive livelock problems because an overload domain
+will cease to receive further data.  Similarly, on the transmit path,
+it provides the application with feedback on the rate at which packets
+are able to leave the system.
+
+Flow control on rings is achieved by including a pair of producer
+indexes on the shared ring page.  Each side will maintain a private
+consumer index indicating the next outstanding message.  In this
+manner, the domains cooperate to divide the ring into two message
+lists, one in each direction.  Notification is decoupled from the
+immediate placement of new messages on the ring; the event channel
+will be used to generate notification when {\em either} a certain
+number of outstanding messages are queued, {\em or} a specified number
+of nanoseconds have elapsed since the oldest message was placed on the
+ring.
+
+%% Not sure if my version is any better -- here is what was here
+%% before: Synchronization between the backend domain and the guest is
+%% achieved using counters held in shared memory that is accessible to
+%% both.  Each ring has associated producer and consumer indices
+%% indicating the area in the ring that holds descriptors that contain
+%% data.  After receiving {\it n} packets or {\t nanoseconds} after
+%% receiving the first packet, the hypervisor sends an event to the
+%% domain.
+
+
+\section{Block I/O}
+
+All guest OS disk access goes through the virtual block device VBD
+interface.  This interface allows domains access to portions of block
+storage devices visible to the the block backend device.  The VBD
+interface is a split driver, similar to the network interface
+described above.  A single shared memory ring is used between the
+frontend and backend drivers, across which read and write messages are
+sent.
+
+Any block device accessible to the backend domain, including
+network-based block (iSCSI, *NBD, etc), loopback and LVM/MD devices,
+can be exported as a VBD.  Each VBD is mapped to a device node in the
+guest, specified in the guest's startup configuration.
+
+Old (Xen 1.2) virtual disks are not supported under Xen 2.0, since
+similar functionality can be achieved using the more complete LVM
+system, which is already in widespread use.
+
+\subsection{Data Transfer}
+
+The single ring between the guest and the block backend supports three
+messages:
+
+\begin{description}
+\item [{\small {\tt PROBE}}:] Return a list of the VBDs available to
+  this guest from the backend.  The request includes a descriptor of a
+  free page into which the reply will be written by the backend.
+
+\item [{\small {\tt READ}}:] Read data from the specified block
+  device.  The front end identifies the device and location to read
+  from and attaches pages for the data to be copied to (typically via
+  DMA from the device).  The backend acknowledges completed read
+  requests as they finish.
+
+\item [{\small {\tt WRITE}}:] Write data to the specified block
+  device.  This functions essentially as {\small {\tt READ}}, except
+  that the data moves to the device instead of from it.
+\end{description}
+
+%% um... some old text: In overview, the same style of descriptor-ring
+%% that is used for network packets is used here.  Each domain has one
+%% ring that carries operation requests to the hypervisor and carries
+%% the results back again.
+
+%% Rather than copying data, the backend simply maps the domain's
+%% buffers in order to enable direct DMA to them.  The act of mapping
+%% the buffers also increases the reference counts of the underlying
+%% pages, so that the unprivileged domain cannot try to return them to
+%% the hypervisor, install them as page tables, or any other unsafe
+%% behaviour.
+%%
+%% % block API here
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/further_info.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/further_info.tex       Thu Sep 22 17:42:01 2005
@@ -0,0 +1,49 @@
+\chapter{Further Information}
+
+If you have questions that are not answered by this manual, the
+sources of information listed below may be of interest to you.  Note
+that bug reports, suggestions and contributions related to the
+software (or the documentation) should be sent to the Xen developers'
+mailing list (address below).
+
+
+\section{Other documentation}
+
+If you are mainly interested in using (rather than developing for)
+Xen, the \emph{Xen Users' Manual} is distributed in the {\tt docs/}
+directory of the Xen source distribution.
+
+% Various HOWTOs are also available in {\tt docs/HOWTOS}.
+
+
+\section{Online references}
+
+The official Xen web site is found at:
+\begin{quote}
+{\tt http://www.cl.cam.ac.uk/Research/SRG/netos/xen/}
+\end{quote}
+
+This contains links to the latest versions of all on-line
+documentation.
+
+
+\section{Mailing lists}
+
+There are currently four official Xen mailing lists:
+
+\begin{description}
+\item[xen-devel@xxxxxxxxxxxxxxxxxxx] Used for development
+  discussions and bug reports.  Subscribe at: \\
+  {\small {\tt http://lists.xensource.com/xen-devel}}
+\item[xen-users@xxxxxxxxxxxxxxxxxxx] Used for installation and usage
+  discussions and requests for help.  Subscribe at: \\
+  {\small {\tt http://lists.xensource.com/xen-users}}
+\item[xen-announce@xxxxxxxxxxxxxxxxxxx] Used for announcements only.
+  Subscribe at: \\
+  {\small {\tt http://lists.xensource.com/xen-announce}}
+\item[xen-changelog@xxxxxxxxxxxxxxxxxxx] Changelog feed
+  from the unstable and 2.0 trees - developer oriented.  Subscribe at: \\
+  {\small {\tt http://lists.xensource.com/xen-changelog}}
+\end{description}
+
+Of these, xen-devel is the most active.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/hypercalls.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/hypercalls.tex Thu Sep 22 17:42:01 2005
@@ -0,0 +1,524 @@
+
+\newcommand{\hypercall}[1]{\vspace{2mm}{\sf #1}}
+
+\chapter{Xen Hypercalls}
+\label{a:hypercalls}
+
+Hypercalls represent the procedural interface to Xen; this appendix 
+categorizes and describes the current set of hypercalls. 
+
+\section{Invoking Hypercalls} 
+
+Hypercalls are invoked in a manner analogous to system calls in a
+conventional operating system; a software interrupt is issued which
+vectors to an entry point within Xen. On x86\_32 machines the
+instruction required is {\tt int \$82}; the (real) IDT is setup so
+that this may only be issued from within ring 1. The particular 
+hypercall to be invoked is contained in {\tt EAX} --- a list 
+mapping these values to symbolic hypercall names can be found 
+in {\tt xen/include/public/xen.h}. 
+
+On some occasions a set of hypercalls will be required to carry
+out a higher-level function; a good example is when a guest 
+operating wishes to context switch to a new process which 
+requires updating various privileged CPU state. As an optimization
+for these cases, there is a generic mechanism to issue a set of 
+hypercalls as a batch: 
+
+\begin{quote}
+\hypercall{multicall(void *call\_list, int nr\_calls)}
+
+Execute a series of hypervisor calls; {\tt nr\_calls} is the length of
+the array of {\tt multicall\_entry\_t} structures pointed to be {\tt
+call\_list}. Each entry contains the hypercall operation code followed
+by up to 7 word-sized arguments.
+\end{quote}
+
+Note that multicalls are provided purely as an optimization; there is
+no requirement to use them when first porting a guest operating
+system.
+
+
+\section{Virtual CPU Setup} 
+
+At start of day, a guest operating system needs to setup the virtual
+CPU it is executing on. This includes installing vectors for the
+virtual IDT so that the guest OS can handle interrupts, page faults,
+etc. However the very first thing a guest OS must setup is a pair 
+of hypervisor callbacks: these are the entry points which Xen will
+use when it wishes to notify the guest OS of an occurrence. 
+
+\begin{quote}
+\hypercall{set\_callbacks(unsigned long event\_selector, unsigned long
+  event\_address, unsigned long failsafe\_selector, unsigned long
+  failsafe\_address) }
+
+Register the normal (``event'') and failsafe callbacks for 
+event processing. In each case the code segment selector and 
+address within that segment are provided. The selectors must
+have RPL 1; in XenLinux we simply use the kernel's CS for both 
+{\tt event\_selector} and {\tt failsafe\_selector}.
+
+The value {\tt event\_address} specifies the address of the guest OSes
+event handling and dispatch routine; the {\tt failsafe\_address}
+specifies a separate entry point which is used only if a fault occurs
+when Xen attempts to use the normal callback. 
+\end{quote} 
+
+
+After installing the hypervisor callbacks, the guest OS can 
+install a `virtual IDT' by using the following hypercall: 
+
+\begin{quote} 
+\hypercall{set\_trap\_table(trap\_info\_t *table)} 
+
+Install one or more entries into the per-domain 
+trap handler table (essentially a software version of the IDT). 
+Each entry in the array pointed to by {\tt table} includes the 
+exception vector number with the corresponding segment selector 
+and entry point. Most guest OSes can use the same handlers on 
+Xen as when running on the real hardware; an exception is the 
+page fault handler (exception vector 14) where a modified 
+stack-frame layout is used. 
+
+
+\end{quote} 
+
+
+
+\section{Scheduling and Timer}
+
+Domains are preemptively scheduled by Xen according to the 
+parameters installed by domain 0 (see Section~\ref{s:dom0ops}). 
+In addition, however, a domain may choose to explicitly 
+control certain behavior with the following hypercall: 
+
+\begin{quote} 
+\hypercall{sched\_op(unsigned long op)} 
+
+Request scheduling operation from hypervisor. The options are: {\it
+yield}, {\it block}, and {\it shutdown}.  {\it yield} keeps the
+calling domain runnable but may cause a reschedule if other domains
+are runnable.  {\it block} removes the calling domain from the run
+queue and cause is to sleeps until an event is delivered to it.  {\it
+shutdown} is used to end the domain's execution; the caller can
+additionally specify whether the domain should reboot, halt or
+suspend.
+\end{quote} 
+
+To aid the implementation of a process scheduler within a guest OS,
+Xen provides a virtual programmable timer:
+
+\begin{quote}
+\hypercall{set\_timer\_op(uint64\_t timeout)} 
+
+Request a timer event to be sent at the specified system time (time 
+in nanoseconds since system boot). The hypercall actually passes the 
+64-bit timeout value as a pair of 32-bit values. 
+
+\end{quote} 
+
+Note that calling {\tt set\_timer\_op()} prior to {\tt sched\_op} 
+allows block-with-timeout semantics. 
+
+
+\section{Page Table Management} 
+
+Since guest operating systems have read-only access to their page 
+tables, Xen must be involved when making any changes. The following
+multi-purpose hypercall can be used to modify page-table entries, 
+update the machine-to-physical mapping table, flush the TLB, install 
+a new page-table base pointer, and more.
+
+\begin{quote} 
+\hypercall{mmu\_update(mmu\_update\_t *req, int count, int *success\_count)} 
+
+Update the page table for the domain; a set of {\tt count} updates are
+submitted for processing in a batch, with {\tt success\_count} being 
+updated to report the number of successful updates.  
+
+Each element of {\tt req[]} contains a pointer (address) and value; 
+the least significant 2-bits of the pointer are used to distinguish 
+the type of update requested as follows:
+\begin{description} 
+
+\item[\it MMU\_NORMAL\_PT\_UPDATE:] update a page directory entry or
+page table entry to the associated value; Xen will check that the
+update is safe, as described in Chapter~\ref{c:memory}.
+
+\item[\it MMU\_MACHPHYS\_UPDATE:] update an entry in the
+  machine-to-physical table. The calling domain must own the machine
+  page in question (or be privileged).
+
+\item[\it MMU\_EXTENDED\_COMMAND:] perform additional MMU operations.
+The set of additional MMU operations is considerable, and includes
+updating {\tt cr3} (or just re-installing it for a TLB flush),
+flushing the cache, installing a new LDT, or pinning \& unpinning
+page-table pages (to ensure their reference count doesn't drop to zero
+which would require a revalidation of all entries).
+
+Further extended commands are used to deal with granting and 
+acquiring page ownership; see Section~\ref{s:idc}. 
+
+
+\end{description}
+
+More details on the precise format of all commands can be 
+found in {\tt xen/include/public/xen.h}. 
+
+
+\end{quote}
+
+Explicitly updating batches of page table entries is extremely
+efficient, but can require a number of alterations to the guest
+OS. Using the writable page table mode (Chapter~\ref{c:memory}) is
+recommended for new OS ports.
+
+Regardless of which page table update mode is being used, however,
+there are some occasions (notably handling a demand page fault) where
+a guest OS will wish to modify exactly one PTE rather than a
+batch. This is catered for by the following:
+
+\begin{quote} 
+\hypercall{update\_va\_mapping(unsigned long page\_nr, unsigned long
+val, \\ unsigned long flags)}
+
+Update the currently installed PTE for the page {\tt page\_nr} to 
+{\tt val}. As with {\tt mmu\_update()}, Xen checks the modification 
+is safe before applying it. The {\tt flags} determine which kind
+of TLB flush, if any, should follow the update. 
+
+\end{quote} 
+
+Finally, sufficiently privileged domains may occasionally wish to manipulate 
+the pages of others: 
+\begin{quote}
+
+\hypercall{update\_va\_mapping\_otherdomain(unsigned long page\_nr,
+unsigned long val, unsigned long flags, uint16\_t domid)}
+
+Identical to {\tt update\_va\_mapping()} save that the pages being
+mapped must belong to the domain {\tt domid}. 
+
+\end{quote}
+
+This privileged operation is currently used by backend virtual device
+drivers to safely map pages containing I/O data. 
+
+
+
+\section{Segmentation Support}
+
+Xen allows guest OSes to install a custom GDT if they require it; 
+this is context switched transparently whenever a domain is 
+[de]scheduled.  The following hypercall is effectively a 
+`safe' version of {\tt lgdt}: 
+
+\begin{quote}
+\hypercall{set\_gdt(unsigned long *frame\_list, int entries)} 
+
+Install a global descriptor table for a domain; {\tt frame\_list} is
+an array of up to 16 machine page frames within which the GDT resides,
+with {\tt entries} being the actual number of descriptor-entry
+slots. All page frames must be mapped read-only within the guest's
+address space, and the table must be large enough to contain Xen's
+reserved entries (see {\tt xen/include/public/arch-x86\_32.h}).
+
+\end{quote}
+
+Many guest OSes will also wish to install LDTs; this is achieved by
+using {\tt mmu\_update()} with an extended command, passing the
+linear address of the LDT base along with the number of entries. No
+special safety checks are required; Xen needs to perform this task
+simply since {\tt lldt} requires CPL 0.
+
+
+Xen also allows guest operating systems to update just an 
+individual segment descriptor in the GDT or LDT:  
+
+\begin{quote}
+\hypercall{update\_descriptor(unsigned long ma, unsigned long word1,
+unsigned long word2)}
+
+Update the GDT/LDT entry at machine address {\tt ma}; the new
+8-byte descriptor is stored in {\tt word1} and {\tt word2}.
+Xen performs a number of checks to ensure the descriptor is 
+valid. 
+
+\end{quote}
+
+Guest OSes can use the above in place of context switching entire 
+LDTs (or the GDT) when the number of changing descriptors is small. 
+
+\section{Context Switching} 
+
+When a guest OS wishes to context switch between two processes, 
+it can use the page table and segmentation hypercalls described
+above to perform the the bulk of the privileged work. In addition, 
+however, it will need to invoke Xen to switch the kernel (ring 1) 
+stack pointer: 
+
+\begin{quote} 
+\hypercall{stack\_switch(unsigned long ss, unsigned long esp)} 
+
+Request kernel stack switch from hypervisor; {\tt ss} is the new 
+stack segment, which {\tt esp} is the new stack pointer. 
+
+\end{quote} 
+
+A final useful hypercall for context switching allows ``lazy'' 
+save and restore of floating point state: 
+
+\begin{quote}
+\hypercall{fpu\_taskswitch(void)} 
+
+This call instructs Xen to set the {\tt TS} bit in the {\tt cr0}
+control register; this means that the next attempt to use floating
+point will cause a trap which the guest OS can trap. Typically it will
+then save/restore the FP state, and clear the {\tt TS} bit. 
+\end{quote} 
+
+This is provided as an optimization only; guest OSes can also choose
+to save and restore FP state on all context switches for simplicity. 
+
+
+\section{Physical Memory Management}
+
+As mentioned previously, each domain has a maximum and current 
+memory allocation. The maximum allocation, set at domain creation 
+time, cannot be modified. However a domain can choose to reduce 
+and subsequently grow its current allocation by using the
+following call: 
+
+\begin{quote} 
+\hypercall{dom\_mem\_op(unsigned int op, unsigned long *extent\_list,
+  unsigned long nr\_extents, unsigned int extent\_order)}
+
+Increase or decrease current memory allocation (as determined by 
+the value of {\tt op}). Each invocation provides a list of 
+extents each of which is $2^s$ pages in size, 
+where $s$ is the value of {\tt extent\_order}. 
+
+\end{quote} 
+
+In addition to simply reducing or increasing the current memory
+allocation via a `balloon driver', this call is also useful for 
+obtaining contiguous regions of machine memory when required (e.g. 
+for certain PCI devices, or if using superpages).  
+
+
+\section{Inter-Domain Communication}
+\label{s:idc} 
+
+Xen provides a simple asynchronous notification mechanism via
+\emph{event channels}. Each domain has a set of end-points (or
+\emph{ports}) which may be bound to an event source (e.g. a physical
+IRQ, a virtual IRQ, or an port in another domain). When a pair of
+end-points in two different domains are bound together, then a `send'
+operation on one will cause an event to be received by the destination
+domain.
+
+The control and use of event channels involves the following hypercall: 
+
+\begin{quote}
+\hypercall{event\_channel\_op(evtchn\_op\_t *op)} 
+
+Inter-domain event-channel management; {\tt op} is a discriminated 
+union which allows the following 7 operations: 
+
+\begin{description} 
+
+\item[\it alloc\_unbound:] allocate a free (unbound) local
+  port and prepare for connection from a specified domain. 
+\item[\it bind\_virq:] bind a local port to a virtual 
+IRQ; any particular VIRQ can be bound to at most one port per domain. 
+\item[\it bind\_pirq:] bind a local port to a physical IRQ;
+once more, a given pIRQ can be bound to at most one port per
+domain. Furthermore the calling domain must be sufficiently
+privileged.
+\item[\it bind\_interdomain:] construct an interdomain event 
+channel; in general, the target domain must have previously allocated 
+an unbound port for this channel, although this can be bypassed by 
+privileged domains during domain setup. 
+\item[\it close:] close an interdomain event channel. 
+\item[\it send:] send an event to the remote end of a 
+interdomain event channel. 
+\item[\it status:] determine the current status of a local port. 
+\end{description} 
+
+For more details see
+{\tt xen/include/public/event\_channel.h}. 
+
+\end{quote} 
+
+Event channels are the fundamental communication primitive between 
+Xen domains and seamlessly support SMP. However they provide little
+bandwidth for communication {\sl per se}, and hence are typically 
+married with a piece of shared memory to produce effective and 
+high-performance inter-domain communication. 
+
+Safe sharing of memory pages between guest OSes is carried out by
+granting access on a per page basis to individual domains. This is
+achieved by using the {\tt grant\_table\_op()} hypercall.
+
+\begin{quote}
+\hypercall{grant\_table\_op(unsigned int cmd, void *uop, unsigned int count)}
+
+Grant or remove access to a particular page to a particular domain. 
+
+\end{quote} 
+
+This is not currently widely in use by guest operating systems, but 
+we intend to integrate support more fully in the near future. 
+
+\section{PCI Configuration} 
+
+Domains with physical device access (i.e.\ driver domains) receive
+limited access to certain PCI devices (bus address space and
+interrupts). However many guest operating systems attempt to 
+determine the PCI configuration by directly access the PCI BIOS, 
+which cannot be allowed for safety. 
+
+Instead, Xen provides the following hypercall: 
+
+\begin{quote}
+\hypercall{physdev\_op(void *physdev\_op)}
+
+Perform a PCI configuration option; depending on the value 
+of {\tt physdev\_op} this can be a PCI config read, a PCI config 
+write, or a small number of other queries. 
+
+\end{quote} 
+
+
+For examples of using {\tt physdev\_op()}, see the 
+Xen-specific PCI code in the linux sparse tree. 
+
+\section{Administrative Operations}
+\label{s:dom0ops}
+
+A large number of control operations are available to a sufficiently
+privileged domain (typically domain 0). These allow the creation and
+management of new domains, for example. A complete list is given 
+below: for more details on any or all of these, please see 
+{\tt xen/include/public/dom0\_ops.h} 
+
+
+\begin{quote}
+\hypercall{dom0\_op(dom0\_op\_t *op)} 
+
+Administrative domain operations for domain management. The options are:
+
+\begin{description} 
+\item [\it DOM0\_CREATEDOMAIN:] create a new domain
+
+\item [\it DOM0\_PAUSEDOMAIN:] remove a domain from the scheduler run 
+queue. 
+
+\item [\it DOM0\_UNPAUSEDOMAIN:] mark a paused domain as schedulable
+  once again. 
+
+\item [\it DOM0\_DESTROYDOMAIN:] deallocate all resources associated
+with a domain
+
+\item [\it DOM0\_GETMEMLIST:] get list of pages used by the domain
+
+\item [\it DOM0\_SCHEDCTL:]
+
+\item [\it DOM0\_ADJUSTDOM:] adjust scheduling priorities for domain
+
+\item [\it DOM0\_BUILDDOMAIN:] do final guest OS setup for domain
+
+\item [\it DOM0\_GETDOMAINFO:] get statistics about the domain
+
+\item [\it DOM0\_GETPAGEFRAMEINFO:] 
+
+\item [\it DOM0\_GETPAGEFRAMEINFO2:]
+
+\item [\it DOM0\_IOPL:] set I/O privilege level
+
+\item [\it DOM0\_MSR:] read or write model specific registers
+
+\item [\it DOM0\_DEBUG:] interactively invoke the debugger
+
+\item [\it DOM0\_SETTIME:] set system time
+
+\item [\it DOM0\_READCONSOLE:] read console content from hypervisor buffer ring
+
+\item [\it DOM0\_PINCPUDOMAIN:] pin domain to a particular CPU
+
+\item [\it DOM0\_GETTBUFS:] get information about the size and location of
+                      the trace buffers (only on trace-buffer enabled builds)
+
+\item [\it DOM0\_PHYSINFO:] get information about the host machine
+
+\item [\it DOM0\_PCIDEV\_ACCESS:] modify PCI device access permissions
+
+\item [\it DOM0\_SCHED\_ID:] get the ID of the current Xen scheduler
+
+\item [\it DOM0\_SHADOW\_CONTROL:] switch between shadow page-table modes
+
+\item [\it DOM0\_SETDOMAININITIALMEM:] set initial memory allocation of a 
domain
+
+\item [\it DOM0\_SETDOMAINMAXMEM:] set maximum memory allocation of a domain
+
+\item [\it DOM0\_SETDOMAINVMASSIST:] set domain VM assist options
+\end{description} 
+\end{quote} 
+
+Most of the above are best understood by looking at the code 
+implementing them (in {\tt xen/common/dom0\_ops.c}) and in 
+the user-space tools that use them (mostly in {\tt tools/libxc}). 
+
+\section{Debugging Hypercalls} 
+
+A few additional hypercalls are mainly useful for debugging: 
+
+\begin{quote} 
+\hypercall{console\_io(int cmd, int count, char *str)}
+
+Use Xen to interact with the console; operations are:
+
+{\it CONSOLEIO\_write}: Output count characters from buffer str.
+
+{\it CONSOLEIO\_read}: Input at most count characters into buffer str.
+\end{quote} 
+
+A pair of hypercalls allows access to the underlying debug registers: 
+\begin{quote}
+\hypercall{set\_debugreg(int reg, unsigned long value)}
+
+Set debug register {\tt reg} to {\tt value} 
+
+\hypercall{get\_debugreg(int reg)}
+
+Return the contents of the debug register {\tt reg}
+\end{quote}
+
+And finally: 
+\begin{quote}
+\hypercall{xen\_version(int cmd)}
+
+Request Xen version number.
+\end{quote} 
+
+This is useful to ensure that user-space tools are in sync 
+with the underlying hypervisor. 
+
+\section{Deprecated Hypercalls}
+
+Xen is under constant development and refinement; as such there 
+are plans to improve the way in which various pieces of functionality 
+are exposed to guest OSes. 
+
+\begin{quote} 
+\hypercall{vm\_assist(unsigned int cmd, unsigned int type)}
+
+Toggle various memory management modes (in particular wrritable page
+tables and superpage support). 
+
+\end{quote} 
+
+This is likely to be replaced with mode values in the shared 
+information page since this is more resilient for resumption 
+after migration or checkpoint. 
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/memory.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/memory.tex     Thu Sep 22 17:42:01 2005
@@ -0,0 +1,162 @@
+\chapter{Memory}
+\label{c:memory} 
+
+Xen is responsible for managing the allocation of physical memory to
+domains, and for ensuring safe use of the paging and segmentation
+hardware.
+
+
+\section{Memory Allocation}
+
+Xen resides within a small fixed portion of physical memory; it also
+reserves the top 64MB of every virtual address space. The remaining
+physical memory is available for allocation to domains at a page
+granularity.  Xen tracks the ownership and use of each page, which
+allows it to enforce secure partitioning between domains.
+
+Each domain has a maximum and current physical memory allocation.  A
+guest OS may run a `balloon driver' to dynamically adjust its current
+memory allocation up to its limit.
+
+
+%% XXX SMH: I use machine and physical in the next section (which is
+%% kinda required for consistency with code); wonder if this section
+%% should use same terms?
+%%
+%% Probably. 
+%%
+%% Merging this and below section at some point prob makes sense.
+
+\section{Pseudo-Physical Memory}
+
+Since physical memory is allocated and freed on a page granularity,
+there is no guarantee that a domain will receive a contiguous stretch
+of physical memory. However most operating systems do not have good
+support for operating in a fragmented physical address space. To aid
+porting such operating systems to run on top of Xen, we make a
+distinction between \emph{machine memory} and \emph{pseudo-physical
+  memory}.
+
+Put simply, machine memory refers to the entire amount of memory
+installed in the machine, including that reserved by Xen, in use by
+various domains, or currently unallocated. We consider machine memory
+to comprise a set of 4K \emph{machine page frames} numbered
+consecutively starting from 0. Machine frame numbers mean the same
+within Xen or any domain.
+
+Pseudo-physical memory, on the other hand, is a per-domain
+abstraction. It allows a guest operating system to consider its memory
+allocation to consist of a contiguous range of physical page frames
+starting at physical frame 0, despite the fact that the underlying
+machine page frames may be sparsely allocated and in any order.
+
+To achieve this, Xen maintains a globally readable {\it
+  machine-to-physical} table which records the mapping from machine
+page frames to pseudo-physical ones. In addition, each domain is
+supplied with a {\it physical-to-machine} table which performs the
+inverse mapping. Clearly the machine-to-physical table has size
+proportional to the amount of RAM installed in the machine, while each
+physical-to-machine table has size proportional to the memory
+allocation of the given domain.
+
+Architecture dependent code in guest operating systems can then use
+the two tables to provide the abstraction of pseudo-physical memory.
+In general, only certain specialized parts of the operating system
+(such as page table management) needs to understand the difference
+between machine and pseudo-physical addresses.
+
+
+\section{Page Table Updates}
+
+In the default mode of operation, Xen enforces read-only access to
+page tables and requires guest operating systems to explicitly request
+any modifications.  Xen validates all such requests and only applies
+updates that it deems safe.  This is necessary to prevent domains from
+adding arbitrary mappings to their page tables.
+
+To aid validation, Xen associates a type and reference count with each
+memory page. A page has one of the following mutually-exclusive types
+at any point in time: page directory ({\sf PD}), page table ({\sf
+  PT}), local descriptor table ({\sf LDT}), global descriptor table
+({\sf GDT}), or writable ({\sf RW}). Note that a guest OS may always
+create readable mappings of its own memory regardless of its current
+type.
+
+%%% XXX: possibly explain more about ref count 'lifecyle' here?
+This mechanism is used to maintain the invariants required for safety;
+for example, a domain cannot have a writable mapping to any part of a
+page table as this would require the page concerned to simultaneously
+be of types {\sf PT} and {\sf RW}.
+
+
+% \section{Writable Page Tables}
+
+Xen also provides an alternative mode of operation in which guests be
+have the illusion that their page tables are directly writable.  Of
+course this is not really the case, since Xen must still validate
+modifications to ensure secure partitioning. To this end, Xen traps
+any write attempt to a memory page of type {\sf PT} (i.e., that is
+currently part of a page table).  If such an access occurs, Xen
+temporarily allows write access to that page while at the same time
+\emph{disconnecting} it from the page table that is currently in use.
+This allows the guest to safely make updates to the page because the
+newly-updated entries cannot be used by the MMU until Xen revalidates
+and reconnects the page.  Reconnection occurs automatically in a
+number of situations: for example, when the guest modifies a different
+page-table page, when the domain is preempted, or whenever the guest
+uses Xen's explicit page-table update interfaces.
+
+Finally, Xen also supports a form of \emph{shadow page tables} in
+which the guest OS uses a independent copy of page tables which are
+unknown to the hardware (i.e.\ which are never pointed to by {\tt
+  cr3}). Instead Xen propagates changes made to the guest's tables to
+the real ones, and vice versa. This is useful for logging page writes
+(e.g.\ for live migration or checkpoint). A full version of the shadow
+page tables also allows guest OS porting with less effort.
+
+
+\section{Segment Descriptor Tables}
+
+On boot a guest is supplied with a default GDT, which does not reside
+within its own memory allocation.  If the guest wishes to use other
+than the default `flat' ring-1 and ring-3 segments that this GDT
+provides, it must register a custom GDT and/or LDT with Xen, allocated
+from its own memory. Note that a number of GDT entries are reserved by
+Xen -- any custom GDT must also include sufficient space for these
+entries.
+
+For example, the following hypercall is used to specify a new GDT:
+
+\begin{quote}
+  int {\bf set\_gdt}(unsigned long *{\em frame\_list}, int {\em
+    entries})
+
+  \emph{frame\_list}: An array of up to 16 machine page frames within
+  which the GDT resides.  Any frame registered as a GDT frame may only
+  be mapped read-only within the guest's address space (e.g., no
+  writable mappings, no use as a page-table page, and so on).
+
+  \emph{entries}: The number of descriptor-entry slots in the GDT.
+  Note that the table must be large enough to contain Xen's reserved
+  entries; thus we must have `{\em entries $>$
+    LAST\_RESERVED\_GDT\_ENTRY}\ '.  Note also that, after registering
+  the GDT, slots \emph{FIRST\_} through
+  \emph{LAST\_RESERVED\_GDT\_ENTRY} are no longer usable by the guest
+  and may be overwritten by Xen.
+\end{quote}
+
+The LDT is updated via the generic MMU update mechanism (i.e., via the
+{\tt mmu\_update()} hypercall.
+
+\section{Start of Day}
+
+The start-of-day environment for guest operating systems is rather
+different to that provided by the underlying hardware. In particular,
+the processor is already executing in protected mode with paging
+enabled.
+
+{\it Domain 0} is created and booted by Xen itself. For all subsequent
+domains, the analogue of the boot-loader is the {\it domain builder},
+user-space software running in {\it domain 0}. The domain builder is
+responsible for building the initial page tables for a domain and
+loading its kernel image at the appropriate virtual address.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/scheduling.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/scheduling.tex Thu Sep 22 17:42:01 2005
@@ -0,0 +1,268 @@
+\chapter{Scheduling API}  
+
+The scheduling API is used by both the schedulers described above and should
+also be used by any new schedulers.  It provides a generic interface and also
+implements much of the ``boilerplate'' code.
+
+Schedulers conforming to this API are described by the following
+structure:
+
+\begin{verbatim}
+struct scheduler
+{
+    char *name;             /* full name for this scheduler      */
+    char *opt_name;         /* option name for this scheduler    */
+    unsigned int sched_id;  /* ID for this scheduler             */
+
+    int          (*init_scheduler) ();
+    int          (*alloc_task)     (struct task_struct *);
+    void         (*add_task)       (struct task_struct *);
+    void         (*free_task)      (struct task_struct *);
+    void         (*rem_task)       (struct task_struct *);
+    void         (*wake_up)        (struct task_struct *);
+    void         (*do_block)       (struct task_struct *);
+    task_slice_t (*do_schedule)    (s_time_t);
+    int          (*control)        (struct sched_ctl_cmd *);
+    int          (*adjdom)         (struct task_struct *,
+                                    struct sched_adjdom_cmd *);
+    s32          (*reschedule)     (struct task_struct *);
+    void         (*dump_settings)  (void);
+    void         (*dump_cpu_state) (int);
+    void         (*dump_runq_el)   (struct task_struct *);
+};
+\end{verbatim}
+
+The only method that {\em must} be implemented is
+{\tt do\_schedule()}.  However, if there is not some implementation for the
+{\tt wake\_up()} method then waking tasks will not get put on the runqueue!
+
+The fields of the above structure are described in more detail below.
+
+\subsubsection{name}
+
+The name field should point to a descriptive ASCII string.
+
+\subsubsection{opt\_name}
+
+This field is the value of the {\tt sched=} boot-time option that will select
+this scheduler.
+
+\subsubsection{sched\_id}
+
+This is an integer that uniquely identifies this scheduler.  There should be a
+macro corrsponding to this scheduler ID in {\tt <xen/sched-if.h>}.
+
+\subsubsection{init\_scheduler}
+
+\paragraph*{Purpose}
+
+This is a function for performing any scheduler-specific initialisation.  For
+instance, it might allocate memory for per-CPU scheduler data and initialise it
+appropriately.
+
+\paragraph*{Call environment}
+
+This function is called after the initialisation performed by the generic
+layer.  The function is called exactly once, for the scheduler that has been
+selected.
+
+\paragraph*{Return values}
+
+This should return negative on failure --- this will cause an
+immediate panic and the system will fail to boot.
+
+\subsubsection{alloc\_task}
+
+\paragraph*{Purpose}
+Called when a {\tt task\_struct} is allocated by the generic scheduler
+layer.  A particular scheduler implementation may use this method to
+allocate per-task data for this task.  It may use the {\tt
+sched\_priv} pointer in the {\tt task\_struct} to point to this data.
+
+\paragraph*{Call environment}
+The generic layer guarantees that the {\tt sched\_priv} field will
+remain intact from the time this method is called until the task is
+deallocated (so long as the scheduler implementation does not change
+it explicitly!).
+
+\paragraph*{Return values}
+Negative on failure.
+
+\subsubsection{add\_task}
+
+\paragraph*{Purpose}
+
+Called when a task is initially added by the generic layer.
+
+\paragraph*{Call environment}
+
+The fields in the {\tt task\_struct} are now filled out and available for use.
+Schedulers should implement appropriate initialisation of any per-task private
+information in this method.
+
+\subsubsection{free\_task}
+
+\paragraph*{Purpose}
+
+Schedulers should free the space used by any associated private data
+structures.
+
+\paragraph*{Call environment}
+
+This is called when a {\tt task\_struct} is about to be deallocated.
+The generic layer will have done generic task removal operations and
+(if implemented) called the scheduler's {\tt rem\_task} method before
+this method is called.
+
+\subsubsection{rem\_task}
+
+\paragraph*{Purpose}
+
+This is called when a task is being removed from scheduling (but is
+not yet being freed).
+
+\subsubsection{wake\_up}
+
+\paragraph*{Purpose}
+
+Called when a task is woken up, this method should put the task on the runqueue
+(or do the scheduler-specific equivalent action).
+
+\paragraph*{Call environment}
+
+The task is already set to state RUNNING.
+
+\subsubsection{do\_block}
+
+\paragraph*{Purpose}
+
+This function is called when a task is blocked.  This function should
+not remove the task from the runqueue.
+
+\paragraph*{Call environment}
+
+The EVENTS\_MASTER\_ENABLE\_BIT is already set and the task state changed to
+TASK\_INTERRUPTIBLE on entry to this method.  A call to the {\tt
+  do\_schedule} method will be made after this method returns, in
+order to select the next task to run.
+
+\subsubsection{do\_schedule}
+
+This method must be implemented.
+
+\paragraph*{Purpose}
+
+The method is called each time a new task must be chosen for scheduling on the
+current CPU.  The current time as passed as the single argument (the current
+task can be found using the {\tt current} macro).
+
+This method should select the next task to run on this CPU and set it's minimum
+time to run as well as returning the data described below.
+
+This method should also take the appropriate action if the previous
+task has blocked, e.g. removing it from the runqueue.
+
+\paragraph*{Call environment}
+
+The other fields in the {\tt task\_struct} are updated by the generic layer,
+which also performs all Xen-specific tasks and performs the actual task switch
+(unless the previous task has been chosen again).
+
+This method is called with the {\tt schedule\_lock} held for the current CPU
+and local interrupts disabled.
+
+\paragraph*{Return values}
+
+Must return a {\tt struct task\_slice} describing what task to run and how long
+for (at maximum).
+
+\subsubsection{control}
+
+\paragraph*{Purpose}
+
+This method is called for global scheduler control operations.  It takes a
+pointer to a {\tt struct sched\_ctl\_cmd}, which it should either
+source data from or populate with data, depending on the value of the
+{\tt direction} field.
+
+\paragraph*{Call environment}
+
+The generic layer guarantees that when this method is called, the
+caller selected the correct scheduler ID, hence the scheduler's
+implementation does not need to sanity-check these parts of the call.
+
+\paragraph*{Return values}
+
+This function should return the value to be passed back to user space, hence it
+should either be 0 or an appropriate errno value.
+
+\subsubsection{sched\_adjdom}
+
+\paragraph*{Purpose}
+
+This method is called to adjust the scheduling parameters of a particular
+domain, or to query their current values.  The function should check
+the {\tt direction} field of the {\tt sched\_adjdom\_cmd} it receives in
+order to determine which of these operations is being performed.
+
+\paragraph*{Call environment}
+
+The generic layer guarantees that the caller has specified the correct
+control interface version and scheduler ID and that the supplied {\tt
+task\_struct} will not be deallocated during the call (hence it is not
+necessary to {\tt get\_task\_struct}).
+
+\paragraph*{Return values}
+
+This function should return the value to be passed back to user space, hence it
+should either be 0 or an appropriate errno value.
+
+\subsubsection{reschedule}
+
+\paragraph*{Purpose}
+
+This method is called to determine if a reschedule is required as a result of a
+particular task.
+
+\paragraph*{Call environment}
+The generic layer will cause a reschedule if the current domain is the idle
+task or it has exceeded its minimum time slice before a reschedule.  The
+generic layer guarantees that the task passed is not currently running but is
+on the runqueue.
+
+\paragraph*{Return values}
+
+Should return a mask of CPUs to cause a reschedule on.
+
+\subsubsection{dump\_settings}
+
+\paragraph*{Purpose}
+
+If implemented, this should dump any private global settings for this
+scheduler to the console.
+
+\paragraph*{Call environment}
+
+This function is called with interrupts enabled.
+
+\subsubsection{dump\_cpu\_state}
+
+\paragraph*{Purpose}
+
+This method should dump any private settings for the specified CPU.
+
+\paragraph*{Call environment}
+
+This function is called with interrupts disabled and the {\tt schedule\_lock}
+for the specified CPU held.
+
+\subsubsection{dump\_runq\_el}
+
+\paragraph*{Purpose}
+
+This method should dump any private settings for the specified task.
+
+\paragraph*{Call environment}
+
+This function is called with interrupts disabled and the {\tt schedule\_lock}
+for the task's CPU held.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/build.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/build.tex   Thu Sep 22 17:42:01 2005
@@ -0,0 +1,170 @@
+\chapter{Build, Boot and Debug Options} 
+
+This chapter describes the build- and boot-time options which may be
+used to tailor your Xen system.
+
+
+\section{Xen Build Options}
+
+Xen provides a number of build-time options which should be set as
+environment variables or passed on make's command-line.
+
+\begin{description}
+\item[verbose=y] Enable debugging messages when Xen detects an
+  unexpected condition.  Also enables console output from all domains.
+\item[debug=y] Enable debug assertions.  Implies {\bf verbose=y}.
+  (Primarily useful for tracing bugs in Xen).
+\item[debugger=y] Enable the in-Xen debugger. This can be used to
+  debug Xen, guest OSes, and applications.
+\item[perfc=y] Enable performance counters for significant events
+  within Xen. The counts can be reset or displayed on Xen's console
+  via console control keys.
+\item[trace=y] Enable per-cpu trace buffers which log a range of
+  events within Xen for collection by control software.
+\end{description}
+
+
+\section{Xen Boot Options}
+\label{s:xboot}
+
+These options are used to configure Xen's behaviour at runtime.  They
+should be appended to Xen's command line, either manually or by
+editing \path{grub.conf}.
+
+\begin{description}
+\item [ noreboot ] Don't reboot the machine automatically on errors.
+  This is useful to catch debug output if you aren't catching console
+  messages via the serial line.
+\item [ nosmp ] Disable SMP support.  This option is implied by
+  `ignorebiostables'.
+\item [ watchdog ] Enable NMI watchdog which can report certain
+  failures.
+\item [ noirqbalance ] Disable software IRQ balancing and affinity.
+  This can be used on systems such as Dell 1850/2850 that have
+  workarounds in hardware for IRQ-routing issues.
+\item [ badpage=$<$page number$>$,$<$page number$>$, \ldots ] Specify
+  a list of pages not to be allocated for use because they contain bad
+  bytes. For example, if your memory tester says that byte 0x12345678
+  is bad, you would place `badpage=0x12345' on Xen's command line.
+\item [ com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$
+  com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\
+  Xen supports up to two 16550-compatible serial ports.  For example:
+  `com1=9600, 8n1, 0x408, 5' maps COM1 to a 9600-baud port, 8 data
+  bits, no parity, 1 stop bit, I/O port base 0x408, IRQ 5.  If some
+  configuration options are standard (e.g., I/O base and IRQ), then
+  only a prefix of the full configuration string need be specified. If
+  the baud rate is pre-configured (e.g., by the bootloader) then you
+  can specify `auto' in place of a numeric baud rate.
+\item [ console=$<$specifier list$>$ ] Specify the destination for Xen
+  console I/O.  This is a comma-separated list of, for example:
+  \begin{description}
+  \item[ vga ] Use VGA console and allow keyboard input.
+  \item[ com1 ] Use serial port com1.
+  \item[ com2H ] Use serial port com2. Transmitted chars will have the
+    MSB set. Received chars must have MSB set.
+  \item[ com2L] Use serial port com2. Transmitted chars will have the
+    MSB cleared. Received chars must have MSB cleared.
+  \end{description}
+  The latter two examples allow a single port to be shared by two
+  subsystems (e.g.\ console and debugger). Sharing is controlled by
+  MSB of each transmitted/received character.  [NB. Default for this
+  option is `com1,vga']
+\item [ sync\_console ] Force synchronous console output. This is
+  useful if you system fails unexpectedly before it has sent all
+  available output to the console. In most cases Xen will
+  automatically enter synchronous mode when an exceptional event
+  occurs, but this option provides a manual fallback.
+\item [ conswitch=$<$switch-char$><$auto-switch-char$>$ ] Specify how
+  to switch serial-console input between Xen and DOM0. The required
+  sequence is CTRL-$<$switch-char$>$ pressed three times. Specifying
+  the backtick character disables switching.  The
+  $<$auto-switch-char$>$ specifies whether Xen should auto-switch
+  input to DOM0 when it boots --- if it is `x' then auto-switching is
+  disabled.  Any other value, or omitting the character, enables
+  auto-switching.  [NB. Default switch-char is `a'.]
+\item [ nmi=xxx ]
+  Specify what to do with an NMI parity or I/O error. \\
+  `nmi=fatal':  Xen prints a diagnostic and then hangs. \\
+  `nmi=dom0':   Inform DOM0 of the NMI. \\
+  `nmi=ignore': Ignore the NMI.
+\item [ mem=xxx ] Set the physical RAM address limit. Any RAM
+  appearing beyond this physical address in the memory map will be
+  ignored. This parameter may be specified with a B, K, M or G suffix,
+  representing bytes, kilobytes, megabytes and gigabytes respectively.
+  The default unit, if no suffix is specified, is kilobytes.
+\item [ dom0\_mem=xxx ] Set the amount of memory to be allocated to
+  domain0. In Xen 3.x the parameter may be specified with a B, K, M or
+  G suffix, representing bytes, kilobytes, megabytes and gigabytes
+  respectively; if no suffix is specified, the parameter defaults to
+  kilobytes. In previous versions of Xen, suffixes were not supported
+  and the value is always interpreted as kilobytes.
+\item [ tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in
+  pages (default 1).  Note that the trace buffers are only enabled in
+  debug builds.  Most users can ignore this feature completely.
+\item [ sched=xxx ] Select the CPU scheduler Xen should use.  The
+  current possibilities are `bvt' (default), `atropos' and `rrobin'.
+  For more information see Section~\ref{s:sched}.
+\item [ apic\_verbosity=debug,verbose ] Print more detailed
+  information about local APIC and IOAPIC configuration.
+\item [ lapic ] Force use of local APIC even when left disabled by
+  uniprocessor BIOS.
+\item [ nolapic ] Ignore local APIC in a uniprocessor system, even if
+  enabled by the BIOS.
+\item [ apic=bigsmp,default,es7000,summit ] Specify NUMA platform.
+  This can usually be probed automatically.
+\end{description}
+
+In addition, the following options may be specified on the Xen command
+line. Since domain 0 shares responsibility for booting the platform,
+Xen will automatically propagate these options to its command line.
+These options are taken from Linux's command-line syntax with
+unchanged semantics.
+
+\begin{description}
+\item [ acpi=off,force,strict,ht,noirq,\ldots ] Modify how Xen (and
+  domain 0) parses the BIOS ACPI tables.
+\item [ acpi\_skip\_timer\_override ] Instruct Xen (and domain~0) to
+  ignore timer-interrupt override instructions specified by the BIOS
+  ACPI tables.
+\item [ noapic ] Instruct Xen (and domain~0) to ignore any IOAPICs
+  that are present in the system, and instead continue to use the
+  legacy PIC.
+\end{description} 
+
+
+\section{XenLinux Boot Options}
+
+In addition to the standard Linux kernel boot options, we support:
+\begin{description}
+\item[ xencons=xxx ] Specify the device node to which the Xen virtual
+  console driver is attached. The following options are supported:
+  \begin{center}
+    \begin{tabular}{l}
+      `xencons=off': disable virtual console \\
+      `xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\
+      `xencons=ttyS': attach console to /dev/ttyS0
+    \end{tabular}
+\end{center}
+The default is ttyS for dom0 and tty for all other domains.
+\end{description}
+
+
+\section{Debugging}
+\label{s:keys}
+
+Xen has a set of debugging features that can be useful to try and
+figure out what's going on. Hit `h' on the serial line (if you
+specified a baud rate on the Xen command line) or ScrollLock-h on the
+keyboard to get a list of supported commands.
+
+If you have a crash you'll likely get a crash dump containing an EIP
+(PC) which, along with an \path{objdump -d image}, can be useful in
+figuring out what's happened.  Debug a Xenlinux image just as you
+would any other Linux kernel.
+
+%% We supply a handy debug terminal program which you can find in
+%% \path{/usr/local/src/xen-2.0.bk/tools/misc/miniterm/} This should
+%% be built and executed on another machine that is connected via a
+%% null modem cable. Documentation is included.  Alternatively, if the
+%% Xen machine is connected to a serial-port server then we supply a
+%% dumb TCP terminal client, {\tt xencons}.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/control_software.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/control_software.tex        Thu Sep 22 17:42:01 2005
@@ -0,0 +1,115 @@
+\chapter{Control Software} 
+
+The Xen control software includes the \xend\ node control daemon
+(which must be running), the xm command line tools, and the prototype
+xensv web interface.
+
+\section{\Xend\ (node control daemon)}
+\label{s:xend}
+
+The Xen Daemon (\Xend) performs system management functions related to
+virtual machines.  It forms a central point of control for a machine
+and can be controlled using an HTTP-based protocol.  \Xend\ must be
+running in order to start and manage virtual machines.
+
+\Xend\ must be run as root because it needs access to privileged
+system management functions.  A small set of commands may be issued on
+the \xend\ command line:
+
+\begin{tabular}{ll}
+  \verb!# xend start! & start \xend, if not already running \\
+  \verb!# xend stop!  & stop \xend\ if already running       \\
+  \verb!# xend restart! & restart \xend\ if running, otherwise start it \\
+  % \verb!# xend trace_start! & start \xend, with very detailed debug logging 
\\
+  \verb!# xend status! & indicates \xend\ status by its return code
+\end{tabular}
+
+A SysV init script called {\tt xend} is provided to start \xend\ at
+boot time.  {\tt make install} installs this script in
+\path{/etc/init.d}.  To enable it, you have to make symbolic links in
+the appropriate runlevel directories or use the {\tt chkconfig} tool,
+where available.
+
+Once \xend\ is running, more sophisticated administration can be done
+using the xm tool (see Section~\ref{s:xm}) and the experimental Xensv
+web interface (see Section~\ref{s:xensv}).
+
+As \xend\ runs, events will be logged to \path{/var/log/xend.log} and,
+if the migration assistant daemon (\path{xfrd}) has been started,
+\path{/var/log/xfrd.log}. These may be of use for troubleshooting
+problems.
+
+\section{Xm (command line interface)}
+\label{s:xm}
+
+The xm tool is the primary tool for managing Xen from the console.
+The general format of an xm command line is:
+
+\begin{verbatim}
+# xm command [switches] [arguments] [variables]
+\end{verbatim}
+
+The available \emph{switches} and \emph{arguments} are dependent on
+the \emph{command} chosen.  The \emph{variables} may be set using
+declarations of the form {\tt variable=value} and command line
+declarations override any of the values in the configuration file
+being used, including the standard variables described above and any
+custom variables (for instance, the \path{xmdefconfig} file uses a
+{\tt vmid} variable).
+
+The available commands are as follows:
+
+\begin{description}
+\item[set-mem] Request a domain to adjust its memory footprint.
+\item[create] Create a new domain.
+\item[destroy] Kill a domain immediately.
+\item[list] List running domains.
+\item[shutdown] Ask a domain to shutdown.
+\item[dmesg] Fetch the Xen (not Linux!) boot output.
+\item[consoles] Lists the available consoles.
+\item[console] Connect to the console for a domain.
+\item[help] Get help on xm commands.
+\item[save] Suspend a domain to disk.
+\item[restore] Restore a domain from disk.
+\item[pause] Pause a domain's execution.
+\item[unpause] Un-pause a domain.
+\item[pincpu] Pin a domain to a CPU.
+\item[bvt] Set BVT scheduler parameters for a domain.
+\item[bvt\_ctxallow] Set the BVT context switching allowance for the
+  system.
+\item[atropos] Set the atropos parameters for a domain.
+\item[rrobin] Set the round robin time slice for the system.
+\item[info] Get information about the Xen host.
+\item[call] Call a \xend\ HTTP API function directly.
+\end{description}
+
+For a detailed overview of switches, arguments and variables to each
+command try
+\begin{quote}
+\begin{verbatim}
+# xm help command
+\end{verbatim}
+\end{quote}
+
+\section{Xensv (web control interface)}
+\label{s:xensv}
+
+Xensv is the experimental web control interface for managing a Xen
+machine.  It can be used to perform some (but not yet all) of the
+management tasks that can be done using the xm tool.
+
+It can be started using:
+\begin{quote}
+  \verb_# xensv start_
+\end{quote}
+and stopped using:
+\begin{quote}
+  \verb_# xensv stop_
+\end{quote}
+
+By default, Xensv will serve out the web interface on port 8080.  This
+can be changed by editing
+\path{/usr/lib/python2.3/site-packages/xen/sv/params.py}.
+
+Once Xensv is running, the web interface can be used to create and
+manage running domains.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/debian.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/debian.tex  Thu Sep 22 17:42:01 2005
@@ -0,0 +1,154 @@
+\chapter{Installing Xen / XenLinux on Debian}
+
+The Debian project provides a tool called \path{debootstrap} which
+allows a base Debian system to be installed into a filesystem without
+requiring the host system to have any Debian-specific software (such
+as \path{apt}).
+
+Here's some info how to install Debian 3.1 (Sarge) for an unprivileged
+Xen domain:
+
+\begin{enumerate}
+
+\item Set up Xen and test that it's working, as described earlier in
+  this manual.
+
+\item Create disk images for rootfs and swap. Alternatively, you might
+  create dedicated partitions, LVM logical volumes, etc.\ if that
+  suits your setup.
+\begin{verbatim}
+dd if=/dev/zero of=/path/diskimage bs=1024k count=size_in_mbytes
+dd if=/dev/zero of=/path/swapimage bs=1024k count=size_in_mbytes
+\end{verbatim}
+
+  If you're going to use this filesystem / disk image only as a
+  `template' for other vm disk images, something like 300 MB should be
+  enough. (of course it depends what kind of packages you are planning
+  to install to the template)
+
+\item Create the filesystem and initialise the swap image
+\begin{verbatim}
+mkfs.ext3 /path/diskimage
+mkswap /path/swapimage
+\end{verbatim}
+
+\item Mount the disk image for installation
+\begin{verbatim}
+mount -o loop /path/diskimage /mnt/disk
+\end{verbatim}
+
+\item Install \path{debootstrap}. Make sure you have debootstrap
+  installed on the host.  If you are running Debian Sarge (3.1 /
+  testing) or unstable you can install it by running \path{apt-get
+    install debootstrap}.  Otherwise, it can be downloaded from the
+  Debian project website.
+
+\item Install Debian base to the disk image:
+\begin{verbatim}
+debootstrap --arch i386 sarge /mnt/disk  \
+            http://ftp.<countrycode>.debian.org/debian
+\end{verbatim}
+
+  You can use any other Debian http/ftp mirror you want.
+
+\item When debootstrap completes successfully, modify settings:
+\begin{verbatim}
+chroot /mnt/disk /bin/bash
+\end{verbatim}
+
+Edit the following files using vi or nano and make needed changes:
+\begin{verbatim}
+/etc/hostname
+/etc/hosts
+/etc/resolv.conf
+/etc/network/interfaces
+/etc/networks
+\end{verbatim}
+
+Set up access to the services, edit:
+\begin{verbatim}
+/etc/hosts.deny
+/etc/hosts.allow
+/etc/inetd.conf
+\end{verbatim}
+
+Add Debian mirror to:   
+\begin{verbatim}
+/etc/apt/sources.list
+\end{verbatim}
+
+Create fstab like this:
+\begin{verbatim}
+/dev/sda1       /       ext3    errors=remount-ro       0       1
+/dev/sda2       none    swap    sw                      0       0
+proc            /proc   proc    defaults                0       0
+\end{verbatim}
+
+Logout
+
+\item Unmount the disk image
+\begin{verbatim}
+umount /mnt/disk
+\end{verbatim}
+
+\item Create Xen 2.0 configuration file for the new domain. You can
+  use the example-configurations coming with Xen as a template.
+
+  Make sure you have the following set up:
+\begin{verbatim}
+disk = [ 'file:/path/diskimage,sda1,w', 'file:/path/swapimage,sda2,w' ]
+root = "/dev/sda1 ro"
+\end{verbatim}
+
+\item Start the new domain
+\begin{verbatim}
+xm create -f domain_config_file
+\end{verbatim}
+
+Check that the new domain is running:
+\begin{verbatim}
+xm list
+\end{verbatim}
+
+\item Attach to the console of the new domain.  You should see
+  something like this when starting the new domain:
+
+\begin{verbatim}
+Started domain testdomain2, console on port 9626
+\end{verbatim}
+        
+  There you can see the ID of the console: 26. You can also list the
+  consoles with \path{xm consoles} (ID is the last two digits of the
+  port number.)
+
+  Attach to the console:
+
+\begin{verbatim}
+xm console 26
+\end{verbatim}
+
+  or by telnetting to the port 9626 of localhost (the xm console
+  program works better).
+
+\item Log in and run base-config
+
+  As a default there's no password for the root.
+
+  Check that everything looks OK, and the system started without
+  errors.  Check that the swap is active, and the network settings are
+  correct.
+
+  Run \path{/usr/sbin/base-config} to set up the Debian settings.
+
+  Set up the password for root using passwd.
+
+\item Done. You can exit the console by pressing {\path{Ctrl + ]}}
+
+\end{enumerate}
+
+
+If you need to create new domains, you can just copy the contents of
+the `template'-image to the new disk images, either by mounting the
+template and the new image, and using \path{cp -a} or \path{tar} or by
+simply copying the image file.  Once this is done, modify the
+image-specific settings (hostname, network settings, etc).
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/domain_configuration.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/domain_configuration.tex    Thu Sep 22 17:42:01 2005
@@ -0,0 +1,281 @@
+\chapter{Domain Configuration}
+\label{cha:config}
+
+The following contains the syntax of the domain configuration files
+and description of how to further specify networking, driver domain
+and general scheduling behavior.
+
+
+\section{Configuration Files}
+\label{s:cfiles}
+
+Xen configuration files contain the following standard variables.
+Unless otherwise stated, configuration items should be enclosed in
+quotes: see \path{/etc/xen/xmexample1} and \path{/etc/xen/xmexample2}
+for concrete examples of the syntax.
+
+\begin{description}
+\item[kernel] Path to the kernel image.
+\item[ramdisk] Path to a ramdisk image (optional).
+  % \item[builder] The name of the domain build function (e.g.
+  %   {\tt'linux'} or {\tt'netbsd'}.
+\item[memory] Memory size in megabytes.
+\item[cpu] CPU to run this domain on, or {\tt -1} for auto-allocation.
+\item[console] Port to export the domain console on (default 9600 +
+  domain ID).
+\item[nics] Number of virtual network interfaces.
+\item[vif] List of MAC addresses (random addresses are assigned if not
+  given) and bridges to use for the domain's network interfaces, e.g.\ 
+\begin{verbatim}
+vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0',
+        'bridge=xen-br1' ]
+\end{verbatim}
+  to assign a MAC address and bridge to the first interface and assign
+  a different bridge to the second interface, leaving \xend\ to choose
+  the MAC address.
+\item[disk] List of block devices to export to the domain, e.g.\ \\
+  \verb_disk = [ 'phy:hda1,sda1,r' ]_ \\
+  exports physical device \path{/dev/hda1} to the domain as
+  \path{/dev/sda1} with read-only access. Exporting a disk read-write
+  which is currently mounted is dangerous -- if you are \emph{certain}
+  you wish to do this, you can specify \path{w!} as the mode.
+\item[dhcp] Set to {\tt `dhcp'} if you want to use DHCP to configure
+  networking.
+\item[netmask] Manually configured IP netmask.
+\item[gateway] Manually configured IP gateway.
+\item[hostname] Set the hostname for the virtual machine.
+\item[root] Specify the root device parameter on the kernel command
+  line.
+\item[nfs\_server] IP address for the NFS server (if any).
+\item[nfs\_root] Path of the root filesystem on the NFS server (if
+  any).
+\item[extra] Extra string to append to the kernel command line (if
+  any)
+\item[restart] Three possible options:
+  \begin{description}
+  \item[always] Always restart the domain, no matter what its exit
+    code is.
+  \item[never] Never restart the domain.
+  \item[onreboot] Restart the domain iff it requests reboot.
+  \end{description}
+\end{description}
+
+For additional flexibility, it is also possible to include Python
+scripting commands in configuration files.  An example of this is the
+\path{xmexample2} file, which uses Python code to handle the
+\path{vmid} variable.
+
+
+%\part{Advanced Topics}
+
+
+\section{Network Configuration}
+
+For many users, the default installation should work ``out of the
+box''.  More complicated network setups, for instance with multiple
+Ethernet interfaces and/or existing bridging setups will require some
+special configuration.
+
+The purpose of this section is to describe the mechanisms provided by
+\xend\ to allow a flexible configuration for Xen's virtual networking.
+
+\subsection{Xen virtual network topology}
+
+Each domain network interface is connected to a virtual network
+interface in dom0 by a point to point link (effectively a ``virtual
+crossover cable'').  These devices are named {\tt
+  vif$<$domid$>$.$<$vifid$>$} (e.g.\ {\tt vif1.0} for the first
+interface in domain~1, {\tt vif3.1} for the second interface in
+domain~3).
+
+Traffic on these virtual interfaces is handled in domain~0 using
+standard Linux mechanisms for bridging, routing, rate limiting, etc.
+Xend calls on two shell scripts to perform initial configuration of
+the network and configuration of new virtual interfaces.  By default,
+these scripts configure a single bridge for all the virtual
+interfaces.  Arbitrary routing / bridging configurations can be
+configured by customizing the scripts, as described in the following
+section.
+
+\subsection{Xen networking scripts}
+
+Xen's virtual networking is configured by two shell scripts (by
+default \path{network} and \path{vif-bridge}).  These are called
+automatically by \xend\ when certain events occur, with arguments to
+the scripts providing further contextual information.  These scripts
+are found by default in \path{/etc/xen/scripts}.  The names and
+locations of the scripts can be configured in
+\path{/etc/xen/xend-config.sxp}.
+
+\begin{description}
+\item[network:] This script is called whenever \xend\ is started or
+  stopped to respectively initialize or tear down the Xen virtual
+  network. In the default configuration initialization creates the
+  bridge `xen-br0' and moves eth0 onto that bridge, modifying the
+  routing accordingly. When \xend\ exits, it deletes the Xen bridge
+  and removes eth0, restoring the normal IP and routing configuration.
+
+  %% In configurations where the bridge already exists, this script
+  %% could be replaced with a link to \path{/bin/true} (for instance).
+
+\item[vif-bridge:] This script is called for every domain virtual
+  interface and can configure firewalling rules and add the vif to the
+  appropriate bridge. By default, this adds and removes VIFs on the
+  default Xen bridge.
+\end{description}
+
+For more complex network setups (e.g.\ where routing is required or
+integrate with existing bridges) these scripts may be replaced with
+customized variants for your site's preferred configuration.
+
+%% There are two possible types of privileges: IO privileges and
+%% administration privileges.
+
+
+\section{Driver Domain Configuration}
+
+I/O privileges can be assigned to allow a domain to directly access
+PCI devices itself.  This is used to support driver domains.
+
+Setting back-end privileges is currently only supported in SXP format
+config files.  To allow a domain to function as a back-end for others,
+somewhere within the {\tt vm} element of its configuration file must
+be a {\tt back-end} element of the form {\tt (back-end ({\em type}))}
+where {\tt \em type} may be either {\tt netif} or {\tt blkif},
+according to the type of virtual device this domain will service.
+%% After this domain has been built, \xend will connect all new and
+%% existing {\em virtual} devices (of the appropriate type) to that
+%% back-end.
+
+Note that a block back-end cannot currently import virtual block
+devices from other domains, and a network back-end cannot import
+virtual network devices from other domains.  Thus (particularly in the
+case of block back-ends, which cannot import a virtual block device as
+their root filesystem), you may need to boot a back-end domain from a
+ramdisk or a network device.
+
+Access to PCI devices may be configured on a per-device basis.  Xen
+will assign the minimal set of hardware privileges to a domain that
+are required to control its devices.  This can be configured in either
+format of configuration file:
+
+\begin{itemize}
+\item SXP Format: Include device elements of the form: \\
+  \centerline{  {\tt (device (pci (bus {\em x}) (dev {\em y}) (func {\em 
z})))}} \\
+  inside the top-level {\tt vm} element.  Each one specifies the
+  address of a device this domain is allowed to access --- the numbers
+  \emph{x},\emph{y} and \emph{z} may be in either decimal or
+  hexadecimal format.
+\item Flat Format: Include a list of PCI device addresses of the
+  format: \\
+  \centerline{{\tt pci = ['x,y,z', \ldots]}} \\
+  where each element in the list is a string specifying the components
+  of the PCI device address, separated by commas.  The components
+  ({\tt \em x}, {\tt \em y} and {\tt \em z}) of the list may be
+  formatted as either decimal or hexadecimal.
+\end{itemize}
+
+%% \section{Administration Domains}
+
+%% Administration privileges allow a domain to use the `dom0
+%% operations' (so called because they are usually available only to
+%% domain 0).  A privileged domain can build other domains, set
+%% scheduling parameters, etc.
+
+% Support for other administrative domains is not yet available...
+% perhaps we should plumb it in some time
+
+
+\section{Scheduler Configuration}
+\label{s:sched}
+
+Xen offers a boot time choice between multiple schedulers.  To select
+a scheduler, pass the boot parameter \emph{sched=sched\_name} to Xen,
+substituting the appropriate scheduler name.  Details of the
+schedulers and their parameters are included below; future versions of
+the tools will provide a higher-level interface to these tools.
+
+It is expected that system administrators configure their system to
+use the scheduler most appropriate to their needs.  Currently, the BVT
+scheduler is the recommended choice.
+
+\subsection{Borrowed Virtual Time}
+
+{\tt sched=bvt} (the default) \\
+
+BVT provides proportional fair shares of the CPU time.  It has been
+observed to penalize domains that block frequently (e.g.\ I/O
+intensive domains), but this can be compensated for by using warping.
+
+\subsubsection{Global Parameters}
+
+\begin{description}
+\item[ctx\_allow] The context switch allowance is similar to the
+  ``quantum'' in traditional schedulers.  It is the minimum time that
+  a scheduled domain will be allowed to run before being preempted.
+\end{description}
+
+\subsubsection{Per-domain parameters}
+
+\begin{description}
+\item[mcuadv] The MCU (Minimum Charging Unit) advance determines the
+  proportional share of the CPU that a domain receives.  It is set
+  inversely proportionally to a domain's sharing weight.
+\item[warp] The amount of ``virtual time'' the domain is allowed to
+  warp backwards.
+\item[warpl] The warp limit is the maximum time a domain can run
+  warped for.
+\item[warpu] The unwarp requirement is the minimum time a domain must
+  run unwarped for before it can warp again.
+\end{description}
+
+\subsection{Atropos}
+
+{\tt sched=atropos} \\
+
+Atropos is a soft real time scheduler.  It provides guarantees about
+absolute shares of the CPU, with a facility for sharing slack CPU time
+on a best-effort basis. It can provide timeliness guarantees for
+latency-sensitive domains.
+
+Every domain has an associated period and slice.  The domain should
+receive `slice' nanoseconds every `period' nanoseconds.  This allows
+the administrator to configure both the absolute share of the CPU a
+domain receives and the frequency with which it is scheduled.
+
+%% When domains unblock, their period is reduced to the value of the
+%% latency hint (the slice is scaled accordingly so that they still
+%% get the same proportion of the CPU).  For each subsequent period,
+%% the slice and period times are doubled until they reach their
+%% original values.
+
+Note: don't over-commit the CPU when using Atropos (i.e.\ don't reserve
+more CPU than is available --- the utilization should be kept to
+slightly less than 100\% in order to ensure predictable behavior).
+
+\subsubsection{Per-domain parameters}
+
+\begin{description}
+\item[period] The regular time interval during which a domain is
+  guaranteed to receive its allocation of CPU time.
+\item[slice] The length of time per period that a domain is guaranteed
+  to run for (in the absence of voluntary yielding of the CPU).
+\item[latency] The latency hint is used to control how soon after
+  waking up a domain it should be scheduled.
+\item[xtratime] This is a boolean flag that specifies whether a domain
+  should be allowed a share of the system slack time.
+\end{description}
+
+\subsection{Round Robin}
+
+{\tt sched=rrobin} \\
+
+The round robin scheduler is included as a simple demonstration of
+Xen's internal scheduler API.  It is not intended for production use.
+
+\subsubsection{Global Parameters}
+
+\begin{description}
+\item[rr\_slice] The maximum time each domain runs before the next
+  scheduling decision is made.
+\end{description}
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/domain_filesystem.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/domain_filesystem.tex       Thu Sep 22 17:42:01 2005
@@ -0,0 +1,243 @@
+\chapter{Domain Filesystem Storage}
+
+It is possible to directly export any Linux block device in dom0 to
+another domain, or to export filesystems / devices to virtual machines
+using standard network protocols (e.g.\ NBD, iSCSI, NFS, etc.).  This
+chapter covers some of the possibilities.
+
+
+\section{Exporting Physical Devices as VBDs}
+\label{s:exporting-physical-devices-as-vbds}
+
+One of the simplest configurations is to directly export individual
+partitions from domain~0 to other domains. To achieve this use the
+\path{phy:} specifier in your domain configuration file. For example a
+line like
+\begin{quote}
+  \verb_disk = ['phy:hda3,sda1,w']_
+\end{quote}
+specifies that the partition \path{/dev/hda3} in domain~0 should be
+exported read-write to the new domain as \path{/dev/sda1}; one could
+equally well export it as \path{/dev/hda} or \path{/dev/sdb5} should
+one wish.
+
+In addition to local disks and partitions, it is possible to export
+any device that Linux considers to be ``a disk'' in the same manner.
+For example, if you have iSCSI disks or GNBD volumes imported into
+domain~0 you can export these to other domains using the \path{phy:}
+disk syntax. E.g.:
+\begin{quote}
+  \verb_disk = ['phy:vg/lvm1,sda2,w']_
+\end{quote}
+
+\begin{center}
+  \framebox{\bf Warning: Block device sharing}
+\end{center}
+\begin{quote}
+  Block devices should typically only be shared between domains in a
+  read-only fashion otherwise the Linux kernel's file systems will get
+  very confused as the file system structure may change underneath
+  them (having the same ext3 partition mounted \path{rw} twice is a
+  sure fire way to cause irreparable damage)!  \Xend\ will attempt to
+  prevent you from doing this by checking that the device is not
+  mounted read-write in domain~0, and hasn't already been exported
+  read-write to another domain.  If you want read-write sharing,
+  export the directory to other domains via NFS from domain~0 (or use
+  a cluster file system such as GFS or ocfs2).
+\end{quote}
+
+
+\section{Using File-backed VBDs}
+
+It is also possible to use a file in Domain~0 as the primary storage
+for a virtual machine.  As well as being convenient, this also has the
+advantage that the virtual block device will be \emph{sparse} ---
+space will only really be allocated as parts of the file are used.  So
+if a virtual machine uses only half of its disk space then the file
+really takes up half of the size allocated.
+
+For example, to create a 2GB sparse file-backed virtual block device
+(actually only consumes 1KB of disk):
+\begin{quote}
+  \verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=1_
+\end{quote}
+
+Make a file system in the disk file:
+\begin{quote}
+  \verb_# mkfs -t ext3 vm1disk_
+\end{quote}
+
+(when the tool asks for confirmation, answer `y')
+
+Populate the file system e.g.\ by copying from the current root:
+\begin{quote}
+\begin{verbatim}
+# mount -o loop vm1disk /mnt
+# cp -ax /{root,dev,var,etc,usr,bin,sbin,lib} /mnt
+# mkdir /mnt/{proc,sys,home,tmp}
+\end{verbatim}
+\end{quote}
+
+Tailor the file system by editing \path{/etc/fstab},
+\path{/etc/hostname}, etc.\ Don't forget to edit the files in the
+mounted file system, instead of your domain~0 filesystem, e.g.\ you
+would edit \path{/mnt/etc/fstab} instead of \path{/etc/fstab}.  For
+this example put \path{/dev/sda1} to root in fstab.
+
+Now unmount (this is important!):
+\begin{quote}
+  \verb_# umount /mnt_
+\end{quote}
+
+In the configuration file set:
+\begin{quote}
+  \verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_
+\end{quote}
+
+As the virtual machine writes to its `disk', the sparse file will be
+filled in and consume more space up to the original 2GB.
+
+{\bf Note that file-backed VBDs may not be appropriate for backing
+  I/O-intensive domains.}  File-backed VBDs are known to experience
+substantial slowdowns under heavy I/O workloads, due to the I/O
+handling by the loopback block device used to support file-backed VBDs
+in dom0.  Better I/O performance can be achieved by using either
+LVM-backed VBDs (Section~\ref{s:using-lvm-backed-vbds}) or physical
+devices as VBDs (Section~\ref{s:exporting-physical-devices-as-vbds}).
+
+Linux supports a maximum of eight file-backed VBDs across all domains
+by default.  This limit can be statically increased by using the
+\emph{max\_loop} module parameter if CONFIG\_BLK\_DEV\_LOOP is
+compiled as a module in the dom0 kernel, or by using the
+\emph{max\_loop=n} boot option if CONFIG\_BLK\_DEV\_LOOP is compiled
+directly into the dom0 kernel.
+
+
+\section{Using LVM-backed VBDs}
+\label{s:using-lvm-backed-vbds}
+
+A particularly appealing solution is to use LVM volumes as backing for
+domain file-systems since this allows dynamic growing/shrinking of
+volumes as well as snapshot and other features.
+
+To initialize a partition to support LVM volumes:
+\begin{quote}
+\begin{verbatim}
+# pvcreate /dev/sda10           
+\end{verbatim} 
+\end{quote}
+
+Create a volume group named `vg' on the physical partition:
+\begin{quote}
+\begin{verbatim}
+# vgcreate vg /dev/sda10
+\end{verbatim} 
+\end{quote}
+
+Create a logical volume of size 4GB named `myvmdisk1':
+\begin{quote}
+\begin{verbatim}
+# lvcreate -L4096M -n myvmdisk1 vg
+\end{verbatim}
+\end{quote}
+
+You should now see that you have a \path{/dev/vg/myvmdisk1} Make a
+filesystem, mount it and populate it, e.g.:
+\begin{quote}
+\begin{verbatim}
+# mkfs -t ext3 /dev/vg/myvmdisk1
+# mount /dev/vg/myvmdisk1 /mnt
+# cp -ax / /mnt
+# umount /mnt
+\end{verbatim}
+\end{quote}
+
+Now configure your VM with the following disk configuration:
+\begin{quote}
+\begin{verbatim}
+ disk = [ 'phy:vg/myvmdisk1,sda1,w' ]
+\end{verbatim}
+\end{quote}
+
+LVM enables you to grow the size of logical volumes, but you'll need
+to resize the corresponding file system to make use of the new space.
+Some file systems (e.g.\ ext3) now support online resize.  See the LVM
+manuals for more details.
+
+You can also use LVM for creating copy-on-write (CoW) clones of LVM
+volumes (known as writable persistent snapshots in LVM terminology).
+This facility is new in Linux 2.6.8, so isn't as stable as one might
+hope.  In particular, using lots of CoW LVM disks consumes a lot of
+dom0 memory, and error conditions such as running out of disk space
+are not handled well. Hopefully this will improve in future.
+
+To create two copy-on-write clone of the above file system you would
+use the following commands:
+
+\begin{quote}
+\begin{verbatim}
+# lvcreate -s -L1024M -n myclonedisk1 /dev/vg/myvmdisk1
+# lvcreate -s -L1024M -n myclonedisk2 /dev/vg/myvmdisk1
+\end{verbatim}
+\end{quote}
+
+Each of these can grow to have 1GB of differences from the master
+volume. You can grow the amount of space for storing the differences
+using the lvextend command, e.g.:
+\begin{quote}
+\begin{verbatim}
+# lvextend +100M /dev/vg/myclonedisk1
+\end{verbatim}
+\end{quote}
+
+Don't let the `differences volume' ever fill up otherwise LVM gets
+rather confused. It may be possible to automate the growing process by
+using \path{dmsetup wait} to spot the volume getting full and then
+issue an \path{lvextend}.
+
+In principle, it is possible to continue writing to the volume that
+has been cloned (the changes will not be visible to the clones), but
+we wouldn't recommend this: have the cloned volume as a `pristine'
+file system install that isn't mounted directly by any of the virtual
+machines.
+
+
+\section{Using NFS Root}
+
+First, populate a root filesystem in a directory on the server
+machine. This can be on a distinct physical machine, or simply run
+within a virtual machine on the same node.
+
+Now configure the NFS server to export this filesystem over the
+network by adding a line to \path{/etc/exports}, for instance:
+
+\begin{quote}
+  \begin{small}
+\begin{verbatim}
+/export/vm1root      1.2.3.4/24 (rw,sync,no_root_squash)
+\end{verbatim}
+  \end{small}
+\end{quote}
+
+Finally, configure the domain to use NFS root.  In addition to the
+normal variables, you should make sure to set the following values in
+the domain's configuration file:
+
+\begin{quote}
+  \begin{small}
+\begin{verbatim}
+root       = '/dev/nfs'
+nfs_server = '2.3.4.5'       # substitute IP address of server
+nfs_root   = '/path/to/root' # path to root FS on the server
+\end{verbatim}
+  \end{small}
+\end{quote}
+
+The domain will need network access at boot time, so either statically
+configure an IP address using the config variables \path{ip},
+\path{netmask}, \path{gateway}, \path{hostname}; or enable DHCP
+(\path{dhcp='dhcp'}).
+
+Note that the Linux NFS root implementation is known to have stability
+problems under high load (this is not a Xen-specific problem), so this
+configuration may not be appropriate for critical servers.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/domain_mgmt.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/domain_mgmt.tex     Thu Sep 22 17:42:01 2005
@@ -0,0 +1,203 @@
+\chapter{Domain Management Tools}
+
+The previous chapter described a simple example of how to configure
+and start a domain.  This chapter summarises the tools available to
+manage running domains.
+
+
+\section{Command-line Management}
+
+Command line management tasks are also performed using the \path{xm}
+tool.  For online help for the commands available, type:
+\begin{quote}
+  \verb_# xm help_
+\end{quote}
+
+You can also type \path{xm help $<$command$>$} for more information on
+a given command.
+
+\subsection{Basic Management Commands}
+
+The most important \path{xm} commands are:
+\begin{quote}
+  \verb_# xm list_: Lists all domains running.\\
+  \verb_# xm consoles_: Gives information about the domain consoles.\\
+  \verb_# xm console_: Opens a console to a domain (e.g.\
+  \verb_# xm console myVM_)
+\end{quote}
+
+\subsection{\tt xm list}
+
+The output of \path{xm list} is in rows of the following format:
+\begin{center} {\tt name domid memory cpu state cputime console}
+\end{center}
+
+\begin{quote}
+  \begin{description}
+  \item[name] The descriptive name of the virtual machine.
+  \item[domid] The number of the domain ID this virtual machine is
+    running in.
+  \item[memory] Memory size in megabytes.
+  \item[cpu] The CPU this domain is running on.
+  \item[state] Domain state consists of 5 fields:
+    \begin{description}
+    \item[r] running
+    \item[b] blocked
+    \item[p] paused
+    \item[s] shutdown
+    \item[c] crashed
+    \end{description}
+  \item[cputime] How much CPU time (in seconds) the domain has used so
+    far.
+  \item[console] TCP port accepting connections to the domain's
+    console.
+  \end{description}
+\end{quote}
+
+The \path{xm list} command also supports a long output format when the
+\path{-l} switch is used.  This outputs the fulls details of the
+running domains in \xend's SXP configuration format.
+
+For example, suppose the system is running the ttylinux domain as
+described earlier.  The list command should produce output somewhat
+like the following:
+\begin{verbatim}
+# xm list
+Name              Id  Mem(MB)  CPU  State  Time(s)  Console
+Domain-0           0      251    0  r----    172.2        
+ttylinux           5       63    0  -b---      3.0    9605
+\end{verbatim}
+
+Here we can see the details for the ttylinux domain, as well as for
+domain~0 (which, of course, is always running).  Note that the console
+port for the ttylinux domain is 9605.  This can be connected to by TCP
+using a terminal program (e.g. \path{telnet} or, better,
+\path{xencons}).  The simplest way to connect is to use the
+\path{xm~console} command, specifying the domain name or ID.  To
+connect to the console of the ttylinux domain, we could use any of the
+following:
+\begin{verbatim}
+# xm console ttylinux
+# xm console 5
+# xencons localhost 9605
+\end{verbatim}
+
+\section{Domain Save and Restore}
+
+The administrator of a Xen system may suspend a virtual machine's
+current state into a disk file in domain~0, allowing it to be resumed
+at a later time.
+
+The ttylinux domain described earlier can be suspended to disk using
+the command:
+\begin{verbatim}
+# xm save ttylinux ttylinux.xen
+\end{verbatim}
+
+This will stop the domain named `ttylinux' and save its current state
+into a file called \path{ttylinux.xen}.
+
+To resume execution of this domain, use the \path{xm restore} command:
+\begin{verbatim}
+# xm restore ttylinux.xen
+\end{verbatim}
+
+This will restore the state of the domain and restart it.  The domain
+will carry on as before and the console may be reconnected using the
+\path{xm console} command, as above.
+
+\section{Live Migration}
+
+Live migration is used to transfer a domain between physical hosts
+whilst that domain continues to perform its usual activities --- from
+the user's perspective, the migration should be imperceptible.
+
+To perform a live migration, both hosts must be running Xen / \xend\
+and the destination host must have sufficient resources (e.g.\ memory
+capacity) to accommodate the domain after the move. Furthermore we
+currently require both source and destination machines to be on the
+same L2 subnet.
+
+Currently, there is no support for providing automatic remote access
+to filesystems stored on local disk when a domain is migrated.
+Administrators should choose an appropriate storage solution (i.e.\
+SAN, NAS, etc.) to ensure that domain filesystems are also available
+on their destination node. GNBD is a good method for exporting a
+volume from one machine to another. iSCSI can do a similar job, but is
+more complex to set up.
+
+When a domain migrates, it's MAC and IP address move with it, thus it
+is only possible to migrate VMs within the same layer-2 network and IP
+subnet. If the destination node is on a different subnet, the
+administrator would need to manually configure a suitable etherip or
+IP tunnel in the domain~0 of the remote node.
+
+A domain may be migrated using the \path{xm migrate} command.  To live
+migrate a domain to another machine, we would use the command:
+
+\begin{verbatim}
+# xm migrate --live mydomain destination.ournetwork.com
+\end{verbatim}
+
+Without the \path{--live} flag, \xend\ simply stops the domain and
+copies the memory image over to the new node and restarts it. Since
+domains can have large allocations this can be quite time consuming,
+even on a Gigabit network. With the \path{--live} flag \xend\ attempts
+to keep the domain running while the migration is in progress,
+resulting in typical `downtimes' of just 60--300ms.
+
+For now it will be necessary to reconnect to the domain's console on
+the new machine using the \path{xm console} command.  If a migrated
+domain has any open network connections then they will be preserved,
+so SSH connections do not have this limitation.
+
+
+\section{Managing Domain Memory}
+
+XenLinux domains have the ability to relinquish / reclaim machine
+memory at the request of the administrator or the user of the domain.
+
+\subsection{Setting memory footprints from dom0}
+
+The machine administrator can request that a domain alter its memory
+footprint using the \path{xm set-mem} command.  For instance, we can
+request that our example ttylinux domain reduce its memory footprint
+to 32 megabytes.
+
+\begin{verbatim}
+# xm set-mem ttylinux 32
+\end{verbatim}
+
+We can now see the result of this in the output of \path{xm list}:
+
+\begin{verbatim}
+# xm list
+Name              Id  Mem(MB)  CPU  State  Time(s)  Console
+Domain-0           0      251    0  r----    172.2        
+ttylinux           5       31    0  -b---      4.3    9605
+\end{verbatim}
+
+The domain has responded to the request by returning memory to Xen. We
+can restore the domain to its original size using the command line:
+
+\begin{verbatim}
+# xm set-mem ttylinux 64
+\end{verbatim}
+
+\subsection{Setting memory footprints from within a domain}
+
+The virtual file \path{/proc/xen/balloon} allows the owner of a domain
+to adjust their own memory footprint.  Reading the file (e.g.\
+\path{cat /proc/xen/balloon}) prints out the current memory footprint
+of the domain.  Writing the file (e.g.\ \path{echo new\_target >
+  /proc/xen/balloon}) requests that the kernel adjust the domain's
+memory footprint to a new value.
+
+\subsection{Setting memory limits}
+
+Xen associates a memory size limit with each domain.  By default, this
+is the amount of memory the domain is originally started with,
+preventing the domain from ever growing beyond this size.  To permit a
+domain to grow beyond its original allocation or to prevent a domain
+you've shrunk from reclaiming the memory it relinquished, use the
+\path{xm maxmem} command.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/glossary.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/glossary.tex        Thu Sep 22 17:42:01 2005
@@ -0,0 +1,79 @@
+\chapter{Glossary of Terms}
+
+\begin{description}
+
+\item[Atropos] One of the CPU schedulers provided by Xen.  Atropos
+  provides domains with absolute shares of the CPU, with timeliness
+  guarantees and a mechanism for sharing out `slack time'.
+
+\item[BVT] The BVT scheduler is used to give proportional fair shares
+  of the CPU to domains.
+
+\item[Exokernel] A minimal piece of privileged code, similar to a {\bf
+    microkernel} but providing a more `hardware-like' interface to the
+  tasks it manages.  This is similar to a paravirtualising VMM like
+  {\bf Xen} but was designed as a new operating system structure,
+  rather than specifically to run multiple conventional OSs.
+
+\item[Domain] A domain is the execution context that contains a
+  running {\bf virtual machine}.  The relationship between virtual
+  machines and domains on Xen is similar to that between programs and
+  processes in an operating system: a virtual machine is a persistent
+  entity that resides on disk (somewhat like a program).  When it is
+  loaded for execution, it runs in a domain.  Each domain has a {\bf
+    domain ID}.
+
+\item[Domain 0] The first domain to be started on a Xen machine.
+  Domain 0 is responsible for managing the system.
+
+\item[Domain ID] A unique identifier for a {\bf domain}, analogous to
+  a process ID in an operating system.
+
+\item[Full virtualisation] An approach to virtualisation which
+  requires no modifications to the hosted operating system, providing
+  the illusion of a complete system of real hardware devices.
+
+\item[Hypervisor] An alternative term for {\bf VMM}, used because it
+  means `beyond supervisor', since it is responsible for managing
+  multiple `supervisor' kernels.
+
+\item[Live migration] A technique for moving a running virtual machine
+  to another physical host, without stopping it or the services
+  running on it.
+
+\item[Microkernel] A small base of code running at the highest
+  hardware privilege level.  A microkernel is responsible for sharing
+  CPU and memory (and sometimes other devices) between less privileged
+  tasks running on the system.  This is similar to a VMM, particularly
+  a {\bf paravirtualising} VMM but typically addressing a different
+  problem space and providing different kind of interface.
+
+\item[NetBSD/Xen] A port of NetBSD to the Xen architecture.
+
+\item[Paravirtualisation] An approach to virtualisation which requires
+  modifications to the operating system in order to run in a virtual
+  machine.  Xen uses paravirtualisation but preserves binary
+  compatibility for user space applications.
+
+\item[Shadow pagetables] A technique for hiding the layout of machine
+  memory from a virtual machine's operating system.  Used in some {\bf
+    VMMs} to provide the illusion of contiguous physical memory, in
+  Xen this is used during {\bf live migration}.
+
+\item[Virtual Machine] The environment in which a hosted operating
+  system runs, providing the abstraction of a dedicated machine.  A
+  virtual machine may be identical to the underlying hardware (as in
+  {\bf full virtualisation}, or it may differ, as in {\bf
+    paravirtualisation}).
+
+\item[VMM] Virtual Machine Monitor - the software that allows multiple
+  virtual machines to be multiplexed on a single physical machine.
+
+\item[Xen] Xen is a paravirtualising virtual machine monitor,
+  developed primarily by the Systems Research Group at the University
+  of Cambridge Computer Laboratory.
+
+\item[XenLinux] Official name for the port of the Linux kernel that
+  runs on Xen.
+
+\end{description}
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/installation.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/installation.tex    Thu Sep 22 17:42:01 2005
@@ -0,0 +1,394 @@
+\chapter{Installation}
+
+The Xen distribution includes three main components: Xen itself, ports
+of Linux 2.4 and 2.6 and NetBSD to run on Xen, and the userspace
+tools required to manage a Xen-based system.  This chapter describes
+how to install the Xen~2.0 distribution from source.  Alternatively,
+there may be pre-built packages available as part of your operating
+system distribution.
+
+
+\section{Prerequisites}
+\label{sec:prerequisites}
+
+The following is a full list of prerequisites.  Items marked `$\dag$'
+are required by the \xend\ control tools, and hence required if you
+want to run more than one virtual machine; items marked `$*$' are only
+required if you wish to build from source.
+\begin{itemize}
+\item A working Linux distribution using the GRUB bootloader and
+  running on a P6-class (or newer) CPU.
+\item [$\dag$] The \path{iproute2} package.
+\item [$\dag$] The Linux bridge-utils\footnote{Available from {\tt
+      http://bridge.sourceforge.net}} (e.g., \path{/sbin/brctl})
+\item [$\dag$] An installation of Twisted~v1.3 or
+  above\footnote{Available from {\tt http://www.twistedmatrix.com}}.
+  There may be a binary package available for your distribution;
+  alternatively it can be installed by running `{\sl make
+    install-twisted}' in the root of the Xen source tree.
+\item [$*$] Build tools (gcc v3.2.x or v3.3.x, binutils, GNU make).
+\item [$*$] Development installation of libcurl (e.g., libcurl-devel)
+\item [$*$] Development installation of zlib (e.g., zlib-dev).
+\item [$*$] Development installation of Python v2.2 or later (e.g.,
+  python-dev).
+\item [$*$] \LaTeX\ and transfig are required to build the
+  documentation.
+\end{itemize}
+
+Once you have satisfied the relevant prerequisites, you can now
+install either a binary or source distribution of Xen.
+
+
+\section{Installing from Binary Tarball}
+
+Pre-built tarballs are available for download from the Xen download
+page
+\begin{quote} {\tt http://xen.sf.net}
+\end{quote}
+
+Once you've downloaded the tarball, simply unpack and install:
+\begin{verbatim}
+# tar zxvf xen-2.0-install.tgz
+# cd xen-2.0-install
+# sh ./install.sh
+\end{verbatim}
+
+Once you've installed the binaries you need to configure your system
+as described in Section~\ref{s:configure}.
+
+
+\section{Installing from Source}
+
+This section describes how to obtain, build, and install Xen from
+source.
+
+\subsection{Obtaining the Source}
+
+The Xen source tree is available as either a compressed source tar
+ball or as a clone of our master BitKeeper repository.
+
+\begin{description}
+\item[Obtaining the Source Tarball]\mbox{} \\
+  Stable versions (and daily snapshots) of the Xen source tree are
+  available as compressed tarballs from the Xen download page
+  \begin{quote} {\tt http://xen.sf.net}
+  \end{quote}
+
+\item[Using BitKeeper]\mbox{} \\
+  If you wish to install Xen from a clone of our latest BitKeeper
+  repository then you will need to install the BitKeeper tools.
+  Download instructions for BitKeeper can be obtained by filling out
+  the form at:
+  \begin{quote} {\tt http://www.bitmover.com/cgi-bin/download.cgi}
+\end{quote}
+The public master BK repository for the 2.0 release lives at:
+\begin{quote} {\tt bk://xen.bkbits.net/xen-2.0.bk}
+\end{quote} 
+You can use BitKeeper to download it and keep it updated with the
+latest features and fixes.
+
+Change to the directory in which you want to put the source code, then
+run:
+\begin{verbatim}
+# bk clone bk://xen.bkbits.net/xen-2.0.bk
+\end{verbatim}
+
+Under your current directory, a new directory named \path{xen-2.0.bk}
+has been created, which contains all the source code for Xen, the OS
+ports, and the control tools. You can update your repository with the
+latest changes at any time by running:
+\begin{verbatim}
+# cd xen-2.0.bk # to change into the local repository
+# bk pull       # to update the repository
+\end{verbatim}
+\end{description}
+
+% \section{The distribution}
+%
+% The Xen source code repository is structured as follows:
+%
+% \begin{description}
+% \item[\path{tools/}] Xen node controller daemon (Xend), command line
+%   tools, control libraries
+% \item[\path{xen/}] The Xen VMM.
+% \item[\path{linux-*-xen-sparse/}] Xen support for Linux.
+% \item[\path{linux-*-patches/}] Experimental patches for Linux.
+% \item[\path{netbsd-*-xen-sparse/}] Xen support for NetBSD.
+% \item[\path{docs/}] Various documentation files for users and
+%   developers.
+% \item[\path{extras/}] Bonus extras.
+% \end{description}
+
+\subsection{Building from Source}
+
+The top-level Xen Makefile includes a target `world' that will do the
+following:
+
+\begin{itemize}
+\item Build Xen.
+\item Build the control tools, including \xend.
+\item Download (if necessary) and unpack the Linux 2.6 source code,
+  and patch it for use with Xen.
+\item Build a Linux kernel to use in domain 0 and a smaller
+  unprivileged kernel, which can optionally be used for unprivileged
+  virtual machines.
+\end{itemize}
+
+After the build has completed you should have a top-level directory
+called \path{dist/} in which all resulting targets will be placed; of
+particular interest are the two kernels XenLinux kernel images, one
+with a `-xen0' extension which contains hardware device drivers and
+drivers for Xen's virtual devices, and one with a `-xenU' extension
+that just contains the virtual ones. These are found in
+\path{dist/install/boot/} along with the image for Xen itself and the
+configuration files used during the build.
+
+The NetBSD port can be built using:
+\begin{quote}
+\begin{verbatim}
+# make netbsd20
+\end{verbatim}
+\end{quote}
+NetBSD port is built using a snapshot of the netbsd-2-0 cvs branch.
+The snapshot is downloaded as part of the build process, if it is not
+yet present in the \path{NETBSD\_SRC\_PATH} search path.  The build
+process also downloads a toolchain which includes all the tools
+necessary to build the NetBSD kernel under Linux.
+
+To customize further the set of kernels built you need to edit the
+top-level Makefile. Look for the line:
+
+\begin{quote}
+\begin{verbatim}
+KERNELS ?= mk.linux-2.6-xen0 mk.linux-2.6-xenU
+\end{verbatim}
+\end{quote}
+
+You can edit this line to include any set of operating system kernels
+which have configurations in the top-level \path{buildconfigs/}
+directory, for example \path{mk.linux-2.4-xenU} to build a Linux 2.4
+kernel containing only virtual device drivers.
+
+%% Inspect the Makefile if you want to see what goes on during a
+%% build.  Building Xen and the tools is straightforward, but XenLinux
+%% is more complicated.  The makefile needs a `pristine' Linux kernel
+%% tree to which it will then add the Xen architecture files.  You can
+%% tell the makefile the location of the appropriate Linux compressed
+%% tar file by
+%% setting the LINUX\_SRC environment variable, e.g. \\
+%% \verb!# LINUX_SRC=/tmp/linux-2.6.11.tar.bz2 make world! \\ or by
+%% placing the tar file somewhere in the search path of {\tt
+%%   LINUX\_SRC\_PATH} which defaults to `{\tt .:..}'.  If the
+%% makefile can't find a suitable kernel tar file it attempts to
+%% download it from kernel.org (this won't work if you're behind a
+%% firewall).
+
+%% After untaring the pristine kernel tree, the makefile uses the {\tt
+%%   mkbuildtree} script to add the Xen patches to the kernel.
+
+
+%% The procedure is similar to build the Linux 2.4 port: \\
+%% \verb!# LINUX_SRC=/path/to/linux2.4/source make linux24!
+
+
+%% \framebox{\parbox{5in}{
+%%     {\bf Distro specific:} \\
+%%     {\it Gentoo} --- if not using udev (most installations,
+%%     currently), you'll need to enable devfs and devfs mount at boot
+%%     time in the xen0 config.  }}
+
+\subsection{Custom XenLinux Builds}
+
+% If you have an SMP machine you may wish to give the {\tt '-j4'}
+% argument to make to get a parallel build.
+
+If you wish to build a customized XenLinux kernel (e.g. to support
+additional devices or enable distribution-required features), you can
+use the standard Linux configuration mechanisms, specifying that the
+architecture being built for is \path{xen}, e.g:
+\begin{quote}
+\begin{verbatim}
+# cd linux-2.6.11-xen0
+# make ARCH=xen xconfig
+# cd ..
+# make
+\end{verbatim}
+\end{quote}
+
+You can also copy an existing Linux configuration (\path{.config})
+into \path{linux-2.6.11-xen0} and execute:
+\begin{quote}
+\begin{verbatim}
+# make ARCH=xen oldconfig
+\end{verbatim}
+\end{quote}
+
+You may be prompted with some Xen-specific options; we advise
+accepting the defaults for these options.
+
+Note that the only difference between the two types of Linux kernel
+that are built is the configuration file used for each.  The `U'
+suffixed (unprivileged) versions don't contain any of the physical
+hardware device drivers, leading to a 30\% reduction in size; hence
+you may prefer these for your non-privileged domains.  The `0'
+suffixed privileged versions can be used to boot the system, as well
+as in driver domains and unprivileged domains.
+
+\subsection{Installing the Binaries}
+
+The files produced by the build process are stored under the
+\path{dist/install/} directory. To install them in their default
+locations, do:
+\begin{quote}
+\begin{verbatim}
+# make install
+\end{verbatim}
+\end{quote}
+
+Alternatively, users with special installation requirements may wish
+to install them manually by copying the files to their appropriate
+destinations.
+
+%% Files in \path{install/boot/} include:
+%% \begin{itemize}
+%% \item \path{install/boot/xen-2.0.gz} Link to the Xen 'kernel'
+%% \item \path{install/boot/vmlinuz-2.6-xen0} Link to domain 0
+%%   XenLinux kernel
+%% \item \path{install/boot/vmlinuz-2.6-xenU} Link to unprivileged
+%%   XenLinux kernel
+%% \end{itemize}
+
+The \path{dist/install/boot} directory will also contain the config
+files used for building the XenLinux kernels, and also versions of Xen
+and XenLinux kernels that contain debug symbols (\path{xen-syms-2.0.6}
+and \path{vmlinux-syms-2.6.11.11-xen0}) which are essential for
+interpreting crash dumps.  Retain these files as the developers may
+wish to see them if you post on the mailing list.
+
+
+\section{Configuration}
+\label{s:configure}
+
+Once you have built and installed the Xen distribution, it is simple
+to prepare the machine for booting and running Xen.
+
+\subsection{GRUB Configuration}
+
+An entry should be added to \path{grub.conf} (often found under
+\path{/boot/} or \path{/boot/grub/}) to allow Xen / XenLinux to boot.
+This file is sometimes called \path{menu.lst}, depending on your
+distribution.  The entry should look something like the following:
+
+{\small
+\begin{verbatim}
+title Xen 2.0 / XenLinux 2.6
+  kernel /boot/xen-2.0.gz dom0_mem=131072
+  module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro console=tty0
+\end{verbatim}
+}
+
+The kernel line tells GRUB where to find Xen itself and what boot
+parameters should be passed to it (in this case, setting domain 0's
+memory allocation in kilobytes and the settings for the serial port).
+For more details on the various Xen boot parameters see
+Section~\ref{s:xboot}.
+
+The module line of the configuration describes the location of the
+XenLinux kernel that Xen should start and the parameters that should
+be passed to it (these are standard Linux parameters, identifying the
+root device and specifying it be initially mounted read only and
+instructing that console output be sent to the screen).  Some
+distributions such as SuSE do not require the \path{ro} parameter.
+
+%% \framebox{\parbox{5in}{
+%%     {\bf Distro specific:} \\
+%%     {\it SuSE} --- Omit the {\tt ro} option from the XenLinux
+%%     kernel command line, since the partition won't be remounted rw
+%%     during boot.  }}
+
+
+If you want to use an initrd, just add another \path{module} line to
+the configuration, as usual:
+
+{\small
+\begin{verbatim}
+  module /boot/my_initrd.gz
+\end{verbatim}
+}
+
+As always when installing a new kernel, it is recommended that you do
+not delete existing menu options from \path{menu.lst} --- you may want
+to boot your old Linux kernel in future, particularly if you have
+problems.
+
+\subsection{Serial Console (optional)}
+
+%% kernel /boot/xen-2.0.gz dom0_mem=131072 com1=115200,8n1
+%% module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro
+
+
+In order to configure Xen serial console output, it is necessary to
+add an boot option to your GRUB config; e.g.\ replace the above kernel
+line with:
+\begin{quote}
+{\small
+\begin{verbatim}
+   kernel /boot/xen.gz dom0_mem=131072 com1=115200,8n1
+\end{verbatim}}
+\end{quote}
+
+This configures Xen to output on COM1 at 115,200 baud, 8 data bits, 1
+stop bit and no parity. Modify these parameters for your set up.
+
+One can also configure XenLinux to share the serial console; to
+achieve this append ``\path{console=ttyS0}'' to your module line.
+
+If you wish to be able to log in over the XenLinux serial console it
+is necessary to add a line into \path{/etc/inittab}, just as per
+regular Linux. Simply add the line:
+\begin{quote} {\small {\tt c:2345:respawn:/sbin/mingetty ttyS0}}
+\end{quote}
+
+and you should be able to log in. Note that to successfully log in as
+root over the serial line will require adding \path{ttyS0} to
+\path{/etc/securetty} in most modern distributions.
+
+\subsection{TLS Libraries}
+
+Users of the XenLinux 2.6 kernel should disable Thread Local Storage
+(e.g.\ by doing a \path{mv /lib/tls /lib/tls.disabled}) before
+attempting to run with a XenLinux kernel\footnote{If you boot without
+  first disabling TLS, you will get a warning message during the boot
+  process. In this case, simply perform the rename after the machine
+  is up and then run \texttt{/sbin/ldconfig} to make it take effect.}.
+You can always reenable it by restoring the directory to its original
+location (i.e.\ \path{mv /lib/tls.disabled /lib/tls}).
+
+The reason for this is that the current TLS implementation uses
+segmentation in a way that is not permissible under Xen.  If TLS is
+not disabled, an emulation mode is used within Xen which reduces
+performance substantially.
+
+We hope that this issue can be resolved by working with Linux
+distribution vendors to implement a minor backward-compatible change
+to the TLS library.
+
+
+\section{Booting Xen}
+
+It should now be possible to restart the system and use Xen.  Reboot
+as usual but choose the new Xen option when the Grub screen appears.
+
+What follows should look much like a conventional Linux boot.  The
+first portion of the output comes from Xen itself, supplying low level
+information about itself and the machine it is running on.  The
+following portion of the output comes from XenLinux.
+
+You may see some errors during the XenLinux boot.  These are not
+necessarily anything to worry about --- they may result from kernel
+configuration differences between your XenLinux kernel and the one you
+usually use.
+
+When the boot completes, you should be able to log into your system as
+usual.  If you are unable to log in to your system running Xen, you
+should still be able to reboot with your normal Linux kernel.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/introduction.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/introduction.tex    Thu Sep 22 17:42:01 2005
@@ -0,0 +1,143 @@
+\chapter{Introduction}
+
+
+Xen is a \emph{paravirtualising} virtual machine monitor (VMM), or
+`hypervisor', for the x86 processor architecture.  Xen can securely
+execute multiple virtual machines on a single physical system with
+close-to-native performance.  The virtual machine technology
+facilitates enterprise-grade functionality, including:
+
+\begin{itemize}
+\item Virtual machines with performance close to native hardware.
+\item Live migration of running virtual machines between physical
+  hosts.
+\item Excellent hardware support (supports most Linux device drivers).
+\item Sandboxed, re-startable device drivers.
+\end{itemize}
+
+Paravirtualisation permits very high performance virtualisation, even
+on architectures like x86 that are traditionally very hard to
+virtualise.
+
+The drawback of this approach is that it requires operating systems to
+be \emph{ported} to run on Xen.  Porting an OS to run on Xen is
+similar to supporting a new hardware platform, however the process is
+simplified because the paravirtual machine architecture is very
+similar to the underlying native hardware. Even though operating
+system kernels must explicitly support Xen, a key feature is that user
+space applications and libraries \emph{do not} require modification.
+
+Xen support is available for increasingly many operating systems:
+right now, Linux 2.4, Linux 2.6 and NetBSD are available for Xen 2.0.
+A FreeBSD port is undergoing testing and will be incorporated into the
+release soon. Other OS ports, including Plan 9, are in progress.  We
+hope that that arch-xen patches will be incorporated into the
+mainstream releases of these operating systems in due course (as has
+already happened for NetBSD).
+
+Possible usage scenarios for Xen include:
+
+\begin{description}
+\item [Kernel development.] Test and debug kernel modifications in a
+  sandboxed virtual machine --- no need for a separate test machine.
+\item [Multiple OS configurations.] Run multiple operating systems
+  simultaneously, for instance for compatibility or QA purposes.
+\item [Server consolidation.] Move multiple servers onto a single
+  physical host with performance and fault isolation provided at
+  virtual machine boundaries.
+\item [Cluster computing.] Management at VM granularity provides more
+  flexibility than separately managing each physical host, but better
+  control and isolation than single-system image solutions,
+  particularly by using live migration for load balancing.
+\item [Hardware support for custom OSes.] Allow development of new
+  OSes while benefiting from the wide-ranging hardware support of
+  existing OSes such as Linux.
+\end{description}
+
+
+\section{Structure of a Xen-Based System}
+
+A Xen system has multiple layers, the lowest and most privileged of
+which is Xen itself. 
+
+Xen in turn may host multiple \emph{guest} operating systems, each of
+which is executed within a secure virtual machine (in Xen terminology,
+a \emph{domain}). Domains are scheduled by Xen to make effective use
+of the available physical CPUs.  Each guest OS manages its own
+applications, which includes responsibility for scheduling each
+application within the time allotted to the VM by Xen.
+
+The first domain, \emph{domain 0}, is created automatically when the
+system boots and has special management privileges. Domain 0 builds
+other domains and manages their virtual devices. It also performs
+administrative tasks such as suspending, resuming and migrating other
+virtual machines.
+
+Within domain 0, a process called \emph{xend} runs to manage the
+system.  \Xend is responsible for managing virtual machines and
+providing access to their consoles.  Commands are issued to \xend over
+an HTTP interface, either from a command-line tool or from a web
+browser.
+
+
+\section{Hardware Support}
+
+Xen currently runs only on the x86 architecture, requiring a `P6' or
+newer processor (e.g. Pentium Pro, Celeron, Pentium II, Pentium III,
+Pentium IV, Xeon, AMD Athlon, AMD Duron).  Multiprocessor machines are
+supported, and we also have basic support for HyperThreading (SMT),
+although this remains a topic for ongoing research. A port
+specifically for x86/64 is in progress, although Xen already runs on
+such systems in 32-bit legacy mode. In addition a port to the IA64
+architecture is approaching completion. We hope to add other
+architectures such as PPC and ARM in due course.
+
+Xen can currently use up to 4GB of memory.  It is possible for x86
+machines to address up to 64GB of physical memory but there are no
+current plans to support these systems: The x86/64 port is the planned
+route to supporting larger memory sizes.
+
+Xen offloads most of the hardware support issues to the guest OS
+running in Domain~0.  Xen itself contains only the code required to
+detect and start secondary processors, set up interrupt routing, and
+perform PCI bus enumeration.  Device drivers run within a privileged
+guest OS rather than within Xen itself. This approach provides
+compatibility with the majority of device hardware supported by Linux.
+The default XenLinux build contains support for relatively modern
+server-class network and disk hardware, but you can add support for
+other hardware by configuring your XenLinux kernel in the normal way.
+
+
+\section{History}
+
+Xen was originally developed by the Systems Research Group at the
+University of Cambridge Computer Laboratory as part of the XenoServers
+project, funded by the UK-EPSRC.
+
+XenoServers aim to provide a `public infrastructure for global
+distributed computing', and Xen plays a key part in that, allowing us
+to efficiently partition a single machine to enable multiple
+independent clients to run their operating systems and applications in
+an environment providing protection, resource isolation and
+accounting.  The project web page contains further information along
+with pointers to papers and technical reports:
+\path{http://www.cl.cam.ac.uk/xeno}
+
+Xen has since grown into a fully-fledged project in its own right,
+enabling us to investigate interesting research issues regarding the
+best techniques for virtualising resources such as the CPU, memory,
+disk and network.  The project has been bolstered by support from
+Intel Research Cambridge, and HP Labs, who are now working closely
+with us.
+
+Xen was first described in a paper presented at SOSP in
+2003\footnote{\tt
+  http://www.cl.cam.ac.uk/netos/papers/2003-xensosp.pdf}, and the
+first public release (1.0) was made that October.  Since then, Xen has
+significantly matured and is now used in production scenarios on many
+sites.
+
+Xen 2.0 features greatly enhanced hardware support, configuration
+flexibility, usability and a larger complement of supported operating
+systems. This latest release takes Xen a step closer to becoming the
+definitive open source solution for virtualisation.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/redhat.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/redhat.tex  Thu Sep 22 17:42:01 2005
@@ -0,0 +1,61 @@
+\chapter{Installing Xen / XenLinux on Red~Hat or Fedora Core}
+
+When using Xen / XenLinux on a standard Linux distribution there are a
+couple of things to watch out for:
+
+Note that, because domains greater than 0 don't have any privileged
+access at all, certain commands in the default boot sequence will fail
+e.g.\ attempts to update the hwclock, change the console font, update
+the keytable map, start apmd (power management), or gpm (mouse
+cursor).  Either ignore the errors (they should be harmless), or
+remove them from the startup scripts.  Deleting the following links
+are a good start: {\path{S24pcmcia}}, {\path{S09isdn}},
+{\path{S17keytable}}, {\path{S26apmd}}, {\path{S85gpm}}.
+
+If you want to use a single root file system that works cleanly for
+both domain~0 and unprivileged domains, a useful trick is to use
+different `init' run levels. For example, use run level 3 for
+domain~0, and run level 4 for other domains. This enables different
+startup scripts to be run in depending on the run level number passed
+on the kernel command line.
+
+If using NFS root files systems mounted either from an external server
+or from domain0 there are a couple of other gotchas.  The default
+{\path{/etc/sysconfig/iptables}} rules block NFS, so part way through
+the boot sequence things will suddenly go dead.
+
+If you're planning on having a separate NFS {\path{/usr}} partition,
+the RH9 boot scripts don't make life easy - they attempt to mount NFS
+file systems way to late in the boot process. The easiest way I found
+to do this was to have a {\path{/linuxrc}} script run ahead of
+{\path{/sbin/init}} that mounts {\path{/usr}}:
+
+\begin{quote}
+  \begin{small}\begin{verbatim}
+ #!/bin/bash
+ /sbin/ipconfig lo 127.0.0.1
+ /sbin/portmap
+ /bin/mount /usr
+ exec /sbin/init "$@" <>/dev/console 2>&1
+\end{verbatim}\end{small}
+\end{quote}
+
+%% $ XXX SMH: font lock fix :-)
+
+The one slight complication with the above is that
+{\path{/sbin/portmap}} is dynamically linked against
+{\path{/usr/lib/libwrap.so.0}} Since this is in {\path{/usr}}, it
+won't work. This can be solved by copying the file (and link) below
+the {\path{/usr}} mount point, and just let the file be `covered' when
+the mount happens.
+
+In some installations, where a shared read-only {\path{/usr}} is being
+used, it may be desirable to move other large directories over into
+the read-only {\path{/usr}}. For example, you might replace
+{\path{/bin}}, {\path{/lib}} and {\path{/sbin}} with links into
+{\path{/usr/root/bin}}, {\path{/usr/root/lib}} and
+{\path{/usr/root/sbin}} respectively. This creates other problems for
+running the {\path{/linuxrc}} script, requiring bash, portmap, mount,
+ifconfig, and a handful of other shared libraries to be copied below
+the mount point --- a simple statically-linked C program would solve
+this problem.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/start_addl_dom.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/start_addl_dom.tex  Thu Sep 22 17:42:01 2005
@@ -0,0 +1,172 @@
+\chapter{Starting Additional Domains}
+
+The first step in creating a new domain is to prepare a root
+filesystem for it to boot from.  Typically, this might be stored in a
+normal partition, an LVM or other volume manager partition, a disk
+file or on an NFS server.  A simple way to do this is simply to boot
+from your standard OS install CD and install the distribution into
+another partition on your hard drive.
+
+To start the \xend\ control daemon, type
+\begin{quote}
+  \verb!# xend start!
+\end{quote}
+
+If you wish the daemon to start automatically, see the instructions in
+Section~\ref{s:xend}. Once the daemon is running, you can use the
+\path{xm} tool to monitor and maintain the domains running on your
+system. This chapter provides only a brief tutorial. We provide full
+details of the \path{xm} tool in the next chapter.
+
+% \section{From the web interface}
+%
+% Boot the Xen machine and start Xensv (see Chapter~\ref{cha:xensv}
+% for more details) using the command: \\
+% \verb_# xensv start_ \\
+% This will also start Xend (see Chapter~\ref{cha:xend} for more
+% information).
+%
+% The domain management interface will then be available at {\tt
+%   http://your\_machine:8080/}.  This provides a user friendly wizard
+% for starting domains and functions for managing running domains.
+%
+% \section{From the command line}
+
+
+\section{Creating a Domain Configuration File}
+
+Before you can start an additional domain, you must create a
+configuration file. We provide two example files which you can use as
+a starting point:
+\begin{itemize}
+\item \path{/etc/xen/xmexample1} is a simple template configuration
+  file for describing a single VM.
+
+\item \path{/etc/xen/xmexample2} file is a template description that
+  is intended to be reused for multiple virtual machines.  Setting the
+  value of the \path{vmid} variable on the \path{xm} command line
+  fills in parts of this template.
+\end{itemize}
+
+Copy one of these files and edit it as appropriate.  Typical values
+you may wish to edit include:
+
+\begin{quote}
+\begin{description}
+\item[kernel] Set this to the path of the kernel you compiled for use
+  with Xen (e.g.\ \path{kernel = `/boot/vmlinuz-2.6-xenU'})
+\item[memory] Set this to the size of the domain's memory in megabytes
+  (e.g.\ \path{memory = 64})
+\item[disk] Set the first entry in this list to calculate the offset
+  of the domain's root partition, based on the domain ID.  Set the
+  second to the location of \path{/usr} if you are sharing it between
+  domains (e.g.\ \path{disk = [`phy:your\_hard\_drive\%d,sda1,w' \%
+    (base\_partition\_number + vmid),
+    `phy:your\_usr\_partition,sda6,r' ]}
+\item[dhcp] Uncomment the dhcp variable, so that the domain will
+  receive its IP address from a DHCP server (e.g.\ \path{dhcp=`dhcp'})
+\end{description}
+\end{quote}
+
+You may also want to edit the {\bf vif} variable in order to choose
+the MAC address of the virtual ethernet interface yourself.  For
+example:
+\begin{quote}
+\verb_vif = [`mac=00:06:AA:F6:BB:B3']_
+\end{quote}
+If you do not set this variable, \xend\ will automatically generate a
+random MAC address from an unused range.
+
+
+\section{Booting the Domain}
+
+The \path{xm} tool provides a variety of commands for managing
+domains.  Use the \path{create} command to start new domains. Assuming
+you've created a configuration file \path{myvmconf} based around
+\path{/etc/xen/xmexample2}, to start a domain with virtual machine
+ID~1 you should type:
+
+\begin{quote}
+\begin{verbatim}
+# xm create -c myvmconf vmid=1
+\end{verbatim}
+\end{quote}
+
+The \path{-c} switch causes \path{xm} to turn into the domain's
+console after creation.  The \path{vmid=1} sets the \path{vmid}
+variable used in the \path{myvmconf} file.
+
+You should see the console boot messages from the new domain appearing
+in the terminal in which you typed the command, culminating in a login
+prompt.
+
+
+\section{Example: ttylinux}
+
+Ttylinux is a very small Linux distribution, designed to require very
+few resources.  We will use it as a concrete example of how to start a
+Xen domain.  Most users will probably want to install a full-featured
+distribution once they have mastered the basics\footnote{ttylinux is
+  maintained by Pascal Schmidt. You can download source packages from
+  the distribution's home page: {\tt
+    http://www.minimalinux.org/ttylinux/}}.
+
+\begin{enumerate}
+\item Download and extract the ttylinux disk image from the Files
+  section of the project's SourceForge site (see
+  \path{http://sf.net/projects/xen/}).
+\item Create a configuration file like the following:
+\begin{verbatim}
+kernel = "/boot/vmlinuz-2.6-xenU"
+memory = 64
+name = "ttylinux"
+nics = 1
+ip = "1.2.3.4"
+disk = ['file:/path/to/ttylinux/rootfs,sda1,w']
+root = "/dev/sda1 ro"
+\end{verbatim}
+\item Now start the domain and connect to its console:
+\begin{verbatim}
+xm create configfile -c
+\end{verbatim}
+\item Login as root, password root.
+\end{enumerate}
+
+
+\section{Starting / Stopping Domains Automatically}
+
+It is possible to have certain domains start automatically at boot
+time and to have dom0 wait for all running domains to shutdown before
+it shuts down the system.
+
+To specify a domain is to start at boot-time, place its configuration
+file (or a link to it) under \path{/etc/xen/auto/}.
+
+A Sys-V style init script for Red Hat and LSB-compliant systems is
+provided and will be automatically copied to \path{/etc/init.d/}
+during install.  You can then enable it in the appropriate way for
+your distribution.
+
+For instance, on Red Hat:
+
+\begin{quote}
+  \verb_# chkconfig --add xendomains_
+\end{quote}
+
+By default, this will start the boot-time domains in runlevels 3, 4
+and 5.
+
+You can also use the \path{service} command to run this script
+manually, e.g:
+
+\begin{quote}
+  \verb_# service xendomains start_
+
+  Starts all the domains with config files under /etc/xen/auto/.
+\end{quote}
+
+\begin{quote}
+  \verb_# service xendomains stop_
+
+  Shuts down ALL running Xen domains.
+\end{quote}
diff -r 97dbd9524a7e -r 06d84bf87159 linux-2.6-xen-sparse/drivers/xen/util.c
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/util.c   Thu Sep 22 17:42:01 2005
@@ -0,0 +1,73 @@
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/uaccess.h>
+#include <asm-xen/driver_util.h>
+
+static int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+       /* generic_page_range() does all the hard work. */
+       return 0;
+}
+
+struct vm_struct *alloc_vm_area(unsigned long size)
+{
+       struct vm_struct *area;
+
+       area = get_vm_area(size, VM_IOREMAP);
+       if (area == NULL)
+               return NULL;
+
+       /*
+        * This ensures that page tables are constructed for this region
+        * of kernel virtual address space and mapped into init_mm.
+        */
+       if (generic_page_range(&init_mm, (unsigned long)area->addr,
+                              area->size, f, NULL)) {
+               free_vm_area(area);
+               return NULL;
+       }
+
+       return area;
+}
+
+void free_vm_area(struct vm_struct *area)
+{
+       BUG_ON(remove_vm_area(area->addr) != area);
+       kfree(area);
+}
+
+void lock_vm_area(struct vm_struct *area)
+{
+       unsigned long i;
+       char c;
+
+       /*
+        * Prevent context switch to a lazy mm that doesn't have this area
+        * mapped into its page tables.
+        */
+       preempt_disable();
+
+       /*
+        * Ensure that the page tables are mapped into the current mm. The
+        * page-fault path will copy the page directory pointers from init_mm.
+        */
+       for (i = 0; i < area->size; i += PAGE_SIZE)
+               (void)__get_user(c, (char *)area->addr + i);
+}
+
+void unlock_vm_area(struct vm_struct *area)
+{
+       preempt_enable();
+}
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/driver_util.h
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/driver_util.h        Thu Sep 22 
17:42:01 2005
@@ -0,0 +1,16 @@
+
+#ifndef __ASM_XEN_DRIVER_UTIL_H__
+#define __ASM_XEN_DRIVER_UTIL_H__
+
+#include <linux/config.h>
+#include <linux/vmalloc.h>
+
+/* Allocate/destroy a 'vmalloc' VM area. */
+extern struct vm_struct *alloc_vm_area(unsigned long size);
+extern void free_vm_area(struct vm_struct *area);
+
+/* Lock an area so that PTEs are accessible in the current address space. */
+extern void lock_vm_area(struct vm_struct *area);
+extern void unlock_vm_area(struct vm_struct *area);
+
+#endif /* __ASM_XEN_DRIVER_UTIL_H__ */
diff -r 97dbd9524a7e -r 06d84bf87159 patches/linux-2.6.12/tpm_partial_read.patch
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/patches/linux-2.6.12/tpm_partial_read.patch       Thu Sep 22 17:42:01 2005
@@ -0,0 +1,74 @@
+--- ref-linux-2.6.12/drivers/char/tpm/tpm.c    2005-06-17 15:48:29.000000000 
-0400
++++ linux-2.6-xen-sparse/drivers/char/tpm/tpm.c        2005-09-15 
14:56:05.000000000 -0400
+@@ -473,6 +401,7 @@ ssize_t tpm_write(struct file * file, co
+       out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE);
+ 
+       atomic_set(&chip->data_pending, out_size);
++      atomic_set(&chip->data_position, 0);
+       up(&chip->buffer_mutex);
+ 
+       /* Set a timeout by which the reader must come claim the result */
+@@ -494,29 +423,34 @@ ssize_t tpm_read(struct file * file, cha
+ {
+       struct tpm_chip *chip = file->private_data;
+       int ret_size = -ENODATA;
++      int pos, pending = 0;
+ 
+-      if (atomic_read(&chip->data_pending) != 0) {    /* Result available */
++      down(&chip->buffer_mutex);
++      ret_size = atomic_read(&chip->data_pending);
++      if ( ret_size > 0 ) {   /* Result available */
++              if (size < ret_size)
++                      ret_size = size;
++
++              pos = atomic_read(&chip->data_position);
++
++              if (copy_to_user((void __user *) buf,
++                               &chip->data_buffer[pos], ret_size)) {
++                      ret_size = -EFAULT;
++              } else {
++                      pending = atomic_read(&chip->data_pending) - ret_size;
++                      if ( pending ) {
++                              atomic_set( &chip->data_pending, pending );
++                              atomic_set( &chip->data_position, pos+ret_size 
);
++                      }
++              }
++      }
++      up(&chip->buffer_mutex);
++
++      if ( ret_size <= 0 || pending == 0 ) {
++              atomic_set( &chip->data_pending, 0 );
+               down(&chip->timer_manipulation_mutex);
+               del_singleshot_timer_sync(&chip->user_read_timer);
+               up(&chip->timer_manipulation_mutex);
+-
+-              down(&chip->buffer_mutex);
+-
+-              ret_size = atomic_read(&chip->data_pending);
+-              atomic_set(&chip->data_pending, 0);
+-
+-              if (ret_size == 0)      /* timeout just occurred */
+-                      ret_size = -ETIME;
+-              else if (ret_size > 0) {        /* relay data */
+-                      if (size < ret_size)
+-                              ret_size = size;
+-
+-                      if (copy_to_user((void __user *) buf,
+-                                       chip->data_buffer, ret_size)) {
+-                              ret_size = -EFAULT;
+-                      }
+-              }
+-              up(&chip->buffer_mutex);
+       }
+ 
+       return ret_size;
+--- ref-linux-2.6.12/drivers/char/tpm/tpm.h    2005-06-17 15:48:29.000000000 
-0400
++++ linux-2.6-xen-sparse/drivers/char/tpm/tpm.h        2005-09-15 
14:56:05.000000000 -0400
+@@ -54,6 +54,7 @@ struct tpm_chip {
+       /* Data passed to and from the tpm via the read/write calls */
+       u8 *data_buffer;
+       atomic_t data_pending;
++      atomic_t data_position;
+       struct semaphore buffer_mutex;
+ 
+       struct timer_list user_read_timer;      /* user needs to claim result */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/debugger/gdb/README
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/tools/debugger/gdb/README Thu Sep 22 17:42:01 2005
@@ -0,0 +1,29 @@
+
+DomU GDB server for 32-bit (PAE and non-PAE) systems
+----------------------------------------------------
+
+Lines marked below with [*] are optional, if you want full
+source-level debugging of your kernel image.
+
+To build the GDB server:
+ 1. Run ./gdbbuild from within this directory.
+ 2. Copy ./gdb-6.2.1-linux-i386-xen/gdb/gdbserver/gdbserver-xen
+    to your test machine.
+
+To build a debuggable guest kernel image:
+ 1. cd linux-2.6.12-xenU
+ 2. ARCH=xen make menuconfig
+ 3. From within the configurator, enable the following options:
+    # Kernel hacking -> Compile the kernel with debug info [*]
+                     -> Compile the kernel with frame pointers
+ 4. (Re)build and (re)install your xenU kernel image.
+
+To debug a running guest:
+ 1. Use 'xm list' to discover its domain id ($domid). 
+ 2. Run 'gdbserver-xen 127.0.0.1:9999 --attach $domid'
+ 3. Run 'gdb /path/to/vmlinux-syms-2.6.xx-xenU'
+ 4. From within the gdb client session:
+    # directory /path/to/linux-2.6.xx-xenU [*]
+    # target remote 127.0.0.1:9999
+    # bt
+    # disass
diff -r 97dbd9524a7e -r 06d84bf87159 tools/firmware/vmxassist/acpi_madt.c
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/tools/firmware/vmxassist/acpi_madt.c      Thu Sep 22 17:42:01 2005
@@ -0,0 +1,145 @@
+/*
+ * acpi_madt.c: Update ACPI MADT table for multiple processor guest.
+ *
+ * Yu Ke, ke.yu@xxxxxxxxx
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include "../acpi/acpi2_0.h"
+#include "../acpi/acpi_madt.h"
+
+#define NULL ((void*)0)
+
+extern int puts(const char *s);
+
+#define VCPU_MAGIC 0x76637075 /* "vcpu" */
+
+/* xc_vmx_builder wrote vcpu block at 0x9F800. Return it. */
+static int 
+get_vcpus(void)
+{
+       unsigned long *vcpus;
+
+       vcpus = (unsigned long *)0x9F800;
+       if (vcpus[0] != VCPU_MAGIC) {
+               puts("Bad vcpus magic, set vcpu number=1\n");
+               return 1;
+       }
+
+       return vcpus[1];
+}
+
+static void *
+acpi_madt_get_madt(unsigned char *acpi_start)
+{
+       ACPI_2_0_RSDP *rsdp=NULL;
+       ACPI_2_0_RSDT *rsdt=NULL;
+       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt;
+
+       rsdp = (ACPI_2_0_RSDP *)(acpi_start + sizeof(ACPI_2_0_FACS));
+       if (rsdp->Signature != ACPI_2_0_RSDP_SIGNATURE) {
+               puts("Bad RSDP signature\n");
+               return NULL;
+       }
+
+       rsdt= (ACPI_2_0_RSDT *)
+               (acpi_start + rsdp->RsdtAddress - ACPI_PHYSICAL_ADDRESS);
+       if (rsdt->Header.Signature != ACPI_2_0_RSDT_SIGNATURE) {
+               puts("Bad RSDT signature\n");
+               return NULL;
+       }
+
+       madt = (ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *)
+               ( acpi_start+ rsdt->Entry[1] - ACPI_PHYSICAL_ADDRESS);
+       if (madt->Header.Header.Signature !=
+           ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE_SIGNATURE) {
+               puts("Bad MADT signature \n");
+               return NULL;
+       }
+
+       return madt;
+}
+
+static void 
+set_checksum(void *start, int checksum_offset, int len)
+{
+       unsigned char sum = 0;  
+       unsigned char *ptr;
+
+       ptr = start;
+       ptr[checksum_offset] = 0;
+       while (len--)
+               sum += *ptr++;
+
+       ptr = start;
+       ptr[checksum_offset] = -sum;
+}
+
+static int 
+acpi_madt_set_local_apics(
+       int nr_vcpu, 
+       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt)
+{
+       int i;
+
+       if ((nr_vcpu > MAX_VIRT_CPUS) || (nr_vcpu < 0) || !madt)
+               return -1;
+
+       for (i = 0; i < nr_vcpu; i++) {
+               madt->LocalApic[i].Type            = ACPI_PROCESSOR_LOCAL_APIC;
+               madt->LocalApic[i].Length          = sizeof 
(ACPI_LOCAL_APIC_STRUCTURE);
+               madt->LocalApic[i].AcpiProcessorId = i;
+               madt->LocalApic[i].ApicId          = i;
+               madt->LocalApic[i].Flags           = 1; 
+       }
+
+       madt->Header.Header.Length =
+               sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) - 
+               (MAX_VIRT_CPUS - nr_vcpu)* sizeof(ACPI_LOCAL_APIC_STRUCTURE);
+
+       return 0;                            
+}
+
+#define FIELD_OFFSET(TYPE,Field) ((unsigned int)(&(((TYPE *) 0)->Field)))
+
+int acpi_madt_update(unsigned char *acpi_start)
+{
+       int rc;
+       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt;
+
+       madt = acpi_madt_get_madt(acpi_start);
+       if (!madt)
+               return -1;
+
+       rc = acpi_madt_set_local_apics(get_vcpus(), madt);
+       if (rc != 0)
+               return rc;
+
+       set_checksum(
+               madt, FIELD_OFFSET(ACPI_TABLE_HEADER, Checksum),
+               madt->Header.Header.Length);
+
+       return 0;              
+}
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/python/xen/xend/server/DevController.py
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/DevController.py     Thu Sep 22 17:42:01 2005
@@ -0,0 +1,203 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
+#============================================================================
+
+
+from xen.xend import sxp
+from xen.xend.XendError import VmError
+from xen.xend.XendLogging import log
+from xen.xend.xenstore.xstransact import xstransact
+
+
+class DevController:
+    """Abstract base class for a device controller.  Device controllers create
+    appropriate entries in the store to trigger the creation, reconfiguration,
+    and destruction of devices in guest domains.  Each subclass of
+    DevController is responsible for a particular device-class, and
+    understands the details of configuration specific to that device-class.
+
+    DevController itself provides the functionality common to all device
+    creation tasks, as well as providing an interface to XendDomainInfo for
+    triggering those events themselves.
+    """
+
+    # Set when registered.
+    deviceClass = None
+
+
+    ## public:
+
+    def __init__(self, vm):
+        self.vm = vm
+
+
+    def createDevice(self, config):
+        """Trigger the creation of a device with the given configuration.
+
+        @return The ID for the newly created device.
+        """
+        (devid, back, front) = self.getDeviceDetails(config)
+
+        self.writeDetails(config, devid, back, front)
+
+        return devid
+
+
+    def reconfigureDevice(self, devid, config):
+        """Reconfigure the specified device.
+
+        The implementation here just raises VmError.  This may be overridden
+        by those subclasses that can reconfigure their devices.
+        """
+        raise VmError('%s devices may not be reconfigured' % self.deviceClass)
+
+
+    def destroyDevice(self, devid):
+        """Destroy the specified device.
+
+        The implementation here simply deletes the appropriate paths from
+        the store.  This may be overridden by subclasses who need to perform
+        other tasks on destruction.
+        """
+
+        frontpath = self.frontendPath(devid)
+        backpath = xstransact.Read("%s/backend" % frontpath)
+
+        xstransact.Remove(frontpath)
+        xstransact.Remove(backpath)
+
+
+    def sxpr(self, devid):
+        """@return an s-expression describing the specified device.
+        """
+        return [self.deviceClass, ['dom', self.vm.getDomid(),
+                                   'id', devid]]
+
+
+    ## protected:
+
+    def getDeviceDetails(self, config):
+        """Compute the details for creation of a device corresponding to the
+        given configuration.  These details consist of a tuple of (devID,
+        backDetails, frontDetails), where devID is the ID for the new device,
+        and backDetails and frontDetails are the device configuration
+        specifics for the backend and frontend respectively.
+
+        backDetails and frontDetails should be dictionaries, the keys and
+        values of which will be used as paths in the store.  There is no need
+        for these dictionaries to include the references from frontend to
+        backend, nor vice versa, as these will be handled by DevController.
+
+        Abstract; must be implemented by every subclass.
+
+        @return (devID, backDetails, frontDetails), as specified above.
+        """
+
+        raise NotImplementedError()
+
+
+    def getDomid(self):
+        """Stub to {@link XendDomainInfo.getDomid}, for use by our
+        subclasses.
+        """
+        return self.vm.getDomid()
+
+
+    def allocateDeviceID(self):
+        """Allocate a device ID, allocating them consecutively on a
+        per-domain, per-device-class basis, and using the store to record the
+        next available ID.
+
+        This method is available to our subclasses, though it is not
+        compulsory to use it; subclasses may prefer to allocate IDs based upon
+        the device configuration instead.
+        """
+        path = self.frontendMiscPath()
+        t = xstransact(path)
+        try:
+            result = t.read("nextDeviceID")
+            if result:
+                result = int(result)
+            else:
+                result = 1
+            t.write("nextDeviceID", str(result + 1))
+            t.commit()
+            return result
+        except:
+            t.abort()
+            raise
+
+
+    ## private:
+
+    def writeDetails(self, config, devid, backDetails, frontDetails):
+        """Write the details in the store to trigger creation of a device.
+        The backend domain ID is taken from the given config, paths for
+        frontend and backend are computed, and these are written to the store
+        appropriately, including references from frontend to backend and vice
+        versa.
+
+        @param config The configuration of the device, as given to
+        {@link #createDevice}.
+        @param devid        As returned by {@link #getDeviceDetails}.
+        @param backDetails  As returned by {@link #getDeviceDetails}.
+        @param frontDetails As returned by {@link #getDeviceDetails}.
+        """
+
+        import xen.xend.XendDomain
+        backdom = xen.xend.XendDomain.instance().domain_lookup_by_name(
+            sxp.child_value(config, 'backend', '0'))
+
+        frontpath = self.frontendPath(devid)
+        backpath  = self.backendPath(backdom, devid)
+        
+        frontDetails.update({
+            'backend' : backpath,
+            'backend-id' : "%i" % backdom.getDomid()
+            })
+
+
+        backDetails.update({
+            'domain' : self.vm.getName(),
+            'frontend' : frontpath,
+            'frontend-id' : "%i" % self.vm.getDomid()
+            })
+
+        log.debug('DevController: writing %s to %s.', str(frontDetails),
+                  frontpath)
+        log.debug('DevController: writing %s to %s.', str(backDetails),
+                  backpath)
+
+        xstransact.Write(frontpath, frontDetails)
+        xstransact.Write(backpath, backDetails)
+
+
+    def backendPath(self, backdom, devid):
+        """@param backdom [XendDomainInfo] The backend domain info."""
+
+        return "%s/backend/%s/%s/%d" % (backdom.getPath(),
+                                        self.deviceClass,
+                                        self.vm.getUuid(), devid)
+
+
+    def frontendPath(self, devid):
+        return "%s/device/%s/%d" % (self.vm.getPath(), self.deviceClass,
+                                    devid)
+
+
+    def frontendMiscPath(self):
+        return "%s/device-misc/%s" % (self.vm.getPath(), self.deviceClass)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm/tpm_emulator-0.2b-x86_64.patch
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm/tpm_emulator-0.2b-x86_64.patch Thu Sep 22 17:42:01 2005
@@ -0,0 +1,499 @@
+diff -uprN tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c 
tpm_emulator-0.2-x86_64/crypto/gmp_kernel_wrapper.c
+--- tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c       2005-08-15 
00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/crypto/gmp_kernel_wrapper.c        2005-09-19 
14:10:29.000000000 -0700
+@@ -79,7 +79,7 @@ void __attribute__ ((regparm(0))) *kerne
+ {
+   void *ret  = (void*)kmalloc(size, GFP_KERNEL);
+   if (!ret) panic(KERN_CRIT TPM_MODULE_NAME 
+-    "GMP: cannot allocate memory (size=%u)\n", size);
++    "GMP: cannot allocate memory (size=%Zu)\n", size);
+   return ret;
+ }
+ 
+@@ -88,7 +88,7 @@ void __attribute__ ((regparm(0))) *kerne
+ {
+   void *ret = (void*)kmalloc(new_size, GFP_KERNEL);
+   if (!ret) panic(KERN_CRIT TPM_MODULE_NAME "GMP: Cannot reallocate memory "
+-    "(old_size=%u new_size=%u)\n", old_size, new_size);
++    "(old_size=%Zu new_size=%Zu)\n", old_size, new_size);
+   memcpy(ret, oldptr, old_size);
+   kfree(oldptr);
+   return ret;
+diff -uprN tpm_emulator-0.2/linux_module.c 
tpm_emulator-0.2-x86_64/linux_module.c
+--- tpm_emulator-0.2/linux_module.c    2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/linux_module.c     2005-09-19 14:10:29.000000000 
-0700
+@@ -66,7 +66,7 @@ static int tpm_release(struct inode *ino
+ 
+ static ssize_t tpm_read(struct file *file, char *buf, size_t count, loff_t 
*ppos)
+ {
+-  debug("%s(%d)", __FUNCTION__, count);
++  debug("%s(%Zu)", __FUNCTION__, count);
+   down(&tpm_mutex);
+   if (tpm_response.data != NULL) {
+     count = min(count, (size_t)tpm_response.size - (size_t)*ppos);
+@@ -81,7 +81,7 @@ static ssize_t tpm_read(struct file *fil
+ 
+ static ssize_t tpm_write(struct file *file, const char *buf, size_t count, 
loff_t *ppos)
+ {
+-  debug("%s(%d)", __FUNCTION__, count);
++  debug("%s(%Zu)", __FUNCTION__, count);
+   down(&tpm_mutex);
+   *ppos = 0;
+   if (tpm_response.data != NULL) kfree(tpm_response.data);
+diff -uprN tpm_emulator-0.2/linux_module.h 
tpm_emulator-0.2-x86_64/linux_module.h
+--- tpm_emulator-0.2/linux_module.h    2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/linux_module.h     2005-09-19 14:10:29.000000000 
-0700
+@@ -28,8 +28,10 @@
+ 
+ /* module settings */
+ 
++#ifndef STR
+ #define STR(s) __STR__(s)
+ #define __STR__(s) #s
++#endif
+ #include "tpm_version.h"
+ 
+ #define TPM_DEVICE_MINOR      224
+diff -uprN tpm_emulator-0.2/Makefile tpm_emulator-0.2-x86_64/Makefile
+--- tpm_emulator-0.2/Makefile  2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/Makefile   2005-09-19 14:10:29.000000000 -0700
+@@ -7,6 +7,7 @@
+ KERNEL_RELEASE := $(shell uname -r)
+ KERNEL_BUILD   := /lib/modules/$(KERNEL_RELEASE)/build
+ MOD_SUBDIR     := misc
++COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/)
+ 
+ # module settings
+ MODULE_NAME    := tpm_emulator
+@@ -17,8 +18,14 @@ VERSION_BUILD  := $(shell date +"%s")
+ # enable/disable DEBUG messages
+ EXTRA_CFLAGS   += -DDEBUG -g  
+ 
++ifeq ($(COMPILE_ARCH),x86_64)
++LIBDIR = lib64
++else
++LIBDIR = lib
++endif
++
+ # GNU MP configuration
+-GMP_LIB        := /usr/lib/libgmp.a
++GMP_LIB        := /usr/$(LIBDIR)/libgmp.a
+ GMP_HEADER     := /usr/include/gmp.h
+ 
+ # sources and objects
+diff -uprN tpm_emulator-0.2/README tpm_emulator-0.2-x86_64/README
+--- tpm_emulator-0.2/README    2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/README     2005-09-19 14:21:43.000000000 -0700
+@@ -45,6 +45,12 @@ Example:
+ GMP_LIB        := /usr/lib/libgmp.a
+ GMP_HEADER     := /usr/include/gmp.h
+ 
++GNU MP Library on 64 bit Systems
++--------------------------------------------------------------------------
++Some 64-bit kernels have problems with importing the user-space gmp 
++library (/usr/lib*/libgmp.a) into kernel space.  These kernels will require
++that the gmp library be recompiled for kernel space with -mcmodel=kernel.
++
+ Installation
+ --------------------------------------------------------------------------
+ The compilation and installation process uses the build environment for 
+diff -uprN tpm_emulator-0.2/tpm/tpm_credentials.c 
tpm_emulator-0.2-x86_64/tpm/tpm_credentials.c
+--- tpm_emulator-0.2/tpm/tpm_credentials.c     2005-08-15 00:58:57.000000000 
-0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_credentials.c      2005-09-19 
14:10:29.000000000 -0700
+@@ -47,16 +47,16 @@ int tpm_compute_pubkey_checksum(TPM_NONC
+ 
+ TPM_RESULT tpm_get_pubek(TPM_PUBKEY *pubEndorsementKey)
+ {
+-  UINT32 key_length;
++  size_t key_length;
+   if (!tpmData.permanent.data.endorsementKey.size) return TPM_NO_ENDORSEMENT;
+   /* setup TPM_PUBKEY structure */
+-  key_length = tpmData.permanent.data.endorsementKey.size;
+-  pubEndorsementKey->pubKey.keyLength = key_length >> 3;
++  pubEndorsementKey->pubKey.keyLength = 
tpmData.permanent.data.endorsementKey.size >> 3;
+   pubEndorsementKey->pubKey.key = 
tpm_malloc(pubEndorsementKey->pubKey.keyLength);
+   if (pubEndorsementKey->pubKey.key == NULL) return TPM_FAIL;
+   rsa_export_modulus(&tpmData.permanent.data.endorsementKey,
+-    pubEndorsementKey->pubKey.key,
+-    &pubEndorsementKey->pubKey.keyLength);
++                   pubEndorsementKey->pubKey.key,
++                   &key_length);
++  pubEndorsementKey->pubKey.keyLength = key_length;
+   pubEndorsementKey->algorithmParms.algorithmID = TPM_ALG_RSA;
+   pubEndorsementKey->algorithmParms.encScheme = TPM_ES_RSAESOAEP_SHA1_MGF1;
+   pubEndorsementKey->algorithmParms.sigScheme = TPM_SS_NONE;
+@@ -169,6 +169,7 @@ TPM_RESULT TPM_OwnerReadInternalPub(TPM_
+ {
+   TPM_RESULT res;
+   TPM_KEY_DATA *srk = &tpmData.permanent.data.srk;
++  size_t key_length;
+   info("TPM_OwnerReadInternalPub()");
+   /* verify authorization */
+   res = tpm_verify_auth(auth1, tpmData.permanent.data.ownerAuth, 
TPM_KH_OWNER);
+@@ -180,7 +181,8 @@ TPM_RESULT TPM_OwnerReadInternalPub(TPM_
+     publicPortion->pubKey.key = tpm_malloc(publicPortion->pubKey.keyLength);
+     if (publicPortion->pubKey.key == NULL) return TPM_FAIL;
+     rsa_export_modulus(&srk->key, publicPortion->pubKey.key, 
+-      &publicPortion->pubKey.keyLength);
++      &key_length);
++    publicPortion->pubKey.keyLength = key_length;
+     publicPortion->algorithmParms.algorithmID = TPM_ALG_RSA;
+     publicPortion->algorithmParms.encScheme = srk->encScheme;
+     publicPortion->algorithmParms.sigScheme = srk->sigScheme;
+diff -uprN tpm_emulator-0.2/tpm/tpm_crypto.c 
tpm_emulator-0.2-x86_64/tpm/tpm_crypto.c
+--- tpm_emulator-0.2/tpm/tpm_crypto.c  2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_crypto.c   2005-09-19 14:10:29.000000000 
-0700
+@@ -182,7 +182,8 @@ TPM_RESULT TPM_CertifyKey(TPM_KEY_HANDLE
+   TPM_KEY_DATA *cert, *key;
+   sha1_ctx_t sha1_ctx;
+   BYTE *buf, *p;
+-  UINT32 length;
++  UINT32 length32;
++  size_t length;
+   info("TPM_CertifyKey()");
+   /* get keys */
+   cert = tpm_get_key(certHandle);
+@@ -264,14 +265,15 @@ TPM_RESULT TPM_CertifyKey(TPM_KEY_HANDLE
+   /* compute the digest of the CERTIFY_INFO[2] structure and sign it */
+   length = sizeof_TPM_CERTIFY_INFO((*certifyInfo));
+   p = buf = tpm_malloc(length);
++  length32=(UINT32) length;
+   if (buf == NULL
+-      || tpm_marshal_TPM_CERTIFY_INFO(&p, &length, certifyInfo)) {
++      || tpm_marshal_TPM_CERTIFY_INFO(&p, &length32, certifyInfo)) {
+     free_TPM_KEY_PARMS(certifyInfo->algorithmParms);
+     return TPM_FAIL;
+   }
+   length = sizeof_TPM_CERTIFY_INFO((*certifyInfo));
+   sha1_init(&sha1_ctx);
+-  sha1_update(&sha1_ctx, buf, length);
++  sha1_update(&sha1_ctx, buf, (size_t) length);
+   sha1_final(&sha1_ctx, buf);
+   res = tpm_sign(cert, auth1, FALSE, buf, SHA1_DIGEST_LENGTH, outData, 
outDataSize);
+   tpm_free(buf);
+@@ -292,7 +294,8 @@ TPM_RESULT TPM_CertifyKey2(TPM_KEY_HANDL
+   TPM_KEY_DATA *cert, *key;
+   sha1_ctx_t sha1_ctx;
+   BYTE *buf, *p;
+-  UINT32 length;
++  size_t length;
++  UINT32 length32;
+   info("TPM_CertifyKey2()");
+   /* get keys */
+   cert = tpm_get_key(certHandle);
+@@ -362,8 +365,9 @@ TPM_RESULT TPM_CertifyKey2(TPM_KEY_HANDL
+   /* compute the digest of the CERTIFY_INFO[2] structure and sign it */
+   length = sizeof_TPM_CERTIFY_INFO((*certifyInfo));
+   p = buf = tpm_malloc(length);
++  length32 = (UINT32) length;
+   if (buf == NULL
+-      || tpm_marshal_TPM_CERTIFY_INFO(&p, &length, certifyInfo)) {
++      || tpm_marshal_TPM_CERTIFY_INFO(&p, &length32, certifyInfo)) {
+     free_TPM_KEY_PARMS(certifyInfo->algorithmParms);
+     return TPM_FAIL;
+   }
+diff -uprN tpm_emulator-0.2/tpm/tpm_data.c 
tpm_emulator-0.2-x86_64/tpm/tpm_data.c
+--- tpm_emulator-0.2/tpm/tpm_data.c    2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_data.c     2005-09-19 14:10:29.000000000 
-0700
+@@ -179,7 +179,7 @@ static int read_from_file(uint8_t **data
+ int tpm_store_permanent_data(void)
+ {
+   uint8_t *buf, *ptr;
+-  size_t buf_length, len;
++  UINT32 buf_length, len;
+ 
+   /* marshal data */
+   buf_length = len = sizeof_TPM_STCLEAR_FLAGS(tpmData.stclear.flags)
+@@ -207,13 +207,14 @@ int tpm_store_permanent_data(void)
+ int tpm_restore_permanent_data(void)
+ {
+   uint8_t *buf, *ptr;
+-  size_t buf_length, len;
++  size_t buf_length;
++  UINT32 len;
+   TPM_VERSION ver;
+ 
+   /* read data */
+   if (read_from_file(&buf, &buf_length)) return -1;
+   ptr = buf;
+-  len = buf_length;
++  len = (uint32_t) buf_length;
+   /* unmarshal data */
+   if (tpm_unmarshal_TPM_VERSION(&ptr, &len, &ver)
+       || memcmp(&ver, &tpmData.permanent.data.version, sizeof(TPM_VERSION))
+diff -uprN tpm_emulator-0.2/tpm/tpm_marshalling.c 
tpm_emulator-0.2-x86_64/tpm/tpm_marshalling.c
+--- tpm_emulator-0.2/tpm/tpm_marshalling.c     2005-08-15 00:58:57.000000000 
-0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_marshalling.c      2005-09-19 
14:10:29.000000000 -0700
+@@ -981,7 +981,7 @@ int tpm_unmarshal_TPM_STANY_FLAGS(BYTE *
+ 
+ int tpm_marshal_RSA(BYTE **ptr, UINT32 *length, rsa_private_key_t *v)
+ {
+-  UINT32 m_len, e_len, q_len;
++  size_t m_len, e_len, q_len;
+   if (*length < sizeof_RSA((*v))) return -1;
+   if (v->size > 0) {
+     rsa_export_modulus(v, &(*ptr)[6], &m_len);
+diff -uprN tpm_emulator-0.2/tpm/tpm_owner.c 
tpm_emulator-0.2-x86_64/tpm/tpm_owner.c
+--- tpm_emulator-0.2/tpm/tpm_owner.c   2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_owner.c    2005-09-19 14:10:29.000000000 
-0700
+@@ -108,7 +108,7 @@ TPM_RESULT TPM_TakeOwnership(TPM_PROTOCO
+   TPM_RESULT res;
+   rsa_private_key_t *ek = &tpmData.permanent.data.endorsementKey;
+   TPM_KEY_DATA *srk = &tpmData.permanent.data.srk;
+-  UINT32 buf_size = ek->size >> 3;
++  size_t buf_size = ek->size >> 3, key_length; 
+   BYTE buf[buf_size];
+ 
+   info("TPM_TakeOwnership()");
+@@ -172,7 +172,8 @@ TPM_RESULT TPM_TakeOwnership(TPM_PROTOCO
+     return TPM_FAIL;
+   }
+   rsa_export_modulus(&srk->key, srkPub->pubKey.key,
+-    &srkPub->pubKey.keyLength);
++                   &key_length);
++  srkPub->pubKey.keyLength = (UINT32) key_length;
+   /* setup tpmProof and set state to owned */
+   tpm_get_random_bytes(tpmData.permanent.data.tpmProof.nonce, 
+     sizeof(tpmData.permanent.data.tpmProof.nonce));
+diff -uprN tpm_emulator-0.2/tpm/tpm_storage.c 
tpm_emulator-0.2-x86_64/tpm/tpm_storage.c
+--- tpm_emulator-0.2/tpm/tpm_storage.c 2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_storage.c  2005-09-19 14:10:29.000000000 
-0700
+@@ -58,6 +58,7 @@ int encrypt_sealed_data(TPM_KEY_DATA *ke
+                         BYTE *enc, UINT32 *enc_size)
+ {
+   UINT32 len;
++  size_t enc_size32 = *enc_size;
+   BYTE *buf, *ptr;
+   rsa_public_key_t pub_key;
+   int scheme;
+@@ -72,7 +73,7 @@ int encrypt_sealed_data(TPM_KEY_DATA *ke
+   if (buf == NULL
+       || tpm_marshal_TPM_SEALED_DATA(&ptr, &len, seal)
+       || rsa_encrypt(&pub_key, scheme, buf, sizeof_TPM_SEALED_DATA((*seal)),
+-                     enc, enc_size)) {
++                     enc, &enc_size32)) {
+     tpm_free(buf);
+     rsa_release_public_key(&pub_key);
+     return -1;
+@@ -85,7 +86,8 @@ int encrypt_sealed_data(TPM_KEY_DATA *ke
+ int decrypt_sealed_data(TPM_KEY_DATA *key, BYTE *enc, UINT32 enc_size,
+                         TPM_SEALED_DATA *seal, BYTE **buf) 
+ {
+-  UINT32 len;
++  size_t len;
++  UINT32 len32;
+   BYTE *ptr;
+   int scheme;
+   switch (key->encScheme) {
+@@ -96,8 +98,12 @@ int decrypt_sealed_data(TPM_KEY_DATA *ke
+   len = enc_size;
+   *buf = ptr = tpm_malloc(len);
+   if (*buf == NULL
+-      || rsa_decrypt(&key->key, scheme, enc, enc_size, *buf, &len)
+-      || tpm_unmarshal_TPM_SEALED_DATA(&ptr, &len, seal)) {
++      || rsa_decrypt(&key->key, scheme, enc, enc_size, *buf, &len) ){
++    tpm_free(*buf);
++    return -1;
++  }
++  len32 = len;
++  if (tpm_unmarshal_TPM_SEALED_DATA(&ptr, &len32, seal)) {
+     tpm_free(*buf);
+     return -1;
+   }
+@@ -237,11 +243,12 @@ TPM_RESULT TPM_Unseal(TPM_KEY_HANDLE par
+ 
+ TPM_RESULT TPM_UnBind(TPM_KEY_HANDLE keyHandle, UINT32 inDataSize,
+                       BYTE *inData, TPM_AUTH *auth1, 
+-                      UINT32 *outDataSize, BYTE **outData)
++                      UINT32 *outDataSize32, BYTE **outData)
+ {
+   TPM_RESULT res;
+   TPM_KEY_DATA *key;
+   int scheme;
++  size_t outDataSize;
+   info("TPM_UnBind()");
+   /* get key */
+   key = tpm_get_key(keyHandle);
+@@ -258,8 +265,8 @@ TPM_RESULT TPM_UnBind(TPM_KEY_HANDLE key
+   /* the size of the input data muss be greater than zero */
+   if (inDataSize == 0) return TPM_BAD_PARAMETER;
+   /* decrypt data */
+-  *outDataSize = inDataSize;
+-  *outData = tpm_malloc(*outDataSize);
++  outDataSize = inDataSize;
++  *outData = tpm_malloc(outDataSize);
+   if (*outData == NULL) return TPM_FAIL;
+   switch (key->encScheme) {
+     case TPM_ES_RSAESOAEP_SHA1_MGF1: scheme = RSA_ES_OAEP_SHA1; break;
+@@ -267,20 +274,21 @@ TPM_RESULT TPM_UnBind(TPM_KEY_HANDLE key
+     default: tpm_free(*outData); return TPM_DECRYPT_ERROR;
+   }
+   if (rsa_decrypt(&key->key, scheme, inData, inDataSize, 
+-      *outData, outDataSize)) {
++                *outData, &outDataSize) ) { 
+     tpm_free(*outData);
+     return TPM_DECRYPT_ERROR;
+   }
+   /* verify data if it is of type TPM_BOUND_DATA */
+   if (key->encScheme == TPM_ES_RSAESOAEP_SHA1_MGF1 
+       || key->keyUsage != TPM_KEY_LEGACY) {
+-    if (*outDataSize < 5 || memcmp(*outData, "\x01\x01\00\x00\x02", 5) != 0) {
++    if (outDataSize < 5 || memcmp(*outData, "\x01\x01\00\x00\x02", 5) != 0) {
+       tpm_free(*outData);
+       return TPM_DECRYPT_ERROR;
+     }
+-    *outDataSize -= 5;
+-    memmove(*outData, &(*outData)[5], *outDataSize);   
+-  } 
++    outDataSize -= 5;
++    memmove(*outData, &(*outData)[5], outDataSize);   
++  }
++  *outDataSize32 = (UINT32) outDataSize; 
+   return TPM_SUCCESS;
+ }
+ 
+@@ -311,12 +319,13 @@ static int verify_key_digest(TPM_KEY *ke
+ }
+ 
+ int encrypt_private_key(TPM_KEY_DATA *key, TPM_STORE_ASYMKEY *store,
+-                        BYTE *enc, UINT32 *enc_size)
++                        BYTE *enc, UINT32 *enc_size32)
+ {
+   UINT32 len;
+   BYTE *buf, *ptr;
+   rsa_public_key_t pub_key;
+   int scheme;
++  size_t enc_size;
+   switch (key->encScheme) {
+     case TPM_ES_RSAESOAEP_SHA1_MGF1: scheme = RSA_ES_OAEP_SHA1; break;
+     case TPM_ES_RSAESPKCSv15: scheme = RSA_ES_PKCSV15; break;
+@@ -328,11 +337,12 @@ int encrypt_private_key(TPM_KEY_DATA *ke
+   if (buf == NULL
+       || tpm_marshal_TPM_STORE_ASYMKEY(&ptr, &len, store)
+       || rsa_encrypt(&pub_key, scheme, buf, 
sizeof_TPM_STORE_ASYMKEY((*store)),
+-                     enc, enc_size)) {
++                     enc, &enc_size)) {
+     tpm_free(buf);
+     rsa_release_public_key(&pub_key);
+     return -1;
+   }
++  *enc_size32 = (UINT32) enc_size;
+   tpm_free(buf);
+   rsa_release_public_key(&pub_key);
+   return 0;
+@@ -341,7 +351,8 @@ int encrypt_private_key(TPM_KEY_DATA *ke
+ int decrypt_private_key(TPM_KEY_DATA *key, BYTE *enc, UINT32 enc_size, 
+                         TPM_STORE_ASYMKEY *store, BYTE **buf) 
+ {
+-  UINT32 len;
++  UINT32 len32;
++  size_t len;
+   BYTE *ptr;
+   int scheme;
+   switch (key->encScheme) {
+@@ -352,11 +363,16 @@ int decrypt_private_key(TPM_KEY_DATA *ke
+   len = enc_size;
+   *buf = ptr = tpm_malloc(len);
+   if (*buf == NULL
+-      || rsa_decrypt(&key->key, scheme, enc, enc_size, *buf, &len)
+-      || tpm_unmarshal_TPM_STORE_ASYMKEY(&ptr, &len, store)) {
++      || rsa_decrypt(&key->key, scheme, enc, enc_size, *buf, &len) ) {
++    tpm_free(*buf);
++    return -1;
++  }
++  len32 = (UINT32) len;
++  if (tpm_unmarshal_TPM_STORE_ASYMKEY(&ptr, &len32, store)) {  
+     tpm_free(*buf);
+     return -1;
+   }
++
+   return 0;
+ }
+ 
+@@ -371,7 +387,7 @@ TPM_RESULT TPM_CreateWrapKey(TPM_KEY_HAN
+   TPM_SESSION_DATA *session;
+   TPM_STORE_ASYMKEY store;
+   rsa_private_key_t rsa;
+-  UINT32 key_length;
++  size_t key_length;
+ 
+   info("TPM_CreateWrapKey()");
+   /* get parent key */
+@@ -428,11 +444,11 @@ TPM_RESULT TPM_CreateWrapKey(TPM_KEY_HAN
+   }
+   if (compute_key_digest(wrappedKey, &store.pubDataDigest)) return TPM_FAIL;
+   /* generate key and store it */
+-  key_length = keyInfo->algorithmParms.parms.rsa.keyLength;
+-  if (rsa_generate_key(&rsa, key_length)) return TPM_FAIL;
+-  wrappedKey->pubKey.keyLength = key_length >> 3;
++  if (rsa_generate_key(&rsa, keyInfo->algorithmParms.parms.rsa.keyLength)) 
++    return TPM_FAIL;
++  wrappedKey->pubKey.keyLength = keyInfo->algorithmParms.parms.rsa.keyLength 
>> 3;
+   wrappedKey->pubKey.key = tpm_malloc(wrappedKey->pubKey.keyLength);
+-  store.privKey.keyLength = key_length >> 4;
++  store.privKey.keyLength = keyInfo->algorithmParms.parms.rsa.keyLength >> 4;
+   store.privKey.key = tpm_malloc(store.privKey.keyLength);
+   wrappedKey->encDataSize = parent->key.size >> 3;
+   wrappedKey->encData = tpm_malloc(wrappedKey->encDataSize);
+@@ -444,9 +460,11 @@ TPM_RESULT TPM_CreateWrapKey(TPM_KEY_HAN
+     tpm_free(wrappedKey->encData);
+     return TPM_FAIL;
+   }
+-  rsa_export_modulus(&rsa, wrappedKey->pubKey.key, 
+-    &wrappedKey->pubKey.keyLength);
+-  rsa_export_prime1(&rsa, store.privKey.key, &store.privKey.keyLength);
++  rsa_export_modulus(&rsa, wrappedKey->pubKey.key,
++                   &key_length);
++  wrappedKey->pubKey.keyLength = (UINT32) key_length;
++  rsa_export_prime1(&rsa, store.privKey.key, &key_length);
++  store.privKey.keyLength = (UINT32) key_length;
+   rsa_release_private_key(&rsa);
+   /* encrypt private key data */
+   if (encrypt_private_key(parent, &store, wrappedKey->encData, 
+@@ -560,6 +578,7 @@ TPM_RESULT TPM_LoadKey(TPM_KEY_HANDLE pa
+ 
+ int tpm_setup_key_parms(TPM_KEY_DATA *key, TPM_KEY_PARMS *parms)
+ {
++  size_t key_length;
+   parms->algorithmID = TPM_ALG_RSA;
+   parms->encScheme = key->encScheme;
+   parms->sigScheme = key->sigScheme;
+@@ -569,7 +588,8 @@ int tpm_setup_key_parms(TPM_KEY_DATA *ke
+   parms->parms.rsa.exponent = tpm_malloc(parms->parms.rsa.exponentSize);
+   if (parms->parms.rsa.exponent == NULL) return -1;
+   rsa_export_exponent(&key->key, parms->parms.rsa.exponent,
+-    &parms->parms.rsa.exponentSize);
++                      &key_length);  
++  parms->parms.rsa.exponentSize = (UINT32) key_length;
+   parms->parmSize = 12 + parms->parms.rsa.exponentSize;  
+   return 0;
+ }
+@@ -580,6 +600,7 @@ TPM_RESULT TPM_GetPubKey(TPM_KEY_HANDLE 
+   TPM_RESULT res;
+   TPM_KEY_DATA *key;
+   TPM_DIGEST digest;
++  size_t key_length;
+   info("TPM_GetPubKey()");
+   /* get key */
+   if (keyHandle == TPM_KH_SRK) return TPM_BAD_PARAMETER;
+@@ -607,8 +628,8 @@ TPM_RESULT TPM_GetPubKey(TPM_KEY_HANDLE 
+   pubKey->pubKey.keyLength = key->key.size >> 3;
+   pubKey->pubKey.key = tpm_malloc(pubKey->pubKey.keyLength);
+   if (pubKey->pubKey.key == NULL) return TPM_FAIL;
+-  rsa_export_modulus(&key->key, pubKey->pubKey.key, 
+-    &pubKey->pubKey.keyLength);
++  rsa_export_modulus(&key->key, pubKey->pubKey.key, &key_length); 
++  pubKey->pubKey.keyLength = (UINT32) key_length;
+   if (tpm_setup_key_parms(key, &pubKey->algorithmParms) != 0) {
+     tpm_free(pubKey->pubKey.key);
+     return TPM_FAIL;  
+diff -uprN tpm_emulator-0.2/tpm_version.h tpm_emulator-0.2-x86_64/tpm_version.h
+--- tpm_emulator-0.2/tpm_version.h     2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/tpm_version.h      1969-12-31 16:00:00.000000000 
-0800
+@@ -1,6 +0,0 @@
+-#ifndef _TPM_VERSION_H_
+-#define _TPM_VERSION_H_
+-#define VERSION_MAJOR 0
+-#define VERSION_MINOR 2
+-#define VERSION_BUILD 1123950310
+-#endif /* _TPM_VERSION_H_ */
diff -r 97dbd9524a7e -r 06d84bf87159 linux-2.6-xen-sparse/drivers/char/tpm/tpm.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c       Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,627 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Authors:
- * Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
- * Dave Safford <safford@xxxxxxxxxxxxxx>
- * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Kylene Hall <kjhall@xxxxxxxxxx>
- *
- * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx>
- *
- * Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2 of the
- * License.
- *
- * Note, the TPM chip is not interrupt driven (only polling)
- * and can have very long timeouts (minutes!). Hence the unusual
- * calls to schedule_timeout.
- *
- */
-
-#include <linux/sched.h>
-#include <linux/poll.h>
-#include <linux/spinlock.h>
-#include "tpm.h"
-
-#define        TPM_MINOR                       224     /* officially assigned 
*/
-
-#define        TPM_BUFSIZE                     2048
-
-static LIST_HEAD(tpm_chip_list);
-static DEFINE_SPINLOCK(driver_lock);
-static int dev_mask[32];
-
-static void user_reader_timeout(unsigned long ptr)
-{
-       struct tpm_chip *chip = (struct tpm_chip *) ptr;
-
-       down(&chip->buffer_mutex);
-       atomic_set(&chip->data_pending, 0);
-       memset(chip->data_buffer, 0, TPM_BUFSIZE);
-       up(&chip->buffer_mutex);
-}
-
-void tpm_time_expired(unsigned long ptr)
-{
-       int *exp = (int *) ptr;
-       *exp = 1;
-}
-
-EXPORT_SYMBOL_GPL(tpm_time_expired);
-
-/*
- * Internal kernel interface to transmit TPM commands
- */
-static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
-                           size_t bufsiz)
-{
-       ssize_t len;
-       u32 count;
-       __be32 *native_size;
-
-       native_size = (__force __be32 *) (buf + 2);
-       count = be32_to_cpu(*native_size);
-
-       if (count == 0)
-               return -ENODATA;
-       if (count > bufsiz) {
-               dev_err(&chip->pci_dev->dev,
-                       "invalid count value %x %zx \n", count, bufsiz);
-               return -E2BIG;
-       }
-
-       down(&chip->tpm_mutex);
-
-       if ((len = chip->vendor->send(chip, (u8 *) buf, count)) < 0) {
-               dev_err(&chip->pci_dev->dev,
-                       "tpm_transmit: tpm_send: error %zd\n", len);
-               return len;
-       }
-
-       down(&chip->timer_manipulation_mutex);
-       chip->time_expired = 0;
-       init_timer(&chip->device_timer);
-       chip->device_timer.function = tpm_time_expired;
-       chip->device_timer.expires = jiffies + 2 * 60 * HZ;
-       chip->device_timer.data = (unsigned long) &chip->time_expired;
-       add_timer(&chip->device_timer);
-       up(&chip->timer_manipulation_mutex);
-
-       do {
-               u8 status = inb(chip->vendor->base + 1);
-               if ((status & chip->vendor->req_complete_mask) ==
-                   chip->vendor->req_complete_val) {
-                       down(&chip->timer_manipulation_mutex);
-                       del_singleshot_timer_sync(&chip->device_timer);
-                       up(&chip->timer_manipulation_mutex);
-                       goto out_recv;
-               }
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(TPM_TIMEOUT);
-               rmb();
-       } while (!chip->time_expired);
-
-
-       chip->vendor->cancel(chip);
-       dev_err(&chip->pci_dev->dev, "Time expired\n");
-       up(&chip->tpm_mutex);
-       return -EIO;
-
-out_recv:
-       len = chip->vendor->recv(chip, (u8 *) buf, bufsiz);
-       if (len < 0)
-               dev_err(&chip->pci_dev->dev,
-                       "tpm_transmit: tpm_recv: error %zd\n", len);
-       up(&chip->tpm_mutex);
-       return len;
-}
-
-#define TPM_DIGEST_SIZE 20
-#define CAP_PCR_RESULT_SIZE 18
-static u8 cap_pcr[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 22,            /* length */
-       0, 0, 0, 101,           /* TPM_ORD_GetCapability */
-       0, 0, 0, 5,
-       0, 0, 0, 4,
-       0, 0, 1, 1
-};
-
-#define READ_PCR_RESULT_SIZE 30
-static u8 pcrread[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 14,            /* length */
-       0, 0, 0, 21,            /* TPM_ORD_PcrRead */
-       0, 0, 0, 0              /* PCR index */
-};
-
-static ssize_t show_pcrs(struct device *dev, char *buf)
-{
-       u8 data[READ_PCR_RESULT_SIZE];
-       ssize_t len;
-       int i, j, index, num_pcrs;
-       char *str = buf;
-
-       struct tpm_chip *chip =
-           pci_get_drvdata(container_of(dev, struct pci_dev, dev));
-       if (chip == NULL)
-               return -ENODEV;
-
-       memcpy(data, cap_pcr, sizeof(cap_pcr));
-       if ((len = tpm_transmit(chip, data, sizeof(data)))
-           < CAP_PCR_RESULT_SIZE)
-               return len;
-
-       num_pcrs = be32_to_cpu(*((__force __be32 *) (data + 14)));
-
-       for (i = 0; i < num_pcrs; i++) {
-               memcpy(data, pcrread, sizeof(pcrread));
-               index = cpu_to_be32(i);
-               memcpy(data + 10, &index, 4);
-               if ((len = tpm_transmit(chip, data, sizeof(data)))
-                   < READ_PCR_RESULT_SIZE)
-                       return len;
-               str += sprintf(str, "PCR-%02d: ", i);
-               for (j = 0; j < TPM_DIGEST_SIZE; j++)
-                       str += sprintf(str, "%02X ", *(data + 10 + j));
-               str += sprintf(str, "\n");
-       }
-       return str - buf;
-}
-
-static DEVICE_ATTR(pcrs, S_IRUGO, show_pcrs, NULL);
-
-#define  READ_PUBEK_RESULT_SIZE 314
-static u8 readpubek[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 30,            /* length */
-       0, 0, 0, 124,           /* TPM_ORD_ReadPubek */
-};
-
-static ssize_t show_pubek(struct device *dev, char *buf)
-{
-       u8 data[READ_PUBEK_RESULT_SIZE];
-       ssize_t len;
-       __be32 *native_val;
-       int i;
-       char *str = buf;
-
-       struct tpm_chip *chip =
-           pci_get_drvdata(container_of(dev, struct pci_dev, dev));
-       if (chip == NULL)
-               return -ENODEV;
-
-       memcpy(data, readpubek, sizeof(readpubek));
-       memset(data + sizeof(readpubek), 0, 20);        /* zero nonce */
-
-       if ((len = tpm_transmit(chip, data, sizeof(data))) <
-           READ_PUBEK_RESULT_SIZE)
-               return len;
-
-       /*
-          ignore header 10 bytes
-          algorithm 32 bits (1 == RSA )
-          encscheme 16 bits
-          sigscheme 16 bits
-          parameters (RSA 12->bytes: keybit, #primes, expbit)
-          keylenbytes 32 bits
-          256 byte modulus
-          ignore checksum 20 bytes
-        */
-
-       native_val = (__force __be32 *) (data + 34);
-
-       str +=
-           sprintf(str,
-                   "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n"
-                   "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X"
-                   " %02X %02X %02X %02X %02X %02X %02X %02X\n"
-                   "Modulus length: %d\nModulus: \n",
-                   data[10], data[11], data[12], data[13], data[14],
-                   data[15], data[16], data[17], data[22], data[23],
-                   data[24], data[25], data[26], data[27], data[28],
-                   data[29], data[30], data[31], data[32], data[33],
-                   be32_to_cpu(*native_val)
-           );
-
-       for (i = 0; i < 256; i++) {
-               str += sprintf(str, "%02X ", data[i + 39]);
-               if ((i + 1) % 16 == 0)
-                       str += sprintf(str, "\n");
-       }
-       return str - buf;
-}
-
-static DEVICE_ATTR(pubek, S_IRUGO, show_pubek, NULL);
-
-#define CAP_VER_RESULT_SIZE 18
-static u8 cap_version[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 18,            /* length */
-       0, 0, 0, 101,           /* TPM_ORD_GetCapability */
-       0, 0, 0, 6,
-       0, 0, 0, 0
-};
-
-#define CAP_MANUFACTURER_RESULT_SIZE 18
-static u8 cap_manufacturer[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 22,            /* length */
-       0, 0, 0, 101,           /* TPM_ORD_GetCapability */
-       0, 0, 0, 5,
-       0, 0, 0, 4,
-       0, 0, 1, 3
-};
-
-static ssize_t show_caps(struct device *dev, char *buf)
-{
-       u8 data[READ_PUBEK_RESULT_SIZE];
-       ssize_t len;
-       char *str = buf;
-
-       struct tpm_chip *chip =
-           pci_get_drvdata(container_of(dev, struct pci_dev, dev));
-       if (chip == NULL)
-               return -ENODEV;
-
-       memcpy(data, cap_manufacturer, sizeof(cap_manufacturer));
-
-       if ((len = tpm_transmit(chip, data, sizeof(data))) <
-           CAP_MANUFACTURER_RESULT_SIZE)
-               return len;
-
-       str += sprintf(str, "Manufacturer: 0x%x\n",
-                      be32_to_cpu(*(data + 14)));
-
-       memcpy(data, cap_version, sizeof(cap_version));
-
-       if ((len = tpm_transmit(chip, data, sizeof(data))) <
-           CAP_VER_RESULT_SIZE)
-               return len;
-
-       str +=
-           sprintf(str, "TCG version: %d.%d\nFirmware version: %d.%d\n",
-                   (int) data[14], (int) data[15], (int) data[16],
-                   (int) data[17]);
-
-       return str - buf;
-}
-
-static DEVICE_ATTR(caps, S_IRUGO, show_caps, NULL);
-
-/*
- * Device file system interface to the TPM
- */
-int tpm_open(struct inode *inode, struct file *file)
-{
-       int rc = 0, minor = iminor(inode);
-       struct tpm_chip *chip = NULL, *pos;
-
-       spin_lock(&driver_lock);
-
-       list_for_each_entry(pos, &tpm_chip_list, list) {
-               if (pos->vendor->miscdev.minor == minor) {
-                       chip = pos;
-                       break;
-               }
-       }
-
-       if (chip == NULL) {
-               rc = -ENODEV;
-               goto err_out;
-       }
-
-       if (chip->num_opens) {
-               dev_dbg(&chip->pci_dev->dev,
-                       "Another process owns this TPM\n");
-               rc = -EBUSY;
-               goto err_out;
-       }
-
-       chip->num_opens++;
-       pci_dev_get(chip->pci_dev);
-
-       spin_unlock(&driver_lock);
-
-       chip->data_buffer = kmalloc(TPM_BUFSIZE * sizeof(u8), GFP_KERNEL);
-       if (chip->data_buffer == NULL) {
-               chip->num_opens--;
-               pci_dev_put(chip->pci_dev);
-               return -ENOMEM;
-       }
-
-       atomic_set(&chip->data_pending, 0);
-
-       file->private_data = chip;
-       return 0;
-
-err_out:
-       spin_unlock(&driver_lock);
-       return rc;
-}
-
-EXPORT_SYMBOL_GPL(tpm_open);
-
-int tpm_release(struct inode *inode, struct file *file)
-{
-       struct tpm_chip *chip = file->private_data;
-
-       file->private_data = NULL;
-
-       spin_lock(&driver_lock);
-       chip->num_opens--;
-       spin_unlock(&driver_lock);
-
-       down(&chip->timer_manipulation_mutex);
-       if (timer_pending(&chip->user_read_timer))
-               del_singleshot_timer_sync(&chip->user_read_timer);
-       else if (timer_pending(&chip->device_timer))
-               del_singleshot_timer_sync(&chip->device_timer);
-       up(&chip->timer_manipulation_mutex);
-
-       kfree(chip->data_buffer);
-       atomic_set(&chip->data_pending, 0);
-
-       pci_dev_put(chip->pci_dev);
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(tpm_release);
-
-ssize_t tpm_write(struct file * file, const char __user * buf,
-                 size_t size, loff_t * off)
-{
-       struct tpm_chip *chip = file->private_data;
-       int in_size = size, out_size;
-
-       /* cannot perform a write until the read has cleared
-          either via tpm_read or a user_read_timer timeout */
-       while (atomic_read(&chip->data_pending) != 0) {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(TPM_TIMEOUT);
-       }
-
-       down(&chip->buffer_mutex);
-
-       if (in_size > TPM_BUFSIZE)
-               in_size = TPM_BUFSIZE;
-
-       if (copy_from_user
-           (chip->data_buffer, (void __user *) buf, in_size)) {
-               up(&chip->buffer_mutex);
-               return -EFAULT;
-       }
-
-       /* atomic tpm command send and result receive */
-       out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE);
-
-       atomic_set(&chip->data_pending, out_size);
-       atomic_set(&chip->data_position, 0);
-       up(&chip->buffer_mutex);
-
-       /* Set a timeout by which the reader must come claim the result */
-       down(&chip->timer_manipulation_mutex);
-       init_timer(&chip->user_read_timer);
-       chip->user_read_timer.function = user_reader_timeout;
-       chip->user_read_timer.data = (unsigned long) chip;
-       chip->user_read_timer.expires = jiffies + (60 * HZ);
-       add_timer(&chip->user_read_timer);
-       up(&chip->timer_manipulation_mutex);
-
-       return in_size;
-}
-
-EXPORT_SYMBOL_GPL(tpm_write);
-
-ssize_t tpm_read(struct file * file, char __user * buf,
-                size_t size, loff_t * off)
-{
-       struct tpm_chip *chip = file->private_data;
-       int ret_size = -ENODATA;
-       int pos, pending = 0;
-
-       down(&chip->buffer_mutex);
-       ret_size = atomic_read(&chip->data_pending);
-       if ( ret_size > 0 ) {   /* Result available */
-               if (size < ret_size)
-                       ret_size = size;
-
-               pos = atomic_read(&chip->data_position);
-
-               if (copy_to_user((void __user *) buf,
-                                &chip->data_buffer[pos], ret_size)) {
-                       ret_size = -EFAULT;
-               } else {
-                       pending = atomic_read(&chip->data_pending) - ret_size;
-                       if ( pending ) {
-                               atomic_set( &chip->data_pending, pending );
-                               atomic_set( &chip->data_position, pos+ret_size 
);
-                       }
-               }
-       }
-       up(&chip->buffer_mutex);
-
-       if ( ret_size <= 0 || pending == 0 ) {
-               atomic_set( &chip->data_pending, 0 );
-               down(&chip->timer_manipulation_mutex);
-               del_singleshot_timer_sync(&chip->user_read_timer);
-               up(&chip->timer_manipulation_mutex);
-       }
-
-       return ret_size;
-}
-
-EXPORT_SYMBOL_GPL(tpm_read);
-
-void __devexit tpm_remove(struct pci_dev *pci_dev)
-{
-       struct tpm_chip *chip = pci_get_drvdata(pci_dev);
-
-       if (chip == NULL) {
-               dev_err(&pci_dev->dev, "No device data found\n");
-               return;
-       }
-
-       spin_lock(&driver_lock);
-
-       list_del(&chip->list);
-
-       spin_unlock(&driver_lock);
-
-       pci_set_drvdata(pci_dev, NULL);
-       misc_deregister(&chip->vendor->miscdev);
-
-       device_remove_file(&pci_dev->dev, &dev_attr_pubek);
-       device_remove_file(&pci_dev->dev, &dev_attr_pcrs);
-       device_remove_file(&pci_dev->dev, &dev_attr_caps);
-
-       pci_disable_device(pci_dev);
-
-       dev_mask[chip->dev_num / 32] &= !(1 << (chip->dev_num % 32));
-
-       kfree(chip);
-
-       pci_dev_put(pci_dev);
-}
-
-EXPORT_SYMBOL_GPL(tpm_remove);
-
-static u8 savestate[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 10,            /* blob length (in bytes) */
-       0, 0, 0, 152            /* TPM_ORD_SaveState */
-};
-
-/*
- * We are about to suspend. Save the TPM state
- * so that it can be restored.
- */
-int tpm_pm_suspend(struct pci_dev *pci_dev, pm_message_t pm_state)
-{
-       struct tpm_chip *chip = pci_get_drvdata(pci_dev);
-       if (chip == NULL)
-               return -ENODEV;
-
-       tpm_transmit(chip, savestate, sizeof(savestate));
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(tpm_pm_suspend);
-
-/*
- * Resume from a power safe. The BIOS already restored
- * the TPM state.
- */
-int tpm_pm_resume(struct pci_dev *pci_dev)
-{
-       struct tpm_chip *chip = pci_get_drvdata(pci_dev);
-
-       if (chip == NULL)
-               return -ENODEV;
-
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(tpm_pm_resume);
-
-/*
- * Called from tpm_<specific>.c probe function only for devices
- * the driver has determined it should claim.  Prior to calling
- * this function the specific probe function has called pci_enable_device
- * upon errant exit from this function specific probe function should call
- * pci_disable_device
- */
-int tpm_register_hardware(struct pci_dev *pci_dev,
-                         struct tpm_vendor_specific *entry)
-{
-       char devname[7];
-       struct tpm_chip *chip;
-       int i, j;
-
-       /* Driver specific per-device data */
-       chip = kmalloc(sizeof(*chip), GFP_KERNEL);
-       if (chip == NULL)
-               return -ENOMEM;
-
-       memset(chip, 0, sizeof(struct tpm_chip));
-
-       init_MUTEX(&chip->buffer_mutex);
-       init_MUTEX(&chip->tpm_mutex);
-       init_MUTEX(&chip->timer_manipulation_mutex);
-       INIT_LIST_HEAD(&chip->list);
-
-       chip->vendor = entry;
-
-       chip->dev_num = -1;
-
-       for (i = 0; i < 32; i++)
-               for (j = 0; j < 8; j++)
-                       if ((dev_mask[i] & (1 << j)) == 0) {
-                               chip->dev_num = i * 32 + j;
-                               dev_mask[i] |= 1 << j;
-                               goto dev_num_search_complete;
-                       }
-
-dev_num_search_complete:
-       if (chip->dev_num < 0) {
-               dev_err(&pci_dev->dev,
-                       "No available tpm device numbers\n");
-               kfree(chip);
-               return -ENODEV;
-       } else if (chip->dev_num == 0)
-               chip->vendor->miscdev.minor = TPM_MINOR;
-       else
-               chip->vendor->miscdev.minor = MISC_DYNAMIC_MINOR;
-
-       snprintf(devname, sizeof(devname), "%s%d", "tpm", chip->dev_num);
-       chip->vendor->miscdev.name = devname;
-
-       chip->vendor->miscdev.dev = &(pci_dev->dev);
-       chip->pci_dev = pci_dev_get(pci_dev);
-
-       if (misc_register(&chip->vendor->miscdev)) {
-               dev_err(&chip->pci_dev->dev,
-                       "unable to misc_register %s, minor %d\n",
-                       chip->vendor->miscdev.name,
-                       chip->vendor->miscdev.minor);
-               pci_dev_put(pci_dev);
-               kfree(chip);
-               dev_mask[i] &= !(1 << j);
-               return -ENODEV;
-       }
-
-       pci_set_drvdata(pci_dev, chip);
-
-       list_add(&chip->list, &tpm_chip_list);
-
-       device_create_file(&pci_dev->dev, &dev_attr_pubek);
-       device_create_file(&pci_dev->dev, &dev_attr_pcrs);
-       device_create_file(&pci_dev->dev, &dev_attr_caps);
-
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(tpm_register_hardware);
-
-static int __init init_tpm(void)
-{
-       return 0;
-}
-
-static void __exit cleanup_tpm(void)
-{
-
-}
-
-module_init(init_tpm);
-module_exit(cleanup_tpm);
-
-MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)");
-MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
-MODULE_LICENSE("GPL");
diff -r 97dbd9524a7e -r 06d84bf87159 linux-2.6-xen-sparse/drivers/char/tpm/tpm.h
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h       Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,92 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Authors:
- * Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
- * Dave Safford <safford@xxxxxxxxxxxxxx>
- * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Kylene Hall <kjhall@xxxxxxxxxx>
- *
- * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx>
- *
- * Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2 of the
- * License.
- *
- */
-#include <linux/module.h>
-#include <linux/version.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
-
-#define TPM_TIMEOUT msecs_to_jiffies(5)
-
-/* TPM addresses */
-#define        TPM_ADDR                        0x4E
-#define        TPM_DATA                        0x4F
-
-struct tpm_chip;
-
-struct tpm_vendor_specific {
-       u8 req_complete_mask;
-       u8 req_complete_val;
-       u16 base;               /* TPM base address */
-
-       int (*recv) (struct tpm_chip *, u8 *, size_t);
-       int (*send) (struct tpm_chip *, u8 *, size_t);
-       void (*cancel) (struct tpm_chip *);
-       struct miscdevice miscdev;
-};
-
-struct tpm_chip {
-       struct pci_dev *pci_dev;        /* PCI device stuff */
-
-       int dev_num;            /* /dev/tpm# */
-       int num_opens;          /* only one allowed */
-       int time_expired;
-
-       /* Data passed to and from the tpm via the read/write calls */
-       u8 *data_buffer;
-       atomic_t data_pending;
-       atomic_t data_position;
-       struct semaphore buffer_mutex;
-
-       struct timer_list user_read_timer;      /* user needs to claim result */
-       struct semaphore tpm_mutex;     /* tpm is processing */
-       struct timer_list device_timer; /* tpm is processing */
-       struct semaphore timer_manipulation_mutex;
-
-       struct tpm_vendor_specific *vendor;
-
-       struct list_head list;
-};
-
-static inline int tpm_read_index(int index)
-{
-       outb(index, TPM_ADDR);
-       return inb(TPM_DATA) & 0xFF;
-}
-
-static inline void tpm_write_index(int index, int value)
-{
-       outb(index, TPM_ADDR);
-       outb(value & 0xFF, TPM_DATA);
-}
-
-extern void tpm_time_expired(unsigned long);
-extern int tpm_register_hardware(struct pci_dev *,
-                                struct tpm_vendor_specific *);
-extern int tpm_open(struct inode *, struct file *);
-extern int tpm_release(struct inode *, struct file *);
-extern ssize_t tpm_write(struct file *, const char __user *, size_t,
-                        loff_t *);
-extern ssize_t tpm_read(struct file *, char __user *, size_t, loff_t *);
-extern void __devexit tpm_remove(struct pci_dev *);
-extern int tpm_pm_suspend(struct pci_dev *, pm_message_t);
-extern int tpm_pm_resume(struct pci_dev *);
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,220 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Authors:
- * Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
- * Dave Safford <safford@xxxxxxxxxxxxxx>
- * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Kylene Hall <kjhall@xxxxxxxxxx>
- *
- * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx>
- *
- * Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2 of the
- * License.
- *
- */
-
-#include "tpm.h"
-
-/* Atmel definitions */
-enum tpm_atmel_addr {
-       TPM_ATMEL_BASE_ADDR_LO = 0x08,
-       TPM_ATMEL_BASE_ADDR_HI = 0x09
-};
-
-/* write status bits */
-#define        ATML_STATUS_ABORT               0x01
-#define        ATML_STATUS_LASTBYTE            0x04
-
-/* read status bits */
-#define        ATML_STATUS_BUSY                0x01
-#define        ATML_STATUS_DATA_AVAIL          0x02
-#define        ATML_STATUS_REWRITE             0x04
-
-
-static int tpm_atml_recv(struct tpm_chip *chip, u8 * buf, size_t count)
-{
-       u8 status, *hdr = buf;
-       u32 size;
-       int i;
-       __be32 *native_size;
-
-       /* start reading header */
-       if (count < 6)
-               return -EIO;
-
-       for (i = 0; i < 6; i++) {
-               status = inb(chip->vendor->base + 1);
-               if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                       dev_err(&chip->pci_dev->dev,
-                               "error reading header\n");
-                       return -EIO;
-               }
-               *buf++ = inb(chip->vendor->base);
-       }
-
-       /* size of the data received */
-       native_size = (__force __be32 *) (hdr + 2);
-       size = be32_to_cpu(*native_size);
-
-       if (count < size) {
-               dev_err(&chip->pci_dev->dev,
-                       "Recv size(%d) less than available space\n", size);
-               for (; i < size; i++) { /* clear the waiting data anyway */
-                       status = inb(chip->vendor->base + 1);
-                       if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                               dev_err(&chip->pci_dev->dev,
-                                       "error reading data\n");
-                               return -EIO;
-                       }
-               }
-               return -EIO;
-       }
-
-       /* read all the data available */
-       for (; i < size; i++) {
-               status = inb(chip->vendor->base + 1);
-               if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                       dev_err(&chip->pci_dev->dev,
-                               "error reading data\n");
-                       return -EIO;
-               }
-               *buf++ = inb(chip->vendor->base);
-       }
-
-       /* make sure data available is gone */
-       status = inb(chip->vendor->base + 1);
-       if (status & ATML_STATUS_DATA_AVAIL) {
-               dev_err(&chip->pci_dev->dev, "data available is stuck\n");
-               return -EIO;
-       }
-
-       return size;
-}
-
-static int tpm_atml_send(struct tpm_chip *chip, u8 * buf, size_t count)
-{
-       int i;
-
-       dev_dbg(&chip->pci_dev->dev, "tpm_atml_send: ");
-       for (i = 0; i < count; i++) {
-               dev_dbg(&chip->pci_dev->dev, "0x%x(%d) ", buf[i], buf[i]);
-               outb(buf[i], chip->vendor->base);
-       }
-
-       return count;
-}
-
-static void tpm_atml_cancel(struct tpm_chip *chip)
-{
-       outb(ATML_STATUS_ABORT, chip->vendor->base + 1);
-}
-
-static struct file_operations atmel_ops = {
-       .owner = THIS_MODULE,
-       .llseek = no_llseek,
-       .open = tpm_open,
-       .read = tpm_read,
-       .write = tpm_write,
-       .release = tpm_release,
-};
-
-static struct tpm_vendor_specific tpm_atmel = {
-       .recv = tpm_atml_recv,
-       .send = tpm_atml_send,
-       .cancel = tpm_atml_cancel,
-       .req_complete_mask = ATML_STATUS_BUSY | ATML_STATUS_DATA_AVAIL,
-       .req_complete_val = ATML_STATUS_DATA_AVAIL,
-       .miscdev = { .fops = &atmel_ops, },
-};
-
-static int __devinit tpm_atml_init(struct pci_dev *pci_dev,
-                                  const struct pci_device_id *pci_id)
-{
-       u8 version[4];
-       int rc = 0;
-       int lo, hi;
-
-       if (pci_enable_device(pci_dev))
-               return -EIO;
-
-       lo = tpm_read_index( TPM_ATMEL_BASE_ADDR_LO );
-       hi = tpm_read_index( TPM_ATMEL_BASE_ADDR_HI );
-
-       tpm_atmel.base = (hi<<8)|lo;
-       dev_dbg( &pci_dev->dev, "Operating with base: 0x%x\n", tpm_atmel.base);
-
-       /* verify that it is an Atmel part */
-       if (tpm_read_index(4) != 'A' || tpm_read_index(5) != 'T'
-           || tpm_read_index(6) != 'M' || tpm_read_index(7) != 'L') {
-               rc = -ENODEV;
-               goto out_err;
-       }
-
-       /* query chip for its version number */
-       if ((version[0] = tpm_read_index(0x00)) != 0xFF) {
-               version[1] = tpm_read_index(0x01);
-               version[2] = tpm_read_index(0x02);
-               version[3] = tpm_read_index(0x03);
-       } else {
-               dev_info(&pci_dev->dev, "version query failed\n");
-               rc = -ENODEV;
-               goto out_err;
-       }
-
-       if ((rc = tpm_register_hardware(pci_dev, &tpm_atmel)) < 0)
-               goto out_err;
-
-       dev_info(&pci_dev->dev,
-                "Atmel TPM version %d.%d.%d.%d\n", version[0], version[1],
-                version[2], version[3]);
-
-       return 0;
-out_err:
-       pci_disable_device(pci_dev);
-       return rc;
-}
-
-static struct pci_device_id tpm_pci_tbl[] __devinitdata = {
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_LPC)},
-       {0,}
-};
-
-MODULE_DEVICE_TABLE(pci, tpm_pci_tbl);
-
-static struct pci_driver atmel_pci_driver = {
-       .name = "tpm_atmel",
-       .id_table = tpm_pci_tbl,
-       .probe = tpm_atml_init,
-       .remove = __devexit_p(tpm_remove),
-       .suspend = tpm_pm_suspend,
-       .resume = tpm_pm_resume,
-};
-
-static int __init init_atmel(void)
-{
-       return pci_register_driver(&atmel_pci_driver);
-}
-
-static void __exit cleanup_atmel(void)
-{
-       pci_unregister_driver(&atmel_pci_driver);
-}
-
-module_init(init_atmel);
-module_exit(cleanup_atmel);
-
-MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)");
-MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
-MODULE_LICENSE("GPL");
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_nsc.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_nsc.c   Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,377 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Authors:
- * Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
- * Dave Safford <safford@xxxxxxxxxxxxxx>
- * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Kylene Hall <kjhall@xxxxxxxxxx>
- *
- * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx>
- *
- * Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2 of the
- * License.
- *
- */
-
-#include "tpm.h"
-
-/* National definitions */
-#define        TPM_NSC_BASE                    0x360
-#define        TPM_NSC_IRQ                     0x07
-#define        TPM_NSC_BASE0_HI                0x60
-#define        TPM_NSC_BASE0_LO                0x61
-#define        TPM_NSC_BASE1_HI                0x62
-#define        TPM_NSC_BASE1_LO                0x63
-
-#define        NSC_LDN_INDEX                   0x07
-#define        NSC_SID_INDEX                   0x20
-#define        NSC_LDC_INDEX                   0x30
-#define        NSC_DIO_INDEX                   0x60
-#define        NSC_CIO_INDEX                   0x62
-#define        NSC_IRQ_INDEX                   0x70
-#define        NSC_ITS_INDEX                   0x71
-
-#define        NSC_STATUS                      0x01
-#define        NSC_COMMAND                     0x01
-#define        NSC_DATA                        0x00
-
-/* status bits */
-#define        NSC_STATUS_OBF                  0x01    /* output buffer full */
-#define        NSC_STATUS_IBF                  0x02    /* input buffer full */
-#define        NSC_STATUS_F0                   0x04    /* F0 */
-#define        NSC_STATUS_A2                   0x08    /* A2 */
-#define        NSC_STATUS_RDY                  0x10    /* ready to receive 
command */
-#define        NSC_STATUS_IBR                  0x20    /* ready to receive 
data */
-
-/* command bits */
-#define        NSC_COMMAND_NORMAL              0x01    /* normal mode */
-#define        NSC_COMMAND_EOC                 0x03
-#define        NSC_COMMAND_CANCEL              0x22
-
-/*
- * Wait for a certain status to appear
- */
-static int wait_for_stat(struct tpm_chip *chip, u8 mask, u8 val, u8 * data)
-{
-       int expired = 0;
-       struct timer_list status_timer =
-           TIMER_INITIALIZER(tpm_time_expired, jiffies + 10 * HZ,
-                             (unsigned long) &expired);
-
-       /* status immediately available check */
-       *data = inb(chip->vendor->base + NSC_STATUS);
-       if ((*data & mask) == val)
-               return 0;
-
-       /* wait for status */
-       add_timer(&status_timer);
-       do {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(TPM_TIMEOUT);
-               *data = inb(chip->vendor->base + 1);
-               if ((*data & mask) == val) {
-                       del_singleshot_timer_sync(&status_timer);
-                       return 0;
-               }
-       }
-       while (!expired);
-
-       return -EBUSY;
-}
-
-static int nsc_wait_for_ready(struct tpm_chip *chip)
-{
-       int status;
-       int expired = 0;
-       struct timer_list status_timer =
-           TIMER_INITIALIZER(tpm_time_expired, jiffies + 100,
-                             (unsigned long) &expired);
-
-       /* status immediately available check */
-       status = inb(chip->vendor->base + NSC_STATUS);
-       if (status & NSC_STATUS_OBF)
-               status = inb(chip->vendor->base + NSC_DATA);
-       if (status & NSC_STATUS_RDY)
-               return 0;
-
-       /* wait for status */
-       add_timer(&status_timer);
-       do {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(TPM_TIMEOUT);
-               status = inb(chip->vendor->base + NSC_STATUS);
-               if (status & NSC_STATUS_OBF)
-                       status = inb(chip->vendor->base + NSC_DATA);
-               if (status & NSC_STATUS_RDY) {
-                       del_singleshot_timer_sync(&status_timer);
-                       return 0;
-               }
-       }
-       while (!expired);
-
-       dev_info(&chip->pci_dev->dev, "wait for ready failed\n");
-       return -EBUSY;
-}
-
-
-static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count)
-{
-       u8 *buffer = buf;
-       u8 data, *p;
-       u32 size;
-       __be32 *native_size;
-
-       if (count < 6)
-               return -EIO;
-
-       if (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0) {
-               dev_err(&chip->pci_dev->dev, "F0 timeout\n");
-               return -EIO;
-       }
-       if ((data =
-            inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_NORMAL) {
-               dev_err(&chip->pci_dev->dev, "not in normal mode (0x%x)\n",
-                       data);
-               return -EIO;
-       }
-
-       /* read the whole packet */
-       for (p = buffer; p < &buffer[count]; p++) {
-               if (wait_for_stat
-                   (chip, NSC_STATUS_OBF, NSC_STATUS_OBF, &data) < 0) {
-                       dev_err(&chip->pci_dev->dev,
-                               "OBF timeout (while reading data)\n");
-                       return -EIO;
-               }
-               if (data & NSC_STATUS_F0)
-                       break;
-               *p = inb(chip->vendor->base + NSC_DATA);
-       }
-
-       if ((data & NSC_STATUS_F0) == 0) {
-               dev_err(&chip->pci_dev->dev, "F0 not set\n");
-               return -EIO;
-       }
-       if ((data = inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_EOC) {
-               dev_err(&chip->pci_dev->dev,
-                       "expected end of command(0x%x)\n", data);
-               return -EIO;
-       }
-
-       native_size = (__force __be32 *) (buf + 2);
-       size = be32_to_cpu(*native_size);
-
-       if (count < size)
-               return -EIO;
-
-       return size;
-}
-
-static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count)
-{
-       u8 data;
-       int i;
-
-       /*
-        * If we hit the chip with back to back commands it locks up
-        * and never set IBF. Hitting it with this "hammer" seems to
-        * fix it. Not sure why this is needed, we followed the flow
-        * chart in the manual to the letter.
-        */
-       outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND);
-
-       if (nsc_wait_for_ready(chip) != 0)
-               return -EIO;
-
-       if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) {
-               dev_err(&chip->pci_dev->dev, "IBF timeout\n");
-               return -EIO;
-       }
-
-       outb(NSC_COMMAND_NORMAL, chip->vendor->base + NSC_COMMAND);
-       if (wait_for_stat(chip, NSC_STATUS_IBR, NSC_STATUS_IBR, &data) < 0) {
-               dev_err(&chip->pci_dev->dev, "IBR timeout\n");
-               return -EIO;
-       }
-
-       for (i = 0; i < count; i++) {
-               if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) {
-                       dev_err(&chip->pci_dev->dev,
-                               "IBF timeout (while writing data)\n");
-                       return -EIO;
-               }
-               outb(buf[i], chip->vendor->base + NSC_DATA);
-       }
-
-       if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) {
-               dev_err(&chip->pci_dev->dev, "IBF timeout\n");
-               return -EIO;
-       }
-       outb(NSC_COMMAND_EOC, chip->vendor->base + NSC_COMMAND);
-
-       return count;
-}
-
-static void tpm_nsc_cancel(struct tpm_chip *chip)
-{
-       outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND);
-}
-
-static struct file_operations nsc_ops = {
-       .owner = THIS_MODULE,
-       .llseek = no_llseek,
-       .open = tpm_open,
-       .read = tpm_read,
-       .write = tpm_write,
-       .release = tpm_release,
-};
-
-static struct tpm_vendor_specific tpm_nsc = {
-       .recv = tpm_nsc_recv,
-       .send = tpm_nsc_send,
-       .cancel = tpm_nsc_cancel,
-       .req_complete_mask = NSC_STATUS_OBF,
-       .req_complete_val = NSC_STATUS_OBF,
-       .miscdev = { .fops = &nsc_ops, },
-
-};
-
-static int __devinit tpm_nsc_init(struct pci_dev *pci_dev,
-                                 const struct pci_device_id *pci_id)
-{
-       int rc = 0;
-       int lo, hi;
-
-       hi = tpm_read_index(TPM_NSC_BASE0_HI);
-       lo = tpm_read_index(TPM_NSC_BASE0_LO);
-
-       tpm_nsc.base = (hi<<8) | lo;
-
-       if (pci_enable_device(pci_dev))
-               return -EIO;
-
-       /* verify that it is a National part (SID) */
-       if (tpm_read_index(NSC_SID_INDEX) != 0xEF) {
-               rc = -ENODEV;
-               goto out_err;
-       }
-
-       dev_dbg(&pci_dev->dev, "NSC TPM detected\n");
-       dev_dbg(&pci_dev->dev,
-               "NSC LDN 0x%x, SID 0x%x, SRID 0x%x\n",
-               tpm_read_index(0x07), tpm_read_index(0x20),
-               tpm_read_index(0x27));
-       dev_dbg(&pci_dev->dev,
-               "NSC SIOCF1 0x%x SIOCF5 0x%x SIOCF6 0x%x SIOCF8 0x%x\n",
-               tpm_read_index(0x21), tpm_read_index(0x25),
-               tpm_read_index(0x26), tpm_read_index(0x28));
-       dev_dbg(&pci_dev->dev, "NSC IO Base0 0x%x\n",
-               (tpm_read_index(0x60) << 8) | tpm_read_index(0x61));
-       dev_dbg(&pci_dev->dev, "NSC IO Base1 0x%x\n",
-               (tpm_read_index(0x62) << 8) | tpm_read_index(0x63));
-       dev_dbg(&pci_dev->dev, "NSC Interrupt number and wakeup 0x%x\n",
-               tpm_read_index(0x70));
-       dev_dbg(&pci_dev->dev, "NSC IRQ type select 0x%x\n",
-               tpm_read_index(0x71));
-       dev_dbg(&pci_dev->dev,
-               "NSC DMA channel select0 0x%x, select1 0x%x\n",
-               tpm_read_index(0x74), tpm_read_index(0x75));
-       dev_dbg(&pci_dev->dev,
-               "NSC Config "
-               "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
-               tpm_read_index(0xF0), tpm_read_index(0xF1),
-               tpm_read_index(0xF2), tpm_read_index(0xF3),
-               tpm_read_index(0xF4), tpm_read_index(0xF5),
-               tpm_read_index(0xF6), tpm_read_index(0xF7),
-               tpm_read_index(0xF8), tpm_read_index(0xF9));
-
-       dev_info(&pci_dev->dev,
-                "NSC PC21100 TPM revision %d\n",
-                tpm_read_index(0x27) & 0x1F);
-
-       if (tpm_read_index(NSC_LDC_INDEX) == 0)
-               dev_info(&pci_dev->dev, ": NSC TPM not active\n");
-
-       /* select PM channel 1 */
-       tpm_write_index(NSC_LDN_INDEX, 0x12);
-       tpm_read_index(NSC_LDN_INDEX);
-
-       /* disable the DPM module */
-       tpm_write_index(NSC_LDC_INDEX, 0);
-       tpm_read_index(NSC_LDC_INDEX);
-
-       /* set the data register base addresses */
-       tpm_write_index(NSC_DIO_INDEX, TPM_NSC_BASE >> 8);
-       tpm_write_index(NSC_DIO_INDEX + 1, TPM_NSC_BASE);
-       tpm_read_index(NSC_DIO_INDEX);
-       tpm_read_index(NSC_DIO_INDEX + 1);
-
-       /* set the command register base addresses */
-       tpm_write_index(NSC_CIO_INDEX, (TPM_NSC_BASE + 1) >> 8);
-       tpm_write_index(NSC_CIO_INDEX + 1, (TPM_NSC_BASE + 1));
-       tpm_read_index(NSC_DIO_INDEX);
-       tpm_read_index(NSC_DIO_INDEX + 1);
-
-       /* set the interrupt number to be used for the host interface */
-       tpm_write_index(NSC_IRQ_INDEX, TPM_NSC_IRQ);
-       tpm_write_index(NSC_ITS_INDEX, 0x00);
-       tpm_read_index(NSC_IRQ_INDEX);
-
-       /* enable the DPM module */
-       tpm_write_index(NSC_LDC_INDEX, 0x01);
-       tpm_read_index(NSC_LDC_INDEX);
-
-       if ((rc = tpm_register_hardware(pci_dev, &tpm_nsc)) < 0)
-               goto out_err;
-
-       return 0;
-
-out_err:
-       pci_disable_device(pci_dev);
-       return rc;
-}
-
-static struct pci_device_id tpm_pci_tbl[] __devinitdata = {
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_LPC)},
-       {0,}
-};
-
-MODULE_DEVICE_TABLE(pci, tpm_pci_tbl);
-
-static struct pci_driver nsc_pci_driver = {
-       .name = "tpm_nsc",
-       .id_table = tpm_pci_tbl,
-       .probe = tpm_nsc_init,
-       .remove = __devexit_p(tpm_remove),
-       .suspend = tpm_pm_suspend,
-       .resume = tpm_pm_resume,
-};
-
-static int __init init_nsc(void)
-{
-       return pci_register_driver(&nsc_pci_driver);
-}
-
-static void __exit cleanup_nsc(void)
-{
-       pci_unregister_driver(&nsc_pci_driver);
-}
-
-module_init(init_nsc);
-module_exit(cleanup_nsc);
-
-MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)");
-MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
-MODULE_LICENSE("GPL");
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h  Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,229 +0,0 @@
-/*
- * blktap.h
- * 
- * Interfaces for the Xen block tap driver.
- * 
- * (c) 2004, Andrew Warfield, University of Cambridge
- * 
- */
-
-#ifndef __BLKTAP_H__
-#define __BLKTAP_H__
-
-#include <linux/version.h>
-#include <linux/blkdev.h>
-#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <asm/io.h>
-#include <asm/setup.h>
-#include <asm/pgalloc.h>
-#include <asm/hypervisor.h>
-#include <asm-xen/xen-public/io/blkif.h>
-#include <asm-xen/xen-public/io/ring.h>
-
-/* Used to signal to the backend that this is a tap domain. */
-#define BLKTAP_COOKIE 0xbeadfeed
-
-/* -------[ debug / pretty printing ]--------------------------------- */
-
-#define PRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#if 0
-#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#else
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-#if 1
-#define ASSERT(_p) \
-    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif
-
-#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
-
-
-/* -------[ state descriptors ]--------------------------------------- */
-
-#define BLKIF_STATE_CLOSED       0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED    2
-
-/* -------[ connection tracking ]------------------------------------- */
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#endif
-
-extern spinlock_t blkif_io_lock;
-
-typedef struct blkif_st {
-    /* Unique identifier for this interface. */
-    domid_t             domid;
-    unsigned int        handle;
-    /* Physical parameters of the comms window. */
-    unsigned long       shmem_frame;
-    unsigned int        evtchn;
-    /* Comms information. */
-    blkif_back_ring_t   blk_ring;
-    
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    /*
-     * DISCONNECT response is deferred until pending requests are ack'ed.
-     * We therefore need to store the id from the original request.
-     */    
-    u8                  disconnect_rspid;
-    struct blkif_st    *hash_next;
-    struct list_head    blkdev_list;
-    spinlock_t          blk_ring_lock;
-    atomic_t            refcnt;
-    struct work_struct work;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    u16 shmem_handle;
-    unsigned long shmem_vaddr;
-    grant_ref_t shmem_ref;
-#endif
-} blkif_t;
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-void blkif_disconnect_complete(blkif_t *blkif);
-#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define blkif_put(_b)                             \
-    do {                                          \
-        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            blkif_disconnect_complete(_b);        \
-    } while (0)
-
-
-/* -------[ active request tracking ]--------------------------------- */
-
-typedef struct {
-    blkif_t       *blkif;
-    unsigned long  id;
-    int            nr_pages;
-    int            next_free;
-} active_req_t;
-
-typedef unsigned int ACTIVE_RING_IDX;
-
-active_req_t *lookup_active_req(ACTIVE_RING_IDX idx);
-
-extern inline unsigned int ID_TO_IDX(unsigned long id) 
-{ 
-    return ( id & 0x0000ffff );
-}
-
-extern inline domid_t ID_TO_DOM(unsigned long id) 
-{ 
-    return (id >> 16); 
-}
-
-void active_reqs_init(void);
-
-/* -------[ interposition -> character device interface ]------------- */
-
-/* /dev/xen/blktap resides at device number major=10, minor=200        */ 
-#define BLKTAP_MINOR 202
-
-/* size of the extra VMA area to map in attached pages. */
-#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
-
-/* blktap IOCTLs:                                                      */
-#define BLKTAP_IOCTL_KICK_FE         1
-#define BLKTAP_IOCTL_KICK_BE         2
-#define BLKTAP_IOCTL_SETMODE         3
-#define BLKTAP_IOCTL_PRINT_IDXS      100  
-
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
-#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
-#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
-#define BLKTAP_MODE_COPY_FE          0x00000004
-#define BLKTAP_MODE_COPY_BE          0x00000008
-#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
-#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
-
-#define BLKTAP_MODE_INTERPOSE \
-           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-#define BLKTAP_MODE_COPY_BOTH \
-           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
-
-#define BLKTAP_MODE_COPY_BOTH_PAGES \
-           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
-        );
-}
-
-
-
-/* -------[ Mappings to User VMA ]------------------------------------ */
-#define BATCH_PER_DOMAIN 16
-
-/* -------[ Here be globals ]----------------------------------------- */
-extern unsigned long blktap_mode;
-
-/* Connection to a single backend domain. */
-extern blkif_front_ring_t blktap_be_ring;
-extern unsigned int blktap_be_evtchn;
-extern unsigned int blktap_be_state;
-
-/* User ring status. */
-extern unsigned long blktap_ring_ok;
-
-/* -------[ ...and function prototypes. ]----------------------------- */
-
-/* init function for character device interface.                       */
-int blktap_init(void);
-
-/* init function for the blkif cache. */
-void __init blkif_interface_init(void);
-void __init blkdev_schedule_init(void);
-void blkif_deschedule(blkif_t *blkif);
-
-/* interfaces to the char driver, passing messages to and from apps.   */
-void blktap_kick_user(void);
-
-/* user ring access functions: */
-int blktap_write_fe_ring(blkif_request_t *req);
-int blktap_write_be_ring(blkif_response_t *rsp);
-int blktap_write_ctrl_ring(ctrl_msg_t *msg);
-
-/* fe/be ring access functions: */
-int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp);
-int write_req_to_be_ring(blkif_request_t *req);
-
-/* event notification functions */
-void kick_fe_domain(blkif_t *blkif);
-void kick_be_domain(void);
-
-/* Interrupt handlers. */
-irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
-                                  struct pt_regs *ptregs);
-irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
-
-/* Control message receiver. */
-extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
-
-/* debug */
-void print_fe_ring_idxs(void);
-void print_be_ring_idxs(void);
-        
-#define __BLKINT_H__
-#endif
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/common.h Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,83 +0,0 @@
-
-#ifndef __USBIF__BACKEND__COMMON_H__
-#define __USBIF__BACKEND__COMMON_H__
-
-#include <linux/config.h>
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/rbtree.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <asm/io.h>
-#include <asm/setup.h>
-#include <asm/pgalloc.h>
-#include <asm/hypervisor.h>
-
-#include <asm-xen/xen-public/io/usbif.h>
-
-#if 0
-#define ASSERT(_p) \
-    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#else
-#define ASSERT(_p) ((void)0)
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-typedef struct usbif_priv_st usbif_priv_t;
-
-struct usbif_priv_st {
-    /* Unique identifier for this interface. */
-    domid_t          domid;
-    unsigned int     handle;
-    /* Physical parameters of the comms window. */
-    unsigned long    shmem_frame;
-    unsigned int     evtchn;
-    /* Comms Information */
-    usbif_back_ring_t usb_ring;
-    /* Private fields. */
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    /*
-     * DISCONNECT response is deferred until pending requests are ack'ed.
-     * We therefore need to store the id from the original request.
-     */
-    u8                   disconnect_rspid;
-    usbif_priv_t        *hash_next;
-    struct list_head     usbif_list;
-    spinlock_t           usb_ring_lock;
-    atomic_t             refcnt;
-
-    struct work_struct work;
-};
-
-void usbif_create(usbif_be_create_t *create);
-void usbif_destroy(usbif_be_destroy_t *destroy);
-void usbif_connect(usbif_be_connect_t *connect);
-int  usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id);
-void usbif_disconnect_complete(usbif_priv_t *up);
-
-void usbif_release_port(usbif_be_release_port_t *msg);
-int usbif_claim_port(usbif_be_claim_port_t *msg);
-void usbif_release_ports(usbif_priv_t *up);
-
-usbif_priv_t *usbif_find(domid_t domid);
-#define usbif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define usbif_put(_b)                             \
-    do {                                          \
-        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            usbif_disconnect_complete(_b);        \
-    } while (0)
-
-
-void usbif_interface_init(void);
-void usbif_ctrlif_init(void);
-
-void usbif_deschedule(usbif_priv_t *up);
-void remove_from_usbif_list(usbif_priv_t *up);
-
-irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs);
-
-#endif /* __USBIF__BACKEND__COMMON_H__ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbback/control.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/control.c        Thu Sep 22 
17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,61 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/control.c
- * 
- * Routines for interfacing with the control plane.
- * 
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    DPRINTK("Received usbif backend message, subtype=%d\n", msg->subtype);
-    
-    switch ( msg->subtype )
-    {
-    case CMSG_USBIF_BE_CREATE:
-        usbif_create((usbif_be_create_t *)&msg->msg[0]);
-        break;        
-    case CMSG_USBIF_BE_DESTROY:
-        usbif_destroy((usbif_be_destroy_t *)&msg->msg[0]);
-        break;        
-    case CMSG_USBIF_BE_CONNECT:
-        usbif_connect((usbif_be_connect_t *)&msg->msg[0]);
-        break;        
-    case CMSG_USBIF_BE_DISCONNECT:
-        if ( !usbif_disconnect((usbif_be_disconnect_t *)&msg->msg[0],msg->id) )
-            return; /* Sending the response is deferred until later. */
-        break;        
-    case CMSG_USBIF_BE_CLAIM_PORT:
-       usbif_claim_port((usbif_be_claim_port_t *)&msg->msg[0]);
-        break;
-    case CMSG_USBIF_BE_RELEASE_PORT:
-        usbif_release_port((usbif_be_release_port_t *)&msg->msg[0]);
-        break;
-    default:
-        DPRINTK("Parse error while reading message subtype %d, len %d\n",
-                msg->subtype, msg->length);
-        msg->length = 0;
-        break;
-    }
-
-    ctrl_if_send_response(msg);
-}
-
-void usbif_ctrlif_init(void)
-{
-    ctrl_msg_t                       cmsg;
-    usbif_be_driver_status_changed_t st;
-
-    (void)ctrl_if_register_receiver(CMSG_USBIF_BE, usbif_ctrlif_rx, 
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_USBIF_BE;
-    cmsg.subtype   = CMSG_USBIF_BE_DRIVER_STATUS_CHANGED;
-    cmsg.length    = sizeof(usbif_be_driver_status_changed_t);
-    st.status      = USBIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &st, sizeof(st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/interface.c      Thu Sep 22 
17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,242 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/interface.c
- * 
- * USB device interface management.
- * 
- * by Mark Williamson, Copyright (c) 2004
- */
-
-#include "common.h"
-
-#define USBIF_HASHSZ 1024
-#define USBIF_HASH(_d) (((int)(_d))&(USBIF_HASHSZ-1))
-
-static kmem_cache_t      *usbif_priv_cachep;
-static usbif_priv_t      *usbif_priv_hash[USBIF_HASHSZ];
-
-usbif_priv_t *usbif_find(domid_t domid)
-{
-    usbif_priv_t *up = usbif_priv_hash[USBIF_HASH(domid)];
-    while ( (up != NULL ) && ( up->domid != domid ) )
-        up = up->hash_next;
-    return up;
-}
-
-static void __usbif_disconnect_complete(void *arg)
-{
-    usbif_priv_t         *usbif = (usbif_priv_t *)arg;
-    ctrl_msg_t            cmsg;
-    usbif_be_disconnect_t disc;
-
-    /*
-     * These can't be done in usbif_disconnect() because at that point there
-     * may be outstanding requests at the device whose asynchronous responses
-     * must still be notified to the remote driver.
-     */
-    vfree(usbif->usb_ring.sring);
-
-    /* Construct the deferred response message. */
-    cmsg.type         = CMSG_USBIF_BE;
-    cmsg.subtype      = CMSG_USBIF_BE_DISCONNECT;
-    cmsg.id           = usbif->disconnect_rspid;
-    cmsg.length       = sizeof(usbif_be_disconnect_t);
-    disc.domid        = usbif->domid;
-    disc.status       = USBIF_BE_STATUS_OKAY;
-    memcpy(cmsg.msg, &disc, sizeof(disc));
-
-    /*
-     * Make sure message is constructed /before/ status change, because
-     * after the status change the 'usbif' structure could be deallocated at
-     * any time. Also make sure we send the response /after/ status change,
-     * as otherwise a subsequent CONNECT request could spuriously fail if
-     * another CPU doesn't see the status change yet.
-     */
-    mb();
-    if ( usbif->status != DISCONNECTING )
-        BUG();
-    usbif->status = DISCONNECTED;
-    mb();
-
-    /* Send the successful response. */
-    ctrl_if_send_response(&cmsg);
-}
-
-void usbif_disconnect_complete(usbif_priv_t *up)
-{
-    INIT_WORK(&up->work, __usbif_disconnect_complete, (void *)up);
-    schedule_work(&up->work);
-}
-
-void usbif_create(usbif_be_create_t *create)
-{
-    domid_t       domid  = create->domid;
-    usbif_priv_t **pup, *up;
-
-    if ( (up = kmem_cache_alloc(usbif_priv_cachep, GFP_KERNEL)) == NULL )
-    {
-        DPRINTK("Could not create usbif: out of memory\n");
-        create->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    memset(up, 0, sizeof(*up));
-    up->domid  = domid;
-    up->status = DISCONNECTED;
-    spin_lock_init(&up->usb_ring_lock);
-    atomic_set(&up->refcnt, 0);
-
-    pup = &usbif_priv_hash[USBIF_HASH(domid)];
-    while ( *pup != NULL )
-    {
-        if ( (*pup)->domid == domid )
-        {
-            create->status = USBIF_BE_STATUS_INTERFACE_EXISTS;
-            kmem_cache_free(usbif_priv_cachep, up);
-            return;
-        }
-        pup = &(*pup)->hash_next;
-    }
-
-    up->hash_next = *pup;
-    *pup = up;
-
-    create->status = USBIF_BE_STATUS_OKAY;
-}
-
-void usbif_destroy(usbif_be_destroy_t *destroy)
-{
-    domid_t       domid  = destroy->domid;
-    usbif_priv_t  **pup, *up;
-
-    pup = &usbif_priv_hash[USBIF_HASH(domid)];
-    while ( (up = *pup) != NULL )
-    {
-        if ( up->domid == domid )
-        {
-            if ( up->status != DISCONNECTED )
-                goto still_connected;
-            goto destroy;
-        }
-        pup = &up->hash_next;
-    }
-
-    destroy->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- still_connected:
-    destroy->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
-    return;
-
- destroy:
-    *pup = up->hash_next;
-    usbif_release_ports(up);
-    kmem_cache_free(usbif_priv_cachep, up);
-    destroy->status = USBIF_BE_STATUS_OKAY;
-}
-
-void usbif_connect(usbif_be_connect_t *connect)
-{
-    domid_t       domid  = connect->domid;
-    unsigned int  evtchn = connect->evtchn;
-    unsigned long shmem_frame = connect->shmem_frame;
-    struct vm_struct *vma;
-    pgprot_t      prot;
-    int           error;
-    usbif_priv_t *up;
-    usbif_sring_t *sring;
-
-    up = usbif_find(domid);
-    if ( unlikely(up == NULL) )
-    {
-        DPRINTK("usbif_connect attempted for non-existent usbif (%u)\n", 
-                connect->domid); 
-        connect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-    {
-        connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    prot = __pgprot(_KERNPG_TABLE);
-    error = direct_remap_pfn_range(&init_mm, VMALLOC_VMADDR(vma->addr),
-                                    shmem_frame, PAGE_SIZE,
-                                    prot, domid);
-    if ( error != 0 )
-    {
-        if ( error == -ENOMEM )
-            connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
-        else if ( error == -EFAULT )
-            connect->status = USBIF_BE_STATUS_MAPPING_ERROR;
-        else
-            connect->status = USBIF_BE_STATUS_ERROR;
-        vfree(vma->addr);
-        return;
-    }
-
-    if ( up->status != DISCONNECTED )
-    {
-        connect->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
-        vfree(vma->addr);
-        return;
-    }
-
-    sring = (usbif_sring_t *)vma->addr;
-    SHARED_RING_INIT(sring);
-    BACK_RING_INIT(&up->usb_ring, sring, PAGE_SIZE);
-
-    up->evtchn        = evtchn;
-    up->shmem_frame   = shmem_frame;
-    up->status        = CONNECTED;
-    usbif_get(up);
-
-    (void)bind_evtchn_to_irqhandler(
-        evtchn, usbif_be_int, 0, "usbif-backend", up);
-
-    connect->status = USBIF_BE_STATUS_OKAY;
-}
-
-/* Remove URBs for this interface before destroying it. */
-void usbif_deschedule(usbif_priv_t *up)
-{
-    remove_from_usbif_list(up);
-}
-
-int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id)
-{
-    domid_t       domid  = disconnect->domid;
-    usbif_priv_t *up;
-
-    up = usbif_find(domid);
-    if ( unlikely(up == NULL) )
-    {
-        DPRINTK("usbif_disconnect attempted for non-existent usbif"
-                " (%u)\n", disconnect->domid); 
-        disconnect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return 1; /* Caller will send response error message. */
-    }
-
-    if ( up->status == CONNECTED )
-    {
-        up->status = DISCONNECTING;
-        up->disconnect_rspid = rsp_id;
-        wmb(); /* Let other CPUs see the status change. */
-        unbind_evtchn_from_irqhandler(up->evtchn, up);
-       usbif_deschedule(up);
-        usbif_put(up);
-        return 0; /* Caller should not send response message. */
-    }
-
-    disconnect->status = USBIF_BE_STATUS_OKAY;
-    return 1;
-}
-
-void __init usbif_interface_init(void)
-{
-    usbif_priv_cachep = kmem_cache_create("usbif_priv_cache",
-                                         sizeof(usbif_priv_t), 
-                                         0, 0, NULL, NULL);
-    memset(usbif_priv_hash, 0, sizeof(usbif_priv_hash));
-}
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c        Thu Sep 22 
17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,1068 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/main.c
- * 
- * Backend for the Xen virtual USB driver - provides an abstraction of a
- * USB host controller to the corresponding frontend driver.
- *
- * by Mark Williamson
- * Copyright (c) 2004 Intel Research Cambridge
- * Copyright (c) 2004, 2005 Mark Williamson
- *
- * Based on arch/xen/drivers/blkif/backend/main.c
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- */
-
-#include "common.h"
-
-
-#include <linux/list.h>
-#include <linux/usb.h>
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/tqueue.h>
-
-/*
- * This is rather arbitrary.
- */
-#define MAX_PENDING_REQS 4
-#define BATCH_PER_DOMAIN 1
-
-static unsigned long mmap_vstart;
-
-/* Needs to be sufficiently large that we can map the (large) buffers
- * the USB mass storage driver wants. */
-#define MMAP_PAGES_PER_REQUEST \
-    (128)
-#define MMAP_PAGES             \
-    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-
-#define MMAP_VADDR(_req,_seg)                        \
-    (mmap_vstart +                                   \
-     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
-     ((_seg) * PAGE_SIZE))
-
-
-static spinlock_t owned_ports_lock;
-LIST_HEAD(owned_ports);
-
-/* A list of these structures is used to track ownership of physical USB
- * ports. */
-typedef struct 
-{
-    usbif_priv_t     *usbif_priv;
-    char             path[16];
-    int               guest_port;
-    int enabled;
-    struct list_head  list;
-    unsigned long guest_address; /* The USB device address that has been
-                                  * assigned by the guest. */
-    int               dev_present; /* Is there a device present? */
-    struct usb_device * dev;
-    unsigned long ifaces;  /* What interfaces are present on this device? */
-} owned_port_t;
-
-
-/*
- * Each outstanding request that we've passed to the lower device layers has a
- * 'pending_req' allocated to it.  The request is complete, the specified
- * domain has a response queued for it, with the saved 'id' passed back.
- */
-typedef struct {
-    usbif_priv_t       *usbif_priv;
-    unsigned long      id;
-    int                nr_pages;
-    unsigned short     operation;
-    int                status;
-} pending_req_t;
-
-/*
- * We can't allocate pending_req's in order, since they may complete out of 
- * order. We therefore maintain an allocation ring. This ring also indicates 
- * when enough work has been passed down -- at that point the allocation ring 
- * will be empty.
- */
-static pending_req_t pending_reqs[MAX_PENDING_REQS];
-static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock;
-
-/* NB. We use a different index type to differentiate from shared usb rings. */
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static int do_usb_io_op(usbif_priv_t *usbif, int max_to_do);
-static void make_response(usbif_priv_t *usbif, unsigned long id, 
-                          unsigned short op, int st, int inband,
-                         unsigned long actual_length);
-static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned 
long port);
-static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req);    
-static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid);
-static owned_port_t *usbif_find_port(char *);
-
-/******************************************************************
- * PRIVATE DEBUG FUNCTIONS
- */
-
-#undef DEBUG
-#ifdef DEBUG
-
-static void dump_port(owned_port_t *p)
-{
-    printk(KERN_DEBUG "owned_port_t @ %p\n"
-          "  usbif_priv @ %p\n"
-          "  path: %s\n"
-          "  guest_port: %d\n"
-          "  guest_address: %ld\n"
-          "  dev_present: %d\n"
-          "  dev @ %p\n"
-          "  ifaces: 0x%lx\n",
-          p, p->usbif_priv, p->path, p->guest_port, p->guest_address,
-          p->dev_present, p->dev, p->ifaces);
-}
-
-
-static void dump_request(usbif_request_t *req)
-{    
-    printk(KERN_DEBUG "id = 0x%lx\n"
-          "devnum %d\n"
-          "endpoint 0x%x\n"
-          "direction %d\n"
-          "speed %d\n"
-          "pipe_type 0x%x\n"
-          "transfer_buffer 0x%lx\n"
-          "length 0x%lx\n"
-          "transfer_flags 0x%lx\n"
-          "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n"
-          "iso_schedule = 0x%lx\n"
-          "num_iso %ld\n",
-          req->id, req->devnum, req->endpoint, req->direction, req->speed,
-          req->pipe_type, req->transfer_buffer, req->length,
-          req->transfer_flags, req->setup[0], req->setup[1], req->setup[2],
-          req->setup[3], req->setup[4], req->setup[5], req->setup[6],
-          req->setup[7], req->iso_schedule, req->num_iso);
-}
-
-static void dump_urb(struct urb *urb)
-{
-    printk(KERN_DEBUG "dumping urb @ %p\n", urb);
-
-#define DUMP_URB_FIELD(name, format) \
-    printk(KERN_DEBUG "  " # name " " format "\n", urb-> name)
-    
-    DUMP_URB_FIELD(pipe, "0x%x");
-    DUMP_URB_FIELD(status, "%d");
-    DUMP_URB_FIELD(transfer_flags, "0x%x");    
-    DUMP_URB_FIELD(transfer_buffer, "%p");
-    DUMP_URB_FIELD(transfer_buffer_length, "%d");
-    DUMP_URB_FIELD(actual_length, "%d");
-}
-
-static void dump_response(usbif_response_t *resp)
-{
-    printk(KERN_DEBUG "usbback: Sending response:\n"
-          "         id = 0x%x\n"
-          "         op = %d\n"
-          "         status = %d\n"
-          "         data = %d\n"
-          "         length = %d\n",
-          resp->id, resp->op, resp->status, resp->data, resp->length);
-}
-
-#else /* DEBUG */
-
-#define dump_port(blah)     ((void)0)
-#define dump_request(blah)   ((void)0)
-#define dump_urb(blah)      ((void)0)
-#define dump_response(blah) ((void)0)
-
-#endif /* DEBUG */
-
-/******************************************************************
- * MEMORY MANAGEMENT
- */
-
-static void fast_flush_area(int idx, int nr_pages)
-{
-    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
-    int               i;
-
-    for ( i = 0; i < nr_pages; i++ )
-    {
-       MULTI_update_va_mapping(mcl+i, MMAP_VADDR(idx, i),
-                               __pte(0), 0);
-    }
-
-    mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
-        BUG();
-}
-
-
-/******************************************************************
- * USB INTERFACE SCHEDULER LIST MAINTENANCE
- */
-
-static struct list_head usbio_schedule_list;
-static spinlock_t usbio_schedule_list_lock;
-
-static int __on_usbif_list(usbif_priv_t *up)
-{
-    return up->usbif_list.next != NULL;
-}
-
-void remove_from_usbif_list(usbif_priv_t *up)
-{
-    unsigned long flags;
-    if ( !__on_usbif_list(up) ) return;
-    spin_lock_irqsave(&usbio_schedule_list_lock, flags);
-    if ( __on_usbif_list(up) )
-    {
-        list_del(&up->usbif_list);
-        up->usbif_list.next = NULL;
-        usbif_put(up);
-    }
-    spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
-}
-
-static void add_to_usbif_list_tail(usbif_priv_t *up)
-{
-    unsigned long flags;
-    if ( __on_usbif_list(up) ) return;
-    spin_lock_irqsave(&usbio_schedule_list_lock, flags);
-    if ( !__on_usbif_list(up) && (up->status == CONNECTED) )
-    {
-        list_add_tail(&up->usbif_list, &usbio_schedule_list);
-        usbif_get(up);
-    }
-    spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
-}
-
-void free_pending(int pending_idx)
-{
-    unsigned long flags;
-
-    /* Free the pending request. */
-    spin_lock_irqsave(&pend_prod_lock, flags);
-    pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-    spin_unlock_irqrestore(&pend_prod_lock, flags);
-}
-
-/******************************************************************
- * COMPLETION CALLBACK -- Called as urb->complete()
- */
-
-static void maybe_trigger_usbio_schedule(void);
-
-static void __end_usb_io_op(struct urb *purb)
-{
-    pending_req_t *pending_req;
-    int pending_idx;
-
-    pending_req = purb->context;
-
-    pending_idx = pending_req - pending_reqs;
-
-    ASSERT(purb->actual_length <= purb->transfer_buffer_length);
-    ASSERT(purb->actual_length <= pending_req->nr_pages * PAGE_SIZE);
-    
-    /* An error fails the entire request. */
-    if ( purb->status )
-    {
-        printk(KERN_WARNING "URB @ %p failed. Status %d\n", purb, 
purb->status);
-    }
-
-    if ( usb_pipetype(purb->pipe) == 0 )
-    {
-        int i;
-        usbif_iso_t *sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, 
pending_req->nr_pages - 1);
-
-        /* If we're dealing with an iso pipe, we need to copy back the 
schedule. */
-        for ( i = 0; i < purb->number_of_packets; i++ )
-        {
-            sched[i].length = purb->iso_frame_desc[i].actual_length;
-            ASSERT(sched[i].buffer_offset ==
-                   purb->iso_frame_desc[i].offset);
-            sched[i].status = purb->iso_frame_desc[i].status;
-        }
-    }
-    
-    fast_flush_area(pending_req - pending_reqs, pending_req->nr_pages);
-
-    kfree(purb->setup_packet);
-
-    make_response(pending_req->usbif_priv, pending_req->id,
-                 pending_req->operation, pending_req->status, 0, 
purb->actual_length);
-    usbif_put(pending_req->usbif_priv);
-
-    usb_free_urb(purb);
-
-    free_pending(pending_idx);
-    
-    rmb();
-
-    /* Check for anything still waiting in the rings, having freed a 
request... */
-    maybe_trigger_usbio_schedule();
-}
-
-/******************************************************************
- * SCHEDULER FUNCTIONS
- */
-
-static DECLARE_WAIT_QUEUE_HEAD(usbio_schedule_wait);
-
-static int usbio_schedule(void *arg)
-{
-    DECLARE_WAITQUEUE(wq, current);
-
-    usbif_priv_t          *up;
-    struct list_head *ent;
-
-    daemonize();
-
-    for ( ; ; )
-    {
-        /* Wait for work to do. */
-        add_wait_queue(&usbio_schedule_wait, &wq);
-        set_current_state(TASK_INTERRUPTIBLE);
-        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
-             list_empty(&usbio_schedule_list) )
-            schedule();
-        __set_current_state(TASK_RUNNING);
-        remove_wait_queue(&usbio_schedule_wait, &wq);
-
-        /* Queue up a batch of requests. */
-        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
-                !list_empty(&usbio_schedule_list) )
-        {
-            ent = usbio_schedule_list.next;
-            up = list_entry(ent, usbif_priv_t, usbif_list);
-            usbif_get(up);
-            remove_from_usbif_list(up);
-            if ( do_usb_io_op(up, BATCH_PER_DOMAIN) )
-                add_to_usbif_list_tail(up);
-            usbif_put(up);
-        }
-    }
-}
-
-static void maybe_trigger_usbio_schedule(void)
-{
-    /*
-     * Needed so that two processes, who together make the following predicate
-     * true, don't both read stale values and evaluate the predicate
-     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
-     */
-    smp_mb();
-
-    if ( !list_empty(&usbio_schedule_list) )
-        wake_up(&usbio_schedule_wait);
-}
-
-
-/******************************************************************************
- * NOTIFICATION FROM GUEST OS.
- */
-
-irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs)
-{
-    usbif_priv_t *up = dev_id;
-
-    smp_mb();
-
-    add_to_usbif_list_tail(up); 
-
-    /* Will in fact /always/ trigger an io schedule in this case. */
-    maybe_trigger_usbio_schedule();
-
-    return IRQ_HANDLED;
-}
-
-
-
-/******************************************************************
- * DOWNWARD CALLS -- These interface with the usb-device layer proper.
- */
-
-static int do_usb_io_op(usbif_priv_t *up, int max_to_do)
-{
-    usbif_back_ring_t *usb_ring = &up->usb_ring;
-    usbif_request_t *req;
-    RING_IDX i, rp;
-    int more_to_do = 0;
-
-    rp = usb_ring->sring->req_prod;
-    rmb(); /* Ensure we see queued requests up to 'rp'. */
-    
-    /* Take items off the comms ring, taking care not to overflow. */
-    for ( i = usb_ring->req_cons; 
-          (i != rp) && !RING_REQUEST_CONS_OVERFLOW(usb_ring, i);
-          i++ )
-    {
-        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
-        {
-            more_to_do = 1;
-            break;
-        }
-
-        req = RING_GET_REQUEST(usb_ring, i);
-        
-        switch ( req->operation )
-        {
-        case USBIF_OP_PROBE:
-            dispatch_usb_probe(up, req->id, req->port);
-            break;
-
-        case USBIF_OP_IO:
-         /* Assemble an appropriate URB. */
-         dispatch_usb_io(up, req);
-          break;
-
-       case USBIF_OP_RESET:
-         dispatch_usb_reset(up, req->port);
-          break;
-
-        default:
-            DPRINTK("error: unknown USB io operation [%d]\n",
-                    req->operation);
-            make_response(up, req->id, req->operation, -EINVAL, 0, 0);
-            break;
-        }
-    }
-
-    usb_ring->req_cons = i;
-
-    return more_to_do;
-}
-
-static owned_port_t *find_guest_port(usbif_priv_t *up, int port)
-{
-    unsigned long flags;
-    struct list_head *l;
-
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    list_for_each(l, &owned_ports)
-    {
-        owned_port_t *p = list_entry(l, owned_port_t, list);
-        if(p->usbif_priv == up && p->guest_port == port)
-        {
-            spin_unlock_irqrestore(&owned_ports_lock, flags);
-            return p;
-        }
-    }
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-
-    return NULL;
-}
-
-static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid)
-{
-    owned_port_t *port = find_guest_port(up, portid);
-    int ret = 0;
-
-
-    /* Allowing the guest to actually reset the device causes more problems
-     * than it's worth.  We just fake it out in software but we will do a real
-     * reset when the interface is destroyed. */
-
-    dump_port(port);
-
-    port->guest_address = 0;
-    /* If there's an attached device then the port is now enabled. */
-    if ( port->dev_present )
-        port->enabled = 1;
-    else
-        port->enabled = 0;
-
-    make_response(up, 0, USBIF_OP_RESET, ret, 0, 0);
-}
-
-static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned 
long portid)
-{
-    owned_port_t *port = find_guest_port(up, portid);
-    int ret;
- 
-    if ( port != NULL )
-        ret = port->dev_present;
-    else
-    {
-        ret = -EINVAL;
-        printk(KERN_INFO "dispatch_usb_probe(): invalid port probe request "
-              "(port %ld)\n", portid);
-    }
-
-    /* Probe result is sent back in-band.  Probes don't have an associated id
-     * right now... */
-    make_response(up, id, USBIF_OP_PROBE, ret, portid, 0);
-}
-
-/**
- * check_iso_schedule - safety check the isochronous schedule for an URB
- * @purb : the URB in question
- */
-static int check_iso_schedule(struct urb *purb)
-{
-    int i;
-    unsigned long total_length = 0;
-    
-    for ( i = 0; i < purb->number_of_packets; i++ )
-    {
-        struct usb_iso_packet_descriptor *desc = &purb->iso_frame_desc[i];
-        
-        if ( desc->offset >= purb->transfer_buffer_length
-            || ( desc->offset + desc->length) > purb->transfer_buffer_length )
-            return -EINVAL;
-
-        total_length += desc->length;
-
-        if ( total_length > purb->transfer_buffer_length )
-            return -EINVAL;
-    }
-    
-    return 0;
-}
-
-owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req);
-
-static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req)
-{
-    unsigned long buffer_mach;
-    int i = 0, offset = 0,
-        pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-    pending_req_t *pending_req;
-    unsigned long  remap_prot;
-    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
-    struct urb *purb = NULL;
-    owned_port_t *port;
-    unsigned char *setup;    
-
-    dump_request(req);
-
-    if ( NR_PENDING_REQS == MAX_PENDING_REQS )
-    {
-        printk(KERN_WARNING "usbback: Max requests already queued. "
-              "Giving up!\n");
-        
-        return;
-    }
-
-    port = find_port_for_request(up, req);
-
-    if ( port == NULL )
-    {
-       printk(KERN_WARNING "No such device! (%d)\n", req->devnum);
-       dump_request(req);
-
-        make_response(up, req->id, req->operation, -ENODEV, 0, 0);
-       return;
-    }
-    else if ( !port->dev_present )
-    {
-        /* In normal operation, we'll only get here if a device is unplugged
-         * and the frontend hasn't noticed yet. */
-        make_response(up, req->id, req->operation, -ENODEV, 0, 0);
-       return;
-    }
-        
-
-    setup = kmalloc(8, GFP_KERNEL);
-
-    if ( setup == NULL )
-        goto no_mem;
-   
-    /* Copy request out for safety. */
-    memcpy(setup, req->setup, 8);
-
-    if( setup[0] == 0x0 && setup[1] == 0x5)
-    {
-        /* To virtualise the USB address space, we need to intercept
-         * set_address messages and emulate.  From the USB specification:
-         * bmRequestType = 0x0;
-         * Brequest = SET_ADDRESS (i.e. 0x5)
-         * wValue = device address
-         * wIndex = 0
-         * wLength = 0
-         * data = None
-         */
-        /* Store into the guest transfer buffer using cpu_to_le16 */
-        port->guest_address = le16_to_cpu(*(u16 *)(setup + 2));
-        /* Make a successful response.  That was easy! */
-
-        make_response(up, req->id, req->operation, 0, 0, 0);
-
-       kfree(setup);
-        return;
-    }
-    else if ( setup[0] == 0x0 && setup[1] == 0x9 )
-    {
-        /* The host kernel needs to know what device configuration is in use
-         * because various error checks get confused otherwise.  We just do
-         * configuration settings here, under controlled conditions.
-         */
-
-      /* Ignore configuration setting and hope that the host kernel
-        did it right. */
-        /* usb_set_configuration(port->dev, setup[2]); */
-
-        make_response(up, req->id, req->operation, 0, 0, 0);
-
-        kfree(setup);
-        return;
-    }
-    else if ( setup[0] == 0x1 && setup[1] == 0xB )
-    {
-        /* The host kernel needs to know what device interface is in use
-         * because various error checks get confused otherwise.  We just do
-         * configuration settings here, under controlled conditions.
-         */
-        usb_set_interface(port->dev, (setup[4] | setup[5] << 8),
-                          (setup[2] | setup[3] << 8) );
-
-        make_response(up, req->id, req->operation, 0, 0, 0);
-
-        kfree(setup);
-        return;
-    }
-
-    if ( ( req->transfer_buffer - (req->transfer_buffer & PAGE_MASK)
-          + req->length )
-        > MMAP_PAGES_PER_REQUEST * PAGE_SIZE )
-    {
-        printk(KERN_WARNING "usbback: request of %lu bytes too large\n",
-              req->length);
-        make_response(up, req->id, req->operation, -EINVAL, 0, 0);
-        kfree(setup);
-        return;
-    }
-    
-    buffer_mach = req->transfer_buffer;
-
-    if( buffer_mach == 0 )
-       goto no_remap;
-
-    ASSERT((req->length >> PAGE_SHIFT) <= MMAP_PAGES_PER_REQUEST);
-    ASSERT(buffer_mach);
-
-    /* Always map writeable for now. */
-    remap_prot = _KERNPG_TABLE;
-
-    for ( i = 0, offset = 0; offset < req->length;
-          i++, offset += PAGE_SIZE )
-    {
-       MULTI_update_va_mapping_otherdomain(
-           mcl+i, MMAP_VADDR(pending_idx, i),
-           pfn_pte_ma((buffer_mach + offset) >> PAGE_SHIFT, remap_prot),
-           0, up->domid);
-        
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
-            FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
-
-        ASSERT(virt_to_mfn(MMAP_VADDR(pending_idx, i))
-               == ((buffer_mach >> PAGE_SHIFT) + i));
-    }
-
-    if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
-    {
-        /* Map in ISO schedule, if necessary. */
-       MULTI_update_va_mapping_otherdomain(
-           mcl+i, MMAP_VADDR(pending_idx, i),
-           pfn_pte_ma(req->iso_schedule >> PAGE_SHIFT, remap_prot),
-           0, up->domid);
-
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
-            FOREIGN_FRAME(req->iso_schedule >> PAGE_SHIFT);
-    
-        i++;
-    }
-
-    if ( unlikely(HYPERVISOR_multicall(mcl, i) != 0) )
-        BUG();
-    
-    {
-        int j;
-        for ( j = 0; j < i; j++ )
-        {
-            if ( unlikely(mcl[j].result != 0) )
-            {
-                printk(KERN_WARNING
-                      "invalid buffer %d -- could not remap it\n", j);
-                fast_flush_area(pending_idx, i);
-                goto bad_descriptor;
-            }
-       }
-    }
-    
- no_remap:
-
-    ASSERT(i <= MMAP_PAGES_PER_REQUEST);
-    ASSERT(i * PAGE_SIZE >= req->length);
-
-    /* We have to do this because some things might complete out of order. */
-    pending_req = &pending_reqs[pending_idx];
-    pending_req->usbif_priv= up;
-    pending_req->id        = req->id;
-    pending_req->operation = req->operation;
-    pending_req->nr_pages  = i;
-
-    pending_cons++;
-
-    usbif_get(up);
-    
-    /* Fill out an actual request for the USB layer. */
-    purb = usb_alloc_urb(req->num_iso);
-
-    if ( purb == NULL )
-    {
-        usbif_put(up);
-        free_pending(pending_idx);
-        goto no_mem;
-    }
-
-    purb->dev = port->dev;
-    purb->context = pending_req;
-    purb->transfer_buffer =
-        (void *)(MMAP_VADDR(pending_idx, 0) + (buffer_mach & ~PAGE_MASK));
-    if(buffer_mach == 0)
-      purb->transfer_buffer = NULL;
-    purb->complete = __end_usb_io_op;
-    purb->transfer_buffer_length = req->length;
-    purb->transfer_flags = req->transfer_flags;
-
-    purb->pipe = 0;
-    purb->pipe |= req->direction << 7;
-    purb->pipe |= port->dev->devnum << 8;
-    purb->pipe |= req->speed << 26;
-    purb->pipe |= req->pipe_type << 30;
-    purb->pipe |= req->endpoint << 15;
-
-    purb->number_of_packets = req->num_iso;
-
-    if ( purb->number_of_packets * sizeof(usbif_iso_t) > PAGE_SIZE )
-        goto urb_error;
-
-    /* Make sure there's always some kind of timeout. */
-    purb->timeout = ( req->timeout > 0 ) ? (req->timeout * HZ) / 1000
-                    :  1000;
-
-    purb->setup_packet = setup;
-
-    if ( req->pipe_type == 0 ) /* ISO */
-    {
-        int j;
-        usbif_iso_t *iso_sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, i - 1);
-
-        /* If we're dealing with an iso pipe, we need to copy in a schedule. */
-        for ( j = 0; j < purb->number_of_packets; j++ )
-        {
-            purb->iso_frame_desc[j].length = iso_sched[j].length;
-            purb->iso_frame_desc[j].offset = iso_sched[j].buffer_offset;
-            iso_sched[j].status = 0;
-        }
-    }
-
-    if ( check_iso_schedule(purb) != 0 )
-        goto urb_error;
-
-    if ( usb_submit_urb(purb) != 0 )
-        goto urb_error;
-
-    return;
-
- urb_error:
-    dump_urb(purb);    
-    usbif_put(up);
-    free_pending(pending_idx);
-
- bad_descriptor:
-    kfree ( setup );
-    if ( purb != NULL )
-        usb_free_urb(purb);
-    make_response(up, req->id, req->operation, -EINVAL, 0, 0);
-    return;
-    
- no_mem:
-    if ( setup != NULL )
-        kfree(setup);
-    make_response(up, req->id, req->operation, -ENOMEM, 0, 0);
-    return;
-} 
-
-
-
-/******************************************************************
- * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
- */
-
-
-static void make_response(usbif_priv_t *up, unsigned long id,
-                          unsigned short op, int st, int inband,
-                         unsigned long length)
-{
-    usbif_response_t *resp;
-    unsigned long     flags;
-    usbif_back_ring_t *usb_ring = &up->usb_ring;
-
-    /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&up->usb_ring_lock, flags);
-    resp = RING_GET_RESPONSE(usb_ring, usb_ring->rsp_prod_pvt);
-    resp->id        = id;
-    resp->operation = op;
-    resp->status    = st;
-    resp->data      = inband;
-    resp->length = length;
-    wmb(); /* Ensure other side can see the response fields. */
-
-    dump_response(resp);
-
-    usb_ring->rsp_prod_pvt++;
-    RING_PUSH_RESPONSES(usb_ring);
-    spin_unlock_irqrestore(&up->usb_ring_lock, flags);
-
-    /* Kick the relevant domain. */
-    notify_via_evtchn(up->evtchn);
-}
-
-/**
- * usbif_claim_port - claim devices on a port on behalf of guest
- *
- * Once completed, this will ensure that any device attached to that
- * port is claimed by this driver for use by the guest.
- */
-int usbif_claim_port(usbif_be_claim_port_t *msg)
-{
-    owned_port_t *o_p;
-    
-    /* Sanity... */
-    if ( usbif_find_port(msg->path) != NULL )
-    {
-        printk(KERN_WARNING "usbback: Attempted to claim USB port "
-               "we already own!\n");
-        return -EINVAL;
-    }
-
-    /* No need for a slab cache - this should be infrequent. */
-    o_p = kmalloc(sizeof(owned_port_t), GFP_KERNEL);
-
-    if ( o_p == NULL )
-        return -ENOMEM;
-
-    o_p->enabled = 0;
-    o_p->usbif_priv = usbif_find(msg->domid);
-    o_p->guest_port = msg->usbif_port;
-    o_p->dev_present = 0;
-    o_p->guest_address = 0; /* Default address. */
-
-    strcpy(o_p->path, msg->path);
-
-    spin_lock_irq(&owned_ports_lock);
-    
-    list_add(&o_p->list, &owned_ports);
-
-    spin_unlock_irq(&owned_ports_lock);
-
-    printk(KERN_INFO "usbback: Claimed USB port (%s) for %d.%d\n", o_p->path,
-          msg->domid, msg->usbif_port);
-
-    /* Force a reprobe for unclaimed devices. */
-    usb_scan_devices();
-
-    return 0;
-}
-
-owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req)
-{
-    unsigned long flags;
-    struct list_head *port;
-
-    /* I'm assuming this is not called from IRQ context - correct?  I think
-     * it's probably only called in response to control messages or plug events
-     * in the USB hub kernel thread, so should be OK. */
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    list_for_each(port, &owned_ports)
-    {
-        owned_port_t *p = list_entry(port, owned_port_t, list);
-        if(p->usbif_priv == up && p->guest_address == req->devnum && 
p->enabled )
-         {
-              dump_port(p);
-
-             spin_unlock_irqrestore(&owned_ports_lock, flags);
-              return p;
-         }
-    }
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-
-    return NULL;    
-}
-
-owned_port_t *__usbif_find_port(char *path)
-{
-    struct list_head *port;
-
-    list_for_each(port, &owned_ports)
-    {
-        owned_port_t *p = list_entry(port, owned_port_t, list);
-        if(!strcmp(path, p->path))
-        {
-            return p;
-        }
-    }
-
-    return NULL;
-}
-
-owned_port_t *usbif_find_port(char *path)
-{
-    owned_port_t *ret;
-    unsigned long flags;
-
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    ret = __usbif_find_port(path);    
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-
-    return ret;
-}
-
-
-static void *probe(struct usb_device *dev, unsigned iface,
-                   const struct usb_device_id *id)
-{
-    owned_port_t *p;
-
-    /* We don't care what the device is - if we own the port, we want it.  We
-     * don't deal with device-specifics in this driver, so we don't care what
-     * the device actually is ;-) */
-    if ( ( p = usbif_find_port(dev->devpath) ) != NULL )
-    {
-        printk(KERN_INFO "usbback: claimed device attached to owned port\n");
-
-        p->dev_present = 1;
-        p->dev = dev;
-        set_bit(iface, &p->ifaces);
-        
-        return p->usbif_priv;
-    }
-    else
-        printk(KERN_INFO "usbback: hotplug for non-owned port (%s), 
ignoring\n",
-              dev->devpath);
-   
-
-    return NULL;
-}
-
-static void disconnect(struct usb_device *dev, void *usbif)
-{
-    /* Note the device is removed so we can tell the guest when it probes. */
-    owned_port_t *port = usbif_find_port(dev->devpath);
-    port->dev_present = 0;
-    port->dev = NULL;
-    port->ifaces = 0;
-}
-
-
-struct usb_driver driver =
-{
-    .owner      = THIS_MODULE,
-    .name       = "Xen USB Backend",
-    .probe      = probe,
-    .disconnect = disconnect,
-    .id_table   = NULL,
-};
-
-/* __usbif_release_port - internal mechanics for releasing a port */
-void __usbif_release_port(owned_port_t *p)
-{
-    int i;
-
-    for ( i = 0; p->ifaces != 0; i++)
-        if ( p->ifaces & 1 << i )
-        {
-            usb_driver_release_interface(&driver, usb_ifnum_to_if(p->dev, i));
-            clear_bit(i, &p->ifaces);
-        }
-    list_del(&p->list);
-
-    /* Reset the real device.  We don't simulate disconnect / probe for other
-     * drivers in this kernel because we assume the device is completely under
-     * the control of ourselves (i.e. the guest!).  This should ensure that the
-     * device is in a sane state for the next customer ;-) */
-
-    /* MAW NB: we're not resetting the real device here.  This looks perfectly
-     * valid to me but it causes memory corruption.  We seem to get away with 
not
-     * resetting for now, although it'd be nice to have this tracked down. */
-/*     if ( p->dev != NULL) */
-/*         usb_reset_device(p->dev); */
-
-    kfree(p);
-}
-
-
-/**
- * usbif_release_port - stop claiming devices on a port on behalf of guest
- */
-void usbif_release_port(usbif_be_release_port_t *msg)
-{
-    owned_port_t *p;
-
-    spin_lock_irq(&owned_ports_lock);
-    p = __usbif_find_port(msg->path);
-    __usbif_release_port(p);
-    spin_unlock_irq(&owned_ports_lock);
-}
-
-void usbif_release_ports(usbif_priv_t *up)
-{
-    struct list_head *port, *tmp;
-    unsigned long flags;
-    
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    list_for_each_safe(port, tmp, &owned_ports)
-    {
-        owned_port_t *p = list_entry(port, owned_port_t, list);
-        if ( p->usbif_priv == up )
-            __usbif_release_port(p);
-    }
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-}
-
-static int __init usbif_init(void)
-{
-    int i;
-    struct page *page;
-
-    if ( !(xen_start_info->flags & SIF_INITDOMAIN) &&
-         !(xen_start_info->flags & SIF_USB_BE_DOMAIN) )
-        return 0;
-
-    page = balloon_alloc_empty_page_range(MMAP_PAGES);
-    BUG_ON(page == NULL);
-    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
-    pending_cons = 0;
-    pending_prod = MAX_PENDING_REQS;
-    memset(pending_reqs, 0, sizeof(pending_reqs));
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-        pending_ring[i] = i;
-
-    spin_lock_init(&pend_prod_lock);
-
-    spin_lock_init(&owned_ports_lock);
-    INIT_LIST_HEAD(&owned_ports);
-
-    spin_lock_init(&usbio_schedule_list_lock);
-    INIT_LIST_HEAD(&usbio_schedule_list);
-
-    if ( kernel_thread(usbio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
-        BUG();
-    
-    usbif_interface_init();
-
-    usbif_ctrlif_init();
-
-    usb_register(&driver);
-
-    printk(KERN_INFO "Xen USB Backend Initialised");
-
-    return 0;
-}
-
-__initcall(usbif_init);
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c      Thu Sep 22 
17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,1735 +0,0 @@
-/*
- * Xen Virtual USB Frontend Driver 
- *
- * This file contains the first version of the Xen virtual USB hub
- * that I've managed not to delete by mistake (3rd time lucky!).
- *
- * Based on Linux's uhci.c, original copyright notices are displayed
- * below.  Portions also (c) 2004 Intel Research Cambridge
- * and (c) 2004, 2005 Mark Williamson
- *
- * Contact <mark.williamson@xxxxxxxxxxxx> or
- * <xen-devel@xxxxxxxxxxxxxxxxxxxxx> regarding this code.
- *
- * Still to be (maybe) implemented:
- * - migration / backend restart support?
- * - support for building / using as a module
- */
-
-/*
- * Universal Host Controller Interface driver for USB.
- *
- * Maintainer: Johannes Erdfelt <johannes@xxxxxxxxxxx>
- *
- * (C) Copyright 1999 Linus Torvalds
- * (C) Copyright 1999-2002 Johannes Erdfelt, johannes@xxxxxxxxxxx
- * (C) Copyright 1999 Randy Dunlap
- * (C) Copyright 1999 Georg Acher, acher@xxxxxxxxx
- * (C) Copyright 1999 Deti Fliegl, deti@xxxxxxxxx
- * (C) Copyright 1999 Thomas Sailer, sailer@xxxxxxxxxxxxxx
- * (C) Copyright 1999 Roman Weissgaerber, weissg@xxxxxxxxx
- * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface
- *               support from usb-ohci.c by Adam Richter, adam@xxxxxxxxxxxxx).
- * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c)
- *
- * Intel documents this fairly well, and as far as I know there
- * are no royalties or anything like that, but even so there are
- * people who decided that they want to do the same thing in a
- * completely different way.
- *
- * WARNING! The USB documentation is downright evil. Most of it
- * is just crap, written by a committee. You're better off ignoring
- * most of it, the important stuff is:
- *  - the low-level protocol (fairly simple but lots of small details)
- *  - working around the horridness of the rest
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/smp_lock.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#ifdef CONFIG_USB_DEBUG
-#define DEBUG
-#else
-#undef DEBUG
-#endif
-#include <linux/usb.h>
-
-#include <asm/irq.h>
-#include <asm/system.h>
-
-#include "xhci.h"
-
-#include "../../../../../drivers/usb/hcd.h"
-
-#include <asm-xen/xen-public/io/usbif.h>
-#include <asm/xen-public/io/domain_controller.h>
-
-/*
- * Version Information
- */
-#define DRIVER_VERSION "v1.0"
-#define DRIVER_AUTHOR "Linus 'Frodo Rabbit' Torvalds, Johannes Erdfelt, " \
-                      "Randy Dunlap, Georg Acher, Deti Fliegl, " \
-                      "Thomas Sailer, Roman Weissgaerber, Mark Williamson"
-#define DRIVER_DESC "Xen Virtual USB Host Controller Interface"
-
-/*
- * debug = 0, no debugging messages
- * debug = 1, dump failed URB's except for stalls
- * debug = 2, dump all failed URB's (including stalls)
- */
-#ifdef DEBUG
-static int debug = 1;
-#else
-static int debug = 0;
-#endif
-MODULE_PARM(debug, "i");
-MODULE_PARM_DESC(debug, "Debug level");
-static char *errbuf;
-#define ERRBUF_LEN    (PAGE_SIZE * 8)
-
-static int rh_submit_urb(struct urb *urb);
-static int rh_unlink_urb(struct urb *urb);
-static int xhci_unlink_urb(struct urb *urb);
-static void xhci_call_completion(struct urb *urb);
-static void xhci_drain_ring(void);
-static void xhci_transfer_result(struct xhci *xhci, struct urb *urb);
-static void xhci_finish_completion(void);
-
-#define MAX_URB_LOOP   2048            /* Maximum number of linked URB's */
-
-static kmem_cache_t *xhci_up_cachep;   /* urb_priv cache */
-static struct xhci *xhci;               /* XHCI structure for the interface */
-
-/******************************************************************************
- * DEBUGGING
- */
-
-#ifdef DEBUG
-
-static void dump_urb(struct urb *urb)
-{
-    printk(KERN_DEBUG "dumping urb @ %p\n"
-           "  hcpriv = %p\n"
-           "  next = %p\n"
-           "  dev = %p\n"
-           "  pipe = 0x%lx\n"
-           "  status = %d\n"
-           "  transfer_flags = 0x%lx\n"
-           "  transfer_buffer = %p\n"
-           "  transfer_buffer_length = %d\n"
-           "  actual_length = %d\n"
-           "  bandwidth = %d\n"
-           "  setup_packet = %p\n",
-           urb, urb->hcpriv, urb->next, urb->dev, urb->pipe, urb->status,
-           urb->transfer_flags, urb->transfer_buffer,
-           urb->transfer_buffer_length, urb->actual_length, urb->bandwidth,
-           urb->setup_packet);
-    if ( urb->setup_packet != NULL )
-        printk(KERN_DEBUG
-               "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n",
-               urb->setup_packet[0], urb->setup_packet[1],
-               urb->setup_packet[2], urb->setup_packet[3],
-               urb->setup_packet[4], urb->setup_packet[5],
-               urb->setup_packet[6], urb->setup_packet[7]);
-    printk(KERN_DEBUG "complete = %p\n"
-           "interval = %d\n", urb->complete, urb->interval);
-        
-}
-
-static void xhci_show_resp(usbif_response_t *r)
-{
-        printk(KERN_DEBUG "dumping response @ %p\n"
-               "  id=0x%lx\n"
-               "  op=0x%x\n"
-               "  data=0x%x\n"
-               "  status=0x%x\n"
-               "  length=0x%lx\n",
-               r->id, r->operation, r->data, r->status, r->length);
-}
-
-#define DPRINK(...) printk(KERN_DEBUG __VA_ARGS__)
-
-#else /* DEBUG */
-
-#define dump_urb(blah) ((void)0)
-#define xhci_show_resp(blah) ((void)0)
-#define DPRINTK(blah,...) ((void)0)
-
-#endif /* DEBUG */
-
-/******************************************************************************
- * RING REQUEST HANDLING
- */
-
-#define RING_PLUGGED(_hc) ( RING_FULL(&_hc->usb_ring) || _hc->recovery )
-
-/**
- * xhci_construct_isoc - add isochronous information to a request
- */
-static int xhci_construct_isoc(usbif_request_t *req, struct urb *urb)
-{
-        usbif_iso_t *schedule;
-        int i;
-        struct urb_priv *urb_priv = urb->hcpriv;
-        
-        req->num_iso = urb->number_of_packets;
-        schedule = (usbif_iso_t *)__get_free_page(GFP_KERNEL);
-
-        if ( schedule == NULL )
-            return -ENOMEM;
-
-        for ( i = 0; i < req->num_iso; i++ )
-        {
-                schedule[i].buffer_offset = urb->iso_frame_desc[i].offset;
-                schedule[i].length = urb->iso_frame_desc[i].length;
-        }
-
-        urb_priv->schedule = schedule;
-       req->iso_schedule = virt_to_mfn(schedule) << PAGE_SHIFT;
-
-        return 0;
-}
-
-/**
- * xhci_queue_req - construct and queue request for an URB
- */
-static int xhci_queue_req(struct urb *urb)
-{
-        unsigned long flags;
-        usbif_request_t *req;
-        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
-#if DEBUG
-        printk(KERN_DEBUG
-               "usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons 
= %d\n",
-               usbif, usbif->req_prod, virt_to_mfn(&usbif->req_prod),
-               usbif->resp_prod, xhci->usb_resp_cons);
-#endif
-        
-        spin_lock_irqsave(&xhci->ring_lock, flags);
-
-        if ( RING_PLUGGED(xhci) )
-        {
-                printk(KERN_WARNING
-                       "xhci_queue_req(): USB ring plugged, not queuing 
request\n");
-                spin_unlock_irqrestore(&xhci->ring_lock, flags);
-                return -ENOBUFS;
-        }
-
-        /* Stick something in the shared communications ring. */
-       req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
-        req->operation       = USBIF_OP_IO;
-        req->port            = 0; /* We don't care what the port is. */
-        req->id              = (unsigned long) urb->hcpriv;
-        req->transfer_buffer = virt_to_mfn(urb->transfer_buffer) << PAGE_SHIFT;
-       req->devnum          = usb_pipedevice(urb->pipe);
-        req->direction       = usb_pipein(urb->pipe);
-       req->speed           = usb_pipeslow(urb->pipe);
-        req->pipe_type       = usb_pipetype(urb->pipe);
-        req->length          = urb->transfer_buffer_length;
-        req->transfer_flags  = urb->transfer_flags;
-       req->endpoint        = usb_pipeendpoint(urb->pipe);
-       req->speed           = usb_pipeslow(urb->pipe);
-       req->timeout         = urb->timeout * (1000 / HZ);
-
-        if ( usb_pipetype(urb->pipe) == 0 ) /* ISO */
-        {
-            int ret = xhci_construct_isoc(req, urb);
-            if ( ret != 0 )
-                return ret;
-        }
-
-       if(urb->setup_packet != NULL)
-                memcpy(req->setup, urb->setup_packet, 8);
-        else
-                memset(req->setup, 0, 8);
-        
-        usb_ring->req_prod_pvt++;
-        RING_PUSH_REQUESTS(usb_ring);
-
-        spin_unlock_irqrestore(&xhci->ring_lock, flags);
-
-       notify_via_evtchn(xhci->evtchn);
-
-        DPRINTK("Queued request for an URB.\n");
-        dump_urb(urb);
-
-        return -EINPROGRESS;
-}
-
-/**
- * xhci_queue_probe - queue a probe request for a particular port
- */
-static inline usbif_request_t *xhci_queue_probe(usbif_vdev_t port)
-{
-        usbif_request_t *req;
-        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
-#if DEBUG
-       printk(KERN_DEBUG
-               "queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, "
-               "resp_cons = %d\n", usbif->req_prod,
-               virt_to_mfn(&usbif->req_prod),
-              usbif->resp_prod, xhci->usb_resp_cons);
-#endif
- 
-        /* This is always called from the timer interrupt. */
-        spin_lock(&xhci->ring_lock);
-       
-        if ( RING_PLUGGED(xhci) )
-        {
-                printk(KERN_WARNING
-                       "xhci_queue_probe(): ring full, not queuing request\n");
-                spin_unlock(&xhci->ring_lock);
-                return NULL;
-        }
-
-        /* Stick something in the shared communications ring. */
-        req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
-        memset(req, 0, sizeof(*req));
-
-        req->operation       = USBIF_OP_PROBE;
-        req->port            = port;
-
-        usb_ring->req_prod_pvt++;
-        RING_PUSH_REQUESTS(usb_ring);
-
-        spin_unlock(&xhci->ring_lock);
-
-       notify_via_evtchn(xhci->evtchn);
-
-        return req;
-}
-
-/**
- * xhci_port_reset - queue a reset request for a particular port
- */
-static int xhci_port_reset(usbif_vdev_t port)
-{
-        usbif_request_t *req;
-        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
-        /* Only ever happens from process context (hub thread). */
-        spin_lock_irq(&xhci->ring_lock);
-
-        if ( RING_PLUGGED(xhci) )
-        {
-                printk(KERN_WARNING
-                       "xhci_port_reset(): ring plugged, not queuing 
request\n");
-                spin_unlock_irq(&xhci->ring_lock);
-                return -ENOBUFS;
-        }
-
-        /* We only reset one port at a time, so we only need one variable per
-         * hub. */
-        xhci->awaiting_reset = 1;
-        
-        /* Stick something in the shared communications ring. */
-       req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
-        memset(req, 0, sizeof(*req));
-
-        req->operation       = USBIF_OP_RESET;
-        req->port            = port;
-        
-        usb_ring->req_prod_pvt++;
-       RING_PUSH_REQUESTS(usb_ring);
-
-        spin_unlock_irq(&xhci->ring_lock);
-
-       notify_via_evtchn(xhci->evtchn);
-
-        while ( xhci->awaiting_reset > 0 )
-        {
-                mdelay(1);
-                xhci_drain_ring();
-        }
-
-       xhci->rh.ports[port].pe = 1;
-       xhci->rh.ports[port].pe_chg = 1;
-
-        return xhci->awaiting_reset;
-}
-
-
-/******************************************************************************
- * RING RESPONSE HANDLING
- */
-
-static void receive_usb_reset(usbif_response_t *resp)
-{
-    xhci->awaiting_reset = resp->status;
-    rmb();
-    
-}
-
-static void receive_usb_probe(usbif_response_t *resp)
-{
-    spin_lock(&xhci->rh.port_state_lock);
-
-    if ( resp->status >= 0 )
-    {
-        if ( resp->status == 1 )
-        {
-            /* If theres a device there and there wasn't one before there must
-             * have been a connection status change. */
-            if( xhci->rh.ports[resp->data].cs == 0 )
-           {
-                xhci->rh.ports[resp->data].cs = 1;
-                xhci->rh.ports[resp->data].cs_chg = 1;
-           }
-        }
-        else if ( resp->status == 0 )
-        {
-            if(xhci->rh.ports[resp->data].cs == 1 )
-            {
-                xhci->rh.ports[resp->data].cs  = 0;
-                xhci->rh.ports[resp->data].cs_chg = 1;
-               xhci->rh.ports[resp->data].pe = 0;
-               /* According to USB Spec v2.0, 11.24.2.7.2.2, we don't need
-                * to set pe_chg since an error has not occurred. */
-            }
-        }
-        else
-            printk(KERN_WARNING "receive_usb_probe(): unexpected status %d "
-                   "for port %d\n", resp->status, resp->data);
-    }
-    else if ( resp->status < 0)
-        printk(KERN_WARNING "receive_usb_probe(): got error status %d\n",
-               resp->status);
-
-    spin_unlock(&xhci->rh.port_state_lock);
-}
-
-static void receive_usb_io(usbif_response_t *resp)
-{
-        struct urb_priv *urbp = (struct urb_priv *)resp->id;
-        struct urb *urb = urbp->urb;
-
-        urb->actual_length = resp->length;
-        urbp->in_progress = 0;
-
-        if( usb_pipetype(urb->pipe) == 0 ) /* ISO */
-        {
-                int i;
-              
-                /* Copy ISO schedule results back in. */
-                for ( i = 0; i < urb->number_of_packets; i++ )
-                {
-                        urb->iso_frame_desc[i].status
-                                = urbp->schedule[i].status;
-                        urb->iso_frame_desc[i].actual_length
-                                = urbp->schedule[i].length;
-                }
-                free_page((unsigned long)urbp->schedule);
-        }
-
-        /* Only set status if it's not been changed since submission.  It might
-         * have been changed if the URB has been unlinked asynchronously, for
-         * instance. */
-       if ( urb->status == -EINPROGRESS )
-                urbp->status = urb->status = resp->status;
-}
-
-/**
- * xhci_drain_ring - drain responses from the ring, calling handlers
- *
- * This may be called from interrupt context when an event is received from the
- * backend domain, or sometimes in process context whilst waiting for a port
- * reset or URB completion.
- */
-static void xhci_drain_ring(void)
-{
-       struct list_head *tmp, *head;
-       usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-       usbif_response_t *resp;
-        RING_IDX i, rp;
-
-        /* Walk the ring here to get responses, updating URBs to show what
-         * completed. */
-        
-        rp = usb_ring->sring->rsp_prod;
-        rmb(); /* Ensure we see queued requests up to 'rp'. */
-
-        /* Take items off the comms ring, taking care not to overflow. */
-        for ( i = usb_ring->rsp_cons; i != rp; i++ )
-        {
-            resp = RING_GET_RESPONSE(usb_ring, i);
-            
-            /* May need to deal with batching and with putting a ceiling on
-               the number dispatched for performance and anti-dos reasons */
-
-            xhci_show_resp(resp);
-
-            switch ( resp->operation )
-            {
-            case USBIF_OP_PROBE:
-                receive_usb_probe(resp);
-                break;
-                
-            case USBIF_OP_IO:
-                receive_usb_io(resp);
-                break;
-
-            case USBIF_OP_RESET:
-                receive_usb_reset(resp);
-                break;
-
-            default:
-                printk(KERN_WARNING
-                       "error: unknown USB io operation response [%d]\n",
-                       resp->operation);
-                break;
-            }
-        }
-
-        usb_ring->rsp_cons = i;
-
-       /* Walk the list of pending URB's to see which ones completed and do
-         * callbacks, etc. */
-       spin_lock(&xhci->urb_list_lock);
-       head = &xhci->urb_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *urb = list_entry(tmp, struct urb, urb_list);
-
-               tmp = tmp->next;
-
-               /* Checks the status and does all of the magic necessary */
-               xhci_transfer_result(xhci, urb);
-       }
-       spin_unlock(&xhci->urb_list_lock);
-
-       xhci_finish_completion();
-}
-
-
-static void xhci_interrupt(int irq, void *__xhci, struct pt_regs *regs)
-{
-        xhci_drain_ring();
-}
-
-/******************************************************************************
- * HOST CONTROLLER FUNCTIONALITY
- */
-
-/**
- * no-op implementation of private device alloc / free routines
- */
-static int xhci_do_nothing_dev(struct usb_device *dev)
-{
-       return 0;
-}
-
-static inline void xhci_add_complete(struct urb *urb)
-{
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
-       unsigned long flags;
-
-       spin_lock_irqsave(&xhci->complete_list_lock, flags);
-       list_add_tail(&urbp->complete_list, &xhci->complete_list);
-       spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-}
-
-/* When this returns, the owner of the URB may free its
- * storage.
- *
- * We spin and wait for the URB to complete before returning.
- *
- * Call with urb->lock acquired.
- */
-static void xhci_delete_urb(struct urb *urb)
-{
-        struct urb_priv *urbp;
-
-       urbp = urb->hcpriv;
-
-        /* If there's no urb_priv structure for this URB then it can't have
-         * been submitted at all. */
-       if ( urbp == NULL )
-               return;
-
-       /* For now we just spin until the URB completes.  It shouldn't take too
-         * long and we don't expect to have to do this very often. */
-       while ( urb->status == -EINPROGRESS )
-        {
-            xhci_drain_ring();
-            mdelay(1);
-        }
-
-       /* Now we know that further transfers to the buffer won't
-        * occur, so we can safely return. */
-}
-
-static struct urb_priv *xhci_alloc_urb_priv(struct urb *urb)
-{
-       struct urb_priv *urbp;
-
-       urbp = kmem_cache_alloc(xhci_up_cachep, SLAB_ATOMIC);
-       if (!urbp) {
-               err("xhci_alloc_urb_priv: couldn't allocate memory for 
urb_priv\n");
-               return NULL;
-       }
-
-       memset((void *)urbp, 0, sizeof(*urbp));
-
-       urbp->inserttime = jiffies;
-       urbp->urb = urb;
-       urbp->dev = urb->dev;
-       
-       INIT_LIST_HEAD(&urbp->complete_list);
-
-       urb->hcpriv = urbp;
-
-       return urbp;
-}
-
-/*
- * MUST be called with urb->lock acquired
- */
-/* When is this called?  Do we need to stop the transfer (as we
- * currently do)? */
-static void xhci_destroy_urb_priv(struct urb *urb)
-{
-    struct urb_priv *urbp;
-    
-    urbp = (struct urb_priv *)urb->hcpriv;
-    if (!urbp)
-        return;
-
-    if (!list_empty(&urb->urb_list))
-        warn("xhci_destroy_urb_priv: urb %p still on xhci->urb_list", urb);
-    
-    if (!list_empty(&urbp->complete_list))
-        warn("xhci_destroy_urb_priv: urb %p still on xhci->complete_list", 
urb);
-    
-    kmem_cache_free(xhci_up_cachep, urb->hcpriv);
-
-    urb->hcpriv = NULL;
-}
-
-/**
- * Try to find URBs in progress on the same pipe to the same device.
- *
- * MUST be called with xhci->urb_list_lock acquired
- */
-static struct urb *xhci_find_urb_ep(struct xhci *xhci, struct urb *urb)
-{
-       struct list_head *tmp, *head;
-
-       /* We don't match Isoc transfers since they are special */
-       if (usb_pipeisoc(urb->pipe))
-               return NULL;
-
-       head = &xhci->urb_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *u = list_entry(tmp, struct urb, urb_list);
-
-               tmp = tmp->next;
-
-               if (u->dev == urb->dev && u->pipe == urb->pipe &&
-                   u->status == -EINPROGRESS)
-                       return u;
-       }
-
-       return NULL;
-}
-
-static int xhci_submit_urb(struct urb *urb)
-{
-       int ret = -EINVAL;
-       unsigned long flags;
-       struct urb *eurb;
-       int bustime;
-
-        DPRINTK("URB submitted to XHCI driver.\n");
-        dump_urb(urb);
-
-       if (!urb)
-               return -EINVAL;
-
-       if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv) {
-               warn("xhci_submit_urb: urb %p belongs to disconnected device or 
bus?", urb);
-               return -ENODEV;
-       }
-
-        if ( urb->dev->devpath == NULL )
-                BUG();
-
-       usb_inc_dev_use(urb->dev);
-
-       spin_lock_irqsave(&xhci->urb_list_lock, flags);
-       spin_lock(&urb->lock);
-
-       if (urb->status == -EINPROGRESS || urb->status == -ECONNRESET ||
-           urb->status == -ECONNABORTED) {
-               dbg("xhci_submit_urb: urb not available to submit (status = 
%d)", urb->status);
-               /* Since we can have problems on the out path */
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-               usb_dec_dev_use(urb->dev);
-
-               return ret;
-       }
-
-       INIT_LIST_HEAD(&urb->urb_list);
-       if (!xhci_alloc_urb_priv(urb)) {
-               ret = -ENOMEM;
-
-               goto out;
-       }
-
-        ( (struct urb_priv *)urb->hcpriv )->in_progress = 1;
-
-       eurb = xhci_find_urb_ep(xhci, urb);
-       if (eurb && !(urb->transfer_flags & USB_QUEUE_BULK)) {
-               ret = -ENXIO;
-
-               goto out;
-       }
-
-       /* Short circuit the virtual root hub */
-       if (urb->dev == xhci->rh.dev) {
-               ret = rh_submit_urb(urb);
-
-               goto out;
-       }
-
-       switch (usb_pipetype(urb->pipe)) {
-       case PIPE_CONTROL:
-       case PIPE_BULK:
-               ret = xhci_queue_req(urb);
-               break;
-
-       case PIPE_INTERRUPT:
-               if (urb->bandwidth == 0) {      /* not yet checked/allocated */
-                       bustime = usb_check_bandwidth(urb->dev, urb);
-                       if (bustime < 0)
-                               ret = bustime;
-                       else {
-                               ret = xhci_queue_req(urb);
-                               if (ret == -EINPROGRESS)
-                                       usb_claim_bandwidth(urb->dev, urb,
-                                                            bustime, 0);
-                       }
-               } else          /* bandwidth is already set */
-                       ret = xhci_queue_req(urb);
-               break;
-
-       case PIPE_ISOCHRONOUS:
-               if (urb->bandwidth == 0) {      /* not yet checked/allocated */
-                       if (urb->number_of_packets <= 0) {
-                               ret = -EINVAL;
-                               break;
-                       }
-                       bustime = usb_check_bandwidth(urb->dev, urb);
-                       if (bustime < 0) {
-                               ret = bustime;
-                               break;
-                       }
-
-                       ret = xhci_queue_req(urb);
-                       if (ret == -EINPROGRESS)
-                               usb_claim_bandwidth(urb->dev, urb, bustime, 1);
-               } else          /* bandwidth is already set */
-                       ret = xhci_queue_req(urb);
-               break;
-       }
-out:
-       urb->status = ret;
-
-       if (ret == -EINPROGRESS) {
-               /* We use _tail to make find_urb_ep more efficient */
-               list_add_tail(&urb->urb_list, &xhci->urb_list);
-
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-               return 0;
-       }
-
-       xhci_delete_urb(urb);
-
-       spin_unlock(&urb->lock);
-       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-       /* Only call completion if it was successful */
-       if (!ret)
-               xhci_call_completion(urb);
-
-       return ret;
-}
-
-/*
- * Return the result of a transfer
- *
- * MUST be called with urb_list_lock acquired
- */
-static void xhci_transfer_result(struct xhci *xhci, struct urb *urb)
-{
-       int ret = 0;
-       unsigned long flags;
-       struct urb_priv *urbp;
-
-       /* The root hub is special */
-       if (urb->dev == xhci->rh.dev)
-               return;
-
-       spin_lock_irqsave(&urb->lock, flags);
-
-       urbp = (struct urb_priv *)urb->hcpriv;
-
-        if ( ( (struct urb_priv *)urb->hcpriv )->in_progress )
-                ret = -EINPROGRESS;
-
-        if (urb->actual_length < urb->transfer_buffer_length) {
-                if (urb->transfer_flags & USB_DISABLE_SPD) {
-                        ret = -EREMOTEIO;
-                }
-        }
-
-       if (urb->status == -EPIPE)
-        {
-                ret = urb->status;
-               /* endpoint has stalled - mark it halted */
-               usb_endpoint_halt(urb->dev, usb_pipeendpoint(urb->pipe),
-                                  usb_pipeout(urb->pipe));
-        }
-
-       if ((debug == 1 && ret != 0 && ret != -EPIPE) ||
-            (ret != 0 && debug > 1)) {
-               /* Some debugging code */
-               dbg("xhci_result_interrupt/bulk() failed with status %x",
-                       status);
-       }
-
-       if (ret == -EINPROGRESS)
-               goto out;
-
-       switch (usb_pipetype(urb->pipe)) {
-       case PIPE_CONTROL:
-       case PIPE_BULK:
-       case PIPE_ISOCHRONOUS:
-               /* Release bandwidth for Interrupt or Isoc. transfers */
-               /* Spinlock needed ? */
-               if (urb->bandwidth)
-                       usb_release_bandwidth(urb->dev, urb, 1);
-               xhci_delete_urb(urb);
-               break;
-       case PIPE_INTERRUPT:
-               /* Interrupts are an exception */
-               if (urb->interval)
-                       goto out_complete;
-
-               /* Release bandwidth for Interrupt or Isoc. transfers */
-               /* Spinlock needed ? */
-               if (urb->bandwidth)
-                       usb_release_bandwidth(urb->dev, urb, 0);
-               xhci_delete_urb(urb);
-               break;
-       default:
-               info("xhci_transfer_result: unknown pipe type %d for urb %p\n",
-                     usb_pipetype(urb->pipe), urb);
-       }
-
-       /* Remove it from xhci->urb_list */
-       list_del_init(&urb->urb_list);
-
-out_complete:
-       xhci_add_complete(urb);
-
-out:
-       spin_unlock_irqrestore(&urb->lock, flags);
-}
-
-static int xhci_unlink_urb(struct urb *urb)
-{
-       unsigned long flags;
-       struct urb_priv *urbp = urb->hcpriv;
-
-       if (!urb)
-               return -EINVAL;
-
-       if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv)
-               return -ENODEV;
-
-       spin_lock_irqsave(&xhci->urb_list_lock, flags);
-       spin_lock(&urb->lock);
-
-       /* Release bandwidth for Interrupt or Isoc. transfers */
-       /* Spinlock needed ? */
-       if (urb->bandwidth) {
-               switch (usb_pipetype(urb->pipe)) {
-               case PIPE_INTERRUPT:
-                       usb_release_bandwidth(urb->dev, urb, 0);
-                       break;
-               case PIPE_ISOCHRONOUS:
-                       usb_release_bandwidth(urb->dev, urb, 1);
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       if (urb->status != -EINPROGRESS) {
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-               return 0;
-       }
-
-       list_del_init(&urb->urb_list);
-
-       /* Short circuit the virtual root hub */
-       if (urb->dev == xhci->rh.dev) {
-               rh_unlink_urb(urb);
-
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-               xhci_call_completion(urb);
-       } else {
-               if (urb->transfer_flags & USB_ASYNC_UNLINK) {
-                        /* We currently don't currently attempt to cancel URBs
-                         * that have been queued in the ring.  We handle async
-                         * unlinked URBs when they complete. */
-                       urbp->status = urb->status = -ECONNABORTED;
-                       spin_unlock(&urb->lock);
-                       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-               } else {
-                       urb->status = -ENOENT;
-
-                       spin_unlock(&urb->lock);
-                       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-                       if (in_interrupt()) {   /* wait at least 1 frame */
-                               static int errorcount = 10;
-
-                               if (errorcount--)
-                                       dbg("xhci_unlink_urb called from 
interrupt for urb %p", urb);
-                               udelay(1000);
-                       } else
-                               schedule_timeout(1+1*HZ/1000); 
-
-                        xhci_delete_urb(urb);
-
-                       xhci_call_completion(urb);
-               }
-       }
-
-       return 0;
-}
-
-static void xhci_call_completion(struct urb *urb)
-{
-       struct urb_priv *urbp;
-       struct usb_device *dev = urb->dev;
-       int is_ring = 0, killed, resubmit_interrupt, status;
-       struct urb *nurb;
-       unsigned long flags;
-
-       spin_lock_irqsave(&urb->lock, flags);
-
-       urbp = (struct urb_priv *)urb->hcpriv;
-       if (!urbp || !urb->dev) {
-               spin_unlock_irqrestore(&urb->lock, flags);
-               return;
-       }
-
-       killed = (urb->status == -ENOENT || urb->status == -ECONNABORTED ||
-                       urb->status == -ECONNRESET);
-       resubmit_interrupt = (usb_pipetype(urb->pipe) == PIPE_INTERRUPT &&
-                       urb->interval);
-
-       nurb = urb->next;
-       if (nurb && !killed) {
-               int count = 0;
-
-               while (nurb && nurb != urb && count < MAX_URB_LOOP) {
-                       if (nurb->status == -ENOENT ||
-                           nurb->status == -ECONNABORTED ||
-                           nurb->status == -ECONNRESET) {
-                               killed = 1;
-                               break;
-                       }
-
-                       nurb = nurb->next;
-                       count++;
-               }
-
-               if (count == MAX_URB_LOOP)
-                       err("xhci_call_completion: too many linked URB's, loop? 
(first loop)");
-
-               /* Check to see if chain is a ring */
-               is_ring = (nurb == urb);
-       }
-
-       status = urbp->status;
-       if (!resubmit_interrupt || killed)
-               /* We don't need urb_priv anymore */
-               xhci_destroy_urb_priv(urb);
-
-       if (!killed)
-               urb->status = status;
-
-       spin_unlock_irqrestore(&urb->lock, flags);
-
-       if (urb->complete)
-               urb->complete(urb);
-
-       if (resubmit_interrupt)
-               /* Recheck the status. The completion handler may have */
-               /*  unlinked the resubmitting interrupt URB */
-               killed = (urb->status == -ENOENT ||
-                         urb->status == -ECONNABORTED ||
-                         urb->status == -ECONNRESET);
-
-       if (resubmit_interrupt && !killed) {
-                if ( urb->dev != xhci->rh.dev )
-                        xhci_queue_req(urb); /* XXX What if this fails? */
-                /* Don't need to resubmit URBs for the virtual root dev. */
-       } else {
-               if (is_ring && !killed) {
-                       urb->dev = dev;
-                       xhci_submit_urb(urb);
-               } else {
-                       /* We decrement the usage count after we're done */
-                       /*  with everything */
-                       usb_dec_dev_use(dev);
-               }
-       }
-}
-
-static void xhci_finish_completion(void)
-{
-       struct list_head *tmp, *head;
-       unsigned long flags;
-
-       spin_lock_irqsave(&xhci->complete_list_lock, flags);
-       head = &xhci->complete_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb_priv *urbp = list_entry(tmp, struct urb_priv,
-                                                   complete_list);
-               struct urb *urb = urbp->urb;
-
-               list_del_init(&urbp->complete_list);
-               spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-
-               xhci_call_completion(urb);
-
-               spin_lock_irqsave(&xhci->complete_list_lock, flags);
-               head = &xhci->complete_list;
-               tmp = head->next;
-       }
-       spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-}
-
-static struct usb_operations xhci_device_operations = {
-       .allocate = xhci_do_nothing_dev,
-       .deallocate = xhci_do_nothing_dev,
-        /* It doesn't look like any drivers actually care what the frame number
-        * is at the moment!  If necessary, we could approximate the current
-        * frame nubmer by passing it from the backend in response messages. */
-       .get_frame_number = NULL,
-       .submit_urb = xhci_submit_urb,
-       .unlink_urb = xhci_unlink_urb
-};
-
-/******************************************************************************
- * VIRTUAL ROOT HUB EMULATION
- */
-
-static __u8 root_hub_dev_des[] =
-{
-       0x12,                   /*  __u8  bLength; */
-       0x01,                   /*  __u8  bDescriptorType; Device */
-       0x00,                   /*  __u16 bcdUSB; v1.0 */
-       0x01,
-       0x09,                   /*  __u8  bDeviceClass; HUB_CLASSCODE */
-       0x00,                   /*  __u8  bDeviceSubClass; */
-       0x00,                   /*  __u8  bDeviceProtocol; */
-       0x08,                   /*  __u8  bMaxPacketSize0; 8 Bytes */
-       0x00,                   /*  __u16 idVendor; */
-       0x00,
-       0x00,                   /*  __u16 idProduct; */
-       0x00,
-       0x00,                   /*  __u16 bcdDevice; */
-       0x00,
-       0x00,                   /*  __u8  iManufacturer; */
-       0x02,                   /*  __u8  iProduct; */
-       0x01,                   /*  __u8  iSerialNumber; */
-       0x01                    /*  __u8  bNumConfigurations; */
-};
-
-
-/* Configuration descriptor */
-static __u8 root_hub_config_des[] =
-{
-       0x09,                   /*  __u8  bLength; */
-       0x02,                   /*  __u8  bDescriptorType; Configuration */
-       0x19,                   /*  __u16 wTotalLength; */
-       0x00,
-       0x01,                   /*  __u8  bNumInterfaces; */
-       0x01,                   /*  __u8  bConfigurationValue; */
-       0x00,                   /*  __u8  iConfiguration; */
-       0x40,                   /*  __u8  bmAttributes;
-                                       Bit 7: Bus-powered, 6: Self-powered,
-                                       Bit 5 Remote-wakeup, 4..0: resvd */
-       0x00,                   /*  __u8  MaxPower; */
-
-       /* interface */
-       0x09,                   /*  __u8  if_bLength; */
-       0x04,                   /*  __u8  if_bDescriptorType; Interface */
-       0x00,                   /*  __u8  if_bInterfaceNumber; */
-       0x00,                   /*  __u8  if_bAlternateSetting; */
-       0x01,                   /*  __u8  if_bNumEndpoints; */
-       0x09,                   /*  __u8  if_bInterfaceClass; HUB_CLASSCODE */
-       0x00,                   /*  __u8  if_bInterfaceSubClass; */
-       0x00,                   /*  __u8  if_bInterfaceProtocol; */
-       0x00,                   /*  __u8  if_iInterface; */
-
-       /* endpoint */
-       0x07,                   /*  __u8  ep_bLength; */
-       0x05,                   /*  __u8  ep_bDescriptorType; Endpoint */
-       0x81,                   /*  __u8  ep_bEndpointAddress; IN Endpoint 1 */
-       0x03,                   /*  __u8  ep_bmAttributes; Interrupt */
-       0x08,                   /*  __u16 ep_wMaxPacketSize; 8 Bytes */
-       0x00,
-       0xff                    /*  __u8  ep_bInterval; 255 ms */
-};
-
-static __u8 root_hub_hub_des[] =
-{
-       0x09,                   /*  __u8  bLength; */
-       0x29,                   /*  __u8  bDescriptorType; Hub-descriptor */
-       0x02,                   /*  __u8  bNbrPorts; */
-       0x00,                   /* __u16  wHubCharacteristics; */
-       0x00,
-       0x01,                   /*  __u8  bPwrOn2pwrGood; 2ms */
-       0x00,                   /*  __u8  bHubContrCurrent; 0 mA */
-       0x00,                   /*  __u8  DeviceRemovable; *** 7 Ports max *** 
*/
-       0xff                    /*  __u8  PortPwrCtrlMask; *** 7 ports max *** 
*/
-};
-
-/* prepare Interrupt pipe transaction data; HUB INTERRUPT ENDPOINT */
-static int rh_send_irq(struct urb *urb)
-{
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
-        xhci_port_t *ports = xhci->rh.ports;
-       unsigned long flags;
-       int i, len = 1;
-       __u16 data = 0;
-
-       spin_lock_irqsave(&urb->lock, flags);
-       for (i = 0; i < xhci->rh.numports; i++) {
-                /* Set a bit if anything at all has changed on the port, as per
-                * USB spec 11.12 */
-               data |= (ports[i].cs_chg || ports[i].pe_chg )
-                        ? (1 << (i + 1))
-                        : 0;
-
-               len = (i + 1) / 8 + 1;
-       }
-
-       *(__u16 *) urb->transfer_buffer = cpu_to_le16(data);
-       urb->actual_length = len;
-       urbp->status = 0;
-
-       spin_unlock_irqrestore(&urb->lock, flags);
-
-       if ((data > 0) && (xhci->rh.send != 0)) {
-               dbg("root-hub INT complete: data: %x", data);
-               xhci_call_completion(urb);
-       }
-
-       return 0;
-}
-
-/* Virtual Root Hub INTs are polled by this timer every "interval" ms */
-static int rh_init_int_timer(struct urb *urb);
-
-static void rh_int_timer_do(unsigned long ptr)
-{
-       struct urb *urb = (struct urb *)ptr;
-       struct list_head list, *tmp, *head;
-       unsigned long flags;
-       int i;
-
-       for ( i = 0; i < xhci->rh.numports; i++)
-                xhci_queue_probe(i);
-
-       if (xhci->rh.send)
-               rh_send_irq(urb);
-
-       INIT_LIST_HEAD(&list);
-
-       spin_lock_irqsave(&xhci->urb_list_lock, flags);
-       head = &xhci->urb_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *u = list_entry(tmp, struct urb, urb_list);
-               struct urb_priv *up = (struct urb_priv *)u->hcpriv;
-
-               tmp = tmp->next;
-
-               spin_lock(&u->lock);
-
-               /* Check if the URB timed out */
-               if (u->timeout && time_after_eq(jiffies,
-                                                up->inserttime + u->timeout)) {
-                       list_del(&u->urb_list);
-                       list_add_tail(&u->urb_list, &list);
-               }
-
-               spin_unlock(&u->lock);
-       }
-       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-       head = &list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *u = list_entry(tmp, struct urb, urb_list);
-
-               tmp = tmp->next;
-
-               u->transfer_flags |= USB_ASYNC_UNLINK | USB_TIMEOUT_KILLED;
-               xhci_unlink_urb(u);
-       }
-
-       rh_init_int_timer(urb);
-}
-
-/* Root Hub INTs are polled by this timer */
-static int rh_init_int_timer(struct urb *urb)
-{
-       xhci->rh.interval = urb->interval;
-       init_timer(&xhci->rh.rh_int_timer);
-       xhci->rh.rh_int_timer.function = rh_int_timer_do;
-       xhci->rh.rh_int_timer.data = (unsigned long)urb;
-       xhci->rh.rh_int_timer.expires = jiffies
-                + (HZ * (urb->interval < 30 ? 30 : urb->interval)) / 1000;
-       add_timer(&xhci->rh.rh_int_timer);
-
-       return 0;
-}
-
-#define OK(x)                  len = (x); break
-
-/* Root Hub Control Pipe */
-static int rh_submit_urb(struct urb *urb)
-{
-       unsigned int pipe = urb->pipe;
-       struct usb_ctrlrequest *cmd =
-                (struct usb_ctrlrequest *)urb->setup_packet;
-       void *data = urb->transfer_buffer;
-       int leni = urb->transfer_buffer_length;
-       int len = 0;
-       xhci_port_t *status;
-       int stat = 0;
-       int i;
-       int retstatus;
-        unsigned long flags;
-        
-       __u16 cstatus;
-       __u16 bmRType_bReq;
-       __u16 wValue;
-       __u16 wIndex;
-       __u16 wLength;
-
-       if (usb_pipetype(pipe) == PIPE_INTERRUPT) {
-               xhci->rh.urb = urb;
-               xhci->rh.send = 1;
-               xhci->rh.interval = urb->interval;
-               rh_init_int_timer(urb);
-
-               return -EINPROGRESS;
-       }
-
-       bmRType_bReq = cmd->bRequestType | cmd->bRequest << 8;
-       wValue = le16_to_cpu(cmd->wValue);
-       wIndex = le16_to_cpu(cmd->wIndex);
-       wLength = le16_to_cpu(cmd->wLength);
-
-       for (i = 0; i < 8; i++)
-               xhci->rh.c_p_r[i] = 0;
-
-        status = &xhci->rh.ports[wIndex - 1];
-
-        spin_lock_irqsave(&xhci->rh.port_state_lock, flags);
-
-       switch (bmRType_bReq) {
-               /* Request Destination:
-                  without flags: Device,
-                  RH_INTERFACE: interface,
-                  RH_ENDPOINT: endpoint,
-                  RH_CLASS means HUB here,
-                  RH_OTHER | RH_CLASS  almost ever means HUB_PORT here
-               */
-
-       case RH_GET_STATUS:
-               *(__u16 *)data = cpu_to_le16(1);
-               OK(2);
-       case RH_GET_STATUS | RH_INTERFACE:
-               *(__u16 *)data = cpu_to_le16(0);
-               OK(2);
-       case RH_GET_STATUS | RH_ENDPOINT:
-               *(__u16 *)data = cpu_to_le16(0);
-               OK(2);
-       case RH_GET_STATUS | RH_CLASS:
-               *(__u32 *)data = cpu_to_le32(0);
-               OK(4);          /* hub power */
-       case RH_GET_STATUS | RH_OTHER | RH_CLASS:
-               cstatus = (status->cs_chg) |
-                       (status->pe_chg << 1) |
-                       (xhci->rh.c_p_r[wIndex - 1] << 4);
-               retstatus = (status->cs) |
-                       (status->pe << 1) |
-                       (status->susp << 2) |
-                       (1 << 8) |      /* power on */
-                       (status->lsda << 9);
-               *(__u16 *)data = cpu_to_le16(retstatus);
-               *(__u16 *)(data + 2) = cpu_to_le16(cstatus);
-               OK(4);
-       case RH_CLEAR_FEATURE | RH_ENDPOINT:
-               switch (wValue) {
-               case RH_ENDPOINT_STALL:
-                       OK(0);
-               }
-               break;
-       case RH_CLEAR_FEATURE | RH_CLASS:
-               switch (wValue) {
-               case RH_C_HUB_OVER_CURRENT:
-                       OK(0);  /* hub power over current */
-               }
-               break;
-       case RH_CLEAR_FEATURE | RH_OTHER | RH_CLASS:
-               switch (wValue) {
-               case RH_PORT_ENABLE:
-                        status->pe     = 0;
-                       OK(0);
-               case RH_PORT_SUSPEND:
-                        status->susp   = 0;
-                       OK(0);
-               case RH_PORT_POWER:
-                       OK(0);  /* port power */
-               case RH_C_PORT_CONNECTION:
-                        status->cs_chg = 0;
-                       OK(0);
-               case RH_C_PORT_ENABLE:
-                        status->pe_chg = 0;
-                       OK(0);
-               case RH_C_PORT_SUSPEND:
-                       /*** WR_RH_PORTSTAT(RH_PS_PSSC); */
-                       OK(0);
-               case RH_C_PORT_OVER_CURRENT:
-                       OK(0);  /* port power over current */
-               case RH_C_PORT_RESET:
-                       xhci->rh.c_p_r[wIndex - 1] = 0;
-                       OK(0);
-               }
-               break;
-       case RH_SET_FEATURE | RH_OTHER | RH_CLASS:
-               switch (wValue) {
-               case RH_PORT_SUSPEND:
-                        status->susp = 1;      
-                       OK(0);
-               case RH_PORT_RESET:
-                {
-                        int ret;
-                        xhci->rh.c_p_r[wIndex - 1] = 1;
-                        status->pr = 0;
-                        status->pe = 1;
-                        ret = xhci_port_reset(wIndex - 1);
-                        /* XXX MAW: should probably cancel queued transfers 
during reset... *\/ */
-                        if ( ret == 0 ) { OK(0); }
-                        else { return ret; }
-                }
-                break;
-               case RH_PORT_POWER:
-                       OK(0); /* port power ** */
-               case RH_PORT_ENABLE:
-                        status->pe = 1;
-                       OK(0);
-               }
-               break;
-       case RH_SET_ADDRESS:
-               xhci->rh.devnum = wValue;
-               OK(0);
-       case RH_GET_DESCRIPTOR:
-               switch ((wValue & 0xff00) >> 8) {
-               case 0x01:      /* device descriptor */
-                       len = min_t(unsigned int, leni,
-                                 min_t(unsigned int,
-                                     sizeof(root_hub_dev_des), wLength));
-                       memcpy(data, root_hub_dev_des, len);
-                       OK(len);
-               case 0x02:      /* configuration descriptor */
-                       len = min_t(unsigned int, leni,
-                                 min_t(unsigned int,
-                                     sizeof(root_hub_config_des), wLength));
-                       memcpy (data, root_hub_config_des, len);
-                       OK(len);
-               case 0x03:      /* string descriptors */
-                       len = usb_root_hub_string (wValue & 0xff,
-                               0, "XHCI-alt",
-                               data, wLength);
-                       if (len > 0) {
-                               OK(min_t(int, leni, len));
-                       } else 
-                               stat = -EPIPE;
-               }
-               break;
-       case RH_GET_DESCRIPTOR | RH_CLASS:
-               root_hub_hub_des[2] = xhci->rh.numports;
-               len = min_t(unsigned int, leni,
-                         min_t(unsigned int, sizeof(root_hub_hub_des), 
wLength));
-               memcpy(data, root_hub_hub_des, len);
-               OK(len);
-       case RH_GET_CONFIGURATION:
-               *(__u8 *)data = 0x01;
-               OK(1);
-       case RH_SET_CONFIGURATION:
-               OK(0);
-       case RH_GET_INTERFACE | RH_INTERFACE:
-               *(__u8 *)data = 0x00;
-               OK(1);
-       case RH_SET_INTERFACE | RH_INTERFACE:
-               OK(0);
-       default:
-               stat = -EPIPE;
-       }
-
-        spin_unlock_irqrestore(&xhci->rh.port_state_lock, flags);
-
-       urb->actual_length = len;
-
-       return stat;
-}
-
-/*
- * MUST be called with urb->lock acquired
- */
-static int rh_unlink_urb(struct urb *urb)
-{
-       if (xhci->rh.urb == urb) {
-               urb->status = -ENOENT;
-               xhci->rh.send = 0;
-               xhci->rh.urb = NULL;
-               del_timer(&xhci->rh.rh_int_timer);
-       }
-       return 0;
-}
-
-/******************************************************************************
- * CONTROL PLANE FUNCTIONALITY
- */
-
-/**
- * alloc_xhci - initialise a new virtual root hub for a new USB device channel
- */
-static int alloc_xhci(void)
-{
-       int retval;
-       struct usb_bus *bus;
-
-       retval = -EBUSY;
-
-       xhci = kmalloc(sizeof(*xhci), GFP_KERNEL);
-       if (!xhci) {
-               err("couldn't allocate xhci structure");
-               retval = -ENOMEM;
-               goto err_alloc_xhci;
-       }
-
-       xhci->state = USBIF_STATE_CLOSED;
-
-       spin_lock_init(&xhci->urb_list_lock);
-       INIT_LIST_HEAD(&xhci->urb_list);
-
-       spin_lock_init(&xhci->complete_list_lock);
-       INIT_LIST_HEAD(&xhci->complete_list);
-
-       spin_lock_init(&xhci->frame_list_lock);
-
-       bus = usb_alloc_bus(&xhci_device_operations);
-
-       if (!bus) {
-               err("unable to allocate bus");
-               goto err_alloc_bus;
-       }
-
-       xhci->bus = bus;
-       bus->bus_name = "XHCI";
-       bus->hcpriv = xhci;
-
-       usb_register_bus(xhci->bus);
-
-       /* Initialize the root hub */
-
-       xhci->rh.numports = 0;
-
-       xhci->bus->root_hub = xhci->rh.dev = usb_alloc_dev(NULL, xhci->bus);
-       if (!xhci->rh.dev) {
-               err("unable to allocate root hub");
-               goto err_alloc_root_hub;
-       }
-
-       xhci->state = 0;
-
-       return 0;
-
-/*
- * error exits:
- */
-err_alloc_root_hub:
-        usb_deregister_bus(xhci->bus);
-       usb_free_bus(xhci->bus);
-       xhci->bus = NULL;
-
-err_alloc_bus:
-       kfree(xhci);
-
-err_alloc_xhci:
-       return retval;
-}
-
-/**
- * usbif_status_change - deal with an incoming USB_INTERFACE_STATUS_ message
- */
-static void usbif_status_change(usbif_fe_interface_status_changed_t *status)
-{
-    ctrl_msg_t                   cmsg;
-    usbif_fe_interface_connect_t up;
-    long rc;
-    usbif_sring_t *sring;
-
-    switch ( status->status )
-    {
-    case USBIF_INTERFACE_STATUS_DESTROYED:
-        printk(KERN_WARNING "Unexpected usbif-DESTROYED message in state %d\n",
-               xhci->state);
-        break;
-
-    case USBIF_INTERFACE_STATUS_DISCONNECTED:
-        if ( xhci->state != USBIF_STATE_CLOSED )
-        {
-            printk(KERN_WARNING "Unexpected usbif-DISCONNECTED message"
-                   " in state %d\n", xhci->state);
-            break;
-            /* Not bothering to do recovery here for now.  Keep things
-             * simple. */
-
-            spin_lock_irq(&xhci->ring_lock);
-            
-            /* Clean up resources. */
-            free_page((unsigned long)xhci->usb_ring.sring);
-            unbind_evtchn_from_irqhandler(xhci->evtchn, xhci);
-
-            /* Plug the ring. */
-            xhci->recovery = 1;
-            wmb();
-            
-            spin_unlock_irq(&xhci->ring_lock);
-        }
-
-        /* Move from CLOSED to DISCONNECTED state. */
-        sring = (usbif_sring_t *)__get_free_page(GFP_KERNEL);
-        SHARED_RING_INIT(sring);
-        FRONT_RING_INIT(&xhci->usb_ring, sring, PAGE_SIZE);
-        xhci->state  = USBIF_STATE_DISCONNECTED;
-
-        /* Construct an interface-CONNECT message for the domain controller. */
-        cmsg.type      = CMSG_USBIF_FE;
-        cmsg.subtype   = CMSG_USBIF_FE_INTERFACE_CONNECT;
-        cmsg.length    = sizeof(usbif_fe_interface_connect_t);
-        up.shmem_frame = virt_to_mfn(sring);
-        memcpy(cmsg.msg, &up, sizeof(up));
-        
-        /* Tell the controller to bring up the interface. */
-        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-        break;
-
-    case USBIF_INTERFACE_STATUS_CONNECTED:
-        if ( xhci->state == USBIF_STATE_CLOSED )
-        {
-            printk(KERN_WARNING "Unexpected usbif-CONNECTED message"
-                   " in state %d\n", xhci->state);
-            break;
-        }
-
-        xhci->evtchn = status->evtchn;
-       xhci->bandwidth = status->bandwidth;
-       xhci->rh.numports = status->num_ports;
-
-        xhci->rh.ports = kmalloc (sizeof(xhci_port_t) * xhci->rh.numports, 
GFP_KERNEL);
-       
-       if ( xhci->rh.ports == NULL )
-            goto alloc_ports_nomem;
-       
-        memset(xhci->rh.ports, 0, sizeof(xhci_port_t) * xhci->rh.numports);
-
-       usb_connect(xhci->rh.dev);
-
-       if (usb_new_device(xhci->rh.dev) != 0) {
-               err("unable to start root hub");
-       }
-
-       /* Allocate the appropriate USB bandwidth here...  Need to
-         * somehow know what the total available is thought to be so we
-         * can calculate the reservation correctly. */
-       usb_claim_bandwidth(xhci->rh.dev, xhci->rh.urb,
-                           1000 - xhci->bandwidth, 0);
-
-        if ( (rc = bind_evtchn_to_irqhandler(xhci->evtchn, xhci_interrupt, 
-                               SA_SAMPLE_RANDOM, "usbif", xhci)) )
-                printk(KERN_ALERT"usbfront request_irq failed (%ld)\n",rc);
-
-       DPRINTK(KERN_INFO __FILE__
-                ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d\n",
-                xhci->usb_ring.sring, virt_to_mfn(xhci->usbif),
-                xhci->evtchn);
-
-        xhci->state = USBIF_STATE_CONNECTED;
-
-        break;
-
-    default:
-        printk(KERN_WARNING "Status change to unknown value %d\n", 
-               status->status);
-        break;
-    }
-
-    return;
-
- alloc_ports_nomem:
-    printk(KERN_WARNING "Failed to allocate port memory, XHCI failed to 
connect.\n");
-    return;
-}
-
-/**
- * usbif_ctrlif_rx - demux control messages by subtype
- */
-static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    switch ( msg->subtype )
-    {
-    case CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED:
-        usbif_status_change((usbif_fe_interface_status_changed_t *)
-                            &msg->msg[0]);
-        break;
-
-        /* New interface...? */
-    default:
-        msg->length = 0;
-        break;
-    }
-
-    ctrl_if_send_response(msg);
-}
-
-static void send_driver_up(void)
-{
-        control_msg_t cmsg;
-        usbif_fe_interface_status_changed_t st;
-
-        /* Send a driver-UP notification to the domain controller. */
-        cmsg.type      = CMSG_USBIF_FE;
-        cmsg.subtype   = CMSG_USBIF_FE_DRIVER_STATUS_CHANGED;
-        cmsg.length    = sizeof(usbif_fe_driver_status_changed_t);
-        st.status      = USBIF_DRIVER_STATUS_UP;
-        memcpy(cmsg.msg, &st, sizeof(st));
-        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-void usbif_resume(void)
-{
-        int i;
-        
-        /* Fake disconnection on all virtual USB ports (suspending / migrating
-         * will destroy hard state associated will the USB devices anyhow). */
-        /* No need to lock here. */
-        for ( i = 0; i < xhci->rh.numports; i++ )
-        {
-                xhci->rh.ports[i].cs = 0;
-                xhci->rh.ports[i].cs_chg = 1;
-               xhci->rh.ports[i].pe = 0;
-        }
-        
-        send_driver_up();
-}
-
-static int __init xhci_hcd_init(void)
-{
-       int retval = -ENOMEM, i;
-
-       if ( (xen_start_info->flags & SIF_INITDOMAIN) ||
-            (xen_start_info->flags & SIF_USB_BE_DOMAIN) )
-                return 0;
-
-       info(DRIVER_DESC " " DRIVER_VERSION);
-
-       if (debug) {
-               errbuf = kmalloc(ERRBUF_LEN, GFP_KERNEL);
-               if (!errbuf)
-                       goto errbuf_failed;
-       }
-
-       xhci_up_cachep = kmem_cache_create("xhci_urb_priv",
-               sizeof(struct urb_priv), 0, 0, NULL, NULL);
-       if (!xhci_up_cachep)
-               goto up_failed;
-
-        /* Let the domain controller know we're here.  For now we wait until
-         * connection, as for the block and net drivers.  This is only strictly
-         * necessary if we're going to boot off a USB device. */
-        printk(KERN_INFO "Initialising Xen virtual USB hub\n");
-    
-        (void)ctrl_if_register_receiver(CMSG_USBIF_FE, usbif_ctrlif_rx,
-                                        CALLBACK_IN_BLOCKING_CONTEXT);
-        
-       alloc_xhci();
-
-        send_driver_up();
-
-        /*
-         * We should read 'nr_interfaces' from response message and wait
-         * for notifications before proceeding. For now we assume that we
-         * will be notified of exactly one interface.
-         */
-        for ( i=0; (xhci->state != USBIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
-        {
-            set_current_state(TASK_INTERRUPTIBLE);
-            schedule_timeout(1);
-        }
-        
-        if (xhci->state != USBIF_STATE_CONNECTED)
-            printk(KERN_WARNING "Timeout connecting USB frontend driver!\n");
-       
-       return 0;
-
-up_failed:
-       if (errbuf)
-               kfree(errbuf);
-
-errbuf_failed:
-       return retval;
-}
-
-module_init(xhci_hcd_init);
-
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
-
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h
--- a/linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h  Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,182 +0,0 @@
-/******************************************************************************
- * xhci.h
- *
- * Private definitions for the Xen Virtual USB Controller.  Based on
- * drivers/usb/host/uhci.h from Linux.  Copyright for the imported content is
- * retained by the original authors.
- *
- * Modifications are:
- * Copyright (C) 2004 Intel Research Cambridge
- * Copyright (C) 2004, 2005 Mark Williamson
- */
-
-#ifndef __LINUX_XHCI_H
-#define __LINUX_XHCI_H
-
-#include <linux/list.h>
-#include <linux/usb.h>
-#include <asm-xen/xen-public/io/usbif.h>
-#include <linux/spinlock.h>
-
-/* xhci_port_t - current known state of a virtual hub ports */
-typedef struct {
-        unsigned int cs     :1; /* Connection status.         */
-        unsigned int cs_chg :1; /* Connection status change.  */
-        unsigned int pe     :1; /* Port enable.               */
-        unsigned int pe_chg :1; /* Port enable change.        */
-        unsigned int susp   :1; /* Suspended.                 */
-        unsigned int lsda   :1; /* Low speed device attached. */
-        unsigned int pr     :1; /* Port reset.                */
-} xhci_port_t;
-
-/* struct virt_root_hub - state related to the virtual root hub */
-struct virt_root_hub {
-       struct usb_device *dev;
-       int devnum;             /* Address of Root Hub endpoint */
-       struct urb *urb;
-       void *int_addr;
-       int send;
-       int interval;
-       int numports;
-       int c_p_r[8];
-       struct timer_list rh_int_timer;
-        spinlock_t port_state_lock;
-        xhci_port_t *ports;
-};
-
-/* struct xhci - contains the state associated with a single USB interface */
-struct xhci {
-
-#ifdef CONFIG_PROC_FS
-       /* procfs */
-       int num;
-       struct proc_dir_entry *proc_entry;
-#endif
-
-        int evtchn;                        /* Interdom channel to backend */
-        enum { 
-                USBIF_STATE_CONNECTED    = 2,
-                USBIF_STATE_DISCONNECTED = 1,
-                USBIF_STATE_CLOSED       = 0
-        } state; /* State of this USB interface */
-        unsigned long recovery; /* boolean recovery in progress flag */
-        
-        unsigned long bandwidth;
-
-       struct usb_bus *bus;
-
-       /* Main list of URB's currently controlled by this HC */
-       spinlock_t urb_list_lock;
-       struct list_head urb_list;              /* P: xhci->urb_list_lock */
-
-       /* List of URB's awaiting completion callback */
-       spinlock_t complete_list_lock;
-       struct list_head complete_list;         /* P: xhci->complete_list_lock 
*/
-
-       struct virt_root_hub rh;        /* private data of the virtual root hub 
*/
-
-        spinlock_t ring_lock;
-        usbif_front_ring_t usb_ring;
-
-        int awaiting_reset;
-};
-
-/* per-URB private data structure for the host controller */
-struct urb_priv {
-       struct urb *urb;
-        usbif_iso_t *schedule;
-       struct usb_device *dev;
-
-        int in_progress : 1;           /* QH was queued (not linked in) */
-       int short_control_packet : 1;   /* If we get a short packet during */
-                                       /*  a control transfer, retrigger */
-                                       /*  the status phase */
-
-       int status;                     /* Final status */
-
-       unsigned long inserttime;       /* In jiffies */
-
-       struct list_head complete_list; /* P: xhci->complete_list_lock */
-};
-
-/*
- * Locking in xhci.c
- *
- * spinlocks are used extensively to protect the many lists and data
- * structures we have. It's not that pretty, but it's necessary. We
- * need to be done with all of the locks (except complete_list_lock) when
- * we call urb->complete. I've tried to make it simple enough so I don't
- * have to spend hours racking my brain trying to figure out if the
- * locking is safe.
- *
- * Here's the safe locking order to prevent deadlocks:
- *
- * #1 xhci->urb_list_lock
- * #2 urb->lock
- * #3 xhci->urb_remove_list_lock
- * #4 xhci->complete_list_lock
- *
- * If you're going to grab 2 or more locks at once, ALWAYS grab the lock
- * at the lowest level FIRST and NEVER grab locks at the same level at the
- * same time.
- * 
- * So, if you need xhci->urb_list_lock, grab it before you grab urb->lock
- */
-
-/* -------------------------------------------------------------------------
-   Virtual Root HUB
-   ------------------------------------------------------------------------- */
-/* destination of request */
-#define RH_DEVICE              0x00
-#define RH_INTERFACE           0x01
-#define RH_ENDPOINT            0x02
-#define RH_OTHER               0x03
-
-#define RH_CLASS               0x20
-#define RH_VENDOR              0x40
-
-/* Requests: bRequest << 8 | bmRequestType */
-#define RH_GET_STATUS          0x0080
-#define RH_CLEAR_FEATURE       0x0100
-#define RH_SET_FEATURE         0x0300
-#define RH_SET_ADDRESS         0x0500
-#define RH_GET_DESCRIPTOR      0x0680
-#define RH_SET_DESCRIPTOR      0x0700
-#define RH_GET_CONFIGURATION   0x0880
-#define RH_SET_CONFIGURATION   0x0900
-#define RH_GET_STATE           0x0280
-#define RH_GET_INTERFACE       0x0A80
-#define RH_SET_INTERFACE       0x0B00
-#define RH_SYNC_FRAME          0x0C80
-/* Our Vendor Specific Request */
-#define RH_SET_EP              0x2000
-
-/* Hub port features */
-#define RH_PORT_CONNECTION     0x00
-#define RH_PORT_ENABLE         0x01
-#define RH_PORT_SUSPEND                0x02
-#define RH_PORT_OVER_CURRENT   0x03
-#define RH_PORT_RESET          0x04
-#define RH_PORT_POWER          0x08
-#define RH_PORT_LOW_SPEED      0x09
-#define RH_C_PORT_CONNECTION   0x10
-#define RH_C_PORT_ENABLE       0x11
-#define RH_C_PORT_SUSPEND      0x12
-#define RH_C_PORT_OVER_CURRENT 0x13
-#define RH_C_PORT_RESET                0x14
-
-/* Hub features */
-#define RH_C_HUB_LOCAL_POWER   0x00
-#define RH_C_HUB_OVER_CURRENT  0x01
-#define RH_DEVICE_REMOTE_WAKEUP        0x00
-#define RH_ENDPOINT_STALL      0x01
-
-/* Our Vendor Specific feature */
-#define RH_REMOVE_EP           0x00
-
-#define RH_ACK                 0x01
-#define RH_REQ_ERR             -1
-#define RH_NACK                        0x00
-
-#endif
-
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/controller.py
--- a/tools/python/xen/xend/server/controller.py        Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,423 +0,0 @@
-#============================================================================
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of version 2.1 of the GNU Lesser General Public
-# License as published by the Free Software Foundation.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#============================================================================
-# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
-#============================================================================
-
-"""General support for controllers, which handle devices
-for a domain.
-"""
-
-from xen.xend.XendError import XendError
-from xen.xend.xenstore import DBVar
-
-DEBUG = 0
-
-class DevControllerTable:
-    """Table of device controller classes, indexed by type name.
-    """
-
-    def __init__(self):
-        self.controllerClasses = {}
-
-    def getDevControllerClass(self, type):
-        return self.controllerClasses.get(type)
-
-    def addDevControllerClass(self, cls):
-        self.controllerClasses[cls.getType()] = cls
-
-    def delDevControllerClass(self, type):
-        if type in self.controllerClasses:
-            del self.controllerClasses[type]
-
-    def createDevController(self, type, vm, recreate=False):
-        cls = self.getDevControllerClass(type)
-        if not cls:
-            raise XendError("unknown device type: " + str(type))
-        return cls.createDevController(vm, recreate=recreate)
-
-def getDevControllerTable():
-    """Singleton constructor for the controller table.
-    """
-    global devControllerTable
-    try:
-        devControllerTable
-    except:
-        devControllerTable = DevControllerTable()
-    return devControllerTable
-
-def addDevControllerClass(name, cls):
-    """Add a device controller class to the controller table.
-    """
-    cls.type = name
-    getDevControllerTable().addDevControllerClass(cls)
-
-
-def isDevControllerClass(name):
-    """@return True if a device controller class has been registered with
-    the controller table under the given name."""
-    return name in getDevControllerTable().controllerClasses
-
-
-def createDevController(name, vm, recreate=False):
-    return getDevControllerTable().createDevController(name, vm, 
recreate=recreate)
-
-class DevController:
-    """Abstract class for a device controller attached to a domain.
-    A device controller manages all the devices of a given type for a domain.
-    There is exactly one device controller for each device type for
-    a domain.
-
-    """
-
-    # State:
-    # controller/<type> : for controller
-    # device/<type>/<id>   : for each device
-
-    def createDevController(cls, vm, recreate=False):
-        """Class method to create a dev controller.
-        """
-        ctrl = cls(vm, recreate=recreate)
-        ctrl.initController(recreate=recreate)
-        ctrl.exportToDB()
-        return ctrl
-
-    createDevController = classmethod(createDevController)
-
-    def getType(cls):
-        return cls.type
-
-    getType = classmethod(getType)
-
-    __exports__ = [
-        DBVar('type',      'str'),
-        DBVar('destroyed', 'bool'),
-        ]
-
-    # Set when registered.
-    type = None
-
-    def __init__(self, vm, recreate=False):
-        self.destroyed = False
-        self.vm = vm
-        self.db = self.getDB()
-        self.deviceId = 0
-        self.devices = {}
-        self.device_order = []
-
-    def getDB(self):
-        """Get the db node to use for a controller.
-        """
-        return self.vm.db.addChild("/controller/%s" % self.getType())
-
-    def getDevDB(self, id):
-        """Get the db node to use for a device.
-        """
-        return self.vm.db.addChild("/device/%s/%s" % (self.getType(), id))
-
-    def exportToDB(self, save=False):
-        self.db.exportToDB(self, fields=self.__exports__, save=save)
-
-    def importFromDB(self):
-        self.db.importFromDB(self, fields=self.__exports__)
-
-    def getDevControllerType(self):
-        return self.dctype
-
-    def getDomain(self):
-        return self.vm.getDomain()
-
-    def getDomainName(self):
-        return self.vm.getName()
-
-    def getDomainInfo(self):
-        return self.vm
-
-    
#----------------------------------------------------------------------------
-    # Subclass interface.
-    # Subclasses should define the unimplemented methods..
-    # Redefinitions must have the same arguments.
-
-    def initController(self, recreate=False, reboot=False):
-        """Initialise the controller. Called when the controller is
-        first created, and again after the domain is rebooted (with reboot 
True).
-        If called with recreate True (and reboot False) the controller is being
-        recreated after a xend restart.
-
-        As this can be a re-init (after reboot) any controller state should
-        be reset. For example the destroyed flag.
-        """
-        self.destroyed = False
-        if reboot:
-            self.rebootDevices()
-
-    def newDevice(self, id, config, recreate=False):
-        """Create a device with the given config.
-        Must be defined in subclass.
-        Called with recreate True when the device is being recreated after a
-        xend restart.
-
-        @return device
-        """
-        raise NotImplementedError()
-
-    def createDevice(self, config, recreate=False, change=False):
-        """Create a device and attach to its front- and back-ends.
-        If recreate is true the device is being recreated after a xend restart.
-        If change is true the device is a change to an existing domain,
-        i.e. it is being added at runtime rather than when the domain is 
created.
-        """
-        dev = self.newDevice(self.nextDeviceId(), config, recreate=recreate)
-        if self.vm.recreate:
-            dev.importFromDB()
-        dev.init(recreate=recreate)
-        self.addDevice(dev)
-        if not recreate:
-            dev.exportToDB()
-        dev.attach(recreate=recreate, change=change)
-        dev.exportToDB()
-
-        return dev
-
-    def configureDevice(self, id, config, change=False):
-        """Reconfigure an existing device.
-        May be defined in subclass."""
-        dev = self.getDevice(id, error=True)
-        dev.configure(config, change=change)
-
-    def destroyDevice(self, id, change=False, reboot=False):
-        """Destroy a device.
-        May be defined in subclass.
-
-        If reboot is true the device is being destroyed for a domain reboot.
-
-        The device is not deleted, since it may be recreated later.
-        """
-        dev = self.getDevice(id, error=True)
-        dev.destroy(change=change, reboot=reboot)
-        return dev
-
-    def deleteDevice(self, id, change=True):
-        """Destroy a device and delete it.
-        Normally called to remove a device from a domain at runtime.
-        """
-        dev = self.destroyDevice(id, change=change)
-        self.removeDevice(dev)
-
-    def destroyController(self, reboot=False):
-        """Destroy all devices and clean up.
-        May be defined in subclass.
-        If reboot is true the controller is being destroyed for a domain 
reboot.
-        Called at domain shutdown.
-        """
-        self.destroyed = True
-        self.destroyDevices(reboot=reboot)
-
-    
#----------------------------------------------------------------------------
-    
-    def isDestroyed(self):
-        return self.destroyed
-
-    def getDevice(self, id, error=False):
-        dev = self.devices.get(int(id))
-        if error and not dev:
-            raise XendError("invalid device id: " + str(id))
-        return dev
-
-    def getDeviceIds(self):
-        return [ dev.getId() for dev in self.device_order ]
-
-    def getDevices(self):
-        return self.device_order
-
-    def getDeviceConfig(self, id):
-        return self.getDevice(id).getConfig()
-
-    def getDeviceConfigs(self):
-        return [ dev.getConfig() for dev in self.device_order ]
-
-    def getDeviceSxprs(self):
-        return [ dev.sxpr() for dev in self.device_order ]
-
-    def addDevice(self, dev):
-        self.devices[dev.getId()] = dev
-        self.device_order.append(dev)
-        return dev
-
-    def removeDevice(self, dev):
-        if dev.getId() in self.devices:
-            del self.devices[dev.getId()]
-        if dev in self.device_order:
-            self.device_order.remove(dev)
-
-    def rebootDevices(self):
-        for dev in self.getDevices():
-            dev.reboot()
-
-    def destroyDevices(self, reboot=False):
-        """Destroy all devices.
-        """
-        for dev in self.getDevices():
-            dev.destroy(reboot=reboot)
-
-    def getMaxDeviceId(self):
-        maxid = 0
-        for id in self.devices:
-            if id > maxid:
-                maxid = id
-        return maxid
-
-    def nextDeviceId(self):
-        id = self.deviceId
-        self.deviceId += 1
-        return id
-
-    def getDeviceCount(self):
-        return len(self.devices)
-
-class Dev:
-    """Abstract class for a device attached to a device controller.
-
-    @ivar id:        identifier
-    @type id:        int
-    @ivar controller: device controller
-    @type controller: DevController
-    """
-    
-    # ./status       : need 2: actual and requested?
-    # down-down: initial.
-    # up-up: fully up.
-    # down-up: down requested, still up. Watch front and back, when both
-    # down go to down-down. But what if one (or both) is not connected?
-    # Still have front/back trees with status? Watch front/status, back/status?
-    # up-down: up requested, still down.
-    # Back-end watches ./status, front/status
-    # Front-end watches ./status, back/status
-    # i.e. each watches the other 2.
-    # Each is status/request status/actual?
-    #
-    # backend?
-    # frontend?
-
-    __exports__ = [
-        DBVar('id',        ty='int'),
-        DBVar('type',      ty='str'),
-        DBVar('config',    ty='sxpr'),
-        DBVar('destroyed', ty='bool'),
-        ]
-
-    def __init__(self, controller, id, config, recreate=False):
-        self.controller = controller
-        self.id = id
-        self.config = config
-        self.destroyed = False
-        self.type = self.getType()
-
-        self.db = controller.getDevDB(id)
-
-    def exportToDB(self, save=False):
-        self.db.exportToDB(self, fields=self.__exports__, save=save)
-
-    def importFromDB(self):
-        self.db.importFromDB(self, fields=self.__exports__)
-
-    def getDomain(self):
-        return self.controller.getDomain()
-
-    def getDomainName(self):
-        return self.controller.getDomainName()
-
-    def getDomainInfo(self):
-        return self.controller.getDomainInfo()
-    
-    def getController(self):
-        return self.controller
-
-    def getType(self):
-        return self.controller.getType()
-
-    def getId(self):
-        return self.id
-
-    def getConfig(self):
-        return self.config
-
-    def isDestroyed(self):
-        return self.destroyed
-
-    
#----------------------------------------------------------------------------
-    # Subclass interface.
-    # Define methods in subclass as needed.
-    # Redefinitions must have the same arguments.
-
-    def init(self, recreate=False, reboot=False):
-        """Initialization. Called on initial create (when reboot is False)
-        and on reboot (when reboot is True). When xend is restarting is
-        called with recreate True. Define in subclass if needed.
-
-        Device instance variables must be defined in the class constructor,
-        but given null or default values. The real values should be initialised
-        in this method. This allows devices to be re-initialised.
-
-        Since this can be called to re-initialise a device any state flags
-        should be reset.
-        """
-        self.destroyed = False
-
-    def attach(self, recreate=False, change=False):
-        """Attach the device to its front and back ends.
-        Define in subclass if needed.
-        """
-        pass
-
-    def reboot(self):
-        """Reconnect the device when the domain is rebooted.
-        """
-        self.init(reboot=True)
-        self.attach()
-
-    def sxpr(self):
-        """Get the s-expression for the deivice.
-        Implement in a subclass if needed.
-
-        @return: sxpr
-        """
-        return self.getConfig()
-
-    def configure(self, config, change=False):
-        """Reconfigure the device.
-
-        Implement in subclass.
-        """
-        raise NotImplementedError()
-
-    def refresh(self):
-        """Refresh the device..
-        Default no-op. Define in subclass if needed.
-        """
-        pass
-
-    def destroy(self, change=False, reboot=False):
-        """Destroy the device.
-        If change is True notify destruction (runtime change).
-        If reboot is True the device is being destroyed for a reboot.
-        Redefine in subclass if needed.
-
-        Called at domain shutdown and when a device is deleted from
-        a running domain (with change True).
-        """
-        self.destroyed = True
-        pass
-    
-    
#----------------------------------------------------------------------------

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
WARNING - OLD ARCHIVES

xen-changelog

[Xen-changelog] Merge latest xen-unstable into xen-ia64-unstable