# HG changeset patch
# User David Scott <dave.scott@xxxxxxxxxxxxx>
# Date 1279905519 -3600
# Node ID 2580868fdc435e5c729f98bd707d018dafd3dc2a
# Parent 08d6e92809f05170f8088d6817de06cf45c5c324
Add a sparse_dd binary which uses the new sparseness scanning code.
The binary supports a "-test" argument which runs a set of unit tests.
Signed-off-by: David Scott <dave.scott@xxxxxxxxxxxxx>
diff -r 08d6e92809f0 -r 2580868fdc43 OMakefile
--- a/OMakefile Wed Jul 14 16:44:47 2010 +0100
+++ b/OMakefile Fri Jul 23 18:18:39 2010 +0100
@@ -113,6 +113,7 @@
ocaml/xiu/xiu \
ocaml/xapi/fakeguestagent \
ocaml/xapi/quicktestbin \
+ ocaml/xapi/sparse_dd \
ocaml/xsrpc/xsrpc \
ocaml/xsrpc/xsrpcd-util \
ocaml/guest/agent \
diff -r 08d6e92809f0 -r 2580868fdc43 ocaml/xapi/OMakefile
--- a/ocaml/xapi/OMakefile Wed Jul 14 16:44:47 2010 +0100
+++ b/ocaml/xapi/OMakefile Fri Jul 23 18:18:39 2010 +0100
@@ -1,4 +1,4 @@
-OCAMLPACKS = xml-light2 cdrom pciutil sexpr log stunnel http-svr rss
xen-utils netdev
+OCAMLPACKS = xml-light2 cdrom pciutil sexpr log stunnel http-svr rss
xen-utils netdev tapctl vhd
OCAML_LIBS = ../util/version ../util/vm_memory_constraints
../util/sanitycheck ../util/stats \
../idl/ocaml_backend/common ../idl/ocaml_backend/client
../idl/ocaml_backend/server ../util/ocamltest
OCAMLINCLUDES = ../idl ../idl/ocaml_backend \
@@ -32,6 +32,7 @@
#OCAML_CLIBS += stubs
OCamlProgram(http_test, http_test)
+OCamlProgram(sparse_dd, sparse_dd)
COMMON = \
xapi_templates \
@@ -258,6 +259,7 @@
$(IPROG) quicktest $(DEBUGDIST)
cp -f quicktestbin $(DEBUGDIST)
$(IPROG) rrddump $(DEBUGDIST)
+ $(IPROG) sparse_dd $(DESTDIR)/opt/xensource/libexec
.PHONY: sdk-install
sdk-install: install
diff -r 08d6e92809f0 -r 2580868fdc43 ocaml/xapi/sparse_dd.ml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ocaml/xapi/sparse_dd.ml Fri Jul 23 18:18:39 2010 +0100
@@ -0,0 +1,364 @@
+(* Utility program which copies between two block devices, using vhd BATs and
efficient zero-scanning
+ for performance. *)
+
+open Pervasiveext
+open Stringext
+open Listext
+
+let ( +* ) = Int64.add
+let ( -* ) = Int64.sub
+let ( ** ) = Int64.mul
+
+let kib = 1024L
+let mib = kib ** kib
+
+let blocksize = 10L ** mib
+
+exception ShortWrite of int (* offset *) * int (* expected *) * int (* actual
*)
+
+(* Consider a tunable quantum of non-zero'ness such that if we encounter
+ a non-zero, we know we're going to incur the penalty of a seek/write
+ and we may as well write a sizeable chunk at a time. *)
+let roundup x =
+ let quantum = 16384 in
+ ((x + quantum + quantum - 1) / quantum) * quantum
+
+(** The copying routine can operate on anything which looks like a
file-descriptor/Stream *)
+module type Stream = sig
+ type stream
+ val read: stream -> int64 -> string -> int -> int -> unit
+ val write: stream -> int64 -> string -> int -> int -> int
+end
+
+(* [fold_over_blocks (s, len) skip f initial] applies contiguous (start,
length) pairs to
+ [f] starting at [s] up to maximum length [len] where each pair is as large
as possible
+ up to [skip]. *)
+let fold_over_blocks (s, len) skip f initial =
+ let rec inner offset acc =
+ if offset = s +* len then acc
+ else
+ let len' = min skip (s +* len -* offset) in
+ inner (offset +* len') (f acc (offset, len')) in
+ inner s initial
+
+(** Represents a "block allocation table" *)
+module Bat = ExtentlistSet.ExtentlistSet(Int64)
+
+(** As we copy we accumulate some simple performance stats *)
+type stats = {
+ writes: int; (* total number of writes *)
+ bytes: int64; (* total bytes written *)
+}
+
+(** Perform the data duplication ("DD") *)
+module DD(S : Stream) = struct
+ (** [copy progress_cb bat sparse src dst size] copies blocks of data
from [src] to [dst]
+ where [bat] represents the allocated / dirty blocks in [src];
+ where if sparse is None it means don't scan for and skip over
blocks of zeroes in [src]
+ where if sparse is (Some c) it means do scan for and skip over
blocks of 'c' in [src]
+ while calling [progress_cb] frequently to report the fraction
complete
+ *)
+ let copy progress_cb bat sparse (src: S.stream) (dst: S.stream) size =
+ let buf = String.create (Int64.to_int blocksize) in
+ let do_block stats (offset, this_chunk) : stats =
+ progress_cb ((Int64.to_float offset) /. (Int64.to_float
size));
+ S.read src offset buf 0 (Int64.to_int this_chunk);
+ let write_extent stats (s, e) =
+ let n = S.write dst (offset +* (Int64.of_int
s)) buf s e in
+ if n < e then raise (ShortWrite(s, e, n));
+ { stats with writes = stats.writes + 1; bytes =
stats.bytes +* (Int64.of_int n) }
+ in
+ begin match sparse with
+ | Some zero -> Zerocheck.fold_over_nonzeros buf
(Int64.to_int this_chunk) roundup write_extent stats
+ | None -> write_extent stats (0, Int64.to_int
this_chunk)
+ end in
+ (* For each entry from the BAT, copy it as a sequence of
sub-blocks *)
+ Bat.fold_left (fun stats b -> fold_over_blocks b blocksize
do_block stats) { writes = 0; bytes = 0L } bat
+end
+
+(* Helper function to always return a block of zeroes, like /dev/null *)
+let read_zeroes _ _ buf offset len =
+ for i = 0 to len - 1 do
+ buf.[offset + i] <- '\000'
+ done
+
+(** A Stream interface implemented over strings, useful for testing *)
+module String_stream = struct
+ type stream = string
+ let blit src srcoff dst dstoff len =
+ (* Printf.printf "[%s](%d) -> [%s](%d) %d\n" "?" srcoff "?"
dstoff len; *)
+ String.blit src srcoff dst dstoff len
+ let read str stream_offset buf offset len =
+ blit str (Int64.to_int stream_offset) buf offset len
+ let write str stream_offset buf offset len =
+ blit buf offset str (Int64.to_int stream_offset) len;
+ len
+end
+
+(** A File interface implemented over open Unix files *)
+module File_stream = struct
+ type stream = Unix.file_descr
+
+ let read stream stream_offset buf offset len =
+ Unix.LargeFile.lseek stream stream_offset Unix.SEEK_SET;
+ Unixext.really_read stream buf offset len
+ let write stream stream_offset buf offset len =
+ let newoff = Unix.LargeFile.lseek stream stream_offset
Unix.SEEK_SET in
+ (* Printf.printf "Unix.write buf len %d; offset %d; len %d\n"
(String.length buf) offset len; *)
+ Unix.write stream buf offset len
+end
+
+(** An implementation of the DD algorithm over strings *)
+module String_copy = DD(String_stream)
+
+(** An implementatino of the DD algorithm which copies zeroes into strings *)
+module String_write_zero = DD(struct
+ include String_stream
+ let read = read_zeroes
+end)
+
+(** An implementatino of the DD algorithm over Unix files *)
+module File_copy = DD(File_stream)
+
+(** An implementatin of the DD algorithm which copies zeroes into files *)
+module File_write_zero = DD(struct
+ include File_stream
+ let read = read_zeroes
+end)
+
+(** [file_dd ?progress_cb ?size ?bat prezeroed src dst]
+ If [size] is not given, will assume a plain file and will use st_size from
Unix.stat.
+ If [prezeroed] is false, will first explicitly write zeroes to all blocks
not in [bat].
+ Will then write blocks from [src] into [dst], using the [bat]. If
[prezeroed] will additionally
+ scan for zeroes within the allocated blocks. *)
+let file_dd ?(progress_cb = (fun _ -> ())) ?size ?bat prezeroed src dst =
+ let size = match size with
+ | None -> (Unix.LargeFile.stat src).Unix.LargeFile.st_size
+ | Some x -> x in
+ let ifd = Unix.openfile src [ Unix.O_RDONLY ] 0o600 in
+ let ofd = Unix.openfile dst [ Unix.O_WRONLY; Unix.O_CREAT ] 0o600 in
+ (* Make sure the output file has the right size *)
+ Unix.LargeFile.lseek ofd (size -* 1L) Unix.SEEK_SET;
+ Unix.write ofd "\000" 0 1;
+ Unix.LargeFile.lseek ofd 0L Unix.SEEK_SET;
+ let full_bat = Bat.of_list [0L, size] in
+ let empty_bat = Bat.of_list [] in
+ let bat = Opt.default full_bat bat in
+ (* If not prezeroed then:
+ 1. explicitly write zeroes into the complement of the BAT;
+ 2. don't scan and skip zeroes in the source disk *)
+ let bat' = if prezeroed
+ then empty_bat
+ else Bat.difference full_bat bat in
+ let progress_cb_zero, progress_cb_copy =
+ (fun fraction -> progress_cb (0.5 *. fraction)),
+ (fun fraction -> progress_cb (0.5 *. fraction +. 0.5)) in
+ Printf.printf "Wiping\n";
+ File_write_zero.copy progress_cb_zero bat' None ifd ofd size;
+ Printf.printf "Copying\n";
+ File_copy.copy progress_cb_copy bat (if prezeroed then Some '\000' else
None) ifd ofd size
+
+(** [make_random size zero nonzero] returns a string (of size [size]) and a
BAT. Blocks not in the BAT
+ are guaranteed to be [zero]. Blocks in the BAT are randomly either [zero]
or [nonzero]. *)
+let make_random size zero nonzero =
+ (* First make a random BAT *)
+ let bs = size / 100 in
+ let bits = Array.make ((size + bs - 1) / bs) false in
+ for i = 0 to Array.length bits - 1 do
+ bits.(i) <- Random.bool ()
+ done;
+ let result = String.create size in
+ for i = 0 to size - 1 do
+ if bits.(i / bs)
+ then result.[i] <- (if Random.float 10. > 1.0 then zero else
nonzero)
+ else result.[i] <- zero
+ done;
+ let bat = snd (Array.fold_left (fun (offset, acc) bit ->
+ let offset' = min size (offset + bs) in
+ offset', if bit then (offset, offset' - offset) :: acc else
acc) (0, []) bits) in
+ let bat = Bat.of_list (List.map (fun (x, y) -> Int64.of_int x,
Int64.of_int y) bat) in
+ result, bat
+
+(** [test_dd (input, bat) ignore_bat prezeroed zero nonzero] uses the DD
algorithm to make a copy of
+ the string [input].
+ If [ignore_bat] is true then the [bat] is ignored (as if none were
available).
+ If [prezeroed] is true then the output is created full of [zero],
otherwise [nonzero].
+ The resulting string is compared to the original and if not idential, an
exception is raised.
+ *)
+let test_dd (input, bat) ignore_bat prezeroed zero nonzero =
+ let size = String.length input in
+ let output = String.make size (if prezeroed then zero else nonzero) in
+ try
+
+ let full_bat = Bat.of_list [0L, Int64.of_int size] in
+ let empty_bat = Bat.of_list [] in
+ let bat = if ignore_bat then full_bat else bat in
+ (* If not prezeroed then:
+ 1. explicitly write zeroes into the complement of the BAT;
+ 2. don't scan and skip zeroes in the source disk *)
+ let bat' = if prezeroed
+ then empty_bat
+ else Bat.difference full_bat bat in
+ String_write_zero.copy (fun _ -> ()) bat' None input output
(Int64.of_int size);
+ let stats = String_copy.copy (fun _ -> ()) bat (if prezeroed
then Some zero else None) input output (Int64.of_int size) in
+ assert (String.compare input output = 0);
+ stats
+ with e ->
+ Printf.printf "Exception: %s" (Printexc.to_string e);
+ let make_visible x =
+ for i = 0 to String.length x - 1 do
+ if x.[i] = '\000'
+ then x.[i] <- 'z'
+ else x.[i] <- 'a';
+ done in
+ make_visible input;
+ make_visible output;
+ failwith (Printf.sprintf "input = [%s]; output = [%s]" input
output)
+
+(** Generates lots of random strings and makes copies with the DD algorithm,
checking that the copies are identical *)
+let test_lots_of_strings () =
+ let n = 1000 and m = 1000 in
+ let writes = ref 0 and bytes = ref 0L in
+ for i = 0 to n do
+ if i mod 100 = 0 then (Printf.printf "i = %d\n" i; flush
stdout);
+ List.iter (fun ignore_bat ->
+ List.iter (fun prezeroed ->
+ let stats = test_dd (make_random m '\000' 'a')
ignore_bat prezeroed '\000' 'a' in
+ writes := !writes + stats.writes;
+ bytes := !bytes +* stats.bytes
+ ) [ true; false ]
+ ) [ true; false ]
+ done;
+ Printf.printf "Tested %d random strings of length %d using all 4
combinations of ignore_bat, prezeroed\n" n m;
+ Printf.printf "Total writes: %d\n" !writes;
+ Printf.printf "Total bytes: %Ld\n" !bytes
+
+(** [vhd_of_device path] returns (Some vhd) where 'vhd' is the vhd leaf
backing a particular device [path] or None.
+ [path] may either be a blktap2 device *or* a blkfront device backed by a
blktap2 device. If the latter then
+ the script must be run in the same domain as blkback. *)
+let vhd_of_device path =
+ let find_underlying_tapdisk path =
+ try
+ (* If we're looking at a xen frontend device, see if the backend
+ is in the same domain. If so check if it looks like a .vhd *)
+ let rdev = (Unix.stat path).Unix.st_rdev in
+ let major = rdev / 256 and minor = rdev mod 256 in
+ let link = Unix.readlink (Printf.sprintf
"/sys/dev/block/%d:%d/device" major minor) in
+ match List.rev (String.split '/' link) with
+ | id :: "xen" :: "devices" :: _ when String.startswith
"vbd-" id ->
+ let id = int_of_string (String.sub id 4
(String.length id - 4)) in
+ let xs = Xs.domain_open () in
+ finally
+ (fun () ->
+ let self = xs.Xs.read "domid" in
+ let backend = xs.Xs.read
(Printf.sprintf "device/vbd/%d/backend" id) in
+ let params = xs.Xs.read (Printf.sprintf
"%s/params" backend) in
+ match String.split '/' backend with
+ | "local" :: "domain" :: bedomid :: _ ->
+ assert (self = bedomid);
+ Some params
+ | _ -> raise Not_found
+ )
+ (fun () -> Xs.close xs)
+ | _ -> raise Not_found
+ with _ -> None in
+ let tapdisk_of_path path =
+ try
+ match Tapctl.of_device (Tapctl.create ()) path with
+ | _, _, (Some (_, vhd)) -> Some vhd
+ | _, _, _ -> raise Not_found
+ with Not_found -> None in
+ match tapdisk_of_path path with
+ | Some vhd -> Some vhd
+ | None ->
+ begin match find_underlying_tapdisk path with
+ | Some path ->
+ begin match tapdisk_of_path path with
+ | Some vhd -> Some vhd
+ | None -> None
+ end
+ | None -> None
+ end
+
+(** Given a vhd filename, return the BAT *)
+let bat vhd =
+ let h = Vhd._open vhd [ Vhd.Open_rdonly ] in
+ finally
+ (fun () ->
+ let b = Vhd.get_bat h in
+ let b' = List.map_tr (fun (s, l) -> 2L ** mib ** (Int64.of_int
s), 2L ** mib ** (Int64.of_int l)) b in
+ Bat.of_list b')
+ (fun () -> Vhd.close h)
+
+(* Record when the binary started for performance measuring *)
+let start = Unix.gettimeofday ()
+
+(* Set to true when we want machine-readable output *)
+let machine_readable = ref false
+
+(* Helper function to print nice progress info *)
+let progress_cb =
+ let last_percent = ref (-1) in
+
+ function fraction ->
+ let new_percent = int_of_float (fraction *. 100.) in
+ if !last_percent <> new_percent then begin
+ if !machine_readable
+ then Printf.printf "Progress: %.0f\n" (fraction *. 100.)
+ else Printf.printf "\b\rProgress: %-60s (%d%%)"
(String.make (int_of_float (fraction *. 60.)) '#') new_percent;
+ flush stdout;
+ end;
+ last_percent := new_percent
+
+let _ =
+ let base = ref "" and src = ref "" and dest = ref "" and size = ref
(-1L) and prezeroed = ref false and test = ref false in
+ Arg.parse [ "-base", Arg.Set_string base, "base disk to search for
differences from (default: None)";
+ "-src", Arg.Set_string src, "source disk";
+ "-dest", Arg.Set_string dest, "destination disk";
+ "-size", Arg.String (fun x -> size := Int64.of_string x),
"number of bytes to copy";
+ "-prezeroed", Arg.Set prezeroed, "assume the destination
disk has been prezeroed";
+ "-machine", Arg.Set machine_readable, "emit
machine-readable output";
+ "-test", Arg.Set test, "perform some unit tests"; ]
+ (fun x -> Printf.fprintf stderr "Warning: ignoring unexpected argument
%s\n" x)
+ (String.concat "\n" [ "Usage:";
+ Printf.sprintf "%s [-base x] [-prezeroed] <-src
y> <-dest z> <-size s>" Sys.argv.(0);
+ " -- copy <s> bytes from <y> to <z>. If <-base
x> is specified then only copy differences";
+ " between <x> and <y>. If [-base x] is
unspecified and [-prezeroed] is unspecified ";
+ " then assume the destination must be fully
wiped.";
+ "";
+ "Examples:";
+ "";
+ Printf.sprintf "%s -prezeroed -src /dev/xvda
-dest /dev/xvdb -size 1024" Sys.argv.(0);
+ " -- copy 1024 bytes from /dev/xvda to /dev/xvdb
assuming that /dev/xvdb is completely";
+ " full of zeroes so there's no need to
explicitly copy runs of zeroes.";
+ "";
+ Printf.sprintf "%s -src /dev/xvda
-dest /dev/xvdb -size 1024" Sys.argv.(0);
+ "";
+ " -- copy 1024 bytes from /dev/xvda to
/dev/xvdb, always explicitly writing zeroes";
+ " into /dev/xvdb under the assumption that it
contains undefined data.";
+ "";
+ Printf.sprintf "%s -base /dev/xvdc -src /dev/xvda
-dest /dev/xvdb -size 1024" Sys.argv.(0);
+ "";
+ " -- copy up to 1024 bytes of *differences*
between /dev/xvdc and /dev/xvda into";
+ " into /dev/xvdb under the assumption that
/dev/xvdb contains identical data";
+ " to /dev/xvdb."; ]);
+ if !test then begin
+ test_lots_of_strings ();
+ exit 0
+ end;
+ if !src = "" || !dest = "" || !size = (-1L) then begin
+ Printf.fprintf stderr "Must have -src -dest and -size
arguments\n";
+ exit 1;
+ end;
+
+
+ let size = Some !size in
+ let src_vhd = vhd_of_device !src and dest_vhd = vhd_of_device !dest in
+ Printf.printf "auto-detect src vhd: %s\n" (Opt.default "None" (Opt.map
(fun x -> "Some " ^ x) src_vhd));
+ let src_bat = Opt.map bat src_vhd in
+ progress_cb 0.;
+ let stats = file_dd ~progress_cb ?size ?bat:src_bat !prezeroed !src
!dest in
+ Printf.printf "Time: %.2f seconds\n" (Unix.gettimeofday () -. start);
+ Printf.printf "\nNumber of writes: %d\n" stats.writes;
+ Printf.printf "Number of bytes: %Ld\n" stats.bytes
+
diff -r 08d6e92809f0 -r 2580868fdc43 xapi.spec
--- a/xapi.spec Wed Jul 14 16:44:47 2010 +0100
+++ b/xapi.spec Fri Jul 23 18:18:39 2010 +0100
@@ -230,6 +230,7 @@
/opt/xensource/libexec/shell.pyo
/opt/xensource/libexec/shell.pyc
/opt/xensource/libexec/shutdown
+/opt/xensource/libexec/sparse_dd
/opt/xensource/libexec/update-mh-info
/opt/xensource/libexec/upload-wrapper
/opt/xensource/libexec/v6d
OMakefile | 1 +
ocaml/xapi/OMakefile | 4 +-
ocaml/xapi/sparse_dd.ml | 364 ++++++++++++++++++++++++++++++++++++++++++++++++
xapi.spec | 1 +
4 files changed, 369 insertions(+), 1 deletions(-)
xen-api.hg-1.patch
Description: Text Data
_______________________________________________
xen-api mailing list
xen-api@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/mailman/listinfo/xen-api
|