port latest grub zfs boot code to pygrub Signed-off-by: Mark Johnson diff --git a/tools/libfsimage/Rules.mk b/tools/libfsimage/Rules.mk --- a/tools/libfsimage/Rules.mk +++ b/tools/libfsimage/Rules.mk @@ -1,6 +1,6 @@ include $(XEN_ROOT)/tools/Rules.mk -CFLAGS += -I$(XEN_ROOT)/tools/libfsimage/common/ -Werror +CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ LDFLAGS += -L../common/ PIC_OBJS := $(patsubst %.c,%.opic,$(LIB_SRCS-y)) diff --git a/tools/libfsimage/common/fsimage_grub.h b/tools/libfsimage/common/fsimage_grub.h --- a/tools/libfsimage/common/fsimage_grub.h +++ b/tools/libfsimage/common/fsimage_grub.h @@ -78,6 +78,7 @@ #define ERR_DEV_VALUES 1 #define ERR_WONT_FIT 1 #define ERR_READ 1 +#define ERR_NEWER_VERSION 1 fsi_plugin_ops_t *fsig_init(fsi_plugin_t *, fsig_plugin_ops_t *); diff --git a/tools/libfsimage/zfs/Makefile b/tools/libfsimage/zfs/Makefile --- a/tools/libfsimage/zfs/Makefile +++ b/tools/libfsimage/zfs/Makefile @@ -24,7 +24,8 @@ XEN_ROOT = ../../.. -LIB_SRCS-y = fsys_zfs.c zfs_lzjb.c zfs_sha256.c zfs_fletcher.c +CFLAGS += -DFSYS_ZFS -DFSIMAGE -I$(XEN_ROOT)/tools/libfsimage/zfs +LIB_SRCS-y = zfs_lzjb.c zfs_sha256.c zfs_fletcher.c fsi_zfs.c fsys_zfs.c FS = zfs diff --git a/tools/libfsimage/zfs/filesys.h b/tools/libfsimage/zfs/filesys.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/filesys.h @@ -0,0 +1,33 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#ifndef _FILESYS_H +#define _FILESYS_H + +#ifdef FSYS_ZFS + +/* THIS FILE IS INTENTIONALLY BLANK */ + +#endif /* FSI_ZFS */ + +#endif /* !_FILESYS_H */ + diff --git a/tools/libfsimage/zfs/fsi_zfs.c b/tools/libfsimage/zfs/fsi_zfs.c new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/fsi_zfs.c @@ -0,0 +1,155 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef FSYS_ZFS + +#include +#include +#include +#include "mb_info.h" + + +#undef filemax +#undef filepos +#undef errnum + + +#define MAXNAMELEN 256 +#define MAXPATHLEN 1024 + +/**** START FROM disk_io.c ****/ +char current_rootpool[MAXNAMELEN]; +char current_bootfs[MAXNAMELEN]; +uint64_t current_bootfs_obj; +char current_bootpath[MAXPATHLEN]; +char current_devid[MAXPATHLEN]; +int is_zfs_mount; +unsigned long best_drive; +unsigned long best_part; +int find_best_root; +unsigned long part_length; +/**** END FROM disk_io.c ****/ + +uint64_t filemax; +uint64_t filepos; + +struct multiboot_info mbi; +fsi_file_t *zfs_ffi; +int errnum; +char *bootstring = NULL; + +extern int zfs_mount(void); +extern int zfs_open(char *filename); +extern int zfs_read(char *buf, int len); + +#define ZFS_SCRATCH_SIZE 0x400000 +#define FSI_MOS_SHIFT 10 +#define FSI_MOS_MASK ((1 << FSI_MOS_SHIFT) - 1) +unsigned char fsi_mos_buf[ZFS_SCRATCH_SIZE + FSI_MOS_MASK + 1]; + +#define FSI_MOS_ALIGN(addr) (((uintptr_t)addr + FSI_MOS_MASK) & \ + ~FSI_MOS_MASK) +#define FSI_MOS(buf) ((FSI_MOS_ALIGN(buf) + \ + ZFS_SCRATCH_SIZE - 0x100000) >> FSI_MOS_SHIFT) + +static int +fsi_zfs_mount(fsi_file_t *ffi, const char *options) +{ + zfs_ffi = ffi; + mbi.mem_upper = FSI_MOS(fsi_mos_buf); + + /* If an boot filesystem is passed in, set it to current_bootfs */ + if (options != NULL) { + if (strlen(options) < MAXNAMELEN) { + strcpy(current_bootfs, options); + } + } + + return (zfs_mount()); +} + +static int +fsi_zfs_open(fsi_file_t *ffi, char *filename) +{ + char *fsi_bootstring; + uint64_t *fmax; + uint64_t *fpos; + int rc; + + zfs_ffi = ffi; + fmax = fsig_filemax(ffi); + fpos = fsig_filepos(ffi); + + rc = zfs_open(filename); + if (rc != 1) { + return (rc); + } + + *fmax = filemax; + *fpos = filepos; + + if (bootstring == NULL) { + rc = asprintf(&bootstring, "zfs-bootfs=%s/%llu,bootpath='%s'", + current_rootpool, current_bootfs_obj, current_bootpath); + if (rc == -1) { + return (rc); + } + fsi_bootstring = fsi_bootstring_alloc(ffi->ff_fsi, + strlen(bootstring) + 1); + strcpy(fsi_bootstring, bootstring); + } + + return (rc); +} + +static int +fsi_zfs_read(fsi_file_t *ffi, char *buf, int len) +{ + uint64_t *fpos; + int rc; + + zfs_ffi = ffi; + fpos = fsig_filepos(ffi); + filepos = *fpos; + rc = zfs_read(buf, len); + *fpos = filepos; + + return (rc); +} + + +fsi_plugin_ops_t * +fsi_init_plugin(int version, fsi_plugin_t *fp, const char **name) +{ + static fsig_plugin_ops_t ops = { + FSIMAGE_PLUGIN_VERSION, + .fpo_mount = fsi_zfs_mount, + .fpo_dir = fsi_zfs_open, + .fpo_read = fsi_zfs_read, + }; + + *name = "zfs"; + return (fsig_init(fp, &ops)); +} + +#endif /* FSYS_ZFS */ diff --git a/tools/libfsimage/zfs/fsi_zfs.h b/tools/libfsimage/zfs/fsi_zfs.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/fsi_zfs.h @@ -0,0 +1,103 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#ifndef _FSI_ZFS_H +#define _FSI_ZFS_H + +#ifdef FSYS_ZFS + +#include +#include +#include +#include + + +/*** START FROM shared.h ****/ +#include "mb_info.h" + +/* Boot signature related defines for the findroot command */ +#define BOOTSIGN_DIR "/boot/grub/bootsign" +#define BOOTSIGN_BACKUP "/etc/bootsign" + +/* Maybe redirect memory requests through grub_scratch_mem. */ +#define RAW_ADDR(x) (x) +#define RAW_SEG(x) (x) + +/* ZFS will use the top 4 Meg of physical memory (below 4Gig) for sratch */ +#define ZFS_SCRATCH_SIZE 0x400000 + +#define MAXPATHLEN 1024 +#define MAXNAMELEN 256 +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +#define MAXUINT 0xFFFFFFFF + +#undef NULL +#define NULL ((void *) 0) + +#define grub_printf printf +#define grub_strcmp strcmp +#define grub_strncmp strncmp +#define grub_strstr strstr +#define grub_strlen strlen +#define grub_memmove memmove + +extern char current_bootpath[MAXPATHLEN]; +extern char current_rootpool[MAXNAMELEN]; +extern char current_bootfs[MAXNAMELEN]; +extern uint64_t current_bootfs_obj; +extern char current_devid[MAXPATHLEN]; +extern int is_zfs_mount; +extern unsigned long best_drive; +extern unsigned long best_part; +extern int find_best_root; + +extern unsigned long part_length; + +#undef filemax +#undef filepos +extern uint64_t filemax; +extern uint64_t filepos; + +extern struct multiboot_info mbi; + +/*** END FROM shared.h ***/ + +#ifdef __linux__ +typedef unsigned char uchar_t; +#endif + +typedef struct fsi_file *fsi_file_handle_t; +extern fsi_file_handle_t zfs_ffi; +extern int fsig_devread(fsi_file_handle_t, unsigned int, unsigned int, + unsigned int, char *); + +#undef devread +#define devread(a, b, c, d) fsig_devread(zfs_ffi, a, b, c, d) + +#undef errnum +extern int errnum; + +#endif /* FSI_ZFS */ + +#endif /* !_FSI_ZFS_H */ + diff --git a/tools/libfsimage/zfs/fsys_zfs.c b/tools/libfsimage/zfs/fsys_zfs.c --- a/tools/libfsimage/zfs/fsys_zfs.c +++ b/tools/libfsimage/zfs/fsys_zfs.c @@ -17,17 +17,11 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* - * All files in the zfs directory are derived from the OpenSolaris - * zfs grub files. All files in the zfs-include directory were - * included without changes. - */ - -/* * The zfs plug-in routines for GRUB are: * * zfs_mount() - locates a valid uberblock of the root pool and reads @@ -48,59 +42,37 @@ * +--------------------------------------------+ */ -#include -#include +#ifdef FSYS_ZFS -/* From "shared.h" */ -#include "mb_info.h" - -/* Boot signature related defines for the findroot command */ -#define BOOTSIGN_DIR "/boot/grub/bootsign" -#define BOOTSIGN_BACKUP "/etc/bootsign" - -/* Maybe redirect memory requests through grub_scratch_mem. */ -#define RAW_ADDR(x) (x) -#define RAW_SEG(x) (x) - -/* ZFS will use the top 4 Meg of physical memory (below 4Gig) for sratch */ -#define ZFS_SCRATCH_SIZE 0x400000 - -#define MIN(x, y) ((x) < (y) ? (x) : (y)) -/* End from shared.h */ - +#include "shared.h" +#include "filesys.h" #include "fsys_zfs.h" /* cache for a file block of the currently zfs_open()-ed file */ -#define file_buf zfs_ba->zfs_file_buf -#define file_start zfs_ba->zfs_file_start -#define file_end zfs_ba->zfs_file_end +static void *file_buf = NULL; +static uint64_t file_start = 0; +static uint64_t file_end = 0; /* cache for a dnode block */ -#define dnode_buf zfs_ba->zfs_dnode_buf -#define dnode_mdn zfs_ba->zfs_dnode_mdn -#define dnode_start zfs_ba->zfs_dnode_start -#define dnode_end zfs_ba->zfs_dnode_end +static dnode_phys_t *dnode_buf = NULL; +static dnode_phys_t *dnode_mdn = NULL; +static uint64_t dnode_start = 0; +static uint64_t dnode_end = 0; -#define stackbase zfs_ba->zfs_stackbase +static uint64_t pool_guid = 0; +static uberblock_t current_uberblock; +static char *stackbase; decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] = { - {"noop", 0}, + {"inherit", 0}, /* ZIO_COMPRESS_INHERIT */ {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */ - {"off", 0}, - {"lzjb", lzjb_decompress} /* ZIO_COMPRESS_LZJB */ + {"off", 0}, /* ZIO_COMPRESS_OFF */ + {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */ + {"empty", 0} /* ZIO_COMPRESS_EMPTY */ }; -/* From disk_io.c */ -/* ZFS root filesystem for booting */ -#define current_bootpath zfs_ba->zfs_current_bootpath -#define current_rootpool zfs_ba->zfs_current_rootpool -#define current_bootfs zfs_ba->zfs_current_bootfs -#define current_bootfs_obj zfs_ba->zfs_current_bootfs_obj -#define is_zfs_mount (*fsig_int1(ffi)) -/* End from disk_io.c */ - -#define is_zfs_open zfs_ba->zfs_open +static int zio_read_data(blkptr_t *bp, void *buf, char *stack); /* * Our own version of bcmp(). @@ -108,8 +80,8 @@ static int zfs_bcmp(const void *s1, const void *s2, size_t n) { - const unsigned char *ps1 = s1; - const unsigned char *ps2 = s2; + const uchar_t *ps1 = s1; + const uchar_t *ps2 = s2; if (s1 != s2 && n != 0) { do { @@ -146,15 +118,16 @@ /* Checksum Table and Values */ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { - {{NULL, NULL}, 0, 0, "inherit"}, - {{NULL, NULL}, 0, 0, "on"}, - {{zio_checksum_off, zio_checksum_off}, 0, 0, "off"}, - {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "label"}, - {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "gang_header"}, - {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, "zilog"}, - {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"}, - {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"}, - {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"} + NULL, NULL, 0, 0, "inherit", + NULL, NULL, 0, 0, "on", + zio_checksum_off, zio_checksum_off, 0, 0, "off", + zio_checksum_SHA256, zio_checksum_SHA256, 1, 1, "label", + zio_checksum_SHA256, zio_checksum_SHA256, 1, 1, "gang_header", + NULL, NULL, 0, 0, "zilog", + fletcher_2_native, fletcher_2_byteswap, 0, 0, "fletcher2", + fletcher_4_native, fletcher_4_byteswap, 1, 0, "fletcher4", + zio_checksum_SHA256, zio_checksum_SHA256, 1, 0, "SHA256", + NULL, NULL, 0, 0, "zilog2", }; /* @@ -170,10 +143,9 @@ zio_checksum_verify(blkptr_t *bp, char *data, int size) { zio_cksum_t zc = bp->blk_cksum; - uint32_t checksum = BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : - BP_GET_CHECKSUM(bp); + uint32_t checksum = BP_GET_CHECKSUM(bp); int byteswap = BP_SHOULD_BYTESWAP(bp); - zio_block_tail_t *zbt = (zio_block_tail_t *)(data + size) - 1; + zio_eck_t *zec = (zio_eck_t *)(data + size) - 1; zio_checksum_info_t *ci = &zio_checksum_table[checksum]; zio_cksum_t actual_cksum, expected_cksum; @@ -184,28 +156,14 @@ if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) return (-1); - if (ci->ci_zbt) { - if (checksum == ZIO_CHECKSUM_GANG_HEADER) { - /* - * 'gang blocks' is not supported. - */ - return (-1); - } - - if (zbt->zbt_magic == BSWAP_64(ZBT_MAGIC)) { - /* byte swapping is not supported */ - return (-1); - } else { - expected_cksum = zbt->zbt_cksum; - zbt->zbt_cksum = zc; - ci->ci_func[0](data, size, &actual_cksum); - zbt->zbt_cksum = expected_cksum; - } + if (ci->ci_eck) { + expected_cksum = zec->zec_cksum; + zec->zec_cksum = zc; + ci->ci_func[0](data, size, &actual_cksum); + zec->zec_cksum = expected_cksum; zc = expected_cksum; } else { - if (BP_IS_GANG(bp)) - return (-1); ci->ci_func[byteswap](data, size, &actual_cksum); } @@ -219,30 +177,14 @@ } /* - * vdev_label_offset takes "offset" (the offset within a vdev_label) and - * returns its physical disk offset (starting from the beginning of the vdev). - * - * Input: - * psize : Physical size of this vdev - * l : Label Number (0-3) - * offset : The offset with a vdev_label in which we want the physical - * address - * Return: - * Success : physical disk offset - * Failure : errnum = ERR_BAD_ARGUMENT, return value is meaningless + * vdev_label_start returns the physical disk offset (in bytes) of + * label "l". */ static uint64_t -vdev_label_offset(fsi_file_t *ffi, uint64_t psize, int l, uint64_t offset) +vdev_label_start(uint64_t psize, int l) { - /* XXX Need to add back label support! */ - if (l >= VDEV_LABELS/2 || offset > sizeof (vdev_label_t)) { - errnum = ERR_BAD_ARGUMENT; - return (0); - } - - return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? + return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 0 : psize - VDEV_LABELS * sizeof (vdev_label_t))); - } /* @@ -283,7 +225,7 @@ * -1 - Failure */ static int -uberblock_verify(uberblock_phys_t *ub, int offset) +uberblock_verify(uberblock_phys_t *ub, uint64_t offset) { uberblock_t *uber = &ub->ubp_uberblock; @@ -297,7 +239,8 @@ if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0) return (-1); - if (uber->ub_magic == UBERBLOCK_MAGIC && uber->ub_version > 0) + if (uber->ub_magic == UBERBLOCK_MAGIC && + uber->ub_version > 0 && uber->ub_version <= SPA_VERSION) return (0); return (-1); @@ -310,16 +253,15 @@ * Failure - NULL */ static uberblock_phys_t * -find_bestub(fsi_file_t *ffi, uberblock_phys_t *ub_array, int label) +find_bestub(uberblock_phys_t *ub_array, uint64_t sector) { uberblock_phys_t *ubbest = NULL; - int i, offset; + uint64_t offset; + int i; for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) { - offset = vdev_label_offset(ffi, 0, label, - VDEV_UBERBLOCK_OFFSET(i)); - if (errnum == ERR_BAD_ARGUMENT) - return (NULL); + offset = (sector << SPA_MINBLOCKSHIFT) + + VDEV_UBERBLOCK_OFFSET(i); if (uberblock_verify(&ub_array[i], offset) == 0) { if (ubbest == NULL) { ubbest = &ub_array[i]; @@ -335,58 +277,142 @@ } /* - * Read in a block and put its uncompressed data in buf. + * Read a block of data based on the gang block address dva, + * and put its data in buf. + * + * Return: + * 0 - success + * 1 - failure + */ +static int +zio_read_gang(blkptr_t *bp, dva_t *dva, void *buf, char *stack) +{ + zio_gbh_phys_t *zio_gb; + uint64_t offset, sector; + blkptr_t tmpbp; + int i; + + zio_gb = (zio_gbh_phys_t *)stack; + stack += SPA_GANGBLOCKSIZE; + offset = DVA_GET_OFFSET(dva); + sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); + + /* read in the gang block header */ + if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) { + grub_printf("failed to read in a gang block header\n"); + return (1); + } + + /* self checksuming the gang block header */ + BP_ZERO(&tmpbp); + BP_SET_CHECKSUM(&tmpbp, ZIO_CHECKSUM_GANG_HEADER); + BP_SET_BYTEORDER(&tmpbp, ZFS_HOST_BYTEORDER); + ZIO_SET_CHECKSUM(&tmpbp.blk_cksum, DVA_GET_VDEV(dva), + DVA_GET_OFFSET(dva), bp->blk_birth, 0); + if (zio_checksum_verify(&tmpbp, (char *)zio_gb, SPA_GANGBLOCKSIZE)) { + grub_printf("failed to checksum a gang block header\n"); + return (1); + } + + for (i = 0; i < SPA_GBH_NBLKPTRS; i++) { + if (zio_gb->zg_blkptr[i].blk_birth == 0) + continue; + + if (zio_read_data(&zio_gb->zg_blkptr[i], buf, stack)) + return (1); + buf += BP_GET_PSIZE(&zio_gb->zg_blkptr[i]); + } + + return (0); +} + +/* + * Read in a block of raw data to buf. + * + * Return: + * 0 - success + * 1 - failure + */ +static int +zio_read_data(blkptr_t *bp, void *buf, char *stack) +{ + int i, psize; + + psize = BP_GET_PSIZE(bp); + + /* pick a good dva from the block pointer */ + for (i = 0; i < SPA_DVAS_PER_BP; i++) { + uint64_t offset, sector; + + if (bp->blk_dva[i].dva_word[0] == 0 && + bp->blk_dva[i].dva_word[1] == 0) + continue; + + if (DVA_GET_GANG(&bp->blk_dva[i])) { + if (zio_read_gang(bp, &bp->blk_dva[i], buf, stack) == 0) + return (0); + } else { + /* read in a data block */ + offset = DVA_GET_OFFSET(&bp->blk_dva[i]); + sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); + if (devread(sector, 0, psize, buf)) + return (0); + } + } + + return (1); +} + +/* + * Read in a block of data, verify its checksum, decompress if needed, + * and put the uncompressed data in buf. * * Return: * 0 - success * errnum - failure */ static int -zio_read(fsi_file_t *ffi, blkptr_t *bp, void *buf, char *stack) +zio_read(blkptr_t *bp, void *buf, char *stack) { - uint64_t offset, sector; - int psize, lsize; - int i, comp, cksum; + int lsize, psize, comp; + char *retbuf; + comp = BP_GET_COMPRESS(bp); + lsize = BP_GET_LSIZE(bp); psize = BP_GET_PSIZE(bp); - lsize = BP_GET_LSIZE(bp); - comp = BP_GET_COMPRESS(bp); - cksum = BP_GET_CHECKSUM(bp); if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS || (comp != ZIO_COMPRESS_OFF && - decomp_table[comp].decomp_func == NULL)) + decomp_table[comp].decomp_func == NULL)) { + grub_printf("compression algorithm not supported\n"); return (ERR_FSYS_CORRUPT); - - /* pick a good dva from the block pointer */ - for (i = 0; i < SPA_DVAS_PER_BP; i++) { - - if (bp->blk_dva[i].dva_word[0] == 0 && - bp->blk_dva[i].dva_word[1] == 0) - continue; - - /* read in a block */ - offset = DVA_GET_OFFSET(&bp->blk_dva[i]); - sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); - - if (comp != ZIO_COMPRESS_OFF) { - - if (devread(ffi, sector, 0, psize, stack) == 0) - continue; - if (zio_checksum_verify(bp, stack, psize) != 0) - continue; - decomp_table[comp].decomp_func(stack, buf, psize, - lsize); - } else { - if (devread(ffi, sector, 0, psize, buf) == 0) - continue; - if (zio_checksum_verify(bp, buf, psize) != 0) - continue; - } - return (0); } - return (ERR_FSYS_CORRUPT); + if ((char *)buf < stack && ((char *)buf) + lsize > stack) { + grub_printf("not enough memory allocated\n"); + return (ERR_WONT_FIT); + } + + retbuf = buf; + if (comp != ZIO_COMPRESS_OFF) { + buf = stack; + stack += psize; + } + + if (zio_read_data(bp, buf, stack)) { + grub_printf("zio_read_data failed\n"); + return (ERR_FSYS_CORRUPT); + } + + if (zio_checksum_verify(bp, buf, psize) != 0) { + grub_printf("checksum verification failed\n"); + return (ERR_FSYS_CORRUPT); + } + + if (comp != ZIO_COMPRESS_OFF) + decomp_table[comp].decomp_func(buf, retbuf, psize, lsize); + + return (0); } /* @@ -398,8 +424,7 @@ * errnum - failure */ static int -dmu_read(fsi_file_t *ffi, dnode_phys_t *dn, uint64_t blkid, void *buf, - char *stack) +dmu_read(dnode_phys_t *dn, uint64_t blkid, void *buf, char *stack) { int idx, level; blkptr_t *bp_array = dn->dn_blkptr; @@ -421,9 +446,10 @@ grub_memset(buf, 0, dn->dn_datablkszsec << SPA_MINBLOCKSHIFT); break; - } else if ((errnum = zio_read(ffi, bp, tmpbuf, stack))) { + } else if (errnum = zio_read(bp, tmpbuf, stack)) { return (errnum); } + bp_array = tmpbuf; } @@ -447,7 +473,7 @@ chunks = objsize/MZAP_ENT_LEN - 1; for (i = 0; i < chunks; i++) { - if (strcmp(mzap_ent[i].mze_name, name) == 0) { + if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) { *value = mzap_ent[i].mze_value; return (0); } @@ -457,7 +483,7 @@ } static uint64_t -zap_hash(fsi_file_t *ffi, uint64_t salt, const char *name) +zap_hash(uint64_t salt, const char *name) { static uint64_t table[256]; const uint8_t *cp; @@ -488,7 +514,7 @@ * those are the onces that we first pay attention to when * chosing the bucket. */ - crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1); + crc &= ~((1ULL << (64 - 28)) - 1); return (crc); } @@ -590,7 +616,7 @@ * errnum - failure */ static int -fzap_lookup(fsi_file_t *ffi, dnode_phys_t *zap_dnode, zap_phys_t *zap, +fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap, char *name, uint64_t *value, char *stack) { zap_leaf_phys_t *l; @@ -598,10 +624,11 @@ int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT); /* Verify if this is a fat zap header block */ - if (zap->zap_magic != (uint64_t)ZAP_MAGIC) + if (zap->zap_magic != (uint64_t)ZAP_MAGIC || + zap->zap_flags != 0) return (ERR_FSYS_CORRUPT); - hash = zap_hash(ffi, zap->zap_salt, name); + hash = zap_hash(zap->zap_salt, name); if (errnum) return (errnum); @@ -616,7 +643,9 @@ /* Get the leaf block */ l = (zap_leaf_phys_t *)stack; stack += 1<dn_datablkszsec << SPA_MINBLOCKSHIFT; stack += size; - if ((errnum = dmu_read(ffi, zap_dnode, 0, zapbuf, stack))) + + if (errnum = dmu_read(zap_dnode, 0, zapbuf, stack)) return (errnum); block_type = *((uint64_t *)zapbuf); @@ -651,7 +680,7 @@ return (mzap_lookup(zapbuf, size, name, val)); } else if (block_type == ZBT_HEADER) { /* this is a fat zap */ - return (fzap_lookup(ffi, zap_dnode, zapbuf, name, + return (fzap_lookup(zap_dnode, zapbuf, name, val, stack)); } @@ -672,14 +701,13 @@ * errnum - failure */ static int -dnode_get(fsi_file_t *ffi, dnode_phys_t *mdn, uint64_t objnum, - uint8_t type, dnode_phys_t *buf, char *stack) +dnode_get(dnode_phys_t *mdn, uint64_t objnum, uint8_t type, dnode_phys_t *buf, + char *stack) { uint64_t blkid, blksz; /* the block id this object dnode is in */ int epbs; /* shift of number of dnodes in a block */ int idx; /* index within a block */ dnode_phys_t *dnbuf; - zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data; blksz = mdn->dn_datablkszsec << SPA_MINBLOCKSHIFT; epbs = zfs_log2(blksz) - DNODE_SHIFT; @@ -703,7 +731,7 @@ stack += blksz; } - if ((errnum = dmu_read(ffi, mdn, blkid, (char *)dnbuf, stack))) + if (errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack)) return (errnum); grub_memmove(buf, &dnbuf[idx], DNODE_SIZE); @@ -723,16 +751,16 @@ { char *tptr; - if (((tptr = strstr(str, "menu.lst"))) && + if ((tptr = grub_strstr(str, "menu.lst")) && (tptr[8] == '\0' || tptr[8] == ' ') && *(tptr-1) == '/') return (1); - if (strncmp(str, BOOTSIGN_DIR"/", - strlen(BOOTSIGN_DIR) + 1) == 0) + if (grub_strncmp(str, BOOTSIGN_DIR"/", + grub_strlen(BOOTSIGN_DIR) + 1) == 0) return (1); - if (strcmp(str, BOOTSIGN_BACKUP) == 0) + if (grub_strcmp(str, BOOTSIGN_BACKUP) == 0) return (1); return (0); @@ -748,44 +776,46 @@ * errnum - failure */ static int -dnode_get_path(fsi_file_t *ffi, dnode_phys_t *mdn, char *path, - dnode_phys_t *dn, char *stack) +dnode_get_path(dnode_phys_t *mdn, char *path, dnode_phys_t *dn, + char *stack) { uint64_t objnum, version; char *cname, ch; - if ((errnum = dnode_get(ffi, mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, - dn, stack))) + if (errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, + dn, stack)) return (errnum); - if ((errnum = zap_lookup(ffi, dn, ZPL_VERSION_STR, &version, stack))) + if (errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack)) + return (errnum); + if (version > ZPL_VERSION) + return (-1); + + if (errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack)) return (errnum); - if ((errnum = zap_lookup(ffi, dn, ZFS_ROOT_OBJ, &objnum, stack))) - return (errnum); - - if ((errnum = dnode_get(ffi, mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, - dn, stack))) + if (errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, + dn, stack)) return (errnum); /* skip leading slashes */ while (*path == '/') path++; - while (*path && !isspace((uint8_t)*path)) { + while (*path && !isspace(*path)) { /* get the next component name */ cname = path; - while (*path && !isspace((uint8_t)*path) && *path != '/') + while (*path && !isspace(*path) && *path != '/') path++; ch = *path; *path = 0; /* ensure null termination */ - if ((errnum = zap_lookup(ffi, dn, cname, &objnum, stack))) + if (errnum = zap_lookup(dn, cname, &objnum, stack)) return (errnum); objnum = ZFS_DIRENT_OBJ(objnum); - if ((errnum = dnode_get(ffi, mdn, objnum, 0, dn, stack))) + if (errnum = dnode_get(mdn, objnum, 0, dn, stack)) return (errnum); *path = ch; @@ -807,35 +837,32 @@ * errnum -failure */ static int -get_default_bootfsobj(fsi_file_t *ffi, dnode_phys_t *mosmdn, - uint64_t *obj, char *stack) +get_default_bootfsobj(dnode_phys_t *mosmdn, uint64_t *obj, char *stack) { uint64_t objnum = 0; dnode_phys_t *dn = (dnode_phys_t *)stack; stack += DNODE_SIZE; - if ((errnum = dnode_get(ffi, mosmdn, DMU_POOL_DIRECTORY_OBJECT, - DMU_OT_OBJECT_DIRECTORY, dn, stack))) + if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, dn, stack)) return (errnum); /* * find the object number for 'pool_props', and get the dnode * of the 'pool_props'. */ - if (zap_lookup(ffi, dn, DMU_POOL_PROPS, &objnum, stack)) + if (zap_lookup(dn, DMU_POOL_PROPS, &objnum, stack)) return (ERR_FILESYSTEM_NOT_FOUND); - if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_POOL_PROPS, dn, - stack))) + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack)) return (errnum); - if (zap_lookup(ffi, dn, ZPOOL_PROP_BOOTFS, &objnum, stack)) + if (zap_lookup(dn, ZPOOL_PROP_BOOTFS, &objnum, stack)) return (ERR_FILESYSTEM_NOT_FOUND); if (!objnum) return (ERR_FILESYSTEM_NOT_FOUND); - *obj = objnum; return (0); } @@ -854,29 +881,30 @@ * errnum - failure */ static int -get_objset_mdn(fsi_file_t *ffi, dnode_phys_t *mosmdn, char *fsname, - uint64_t *obj, dnode_phys_t *mdn, char *stack) +get_objset_mdn(dnode_phys_t *mosmdn, char *fsname, uint64_t *obj, + dnode_phys_t *mdn, char *stack) { uint64_t objnum, headobj; char *cname, ch; blkptr_t *bp; objset_phys_t *osp; + int issnapshot = 0; + char *snapname; if (fsname == NULL && obj) { headobj = *obj; goto skip; } - if ((errnum = dnode_get(ffi, mosmdn, DMU_POOL_DIRECTORY_OBJECT, - DMU_OT_OBJECT_DIRECTORY, mdn, stack))) + if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, mdn, stack)) return (errnum); - if ((errnum = zap_lookup(ffi, mdn, DMU_POOL_ROOT_DATASET, &objnum, - stack))) + if (errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, + stack)) return (errnum); - if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_DSL_DIR, mdn, - stack))) + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, stack)) return (errnum); if (fsname == NULL) { @@ -886,51 +914,74 @@ } /* take out the pool name */ - while (*fsname && !isspace((uint8_t)*fsname) && *fsname != '/') + while (*fsname && !isspace(*fsname) && *fsname != '/') fsname++; - while (*fsname && !isspace((uint8_t)*fsname)) { + while (*fsname && !isspace(*fsname)) { uint64_t childobj; while (*fsname == '/') fsname++; cname = fsname; - while (*fsname && !isspace((uint8_t)*fsname) && *fsname != '/') + while (*fsname && !isspace(*fsname) && *fsname != '/') fsname++; ch = *fsname; *fsname = 0; + snapname = cname; + while (*snapname && !isspace(*snapname) && *snapname != '@') + snapname++; + if (*snapname == '@') { + issnapshot = 1; + *snapname = 0; + } childobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj; - if ((errnum = dnode_get(ffi, mosmdn, childobj, - DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack))) + if (errnum = dnode_get(mosmdn, childobj, + DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack)) return (errnum); - if (zap_lookup(ffi, mdn, cname, &objnum, stack)) + if (zap_lookup(mdn, cname, &objnum, stack)) return (ERR_FILESYSTEM_NOT_FOUND); - if ((errnum = dnode_get(ffi, mosmdn, objnum, DMU_OT_DSL_DIR, - mdn, stack))) + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, + mdn, stack)) return (errnum); *fsname = ch; + if (issnapshot) + *snapname = '@'; } headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj; if (obj) *obj = headobj; skip: - if ((errnum = dnode_get(ffi, mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, - stack))) + if (errnum = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, stack)) return (errnum); + if (issnapshot) { + uint64_t snapobj; - /* TODO: Add snapshot support here - for fsname=snapshot-name */ + snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))-> + ds_snapnames_zapobj; + + if (errnum = dnode_get(mosmdn, snapobj, + DMU_OT_DSL_DS_SNAP_MAP, mdn, stack)) + return (errnum); + if (zap_lookup(mdn, snapname + 1, &headobj, stack)) + return (ERR_FILESYSTEM_NOT_FOUND); + if (errnum = dnode_get(mosmdn, headobj, + DMU_OT_DSL_DATASET, mdn, stack)) + return (errnum); + if (obj) + *obj = headobj; + } bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp; osp = (objset_phys_t *)stack; stack += sizeof (objset_phys_t); - if ((errnum = zio_read(ffi, bp, osp, stack))) + if (errnum = zio_read(bp, osp, stack)) return (errnum); grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE); @@ -982,7 +1033,7 @@ /* skip the header, nvl_version, and nvl_nvflag */ nvlist = nvlist + 4 * 2; - while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) + while (encode_size = BSWAP_32(*(uint32_t *)nvlist)) nvlist += encode_size; /* goto the next nvpair */ nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */ @@ -1006,7 +1057,7 @@ * Loop thru the nvpair list * The XDR representation of an integer is in big-endian byte order. */ - while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) { + while (encode_size = BSWAP_32(*(uint32_t *)nvlist)) { nvpair = nvlist + 4 * 2; /* skip the encode/decode size */ @@ -1019,11 +1070,11 @@ type = BSWAP_32(*(uint32_t *)nvpair); nvpair += 4; - if (((strncmp(nvp_name, name, name_len) == 0) && - type == valtype)) { + if ((grub_strncmp(nvp_name, name, name_len) == 0) && + type == valtype) { int nelm; - if (((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1)) + if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1) return (1); nvpair += 4; @@ -1069,8 +1120,6 @@ DATA_TYPE_UINT64, NULL) == 0 || nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival, DATA_TYPE_UINT64, NULL) == 0 || - nvlist_lookup_value(nv, ZPOOL_CONFIG_DEGRADED, &ival, - DATA_TYPE_UINT64, NULL) == 0 || nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival, DATA_TYPE_UINT64, NULL) == 0) return (ERR_DEV_VALUES); @@ -1079,26 +1128,58 @@ } /* - * Get a list of valid vdev pathname from the boot device. - * The caller should already allocate MAXNAMELEN memory for bootpath. + * Get a valid vdev pathname/devid from the boot device. + * The caller should already allocate MAXPATHLEN memory for bootpath and devid. */ static int -vdev_get_bootpath(char *nv, char *bootpath) +vdev_get_bootpath(char *nv, uint64_t inguid, char *devid, char *bootpath, + int is_spare) { char type[16]; - bootpath[0] = '\0'; if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING, NULL)) return (ERR_FSYS_CORRUPT); if (strcmp(type, VDEV_TYPE_DISK) == 0) { - if (vdev_validate(nv) != 0 || - nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, bootpath, - DATA_TYPE_STRING, NULL) != 0) + uint64_t guid; + + if (vdev_validate(nv) != 0) return (ERR_NO_BOOTPATH); - } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) { + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_GUID, + &guid, DATA_TYPE_UINT64, NULL) != 0) + return (ERR_NO_BOOTPATH); + + if (guid != inguid) + return (ERR_NO_BOOTPATH); + + /* for a spare vdev, pick the disk labeled with "is_spare" */ + if (is_spare) { + uint64_t spare = 0; + (void) nvlist_lookup_value(nv, ZPOOL_CONFIG_IS_SPARE, + &spare, DATA_TYPE_UINT64, NULL); + if (!spare) + return (ERR_NO_BOOTPATH); + } + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, + bootpath, DATA_TYPE_STRING, NULL) != 0) + bootpath[0] = '\0'; + + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_DEVID, + devid, DATA_TYPE_STRING, NULL) != 0) + devid[0] = '\0'; + + if (strlen(bootpath) >= MAXPATHLEN || + strlen(devid) >= MAXPATHLEN) + return (ERR_WONT_FIT); + + return (0); + + } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 || + strcmp(type, VDEV_TYPE_REPLACING) == 0 || + (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) { int nelm, i; char *child; @@ -1107,28 +1188,16 @@ return (ERR_FSYS_CORRUPT); for (i = 0; i < nelm; i++) { - char tmp_path[MAXNAMELEN]; char *child_i; child_i = nvlist_array(child, i); - if (vdev_validate(child_i) != 0) - continue; - - if (nvlist_lookup_value(child_i, ZPOOL_CONFIG_PHYS_PATH, - tmp_path, DATA_TYPE_STRING, NULL) != 0) - return (ERR_NO_BOOTPATH); - - if ((strlen(bootpath) + strlen(tmp_path)) > MAXNAMELEN) - return (ERR_WONT_FIT); - - if (strlen(bootpath) == 0) - sprintf(bootpath, "%s", tmp_path); - else - sprintf(bootpath, "%s %s", bootpath, tmp_path); + if (vdev_get_bootpath(child_i, inguid, devid, + bootpath, is_spare) == 0) + return (0); } } - return (strlen(bootpath) > 0 ? 0 : ERR_NO_BOOTPATH); + return (ERR_NO_BOOTPATH); } /* @@ -1138,22 +1207,24 @@ * 0 - success * ERR_* - failure */ -static int -check_pool_label(fsi_file_t *ffi, int label, char *stack) +int +check_pool_label(uint64_t sector, char *stack, char *outdevid, + char *outpath, uint64_t *outguid) { vdev_phys_t *vdev; - uint64_t sector, pool_state, txg = 0; + uint64_t pool_state, txg = 0; char *nvlist, *nv; - zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data; + uint64_t diskguid; + uint64_t version; - sector = (label * sizeof (vdev_label_t) + VDEV_SKIP_SIZE + - VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT; + sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT); /* Read in the vdev name-value pair list (112K). */ - if (devread(ffi, sector, 0, VDEV_PHYS_SIZE, stack) == 0) + if (devread(sector, 0, VDEV_PHYS_SIZE, stack) == 0) return (ERR_READ); vdev = (vdev_phys_t *)stack; + stack += sizeof (vdev_phys_t); if (nvlist_unpack(vdev->vp_nvlist, &nvlist)) return (ERR_FSYS_CORRUPT); @@ -1177,13 +1248,22 @@ if (txg == 0) return (ERR_NO_BOOTPATH); + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version, + DATA_TYPE_UINT64, NULL)) + return (ERR_FSYS_CORRUPT); + if (version > SPA_VERSION) + return (ERR_NEWER_VERSION); if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv, DATA_TYPE_NVLIST, NULL)) return (ERR_FSYS_CORRUPT); - - if (vdev_get_bootpath(nv, current_bootpath)) + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid, + DATA_TYPE_UINT64, NULL)) + return (ERR_FSYS_CORRUPT); + if (vdev_get_bootpath(nv, diskguid, outdevid, outpath, 0)) return (ERR_NO_BOOTPATH); - + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid, + DATA_TYPE_UINT64, NULL)) + return (ERR_FSYS_CORRUPT); return (0); } @@ -1195,34 +1275,23 @@ * 1 - success * 0 - failure */ -static int -zfs_mount(fsi_file_t *ffi, const char *options) +int +zfs_mount(void) { char *stack; int label = 0; - uberblock_phys_t *ub_array, *ubbest = NULL; + uberblock_phys_t *ub_array, *ubbest; objset_phys_t *osp; - zfs_bootarea_t *zfs_ba; + char tmp_bootpath[MAXNAMELEN]; + char tmp_devid[MAXNAMELEN]; + uint64_t tmp_guid; + uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT; + int err = errnum; /* preserve previous errnum state */ - /* if zfs is already mounted, don't do it again */ - if (is_zfs_mount == 1) - return (1); - - /* get much bigger data block for zfs */ - if (((zfs_ba = malloc(sizeof (zfs_bootarea_t))) == NULL)) { - return (1); - } - bzero(zfs_ba, sizeof (zfs_bootarea_t)); - - /* replace small data area in fsi with big one */ - free(ffi->ff_fsi->f_data); - ffi->ff_fsi->f_data = (void *)zfs_ba; - - /* If an boot filesystem is passed in, set it to current_bootfs */ - if (options != NULL) { - if (strlen(options) < MAXNAMELEN) { - strcpy(current_bootfs, options); - } + /* if it's our first time here, zero the best uberblock out */ + if (best_drive == 0 && best_part == 0 && find_best_root) { + grub_memset(¤t_uberblock, 0, sizeof (uberblock_t)); + pool_guid = 0; } stackbase = ZFS_SCRATCH; @@ -1232,43 +1301,69 @@ osp = (objset_phys_t *)stack; stack += sizeof (objset_phys_t); + adjpl = P2ALIGN(adjpl, (uint64_t)sizeof (vdev_label_t)); - /* XXX add back labels support? */ - for (label = 0; ubbest == NULL && label < (VDEV_LABELS/2); label++) { - uint64_t sector = (label * sizeof (vdev_label_t) + - VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE + - VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT; + for (label = 0; label < VDEV_LABELS; label++) { + /* + * some eltorito stacks don't give us a size and + * we end up setting the size to MAXUINT, further + * some of these devices stop working once a single + * read past the end has been issued. Checking + * for a maximum part_length and skipping the backup + * labels at the end of the slice/partition/device + * avoids breaking down on such devices. + */ + if (part_length == MAXUINT && label == 2) + break; + + uint64_t sector = vdev_label_start(adjpl, + label) >> SPA_MINBLOCKSHIFT; /* Read in the uberblock ring (128K). */ - if (devread(ffi, sector, 0, VDEV_UBERBLOCK_RING, + if (devread(sector + + ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> + SPA_MINBLOCKSHIFT), 0, VDEV_UBERBLOCK_RING, (char *)ub_array) == 0) continue; - if ((ubbest = find_bestub(ffi, ub_array, label)) != NULL && - zio_read(ffi, &ubbest->ubp_uberblock.ub_rootbp, osp, stack) + if ((ubbest = find_bestub(ub_array, sector)) != NULL && + zio_read(&ubbest->ubp_uberblock.ub_rootbp, osp, stack) == 0) { VERIFY_OS_TYPE(osp, DMU_OST_META); + if (check_pool_label(sector, stack, tmp_devid, + tmp_bootpath, &tmp_guid)) + continue; + if (pool_guid == 0) + pool_guid = tmp_guid; + + if (find_best_root && ((pool_guid != tmp_guid) || + vdev_uberblock_compare(&ubbest->ubp_uberblock, + &(current_uberblock)) <= 0)) + continue; + /* Got the MOS. Save it at the memory addr MOS. */ grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); - - if (check_pool_label(ffi, label, stack)) - return (0); - - /* - * Copy fsi->f_data to ffi->ff_data since - * fsig_mount copies from ff_data to f_data - * overwriting fsi->f_data. - */ - bcopy(zfs_ba, fsig_file_buf(ffi), FSYS_BUFLEN); - + grub_memmove(¤t_uberblock, + &ubbest->ubp_uberblock, sizeof (uberblock_t)); + grub_memmove(current_bootpath, tmp_bootpath, + MAXNAMELEN); + grub_memmove(current_devid, tmp_devid, + grub_strlen(tmp_devid)); is_zfs_mount = 1; return (1); } } + /* + * While some fs impls. (tftp) rely on setting and keeping + * global errnums set, others won't reset it and will break + * when issuing rawreads. The goal here is to simply not + * have zfs mount attempts impact the previous state. + */ + errnum = err; return (0); } @@ -1280,13 +1375,11 @@ * 1 - success * 0 - failure */ -static int -zfs_open(fsi_file_t *ffi, char *filename) +int +zfs_open(char *filename) { char *stack; dnode_phys_t *mdn; - char *bootstring; - zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data; file_buf = NULL; stackbase = ZFS_SCRATCH; @@ -1304,62 +1397,72 @@ * do not goto 'current_bootfs'. */ if (is_top_dataset_file(filename)) { - if ((errnum = get_objset_mdn(ffi, MOS, NULL, NULL, mdn, stack))) + if (errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack)) return (0); current_bootfs_obj = 0; } else { if (current_bootfs[0] == '\0') { /* Get the default root filesystem object number */ - if ((errnum = get_default_bootfsobj(ffi, MOS, - ¤t_bootfs_obj, stack))) + if (errnum = get_default_bootfsobj(MOS, + ¤t_bootfs_obj, stack)) return (0); - if ((errnum = get_objset_mdn(ffi, MOS, NULL, - ¤t_bootfs_obj, mdn, stack))) + + if (errnum = get_objset_mdn(MOS, NULL, + ¤t_bootfs_obj, mdn, stack)) return (0); } else { - if ((errnum = get_objset_mdn(ffi, MOS, - current_bootfs, ¤t_bootfs_obj, mdn, stack))) + if (errnum = get_objset_mdn(MOS, current_bootfs, + ¤t_bootfs_obj, mdn, stack)) { + grub_memset(current_bootfs, 0, MAXNAMELEN); return (0); - } - - /* - * Put zfs rootpool and boot obj number into bootstring. - */ - if (is_zfs_open == 0) { - char temp[25]; /* needs to hold long long */ - int alloc_size; - char zfs_bootstr[] = "zfs-bootfs="; - char zfs_bootpath[] = ",bootpath='"; - - snprintf(temp, sizeof(temp), "%llu", (unsigned long long) - current_bootfs_obj); - alloc_size = strlen(zfs_bootstr) + - strlen(current_rootpool) + - strlen(temp) + strlen(zfs_bootpath) + - strlen(current_bootpath) + 3; - bootstring = fsi_bootstring_alloc(ffi->ff_fsi, - alloc_size); - if (bootstring != NULL) { - strcpy(bootstring, zfs_bootstr); - strcat(bootstring, current_rootpool); - strcat(bootstring, "/"); - strcat(bootstring, temp); - strcat(bootstring, zfs_bootpath); - strcat(bootstring, current_bootpath); - strcat(bootstring, "'"); - is_zfs_open = 1; } } } - if (dnode_get_path(ffi, mdn, filename, DNODE, stack)) { + if (dnode_get_path(mdn, filename, DNODE, stack)) { errnum = ERR_FILE_NOT_FOUND; return (0); } /* get the file size and set the file position to 0 */ - filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size; + + /* + * For DMU_OT_SA we will need to locate the SIZE attribute + * attribute, which could be either in the bonus buffer + * or the "spill" block. + */ + if (DNODE->dn_bonustype == DMU_OT_SA) { + sa_hdr_phys_t *sahdrp; + int hdrsize; + + if (DNODE->dn_bonuslen != 0) { + sahdrp = (sa_hdr_phys_t *)DN_BONUS(DNODE); + } else { + if (DNODE->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + blkptr_t *bp = &DNODE->dn_spill; + void *buf; + + buf = (void *)stack; + stack += BP_GET_LSIZE(bp); + + /* reset errnum to rawread() failure */ + errnum = 0; + if (zio_read(bp, buf, stack) != 0) { + return (0); + } + sahdrp = buf; + } else { + errnum = ERR_FSYS_CORRUPT; + return (0); + } + } + hdrsize = SA_HDR_SIZE(sahdrp); + filemax = *(uint64_t *)((char *)sahdrp + hdrsize + + SA_SIZE_OFFSET); + } else { + filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size; + } filepos = 0; dnode_buf = NULL; @@ -1373,12 +1476,12 @@ * len - the length successfully read in to the buffer * 0 - failure */ -static int -zfs_read(fsi_file_t *ffi, char *buf, int len) +int +zfs_read(char *buf, int len) { char *stack; + char *tmpbuf; int blksz, length, movesize; - zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data; if (file_buf == NULL) { file_buf = stackbase; @@ -1411,7 +1514,7 @@ */ uint64_t blkid = filepos / blksz; - if ((errnum = dmu_read(ffi, DNODE, blkid, file_buf, stack))) + if (errnum = dmu_read(DNODE, blkid, file_buf, stack)) return (0); file_start = blkid * blksz; @@ -1438,16 +1541,4 @@ return (1); } -fsi_plugin_ops_t * -fsi_init_plugin(int version, fsi_plugin_t *fp, const char **name) -{ - static fsig_plugin_ops_t ops = { - FSIMAGE_PLUGIN_VERSION, - .fpo_mount = zfs_mount, - .fpo_dir = zfs_open, - .fpo_read = zfs_read - }; - - *name = "zfs"; - return (fsig_init(fp, &ops)); -} +#endif /* FSYS_ZFS */ diff --git a/tools/libfsimage/zfs/fsys_zfs.h b/tools/libfsimage/zfs/fsys_zfs.h --- a/tools/libfsimage/zfs/fsys_zfs.h +++ b/tools/libfsimage/zfs/fsys_zfs.h @@ -17,64 +17,56 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _FSYS_ZFS_H #define _FSYS_ZFS_H -#include -#include +#ifdef FSYS_ZFS -#include "zfs-include/zfs.h" -#include "zfs-include/dmu.h" -#include "zfs-include/spa.h" -#include "zfs-include/zio.h" -#include "zfs-include/zio_checksum.h" -#include "zfs-include/vdev_impl.h" -#include "zfs-include/zap_impl.h" -#include "zfs-include/zap_leaf.h" -#include "zfs-include/uberblock_impl.h" -#include "zfs-include/dnode.h" -#include "zfs-include/dsl_dir.h" -#include "zfs-include/zfs_acl.h" -#include "zfs-include/zfs_znode.h" -#include "zfs-include/dsl_dataset.h" -#include "zfs-include/zil.h" -#include "zfs-include/dmu_objset.h" +#ifndef FSIMAGE +typedef unsigned long long uint64_t; +typedef unsigned int uint32_t; +typedef unsigned short uint16_t; +typedef unsigned char uint8_t; +typedef unsigned char uchar_t; + +#if defined(_LP64) || defined(_I32LPx) +typedef unsigned long size_t; +#else +typedef unsigned int size_t; +#endif +#else +#include "fsi_zfs.h" +#endif /* !FSIMAGE */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* * Global Memory addresses to store MOS and DNODE data */ -#define MOS ((dnode_phys_t *)(((zfs_bootarea_t *) \ - (ffi->ff_fsi->f_data))->zfs_data)) +#define MOS ((dnode_phys_t *)\ + (RAW_ADDR((mbi.mem_upper << 10) + 0x100000) - ZFS_SCRATCH_SIZE)) #define DNODE (MOS+1) /* move sizeof(dnode_phys_t) bytes */ #define ZFS_SCRATCH ((char *)(DNODE+1)) -#define MAXNAMELEN 256 - -typedef struct zfs_bootarea { - char zfs_current_bootpath[MAXNAMELEN]; - char zfs_current_rootpool[MAXNAMELEN]; - char zfs_current_bootfs[MAXNAMELEN]; - uint64_t zfs_current_bootfs_obj; - int zfs_open; - - /* cache for a file block of the currently zfs_open()-ed file */ - void *zfs_file_buf; - uint64_t zfs_file_start; - uint64_t zfs_file_end; - - /* cache for a dnode block */ - dnode_phys_t *zfs_dnode_buf; - dnode_phys_t *zfs_dnode_mdn; - uint64_t zfs_dnode_start; - uint64_t zfs_dnode_end; - - char *zfs_stackbase; - char zfs_data[0x400000]; -} zfs_bootarea_t; - /* * Verify dnode type. * Can only be used in functions returning non-0 for failure. @@ -109,7 +101,7 @@ */ #define UBERBLOCK_SIZE (1ULL << UBERBLOCK_SHIFT) #undef offsetof -#define offsetof(t, m) (size_t)(&(((t *)0)->m)) +#define offsetof(t, m) ((int)&(((t *)0)->m)) #define VDEV_UBERBLOCK_SHIFT UBERBLOCK_SHIFT #define VDEV_UBERBLOCK_OFFSET(n) \ offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT]) @@ -120,8 +112,8 @@ typedef struct uberblock_phys { uberblock_t ubp_uberblock; char ubp_pad[UBERBLOCK_SIZE - sizeof (uberblock_t) - - sizeof (zio_block_tail_t)]; - zio_block_tail_t ubp_zbt; + sizeof (zio_eck_t)]; + zio_eck_t ubp_zec; } uberblock_phys_t; /* @@ -132,6 +124,15 @@ ((offset + VDEV_LABEL_START_SIZE) >> SPA_MINBLOCKSHIFT) /* + * return x rounded down to an align boundary + * eg, P2ALIGN(1200, 1024) == 1024 (1*align) + * eg, P2ALIGN(1024, 1024) == 1024 (1*align) + * eg, P2ALIGN(0x1234, 0x100) == 0x1200 (0x12*align) + * eg, P2ALIGN(0x5600, 0x100) == 0x5600 (0x56*align) + */ +#define P2ALIGN(x, align) ((x) & -(align)) + +/* * For nvlist manipulation. (from nvpair.h) */ #define NV_ENCODE_NATIVE 0 @@ -200,4 +201,6 @@ extern void zio_checksum_SHA256(const void *, uint64_t, zio_cksum_t *); extern int lzjb_decompress(void *, void *, size_t, size_t); +#endif /* FSYS_ZFS */ + #endif /* !_FSYS_ZFS_H */ diff --git a/tools/libfsimage/zfs/shared.h b/tools/libfsimage/zfs/shared.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/shared.h @@ -0,0 +1,33 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#ifndef _SHARED_H +#define _SHARED_H + +#ifdef FSYS_ZFS + +/* THIS FILE IS INTENTIONALLY BLANK */ + +#endif /* FSI_ZFS */ + +#endif /* !_SHARED_H */ + diff --git a/tools/libfsimage/zfs/zfs-include/dmu.h b/tools/libfsimage/zfs/zfs-include/dmu.h --- a/tools/libfsimage/zfs/zfs-include/dmu.h +++ b/tools/libfsimage/zfs/zfs-include/dmu.h @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -56,7 +56,7 @@ DMU_OT_DSL_DATASET, /* UINT64 */ /* zpl: */ DMU_OT_ZNODE, /* ZNODE */ - DMU_OT_ACL, /* ACL */ + DMU_OT_OLDACL, /* OLD ACL */ DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */ DMU_OT_DIRECTORY_CONTENTS, /* ZAP */ DMU_OT_MASTER_NODE, /* ZAP */ @@ -73,7 +73,22 @@ DMU_OT_SPA_HISTORY, /* UINT8 */ DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */ DMU_OT_POOL_PROPS, /* ZAP */ - + DMU_OT_DSL_PERMS, /* ZAP */ + DMU_OT_ACL, /* ACL */ + DMU_OT_SYSACL, /* SYSACL */ + DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */ + DMU_OT_FUID_SIZE, /* FUID table size UINT64 */ + DMU_OT_NEXT_CLONES, /* ZAP */ + DMU_OT_SCRUB_QUEUE, /* ZAP */ + DMU_OT_USERGROUP_USED, /* ZAP */ + DMU_OT_USERGROUP_QUOTA, /* ZAP */ + DMU_OT_USERREFS, /* ZAP */ + DMU_OT_DDT_ZAP, /* ZAP */ + DMU_OT_DDT_STATS, /* ZAP */ + DMU_OT_SA, /* System attr */ + DMU_OT_SA_MASTER_NODE, /* ZAP */ + DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */ + DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */ DMU_OT_NUMTYPES } dmu_object_type_t; diff --git a/tools/libfsimage/zfs/zfs-include/dnode.h b/tools/libfsimage/zfs/zfs-include/dnode.h --- a/tools/libfsimage/zfs/zfs-include/dnode.h +++ b/tools/libfsimage/zfs/zfs-include/dnode.h @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,6 +47,8 @@ #define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT) #define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT) +#define DNODE_FLAG_SPILL_BLKPTR (1<<2) + #define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) @@ -70,7 +72,8 @@ uint64_t dn_pad3[4]; blkptr_t dn_blkptr[1]; - uint8_t dn_bonus[DN_MAX_BONUSLEN]; + uint8_t dn_bonus[DN_MAX_BONUSLEN - sizeof (blkptr_t)]; + blkptr_t dn_spill; } dnode_phys_t; #endif /* _SYS_DNODE_H */ diff --git a/tools/libfsimage/zfs/zfs-include/dsl_dataset.h b/tools/libfsimage/zfs/zfs-include/dsl_dataset.h --- a/tools/libfsimage/zfs/zfs-include/dsl_dataset.h +++ b/tools/libfsimage/zfs/zfs-include/dsl_dataset.h @@ -24,6 +24,8 @@ #ifndef _SYS_DSL_DATASET_H #define _SYS_DSL_DATASET_H +#pragma ident "%Z%%M% %I% %E% SMI" + typedef struct dsl_dataset_phys { uint64_t ds_dir_obj; uint64_t ds_prev_snap_obj; diff --git a/tools/libfsimage/zfs/zfs-include/dsl_dir.h b/tools/libfsimage/zfs/zfs-include/dsl_dir.h --- a/tools/libfsimage/zfs/zfs-include/dsl_dir.h +++ b/tools/libfsimage/zfs/zfs-include/dsl_dir.h @@ -24,6 +24,8 @@ #ifndef _SYS_DSL_DIR_H #define _SYS_DSL_DIR_H +#pragma ident "%Z%%M% %I% %E% SMI" + typedef struct dsl_dir_phys { uint64_t dd_creation_time; /* not actually used */ uint64_t dd_head_dataset_obj; diff --git a/tools/libfsimage/zfs/zfs-include/sa_impl.h b/tools/libfsimage/zfs/zfs-include/sa_impl.h new file mode 100644 --- /dev/null +++ b/tools/libfsimage/zfs/zfs-include/sa_impl.h @@ -0,0 +1,35 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#ifndef _SYS_SA_IMPL_H +#define _SYS_SA_IMPL_H + +typedef struct sa_hdr_phys { + uint32_t sa_magic; + uint16_t sa_layout_info; + uint16_t sa_lengths[1]; +} sa_hdr_phys_t; + +#define SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0) +#define SA_SIZE_OFFSET 0x8 + +#endif /* _SYS_SA_IMPL_H */ diff --git a/tools/libfsimage/zfs/zfs-include/spa.h b/tools/libfsimage/zfs/zfs-include/spa.h --- a/tools/libfsimage/zfs/zfs-include/spa.h +++ b/tools/libfsimage/zfs/zfs-include/spa.h @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -63,6 +63,11 @@ #define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1) /* + * Size of block to hold the configuration data (a packed nvlist) + */ +#define SPA_CONFIG_BLOCKSIZE (1 << 14) + +/* * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. * The ASIZE encoding should be at least 64 times larger (6 more bits) * to support up to 4-way RAID-Z mirror mode with worst-case gang block @@ -106,15 +111,15 @@ * +-------+-------+-------+-------+-------+-------+-------+-------+ * 5 |G| offset3 | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE | + * 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 7 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 8 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 9 | padding | + * 9 | physical birth txg | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * a | birth txg | + * a | logical birth txg | * +-------+-------+-------+-------+-------+-------+-------+-------+ * b | fill count | * +-------+-------+-------+-------+-------+-------+-------+-------+ @@ -138,25 +143,29 @@ * cksum checksum function * comp compression function * G gang block indicator - * E endianness + * B byteorder (endianness) + * D dedup + * X unused + * lvl level of indirection * type DMU object type - * lvl level of indirection - * birth txg transaction group in which the block was born + * phys birth txg of block allocation; zero if same as logical birth txg + * log. birth transaction group in which the block was logically born * fill count number of non-zero blocks under this bp * checksum[4] 256-bit checksum of the data this bp describes */ -typedef struct blkptr { - dva_t blk_dva[3]; /* 128-bit Data Virtual Address */ - uint64_t blk_prop; /* size, compression, type, etc */ - uint64_t blk_pad[3]; /* Extra space for the future */ - uint64_t blk_birth; /* transaction group at birth */ - uint64_t blk_fill; /* fill count */ - zio_cksum_t blk_cksum; /* 256-bit checksum */ -} blkptr_t; - #define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */ #define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */ +typedef struct blkptr { + dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */ + uint64_t blk_prop; /* size, compression, type, etc */ + uint64_t blk_pad[2]; /* Extra space for the future */ + uint64_t blk_phys_birth; /* txg when block was allocated */ + uint64_t blk_birth; /* transaction group at birth */ + uint64_t blk_fill; /* fill count */ + zio_cksum_t blk_cksum; /* 256-bit checksum */ +} blkptr_t; + /* * Macros to get and set fields in a bp or DVA. */ @@ -180,8 +189,7 @@ #define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x) #define BP_GET_LSIZE(bp) \ - (BP_IS_HOLE(bp) ? 0 : \ - BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)) + BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1) #define BP_SET_LSIZE(bp, x) \ BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x) @@ -190,20 +198,35 @@ #define BP_SET_PSIZE(bp, x) \ BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) -#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8) -#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x) +#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8) +#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x) -#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) -#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) +#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) +#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) -#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8) -#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x) +#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8) +#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x) -#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) -#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) +#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) +#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) -#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1)) -#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x) +#define BP_GET_PROP_BIT_61(bp) BF64_GET((bp)->blk_prop, 61, 1) +#define BP_SET_PROP_BIT_61(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x) + +#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1) +#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x) + +#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1)) +#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x) + +#define BP_PHYSICAL_BIRTH(bp) \ + ((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth) + +#define BP_SET_BIRTH(bp, logical, physical) \ +{ \ + (bp)->blk_birth = (logical); \ + (bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \ +} #define BP_GET_ASIZE(bp) \ (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ @@ -227,13 +250,18 @@ ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ (dva1)->dva_word[0] == (dva2)->dva_word[0]) +#define BP_EQUAL(bp1, bp2) \ + (BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) && \ + DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \ + DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \ + DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2])) + #define ZIO_CHECKSUM_EQUAL(zc1, zc2) \ (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \ ((zc1).zc_word[1] - (zc2).zc_word[1]) | \ ((zc1).zc_word[2] - (zc2).zc_word[2]) | \ ((zc1).zc_word[3] - (zc2).zc_word[3]))) - #define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0) #define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \ @@ -247,7 +275,10 @@ #define BP_IDENTITY(bp) (&(bp)->blk_dva[0]) #define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp)) #define BP_IS_HOLE(bp) ((bp)->blk_birth == 0) -#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg)) + +/* BP_IS_RAIDZ(bp) assumes no block compression */ +#define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \ + BP_GET_PSIZE(bp)) #define BP_ZERO(bp) \ { \ @@ -260,7 +291,7 @@ (bp)->blk_prop = 0; \ (bp)->blk_pad[0] = 0; \ (bp)->blk_pad[1] = 0; \ - (bp)->blk_pad[2] = 0; \ + (bp)->blk_phys_birth = 0; \ (bp)->blk_birth = 0; \ (bp)->blk_fill = 0; \ ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \ diff --git a/tools/libfsimage/zfs/zfs-include/uberblock_impl.h b/tools/libfsimage/zfs/zfs-include/uberblock_impl.h --- a/tools/libfsimage/zfs/zfs-include/uberblock_impl.h +++ b/tools/libfsimage/zfs/zfs-include/uberblock_impl.h @@ -24,6 +24,8 @@ #ifndef _SYS_UBERBLOCK_IMPL_H #define _SYS_UBERBLOCK_IMPL_H +#pragma ident "%Z%%M% %I% %E% SMI" + /* * The uberblock version is incremented whenever an incompatible on-disk * format change is made to the SPA, DMU, or ZAP. diff --git a/tools/libfsimage/zfs/zfs-include/vdev_impl.h b/tools/libfsimage/zfs/zfs-include/vdev_impl.h --- a/tools/libfsimage/zfs/zfs-include/vdev_impl.h +++ b/tools/libfsimage/zfs/zfs-include/vdev_impl.h @@ -17,38 +17,27 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_VDEV_IMPL_H #define _SYS_VDEV_IMPL_H -#define VDEV_SKIP_SIZE (8 << 10) -#define VDEV_BOOT_HEADER_SIZE (8 << 10) +#define VDEV_PAD_SIZE (8 << 10) +/* 2 padding areas (vl_pad1 and vl_pad2) to skip */ +#define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2 #define VDEV_PHYS_SIZE (112 << 10) #define VDEV_UBERBLOCK_RING (128 << 10) -/* ZFS boot block */ -#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL -#define VDEV_BOOT_VERSION 1 /* version number */ - -typedef struct vdev_boot_header { - uint64_t vb_magic; /* VDEV_BOOT_MAGIC */ - uint64_t vb_version; /* VDEV_BOOT_VERSION */ - uint64_t vb_offset; /* start offset (bytes) */ - uint64_t vb_size; /* size (bytes) */ - char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (uint64_t)]; -} vdev_boot_header_t; - typedef struct vdev_phys { - char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)]; - zio_block_tail_t vp_zbt; + char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)]; + zio_eck_t vp_zbt; } vdev_phys_t; typedef struct vdev_label { - char vl_pad[VDEV_SKIP_SIZE]; /* 8K */ - vdev_boot_header_t vl_boot_header; /* 8K */ + char vl_pad1[VDEV_PAD_SIZE]; /* 8K */ + char vl_pad2[VDEV_PAD_SIZE]; /* 8K */ vdev_phys_t vl_vdev_phys; /* 112K */ char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */ } vdev_label_t; /* 256K total */ diff --git a/tools/libfsimage/zfs/zfs-include/zap_impl.h b/tools/libfsimage/zfs/zfs-include/zap_impl.h --- a/tools/libfsimage/zfs/zfs-include/zap_impl.h +++ b/tools/libfsimage/zfs/zfs-include/zap_impl.h @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -26,7 +26,6 @@ #define ZAP_MAGIC 0x2F52AB2ABULL -#define ZAP_HASHBITS 28 #define MZAP_ENT_LEN 64 #define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2) #define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT @@ -99,6 +98,8 @@ uint64_t zap_num_leafs; /* number of leafs */ uint64_t zap_num_entries; /* number of entries */ uint64_t zap_salt; /* salt to stir into hash function */ + uint64_t zap_normflags; /* flags for u8_textprep_str() */ + uint64_t zap_flags; /* zap_flag_t */ /* * This structure is followed by padding, and then the embedded * pointer table. The embedded pointer table takes up second diff --git a/tools/libfsimage/zfs/zfs-include/zap_leaf.h b/tools/libfsimage/zfs/zfs-include/zap_leaf.h --- a/tools/libfsimage/zfs/zfs-include/zap_leaf.h +++ b/tools/libfsimage/zfs/zfs-include/zap_leaf.h @@ -24,6 +24,8 @@ #ifndef _SYS_ZAP_LEAF_H #define _SYS_ZAP_LEAF_H +#pragma ident "%Z%%M% %I% %E% SMI" + #define ZAP_LEAF_MAGIC 0x2AB1EAF /* chunk size = 24 bytes */ diff --git a/tools/libfsimage/zfs/zfs-include/zfs.h b/tools/libfsimage/zfs/zfs-include/zfs.h --- a/tools/libfsimage/zfs/zfs-include/zfs.h +++ b/tools/libfsimage/zfs/zfs-include/zfs.h @@ -17,18 +17,17 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_FS_ZFS_H #define _SYS_FS_ZFS_H - /* * On-disk version number. */ -#define SPA_VERSION 16ULL +#define SPA_VERSION 24ULL /* * The following are configuration names used in the nvlist describing a pool's @@ -62,6 +61,12 @@ #define ZPOOL_CONFIG_NPARITY "nparity" #define ZPOOL_CONFIG_PHYS_PATH "phys_path" #define ZPOOL_CONFIG_L2CACHE "l2cache" +#define ZPOOL_CONFIG_HOLE_ARRAY "hole_array" +#define ZPOOL_CONFIG_VDEV_CHILDREN "vdev_children" +#define ZPOOL_CONFIG_IS_HOLE "is_hole" +#define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram" +#define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats" +#define ZPOOL_CONFIG_DDT_STATS "ddt_stats" /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such @@ -79,6 +84,7 @@ #define VDEV_TYPE_DISK "disk" #define VDEV_TYPE_FILE "file" #define VDEV_TYPE_MISSING "missing" +#define VDEV_TYPE_HOLE "hole" #define VDEV_TYPE_SPARE "spare" #define VDEV_TYPE_L2CACHE "l2cache" diff --git a/tools/libfsimage/zfs/zfs-include/zfs_acl.h b/tools/libfsimage/zfs/zfs-include/zfs_acl.h --- a/tools/libfsimage/zfs/zfs-include/zfs_acl.h +++ b/tools/libfsimage/zfs/zfs-include/zfs_acl.h @@ -24,6 +24,13 @@ #ifndef _SYS_FS_ZFS_ACL_H #define _SYS_FS_ZFS_ACL_H +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifndef _UID_T +#define _UID_T +typedef unsigned int uid_t; /* UID type */ +#endif /* _UID_T */ + typedef struct zfs_oldace { uint32_t z_fuid; /* "who" */ uint32_t z_access_mask; /* access mask */ diff --git a/tools/libfsimage/zfs/zfs-include/zfs_znode.h b/tools/libfsimage/zfs/zfs-include/zfs_znode.h --- a/tools/libfsimage/zfs/zfs-include/zfs_znode.h +++ b/tools/libfsimage/zfs/zfs-include/zfs_znode.h @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -27,8 +27,9 @@ #define MASTER_NODE_OBJ 1 #define ZFS_ROOT_OBJ "ROOT" #define ZPL_VERSION_STR "VERSION" +#define ZFS_SA_ATTRS "SA_ATTRS" -#define ZPL_VERSION 4ULL +#define ZPL_VERSION 5ULL #define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48) diff --git a/tools/libfsimage/zfs/zfs-include/zil.h b/tools/libfsimage/zfs/zfs-include/zil.h --- a/tools/libfsimage/zfs/zfs-include/zil.h +++ b/tools/libfsimage/zfs/zfs-include/zil.h @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,7 +45,13 @@ uint64_t zh_replay_seq; /* highest replayed sequence number */ blkptr_t zh_log; /* log chain */ uint64_t zh_claim_seq; /* highest claimed sequence number */ - uint64_t zh_pad[5]; + uint64_t zh_flags; /* header flags */ + uint64_t zh_pad[4]; } zil_header_t; +/* + * zh_flags bit settings + */ +#define ZIL_REPLAY_NEEDED 0x1 /* replay needed - internal only */ + #endif /* _SYS_ZIL_H */ diff --git a/tools/libfsimage/zfs/zfs-include/zio.h b/tools/libfsimage/zfs/zfs-include/zio.h --- a/tools/libfsimage/zfs/zfs-include/zio.h +++ b/tools/libfsimage/zfs/zfs-include/zio.h @@ -17,19 +17,19 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _ZIO_H #define _ZIO_H -#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */ +#define ZEC_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */ -typedef struct zio_block_tail { - uint64_t zbt_magic; /* for validation, endianness */ - zio_cksum_t zbt_cksum; /* 256-bit checksum */ -} zio_block_tail_t; +typedef struct zio_eck { + uint64_t zec_magic; /* for validation, endianness */ + zio_cksum_t zec_cksum; /* 256-bit checksum */ +} zio_eck_t; /* * Gang block headers are self-checksumming and contain an array @@ -37,9 +37,9 @@ */ #define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE #define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ - sizeof (zio_block_tail_t)) / sizeof (blkptr_t)) + sizeof (zio_eck_t)) / sizeof (blkptr_t)) #define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ - sizeof (zio_block_tail_t) - \ + sizeof (zio_eck_t) - \ (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ sizeof (uint64_t)) @@ -50,7 +50,7 @@ typedef struct zio_gbh { blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; uint64_t zg_filler[SPA_GBH_FILLER]; - zio_block_tail_t zg_tail; + zio_eck_t zg_tail; } zio_gbh_phys_t; enum zio_checksum { @@ -63,12 +63,10 @@ ZIO_CHECKSUM_FLETCHER_2, ZIO_CHECKSUM_FLETCHER_4, ZIO_CHECKSUM_SHA256, + ZIO_CHECKSUM_ZILOG2, ZIO_CHECKSUM_FUNCTIONS }; -#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2 -#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON - enum zio_compress { ZIO_COMPRESS_INHERIT = 0, ZIO_COMPRESS_ON, diff --git a/tools/libfsimage/zfs/zfs-include/zio_checksum.h b/tools/libfsimage/zfs/zfs-include/zio_checksum.h --- a/tools/libfsimage/zfs/zfs-include/zio_checksum.h +++ b/tools/libfsimage/zfs/zfs-include/zio_checksum.h @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,7 +35,7 @@ typedef struct zio_checksum_info { zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */ int ci_correctable; /* number of correctable bits */ - int ci_zbt; /* uses zio block tail? */ + int ci_eck; /* uses zio embedded checksum? */ char *ci_name; /* descriptive name */ } zio_checksum_info_t; diff --git a/tools/libfsimage/zfs/zfs_fletcher.c b/tools/libfsimage/zfs/zfs_fletcher.c --- a/tools/libfsimage/zfs/zfs_fletcher.c +++ b/tools/libfsimage/zfs/zfs_fletcher.c @@ -21,6 +21,8 @@ * Use is subject to license terms. */ +#pragma ident "%Z%%M% %I% %E% SMI" + #include "fsys_zfs.h" diff --git a/tools/libfsimage/zfs/zfs_lzjb.c b/tools/libfsimage/zfs/zfs_lzjb.c --- a/tools/libfsimage/zfs/zfs_lzjb.c +++ b/tools/libfsimage/zfs/zfs_lzjb.c @@ -21,6 +21,8 @@ * Use is subject to license terms. */ +#pragma ident "%Z%%M% %I% %E% SMI" + #include "fsys_zfs.h" #define MATCH_BITS 6 @@ -32,11 +34,10 @@ int lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len) { - unsigned char *src = s_start; - unsigned char *dst = d_start; - unsigned char *d_end = (unsigned char *)d_start + d_len; - unsigned char *cpy; - unsigned char copymap = '\0'; + uchar_t *src = s_start; + uchar_t *dst = d_start; + uchar_t *d_end = (uchar_t *)d_start + d_len; + uchar_t *cpy, copymap; int copymask = 1 << (NBBY - 1); while (dst < d_end) { @@ -44,11 +45,11 @@ copymask = 1; copymap = *src++; } - if (copymap & (unsigned char)copymask) { + if (copymap & copymask) { int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; src += 2; - if ((cpy = dst - offset) < (unsigned char *)d_start) + if ((cpy = dst - offset) < (uchar_t *)d_start) return (-1); while (--mlen >= 0 && dst < d_end) *dst++ = *cpy++; diff --git a/tools/libfsimage/zfs/zfs_sha256.c b/tools/libfsimage/zfs/zfs_sha256.c --- a/tools/libfsimage/zfs/zfs_sha256.c +++ b/tools/libfsimage/zfs/zfs_sha256.c @@ -21,6 +21,8 @@ * Use is subject to license terms. */ +#pragma ident "%Z%%M% %I% %E% SMI" + #include "fsys_zfs.h" /*