# HG changeset patch # User Joseph Cihula # Date 1231983939 28800 # Node ID 2442c148d5773a0cb91759014470dcd66b7be00a # Parent 4ea7c584151a7128f27f6de008695e84777833c1 Change e820 memory type for TXT-related ranges (e.g. SINIT, TXT heap, TXT private config space) from E820_UNUSABLE to E820_RESERVED to allow for more flexibility of kernels/VMMs to control access to these regions This requires a patch to Xen to continue to protect these regions from dom0. Signed-off-by: Shane Wang Signed-off-by: Joseph Cihula diff -r 4ea7c584151a -r 2442c148d577 tboot/txt/txt.c --- a/tboot/txt/txt.c Tue Jan 13 11:29:14 2009 -0800 +++ b/tboot/txt/txt.c Wed Jan 14 17:45:39 2009 -0800 @@ -703,7 +703,7 @@ tb_error_t txt_protect_mem_regions(void) size = read_pub_config_reg(TXTCR_HEAP_SIZE); printk("protecting TXT heap (%Lx - %Lx) in e820 table\n", base, (base + size - 1)); - if ( !e820_protect_region(base, size, E820_UNUSABLE) ) + if ( !e820_protect_region(base, size, E820_RESERVED) ) return TB_ERR_FATAL; /* SINIT */ @@ -711,7 +711,15 @@ tb_error_t txt_protect_mem_regions(void) size = read_pub_config_reg(TXTCR_SINIT_SIZE); printk("protecting SINIT (%Lx - %Lx) in e820 table\n", base, (base + size - 1)); - if ( !e820_protect_region(base, size, E820_UNUSABLE) ) + if ( !e820_protect_region(base, size, E820_RESERVED) ) + return TB_ERR_FATAL; + + /* TXT private space */ + base = TXT_PRIV_CONFIG_REGS_BASE; + size = NR_TXT_CONFIG_PAGES * PAGE_SIZE; + printk("protecting TXT Private Space (%Lx - %Lx) in e820 table\n", + base, (base + size - 1)); + if ( !e820_protect_region(base, size, E820_RESERVED) ) return TB_ERR_FATAL; /* ensure that memory not marked as good RAM by the MDRs is RESERVED in @@ -727,14 +735,6 @@ tb_error_t txt_protect_mem_regions(void) return TB_ERR_POST_LAUNCH_VERIFICATION; } printk("verification succeeded.\n"); - - /* TXT private space */ - base = TXT_PRIV_CONFIG_REGS_BASE; - size = NR_TXT_CONFIG_PAGES * PAGE_SIZE; - printk("protecting TXT Private Space (%Lx - %Lx) in e820 table\n", - base, (base + size - 1)); - if ( !e820_protect_region(base, size, E820_UNUSABLE) ) - return TB_ERR_FATAL; /* if vtd_pmr_lo/hi sizes rounded to 2MB granularity are less than the max_lo/hi_ram values determined from the e820 table, then we must # HG changeset patch # User Joseph Cihula # Date 1231985262 28800 # Node ID d54d9925440bc5e01143de794da6ea2eaaeb5ec0 # Parent 2442c148d5773a0cb91759014470dcd66b7be00a Support ACPI GAS (Generic Address Structure) for handling sleep states. This requires a change to the tboot_shared_t data structure that is not backwards compatible, so the version has been incremented to 3. This version of tboot will require a new version of Xen that supports GAS for tboot shutdown. Signed-off-by: Shane Wang Signed-off-by: Joseph Cihula diff -r 2442c148d577 -r d54d9925440b include/tboot.h --- a/include/tboot.h Wed Jan 14 17:45:39 2009 -0800 +++ b/include/tboot.h Wed Jan 14 18:07:42 2009 -0800 @@ -2,7 +2,7 @@ * tboot.h: shared data structure with MLE and kernel and functions * used by kernel for runtime support * - * Copyright (c) 2006-2008, Intel Corporation + * Copyright (c) 2006-2009, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,18 +57,27 @@ typedef struct __packed { * used to communicate between tboot and the launched kernel (i.e. Xen) */ +/* GAS - Generic Address Structure (ACPI 2.0+) */ typedef struct __packed { - uint16_t pm1a_cnt; - uint16_t pm1b_cnt; - uint16_t pm1a_evt; - uint16_t pm1b_evt; + uint8_t space_id; /* Address space where struct or register exists */ + uint8_t bit_width; /* Size in bits of given register */ + uint8_t bit_offset; /* Bit offset within the register */ + uint8_t access_width; /* Minimum Access size (ACPI 3.0) */ + uint64_t address; /* 64-bit address of struct or register */ +} tboot_acpi_generic_address_t; + +typedef struct __packed { + tboot_acpi_generic_address_t pm1a_cnt_blk; + tboot_acpi_generic_address_t pm1b_cnt_blk; + tboot_acpi_generic_address_t pm1a_evt_blk; + tboot_acpi_generic_address_t pm1b_evt_blk; uint16_t pm1a_cnt_val; uint16_t pm1b_cnt_val; } tboot_acpi_sleep_info; typedef struct __packed { uuid_t uuid; /* {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} */ - uint32_t version; /* currently 0.2 */ + uint32_t version; /* currently 0.3 */ uint32_t log_addr; /* physical addr of log or NULL if none */ uint32_t shutdown_entry32; /* entry point for tboot shutdown from 32b */ uint32_t shutdown_entry64; /* entry point for tboot shutdown from 64b */ @@ -77,7 +86,6 @@ typedef struct __packed { uint32_t s3_k_wakeup_entry; /* entry point for xen s3 wake up */ tboot_acpi_sleep_info acpi_sinfo; /* where kernel put acpi sleep info in Sx */ - uint8_t reserved[52]; /* this pad is for compat with old field */ uint32_t tboot_base; /* starting addr for tboot */ uint32_t tboot_size; /* size of tboot */ } tboot_shared_t; diff -r 2442c148d577 -r d54d9925440b tboot/common/acpi.c --- a/tboot/common/acpi.c Wed Jan 14 17:45:39 2009 -0800 +++ b/tboot/common/acpi.c Wed Jan 14 18:07:42 2009 -0800 @@ -1,6 +1,8 @@ /* * from xen/arch/x86/acpi/boot.c * drivers/acpi/tables.c + * drivers/acpi/hwregs.c + * drivers/acpi/osl.c * * Copyright(c) 2008 Intel Corporation. All rights reserved. * Copyright (C) 2001, 2002 Paul Diefenbaugh @@ -42,6 +44,7 @@ #define ACPI_RSDT_TABLE_SIG "RSDT" #define ACPI_MADT_TABLE_SIG "APIC" +static const tboot_acpi_sleep_info* g_acpi_info; static unsigned long acpi_scan_rsdp(unsigned long start, unsigned long length) { @@ -327,25 +330,303 @@ uint32_t get_acpi_ioapic_table(void) #define ACPI_BITMASK_WAKE_STATUS 0x8000 #define ACPI_BITPOSITION_WAKE_STATUS 0x0F -static int acpi_get_wake_status(const tboot_acpi_sleep_info* acpi_info) +static acpi_status acpi_read_port(acpi_io_address port, u32* value, u32 width) { - uint16_t val; + if ( !value ) + return AE_ERROR; + + *value = 0; + if ( width <= 8 ) + *(u8 *)value = inb(port); + else if ( width <= 16 ) + *(u16 *)value = inw(port); + else if ( width <= 32 ) + *(u32 *)value = in(port); + else + return AE_ERROR; + + return AE_OK; +} + +static acpi_status acpi_write_port(acpi_io_address port, u32 value, u32 width) +{ + switch ( width ) { + case 8: + outb(value, port); + break; + case 16: + outw(value, port); + break; + case 32: + out(value, port); + break; + default: + return AE_ERROR; + } + + return AE_OK; +} + +static acpi_status +acpi_read_memory(acpi_physical_address addr, u32* value, u32 width) +{ + if ( !value ) + return AE_ERROR; + + *value = 0; + if ( width <= 8 ) + *(u8 *)value = readb(addr); + else if ( width <= 16 ) + *(u16 *)value = readw(addr); + else if ( width <= 32 ) + *(u32 *)value = readl(addr); + else + return AE_ERROR; + + return AE_OK; +} + +static acpi_status +acpi_write_memory(acpi_physical_address addr, u32 value, u32 width) +{ + switch ( width ) { + case 8: + writeb(value, addr); + break; + case 16: + writew(value, addr); + break; + case 32: + writel(value, addr); + break; + default: + return AE_ERROR; + } + + return AE_OK; +} + +static acpi_status +acpi_hw_low_level_read(u32 width, u32* value, + const tboot_acpi_generic_address_t* reg) +{ + u64 address; + acpi_status status; + + /* + * Must have a valid pointer to a GAS structure, and + * a non-zero address within. However, don't return an error + * because the PM1A/B code must not fail if B isn't present. + */ + if ( !reg ) + return AE_OK; + + /* Get a local copy of the address. Handles possible alignment issues */ + ACPI_MOVE_64_TO_64(&address, ®->address); + if ( !address ) + return AE_OK; + + *value = 0; + switch ( reg->space_id ) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + status = acpi_read_memory( + (acpi_physical_address)(unsigned long)address, + value, width); + break; + case ACPI_ADR_SPACE_SYSTEM_IO: + status = acpi_read_port((acpi_io_address)address, + value, width); + break; + default: + return AE_BAD_PARAMETER; + } + + return status; +} + +static acpi_status +acpi_hw_low_level_write(u32 width, u32 value, + const tboot_acpi_generic_address_t* reg) +{ + u64 address; + acpi_status status; + + /* + * Must have a valid pointer to a GAS structure, and + * a non-zero address within. However, don't return an error + * because the PM1A/B code must not fail if B isn't present. + */ + if ( !reg ) + return AE_OK; + + /* Get a local copy of the address. Handles possible alignment issues */ + ACPI_MOVE_64_TO_64(&address, ®->address); + if ( !address ) + return AE_OK; + + switch ( reg->space_id ) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + status = acpi_write_memory( + (acpi_physical_address)(unsigned long)address, + value, width); + break; + case ACPI_ADR_SPACE_SYSTEM_IO: + status = acpi_write_port((acpi_io_address)address, + value, width); + break; + default: + return AE_BAD_PARAMETER; + } + + return status; +} + +static acpi_status +acpi_hw_register_read(u32 register_id, u32* value) +{ + u32 value1 = 0; + u32 value2 = 0; + acpi_status status; + + if ( !value ) + return AE_ERROR; + + switch ( register_id ) { + case ACPI_REGISTER_PM1_STATUS: /* 16-bit access */ + status = acpi_hw_low_level_read(16, &value1, + &g_acpi_info->pm1a_evt_blk); + if ( ACPI_FAILURE(status) ) + goto exit; + /* PM1B is optional */ + status = acpi_hw_low_level_read(16, &value2, + &g_acpi_info->pm1b_evt_blk); + value1 |= value2; + break; + + case ACPI_REGISTER_PM1_CONTROL: /* 16-bit access */ + status = acpi_hw_low_level_read(16, &value1, + &g_acpi_info->pm1a_cnt_blk); + if ( ACPI_FAILURE(status) ) + goto exit; + status = acpi_hw_low_level_read(16, &value2, + &g_acpi_info->pm1b_cnt_blk); + value1 |= value2; + break; + default: + status = AE_BAD_PARAMETER; + break; + } + +exit: + if ( ACPI_SUCCESS(status) ) + *value = value1; + + return status; +} + +static acpi_status +acpi_hw_register_write(u32 register_id, u32 value) +{ + u32 value1 = 0; + acpi_status status; + + switch ( register_id ) { + case ACPI_REGISTER_PM1_STATUS: /* 16-bit access */ + /* Perform a read first to preserve certain bits (per ACPI spec) */ + status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &value1); + if ( ACPI_FAILURE(status) ) + goto exit; + /* Insert the bits to be preserved */ + ACPI_INSERT_BITS(value, ACPI_PM1_STATUS_PRESERVED_BITS, value1); + /* Now we can write the data */ + status = acpi_hw_low_level_write(16, value, + &g_acpi_info->pm1a_evt_blk); + if ( ACPI_FAILURE(status) ) + goto exit; + /* PM1B is optional */ + status = acpi_hw_low_level_write(16, value, + &g_acpi_info->pm1b_evt_blk); + break; + case ACPI_REGISTER_PM1_CONTROL: /* 16-bit access */ + /* Perform a read first to preserve certain bits (per ACPI spec) */ + status = acpi_hw_register_read(ACPI_REGISTER_PM1_CONTROL, &value1); + if ( ACPI_FAILURE(status) ) + goto exit; + /* Insert the bits to be preserved */ + ACPI_INSERT_BITS(value, ACPI_PM1_CONTROL_PRESERVED_BITS, value1); + /* Now we can write the data */ + status = acpi_hw_low_level_write(16, value, + &g_acpi_info->pm1a_cnt_blk); + if ( ACPI_FAILURE(status) ) + goto exit; + status = acpi_hw_low_level_write(16, value, + &g_acpi_info->pm1b_cnt_blk); + break; + case ACPI_REGISTER_PM1A_CONTROL: /* 16-bit access */ + status = acpi_hw_low_level_write(16, value, + &g_acpi_info->pm1a_cnt_blk); + break; + case ACPI_REGISTER_PM1B_CONTROL: /* 16-bit access */ + status = acpi_hw_low_level_write(16, value, + &g_acpi_info->pm1b_cnt_blk); + break; + default: + status = AE_BAD_PARAMETER; + break; + } + +exit: + return status; +} + +static int acpi_get_wake_status(void) +{ + uint32_t val; + acpi_status status; /* Wake status is the 15th bit of PM1 status register. (ACPI spec 3.0) */ - val = inw(acpi_info->pm1a_evt) | inw(acpi_info->pm1b_evt); + status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &val); + if ( ACPI_FAILURE(status) ) + return 0; + val &= ACPI_BITMASK_WAKE_STATUS; val >>= ACPI_BITPOSITION_WAKE_STATUS; return val; } -void machine_sleep(const tboot_acpi_sleep_info* acpi_info) +acpi_status machine_sleep(const tboot_acpi_sleep_info* acpi_info) { + acpi_status status; + wbinvd(); - outw((u16)acpi_info->pm1a_cnt_val, acpi_info->pm1a_cnt); - if ( acpi_info->pm1b_cnt ) - outw((u16)acpi_info->pm1b_cnt_val, acpi_info->pm1b_cnt); - - /* Wait until we enter sleep state, and spin until we wake */ - while ( !acpi_get_wake_status(acpi_info) ); + g_acpi_info = acpi_info; + + status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL, + g_acpi_info->pm1a_cnt_val); + if ( ACPI_FAILURE(status) ) + return AE_ERROR; + + if ( g_acpi_info->pm1b_cnt_blk.address ) { + status = acpi_hw_register_write(ACPI_REGISTER_PM1B_CONTROL, + g_acpi_info->pm1b_cnt_val); + if ( ACPI_FAILURE(status) ) + return AE_ERROR; + } + + /* Wait until we enter sleep state; we shoudl never return to here */ + while ( !acpi_get_wake_status() ) + continue; + + return AE_OK; } + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 2442c148d577 -r d54d9925440b tboot/common/early_printk.c --- a/tboot/common/early_printk.c Wed Jan 14 17:45:39 2009 -0800 +++ b/tboot/common/early_printk.c Wed Jan 14 18:07:42 2009 -0800 @@ -323,9 +323,6 @@ config_parsed: #define VGABASE 0xb8000 -#define readw(x) (*(volatile unsigned short *)(x)) -#define writew(d,x) (*(volatile unsigned short *)(x) = (d)) - static const int max_ypos = 25; static const int max_xpos = 80; diff -r 2442c148d577 -r d54d9925440b tboot/common/tboot.c --- a/tboot/common/tboot.c Wed Jan 14 17:45:39 2009 -0800 +++ b/tboot/common/tboot.c Wed Jan 14 18:07:42 2009 -0800 @@ -58,6 +58,8 @@ #include #include #include + +extern bool txt_verify_acpi_ptrs(const tboot_acpi_sleep_info* acpi_info); extern void _prot_to_real(uint32_t dist_addr); extern bool set_policy(void); @@ -214,7 +216,7 @@ static void post_launch(void) */ memset(&_tboot_shared, 0, PAGE_SIZE); _tboot_shared.uuid = (uuid_t)TBOOT_SHARED_UUID; - _tboot_shared.version = 0x02; + _tboot_shared.version = 3; _tboot_shared.log_addr = (uint32_t)g_log; _tboot_shared.shutdown_entry32 = (uint32_t)shutdown_entry; _tboot_shared.shutdown_entry64 = (uint32_t)shutdown_entry; @@ -368,7 +370,10 @@ static void shutdown_system(uint32_t shu copy_s3_wakeup_entry(); case TB_SHUTDOWN_S4: case TB_SHUTDOWN_S5: + if ( !txt_verify_acpi_ptrs(&_tboot_shared.acpi_sinfo) ) + apply_policy(TB_ERR_FATAL); machine_sleep(&_tboot_shared.acpi_sinfo); + /* if machine_sleep() fails, fall through to reset */ case TB_SHUTDOWN_REBOOT: if ( txt_is_powercycle_required() ) { diff -r 2442c148d577 -r d54d9925440b tboot/common/tpm.c --- a/tboot/common/tpm.c Wed Jan 14 17:45:39 2009 -0800 +++ b/tboot/common/tpm.c Wed Jan 14 18:07:42 2009 -0800 @@ -64,9 +64,6 @@ #define TPM_TAG_PCR_INFO_LONG 0x0006 #define TPM_TAG_STORED_DATA12 0x0016 - -#define readb(x) (*(volatile char *)(x)) -#define writeb(d,x) (*(volatile char *)(x) = (d)) /* * TPM registers and data structures diff -r 2442c148d577 -r d54d9925440b tboot/include/acpi.h --- a/tboot/include/acpi.h Wed Jan 14 17:45:39 2009 -0800 +++ b/tboot/include/acpi.h Wed Jan 14 18:07:42 2009 -0800 @@ -127,6 +127,116 @@ extern bool restore_vtd_dmar_table(void) extern bool restore_vtd_dmar_table(void); extern bool remove_vtd_dmar_table(void); -extern void machine_sleep(const tboot_acpi_sleep_info *acpi_info); + +/* + * Macros for moving data around to/from buffers that are possibly unaligned. + * If the hardware supports the transfer of unaligned data, just do the store. + * Otherwise, we have to move one byte at a time. + */ +#ifdef ACPI_BIG_ENDIAN +/* + * Macros for big-endian machines + */ + +/* These macros reverse the bytes during the move, converting little-endian to big endian */ + + /* Big Endian <== Little Endian */ + /* Hi...Lo Lo...Hi */ +#define ACPI_MOVE_64_TO_64(d,s) \ + {(( u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[7];\ + (( u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[6];\ + (( u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[5];\ + (( u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[4];\ + (( u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[3];\ + (( u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[2];\ + (( u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[1];\ + (( u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[0];} +#else +/* + * Macros for little-endian machines + */ + +#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED + +/* The hardware supports unaligned transfers, just do the little-endian move */ + +/* 64-bit source, 64 destination */ +#define ACPI_MOVE_64_TO_64(d,s) \ + *(u64 *)(void *)(d) = *(u64 *)(void *)(s) + +#else +/* + * The hardware does not support unaligned transfers. We must move the + * data one byte at a time. These macros work whether the source or + * the destination (or both) is/are unaligned. (Little-endian move) + */ + +/* 64-bit source, 64 destination */ +#define ACPI_MOVE_64_TO_64(d,s) \ + {(( u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[0];\ + (( u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[1];\ + (( u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[2];\ + (( u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[3];\ + (( u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[4];\ + (( u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[5];\ + (( u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[6];\ + (( u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[7];} +#endif +#endif + + + +#define ACPI_INSERT_BITS(target,mask,source) \ + target = ((target & (~(mask))) | (source & mask)) + + + +typedef u32 acpi_status; /* All ACPI Exceptions */ +#define AE_CODE_ENVIRONMENTAL 0x0000 +#define AE_CODE_PROGRAMMER 0x1000 +#define AE_OK (acpi_status)0x0000 +/* Environmental exceptions */ +#define AE_ERROR (acpi_status) (0x0001 | AE_CODE_ENVIRONMENTAL) +/* Programmer exceptions */ +#define AE_BAD_PARAMETER (acpi_status) (0x0001 | AE_CODE_PROGRAMMER) + +#define ACPI_SUCCESS(a) (!(a)) +#define ACPI_FAILURE(a) (a) + + + +typedef u8 acpi_adr_space_type; +#define ACPI_ADR_SPACE_SYSTEM_MEMORY (acpi_adr_space_type)0 +#define ACPI_ADR_SPACE_SYSTEM_IO (acpi_adr_space_type)1 + + + +/* + * Some ACPI registers have bits that must be ignored -- meaning that they + * must be preserved. + */ +#define ACPI_PM1_STATUS_PRESERVED_BITS 0x0800 /* Bit 11 */ +#define ACPI_PM1_CONTROL_PRESERVED_BITS 0x0200 /* Bit 9 (whatever) */ + +/* + * Register IDs + * These are the full ACPI registers + */ +#define ACPI_REGISTER_PM1_STATUS 0x01 +#define ACPI_REGISTER_PM1_ENABLE 0x02 +#define ACPI_REGISTER_PM1_CONTROL 0x03 +#define ACPI_REGISTER_PM1A_CONTROL 0x04 +#define ACPI_REGISTER_PM1B_CONTROL 0x05 +#define ACPI_REGISTER_PM2_CONTROL 0x06 +#define ACPI_REGISTER_PM_TIMER 0x07 +#define ACPI_REGISTER_PROCESSOR_BLOCK 0x08 +#define ACPI_REGISTER_SMI_COMMAND_BLOCK 0x09 + + +typedef unsigned short acpi_io_address; +typedef void * acpi_physical_address; + + +extern acpi_status machine_sleep(const tboot_acpi_sleep_info* acpi_info); #endif /* __ACPI_H__ */ diff -r 2442c148d577 -r d54d9925440b tboot/include/misc.h --- a/tboot/include/misc.h Wed Jan 14 17:45:39 2009 -0800 +++ b/tboot/include/misc.h Wed Jan 14 18:07:42 2009 -0800 @@ -80,6 +80,16 @@ static inline void out(unsigned int valu __asm__ __volatile__ ("out %0, %w1" : : "a" (value), "Nd" (port)); } +/* + * from io.h + */ + +#define readb(x) (*(volatile char *)(x)) +#define readw(x) (*(volatile short *)(x)) +#define readl(x) (*(volatile int *)(x)) +#define writeb(d,x) (*(volatile char *)(x) = (d)) +#define writew(d,x) (*(volatile short *)(x) = (d)) +#define writel(d,x) (*(volatile int *)(x) = (d)) /* * from lib.h diff -r 2442c148d577 -r d54d9925440b tboot/include/txt/verify.h --- a/tboot/include/txt/verify.h Wed Jan 14 17:45:39 2009 -0800 +++ b/tboot/include/txt/verify.h Wed Jan 14 18:07:42 2009 -0800 @@ -40,6 +40,7 @@ extern void set_vtd_pmrs(os_sinit_data_t uint64_t min_lo_ram, uint64_t max_lo_ram, uint64_t min_hi_ram, uint64_t max_hi_ram); extern bool verify_e820_map(sinit_mdr_t* mdrs_base, uint32_t num_mdrs); +extern bool txt_verify_acpi_ptrs(const tboot_acpi_sleep_info* acpi_info); #endif /* __TXT_VERIFY_H__ */ diff -r 2442c148d577 -r d54d9925440b tboot/txt/verify.c --- a/tboot/txt/verify.c Wed Jan 14 17:45:39 2009 -0800 +++ b/tboot/txt/verify.c Wed Jan 14 18:07:42 2009 -0800 @@ -52,6 +52,8 @@ #include #include #include +#include +#include #include #include #include @@ -63,6 +65,11 @@ extern char _end[]; /* end o extern char _end[]; /* end of module */ extern long s3_flag; + +extern char s3_wakeup_16[]; /* real mode S3 wakeup code */ +extern char s3_wakeup_end[]; + +extern tboot_shared_t _tboot_shared; /* MLE/kernel shared data page */ /* * CPUID extended feature info @@ -390,6 +397,90 @@ bool verify_e820_map(sinit_mdr_t* mdrs_b return e820_reserve_ram(base, length); } +static bool txt_verify_acpi_ptr(tboot_acpi_generic_address_t blk) +{ + uint64_t base, size; + uint64_t addr; + + if ( blk.space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY ) + return true; + + ACPI_MOVE_64_TO_64(&addr, &blk.address); + + /* TXT heap */ + base = read_pub_config_reg(TXTCR_HEAP_BASE); + size = read_pub_config_reg(TXTCR_HEAP_SIZE); + printk("verifying ACPI address %Lx against TXT heap (%Lx - %Lx)\n", + addr, base, (base + size - 1)); + if ( (addr >= base) && (addr <= (base + size - 1)) ) + return false; + + /* SINIT */ + base = read_pub_config_reg(TXTCR_SINIT_BASE); + size = read_pub_config_reg(TXTCR_SINIT_SIZE); + printk("verifying ACPI address %Lx against SINIT (%Lx - %Lx)\n", + addr, base, (base + size - 1)); + if ( (addr >= base) && (addr <= (base + size - 1)) ) + return false; + + /* TXT private space */ + base = TXT_PRIV_CONFIG_REGS_BASE; + size = NR_TXT_CONFIG_PAGES * PAGE_SIZE; + printk("verifying ACPI address %Lx against " + "TXT Private Space (%Lx - %Lx)\n", addr, base, (base + size - 1)); + if ( (addr >= base) && (addr <= (base + size - 1)) ) + return false; + + /* TPM (localities) */ + base = TPM_LOCALITY_BASE; + size = TPM_NR_LOCALITIES * NR_TPM_LOCALITY_PAGES; + printk("verifying ACPI address %Lx against TPM localities (%Lx - %Lx)\n", + addr, base, (base + size - 1)); + if ( (addr >= base) && (addr <= (base + size - 1)) ) + return false; + + /* Tboot */ + base = (uint64_t)((unsigned long)&_start - 3*PAGE_SIZE); + size = (uint64_t)(unsigned long)&_end - base; + printk("verifying ACPI address %Lx against Tboot (%Lx - %Lx)\n", + addr, base, (base + size - 1)); + if ( (addr >= base) && (addr <= (base + size - 1)) ) + return false; + + /* MLE/Kernel shared page */ + base = (uint32_t)&_tboot_shared; + size = PAGE_SIZE; + printk("verifying ACPI address %Lx against " + "MLE/kernel shared (%Lx - %Lx)\n", + addr, base, (base + size - 1)); + if ( (addr >= base) && (addr <= (base + size - 1)) ) + return false; + + /* S3 resume range in low memory */ + base = TBOOT_S3_WAKEUP_ADDR; + size = s3_wakeup_end - s3_wakeup_16; + printk("verifying ACPI address %Lx against Tboot S3 resume range " + "(%Lx - %Lx)\n", addr, base, (base + size - 1)); + if ( (addr >= base) && (addr <= (base + size - 1)) ) + return false; + + return true; +} + +bool txt_verify_acpi_ptrs(const tboot_acpi_sleep_info* acpi_info) +{ + if ( !txt_verify_acpi_ptr(acpi_info->pm1a_cnt_blk) ) + return false; + if ( !txt_verify_acpi_ptr(acpi_info->pm1b_cnt_blk) ) + return false; + if ( !txt_verify_acpi_ptr(acpi_info->pm1a_evt_blk) ) + return false; + if ( !txt_verify_acpi_ptr(acpi_info->pm1b_evt_blk) ) + return false; + + return true; +} + /* * Local variables: * mode: C # HG changeset patch # User Joseph Cihula # Date 1231985482 28800 # Node ID 719eecd798d6efe8a72b425e524ca76c5c4de4e5 # Parent d54d9925440bc5e01143de794da6ea2eaaeb5ec0 Remove the shutdown_entry32 and shutdown_entry64 from tboot_shared_t and have just a single shutdown_entry field. Since backwards compatbility was broken with the previous patch, this is intended to be used with a set of patches to Xen that support the new tboot_shared_t. Signed-off-by: Joseph Cihula diff -r d54d9925440b -r 719eecd798d6 include/tboot.h --- a/include/tboot.h Wed Jan 14 18:07:42 2009 -0800 +++ b/include/tboot.h Wed Jan 14 18:11:22 2009 -0800 @@ -79,8 +79,7 @@ typedef struct __packed { uuid_t uuid; /* {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} */ uint32_t version; /* currently 0.3 */ uint32_t log_addr; /* physical addr of log or NULL if none */ - uint32_t shutdown_entry32; /* entry point for tboot shutdown from 32b */ - uint32_t shutdown_entry64; /* entry point for tboot shutdown from 64b */ + uint32_t shutdown_entry; /* entry point for tboot shutdown */ uint32_t shutdown_type; /* type of shutdown (TB_SHUTDOWN_*) */ uint32_t s3_tb_wakeup_entry;/* entry point for tboot s3 wake up */ uint32_t s3_k_wakeup_entry; /* entry point for xen s3 wake up */ @@ -128,8 +127,7 @@ static inline void print_tboot_shared(tb printk("tboot_shared data:\n"); printk("\t version: %d\n", tboot_shared->version); printk("\t log_addr: 0x%08x\n", tboot_shared->log_addr); - printk("\t shutdown_entry32: 0x%08x\n", tboot_shared->shutdown_entry32); - printk("\t shutdown_entry64: 0x%08x\n", tboot_shared->shutdown_entry64); + printk("\t shutdown_entry: 0x%08x\n", tboot_shared->shutdown_entry); printk("\t shutdown_type: %d\n", tboot_shared->shutdown_type); printk("\t s3_tb_wakeup_entry: 0x%08x\n", tboot_shared->s3_tb_wakeup_entry); diff -r d54d9925440b -r 719eecd798d6 tboot/common/tboot.c --- a/tboot/common/tboot.c Wed Jan 14 18:07:42 2009 -0800 +++ b/tboot/common/tboot.c Wed Jan 14 18:11:22 2009 -0800 @@ -218,8 +218,7 @@ static void post_launch(void) _tboot_shared.uuid = (uuid_t)TBOOT_SHARED_UUID; _tboot_shared.version = 3; _tboot_shared.log_addr = (uint32_t)g_log; - _tboot_shared.shutdown_entry32 = (uint32_t)shutdown_entry; - _tboot_shared.shutdown_entry64 = (uint32_t)shutdown_entry; + _tboot_shared.shutdown_entry = (uint32_t)shutdown_entry; _tboot_shared.s3_tb_wakeup_entry = (uint32_t)TBOOT_S3_WAKEUP_ADDR; _tboot_shared.tboot_base = (uint32_t)&_start; _tboot_shared.tboot_size = (uint32_t)&_end - (uint32_t)&_start; # HG changeset patch # User Joseph Cihula # Date 1231988588 28800 # Node ID 89573e53cc29f55ac2c4567965c434d56cfeb686 # Parent 719eecd798d6efe8a72b425e524ca76c5c4de4e5 Provide integrity protection to launched kernel/VMM during S3 Before entering S3, tboot will calculate a UMAC over regions of memory provided by the kernel and seal the UMAC. Upon resume, after re-establishing the measured environment, tboot will verify that the memory image of those regions match the sealed UMAC. Signed-off-by: Joseph Cihula Signed-off-by: Shane Wang diff -r 719eecd798d6 -r 89573e53cc29 include/tboot.h --- a/include/tboot.h Wed Jan 14 18:11:22 2009 -0800 +++ b/include/tboot.h Wed Jan 14 19:03:08 2009 -0800 @@ -57,6 +57,12 @@ typedef struct __packed { * used to communicate between tboot and the launched kernel (i.e. Xen) */ +#define MAX_TB_MAC_REGIONS 32 +typedef struct __packed { + uint64_t start; + uint64_t end; +} tboot_mac_region_t; + /* GAS - Generic Address Structure (ACPI 2.0+) */ typedef struct __packed { uint8_t space_id; /* Address space where struct or register exists */ @@ -87,6 +93,9 @@ typedef struct __packed { acpi_sinfo; /* where kernel put acpi sleep info in Sx */ uint32_t tboot_base; /* starting addr for tboot */ uint32_t tboot_size; /* size of tboot */ + uint8_t num_mac_regions; /* number mem regions to MAC on S3 */ + /* contig regions memory to MAC on S3 */ + tboot_mac_region_t mac_regions[MAX_TB_MAC_REGIONS]; } tboot_shared_t; #define TB_SHUTDOWN_REBOOT 0 diff -r 719eecd798d6 -r 89573e53cc29 tboot/common/integrity.c --- a/tboot/common/integrity.c Wed Jan 14 18:11:22 2009 -0800 +++ b/tboot/common/integrity.c Wed Jan 14 19:03:08 2009 -0800 @@ -47,6 +47,7 @@ #include #include #include +#include /* put in .data section to that they aren't cleared on S3 resume */ @@ -60,6 +61,14 @@ static __data uint32_t sealed_vl_size; launch hashes and memory integrity UMAC) */ static __data tpm_pcr_value_t post_launch_pcr17, post_launch_pcr18; +/* MAC result for kernel used to verify memory integrity on S3 resume */ +static __data umac_t g_mac; + +static __data uint8_t sealed_mac[512]; +static __data uint32_t sealed_mac_size; + + +extern tboot_shared_t _tboot_shared; extern bool hash_policy(tb_hash_t *hash, uint8_t hash_alg); @@ -140,8 +149,34 @@ bool seal_initial_measurements(void) return true; } +static bool measure_memory_integrity(umac_t result) +{ + umac_ctx_t ctx; + + memset(result, 0, UMAC_OUTPUT_LEN); + + ctx = umac_new("abcdefghijklmnop"); + for ( unsigned int i = 0; i < _tboot_shared.num_mac_regions; i++ ) { + uint64_t start = _tboot_shared.mac_regions[i].start; + uint64_t end = _tboot_shared.mac_regions[i].end; + + printk("MACing region %u: 0x%Lx - 0x%Lx\n", i, start, end); + /* TBD: don't handle addrs > 4GB yet, so error */ + if ( (start & 0xffffffff00000000L) || (end & 0xffffffff00000000L) ) { + printk("range is in memory > 4GB\n"); + return false; + } + umac_update(ctx, (char *)(uintptr_t)start, end - start); + } + umac_final(ctx, result, nonce); + + printk("MAC of regions is: "); + print_hex(NULL, (uint8_t *)result, UMAC_OUTPUT_LEN); + return true; +} + /* - * verify sealed VL msmnts and policy, then re-extend hashes + * verify memory integrity (UMAC) and sealed VL hashes, then re-extend hashes * * this must be called post-launch but before extending any modules or other * measurements into PCRs @@ -149,6 +184,10 @@ bool verify_integrity(void) bool verify_integrity(void) { uint32_t data_size; + unsigned int i; + uint8_t pcr_indcs_create[] = {17, 18}; + tpm_pcr_value_t pcr17, pcr18; + const tpm_pcr_value_t *pcr_values_create[] = {&pcr17, &pcr18}; /* * unseal and verify VL measurements @@ -178,6 +217,45 @@ bool verify_integrity(void) return false; } + /* to prevent rollback attack using old sealed measurements, + verify that (creation) PCRs at mem integrity seal time are same as + if we extend current PCRs with unsealed VL measurements */ + /* TBD: we should check all DRTM PCRs */ + tpm_pcr_read(2, 17, &pcr17); + tpm_pcr_read(2, 18, &pcr18); + for ( i = 0; i < g_s3_state.num_vl_entries; i++ ) { + if ( g_s3_state.vl_entries[i].pcr == 17 ) + extend_hash((tb_hash_t *)&pcr17, &g_s3_state.vl_entries[i].hash, + TB_HALG_SHA1); + else if ( g_s3_state.vl_entries[i].pcr == 18 ) + extend_hash((tb_hash_t *)&pcr18, &g_s3_state.vl_entries[i].hash, + TB_HALG_SHA1); + } + if ( !tpm_cmp_creation_pcrs(ARRAY_SIZE(pcr_indcs_create), + pcr_indcs_create, pcr_values_create, + sealed_mac_size, sealed_mac) ) { + printk("extended PCR values don't match creation values in sealed blob.\n"); + return false; + } + + /* + * unseal and verify memory integrity value + */ + + data_size = sizeof(g_mac); + if ( tpm_unseal(2, sealed_mac_size, sealed_mac, + &data_size, (uint8_t *)&g_mac) != TPM_SUCCESS ) + return false; + + /* Verify memory integrity against sealed value */ + umac_t mac; + if ( !measure_memory_integrity(mac) ) + return false; + if ( memcmp(mac, g_mac, sizeof(g_mac)) ) { + printk("memory integrity lost on S3 resume\n"); + return false; + } + /* re-extend PCRs with VL measurements */ if ( !extend_pcrs() ) return false; @@ -194,6 +272,43 @@ void display_vl_msmnts(void) } } +/* + * memory integrity value (UMAC) is sealed to PCRs 17+18 with post-launch + * values (i.e. before extending with VL hashes) + */ +bool generate_mem_integrity(void) +{ + uint8_t pcr_indcs_create[] = {17, 18}; + uint8_t pcr_indcs_release[] = {17, 18}; + const tpm_pcr_value_t *pcr_values_release[] = {&post_launch_pcr17, + &post_launch_pcr18}; + + /* since tboot relies on the module it launches for resource protection, + that module should have at least one region for itself, otherwise + it will not be protected against S3 resume attacks */ + if ( _tboot_shared.num_mac_regions == 0 ) { + printk("no memory regions to MAC\n"); + return false; + } + + sealed_mac_size = sizeof(sealed_mac); + + /* calculate the memory integrity hash */ + if ( !measure_memory_integrity(g_mac) ) + return false; + + /* seal to locality 2, pcr 17/18 */ + if ( tpm_seal(2, TPM_LOC_TWO, + ARRAY_SIZE(pcr_indcs_create), pcr_indcs_create, + ARRAY_SIZE(pcr_indcs_release), pcr_indcs_release, + pcr_values_release, + sizeof(g_mac), (const uint8_t *)g_mac, + &sealed_mac_size, sealed_mac) != TPM_SUCCESS ) + return false; + + return true; +} + /* * Local variables: diff -r 719eecd798d6 -r 89573e53cc29 tboot/common/tboot.c --- a/tboot/common/tboot.c Wed Jan 14 18:11:22 2009 -0800 +++ b/tboot/common/tboot.c Wed Jan 14 19:03:08 2009 -0800 @@ -432,6 +432,10 @@ void shutdown(void) /* restore DMAR table if needed */ restore_vtd_dmar_table(); + + /* create and seal memory integrity measurement */ + if ( !generate_mem_integrity() ) + apply_policy(TB_ERR_S3_INTEGRITY); } /* cap dynamic PCRs extended as part of launch (17, 18, ...) */ diff -r 719eecd798d6 -r 89573e53cc29 tboot/common/umaclib.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tboot/common/umaclib.c Wed Jan 14 19:03:08 2009 -0800 @@ -0,0 +1,371 @@ +/* + * A couple of 64 bit operations ported from FreeBSD. + * The code within the '#if BITS_PER_LONG == 32' block below, and no other + * code in this file, is distributed under the following licensing terms + * This is the modified '3-clause' BSD license with the obnoxious + * advertising clause removed, as permitted by University of California. + * + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include + +#if BITS_PER_LONG == 32 + +/* + * Depending on the desired operation, we view a `long long' (aka quad_t) in + * one or more of the following formats. + */ +union uu { + s64 q; /* as a (signed) quad */ + s64 uq; /* as an unsigned quad */ + long sl[2]; /* as two signed longs */ + unsigned long ul[2]; /* as two unsigned longs */ +}; + +#ifdef __BIG_ENDIAN +#define _QUAD_HIGHWORD 0 +#define _QUAD_LOWWORD 1 +#else /* __LITTLE_ENDIAN */ +#define _QUAD_HIGHWORD 1 +#define _QUAD_LOWWORD 0 +#endif + +/* + * Define high and low longwords. + */ +#define H _QUAD_HIGHWORD +#define L _QUAD_LOWWORD + +/* + * Total number of bits in a quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +#define CHAR_BIT 8 /* number of bits in a char */ +#define QUAD_BITS (sizeof(s64) * CHAR_BIT) +#define LONG_BITS (sizeof(long) * CHAR_BIT) +#define HALF_BITS (sizeof(long) * CHAR_BIT / 2) + +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of long, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(long)*CHAR_BIT/2). + */ +#define HHALF(x) ((x) >> HALF_BITS) +#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1)) +#define LHUP(x) ((x) << HALF_BITS) + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ +#define B (1 << HALF_BITS) /* digit base */ + +/* Combine two `digits' to make a single two-digit number. */ +#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b)) + +/* select a type for digits in base B */ +typedef u_long digit; + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +static void shl(register digit *p, register int len, register int sh) +{ + register int i; + + for (i = 0; i < len; i++) + p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); + p[i] = LHALF(p[i] << sh); +} + +/* + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within u_long. As a consequence, the maximum length dividend and + * divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +u64 __qdivrem(u64 uq, u64 vq, u64 *arq) +{ + union uu tmp; + digit *u, *v, *q; + register digit v1, v2; + u_long qhat, rhat, t; + int m, n, d, j, i; + digit uspace[5], vspace[5], qspace[5]; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + static volatile const unsigned int zero = 0; + + tmp.ul[H] = tmp.ul[L] = 1 / zero; + if (arq) + *arq = uq; + return (tmp.q); + } + if (uq < vq) { + if (arq) + *arq = uq; + return (0); + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.uq = uq; + u[0] = 0; + u[1] = HHALF(tmp.ul[H]); + u[2] = LHALF(tmp.ul[H]); + u[3] = HHALF(tmp.ul[L]); + u[4] = LHALF(tmp.ul[L]); + tmp.uq = vq; + v[1] = HHALF(tmp.ul[H]); + v[2] = LHALF(tmp.ul[H]); + v[3] = HHALF(tmp.ul[L]); + v[4] = LHALF(tmp.ul[L]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + u_long rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = u[1] / t; + rbj = COMBINE(u[1] % t, u[2]); + q2 = rbj / t; + rbj = COMBINE(rbj % t, u[3]); + q3 = rbj / t; + rbj = COMBINE(rbj % t, u[4]); + q4 = rbj / t; + if (arq) + *arq = rbj % t; + tmp.ul[H] = COMBINE(q1, q2); + tmp.ul[L] = COMBINE(q3, q4); + return (tmp.q); + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) + m--; + for (i = 4 - m; --i >= 0;) + q[i] = 0; + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + d = 0; + for (t = v[1]; t < B / 2; t <<= 1) + d++; + if (d > 0) { + shl(&u[0], m + n, d); /* u <<= d */ + shl(&v[1], n - 1, d); /* v <<= d */ + } + /* + * D2: j = 0. + */ + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + register digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + u_long nn = COMBINE(uj0, uj1); + qhat = nn / v1; + rhat = nn % v1; + } + while (v2 * qhat > COMBINE(rhat, uj2)) { + qhat_too_big: + qhat--; + if ((rhat += v1) >= B) + break; + } + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = LHALF(t); + t = HHALF(t); + } + u[j] = LHALF(u[j] + t); + } + q[j] = qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + u[i] = (u[i] >> d) | + LHALF(u[i - 1] << (HALF_BITS - d)); + u[i] = 0; + } + tmp.ul[H] = COMBINE(uspace[1], uspace[2]); + tmp.ul[L] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.q; + } + + tmp.ul[H] = COMBINE(qspace[1], qspace[2]); + tmp.ul[L] = COMBINE(qspace[3], qspace[4]); + return (tmp.q); +} + +/* + * Divide two signed quads. + * Truncates towards zero, as required by C99. + */ +s64 __divdi3(s64 a, s64 b) +{ + u64 ua, ub, uq; + int neg = (a < 0) ^ (b < 0); + ua = (a < 0) ? -(u64)a : a; + ub = (b < 0) ? -(u64)b : b; + uq = __qdivrem(ua, ub, (u64 *)0); + return (neg ? -uq : uq); +} + + +/* + * Divide two unsigned quads. + */ +u64 __udivdi3(u64 a, u64 b) +{ + return __qdivrem(a, b, (u64 *)0); +} + +/* + * Remainder of unsigned quad division + */ +u64 __umoddi3(u64 a, u64 b) +{ + u64 rem; + __qdivrem(a, b, &rem); + return rem; +} + +/* + * Remainder of signed quad division. + * Truncates towards zero, as required by C99: + * 11 % 5 = 1 + * -11 % 5 = -1 + * 11 % -5 = 1 + * -11 % -5 = 1 + */ +s64 __moddi3(s64 a, s64 b) +{ + u64 ua, ub, urem; + int neg = (a < 0); + ua = neg ? -(u64)a : a; + ub = (b < 0) ? -(u64)b : b; + __qdivrem(ua, ub, &urem); + return (neg ? -urem : urem); +} + +#endif /* BITS_PER_LONG == 32 */ diff -r 719eecd798d6 -r 89573e53cc29 tboot/include/integrity.h --- a/tboot/include/integrity.h Wed Jan 14 18:11:22 2009 -0800 +++ b/tboot/include/integrity.h Wed Jan 14 19:03:08 2009 -0800 @@ -66,6 +66,7 @@ extern bool seal_initial_measurements(vo extern bool seal_initial_measurements(void); extern bool verify_integrity(void); extern void display_vl_msmnts(void); +extern bool generate_mem_integrity(void); #endif /* _TBOOT_INTEGRITY_H_ */ diff -r 719eecd798d6 -r 89573e53cc29 tboot/include/rijndael-alg-fst.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tboot/include/rijndael-alg-fst.h Wed Jan 14 19:03:08 2009 -0800 @@ -0,0 +1,1410 @@ +/** + * rijndael-alg-fst.c + * + * @version 3.0 (December 2000) + * + * Optimised ANSI C code for the Rijndael cipher (now AES) + * + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto + * + * This code is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +//#include +//#include +#ifndef __RIJNDAEL_ALG_FST_H +#define __RIJNDAEL_ALG_FST_H + +#define MAXKC (256/32) +#define MAXKB (256/8) +#define MAXNR 14 + +#include "types.h" + +/* +Te0[x] = S [x].[02, 01, 01, 03]; +Te1[x] = S [x].[03, 02, 01, 01]; +Te2[x] = S [x].[01, 03, 02, 01]; +Te3[x] = S [x].[01, 01, 03, 02]; +Te4[x] = S [x].[01, 01, 01, 01]; + +Td0[x] = Si[x].[0e, 09, 0d, 0b]; +Td1[x] = Si[x].[0b, 0e, 09, 0d]; +Td2[x] = Si[x].[0d, 0b, 0e, 09]; +Td3[x] = Si[x].[09, 0d, 0b, 0e]; +Td4[x] = Si[x].[01, 01, 01, 01]; +*/ + +static const u32 Te0[256] = { + 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, + 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, + 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, + 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU, + 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, + 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU, + 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, + 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU, + 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, + 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU, + 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, + 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU, + 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, + 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U, + 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, + 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU, + 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, + 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU, + 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, + 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U, + 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, + 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU, + 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, + 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU, + 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, + 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U, + 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, + 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U, + 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, + 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U, + 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, + 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU, + 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, + 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U, + 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, + 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U, + 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, + 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U, + 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, + 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U, + 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, + 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U, + 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, + 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU, + 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, + 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U, + 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, + 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U, + 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, + 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U, + 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, + 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U, + 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, + 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U, + 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, + 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U, + 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, + 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U, + 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, + 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU, + 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, + 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U, + 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, + 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU, +}; +static const u32 Te1[256] = { + 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, + 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U, + 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, + 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U, + 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, + 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U, + 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, + 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U, + 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, + 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU, + 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, + 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U, + 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, + 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU, + 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, + 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U, + 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, + 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U, + 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, + 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U, + 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, + 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU, + 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, + 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU, + 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, + 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U, + 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, + 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U, + 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, + 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U, + 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, + 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U, + 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, + 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U, + 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, + 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U, + 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, + 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U, + 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, + 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU, + 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, + 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU, + 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, + 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U, + 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, + 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U, + 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, + 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U, + 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, + 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U, + 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, + 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU, + 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, + 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU, + 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, + 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU, + 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, + 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U, + 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, + 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU, + 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, + 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U, + 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, + 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U, +}; +static const u32 Te2[256] = { + 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, + 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U, + 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, + 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U, + 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, + 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U, + 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, + 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U, + 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, + 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU, + 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, + 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U, + 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, + 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU, + 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, + 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U, + 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, + 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U, + 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, + 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U, + 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, + 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU, + 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, + 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU, + 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, + 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U, + 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, + 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U, + 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, + 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U, + 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, + 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U, + 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, + 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U, + 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, + 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U, + 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, + 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U, + 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, + 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU, + 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, + 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU, + 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, + 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U, + 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, + 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U, + 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, + 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U, + 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, + 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U, + 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, + 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU, + 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, + 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU, + 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, + 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU, + 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, + 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U, + 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, + 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU, + 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, + 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U, + 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, + 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U, +}; +static const u32 Te3[256] = { + + 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, + 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U, + 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, + 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU, + 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, + 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU, + 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, + 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU, + 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, + 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U, + 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, + 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU, + 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, + 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU, + 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, + 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU, + 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, + 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU, + 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, + 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U, + 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, + 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U, + 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, + 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U, + 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, + 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U, + 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, + 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU, + 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, + 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U, + 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, + 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU, + 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, + 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU, + 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, + 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U, + 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, + 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU, + 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, + 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU, + 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, + 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U, + 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, + 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U, + 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, + 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U, + 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, + 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U, + 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, + 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U, + 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, + 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU, + 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, + 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU, + 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, + 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U, + 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, + 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U, + 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, + 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U, + 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, + 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U, + 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, + 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, +}; +static const u32 Te4[256] = { + 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU, + 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U, + 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU, + 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U, + 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU, + 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U, + 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU, + 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U, + 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U, + 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU, + 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U, + 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U, + 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U, + 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU, + 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U, + 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U, + 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU, + 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U, + 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U, + 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U, + 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU, + 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU, + 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U, + 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU, + 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU, + 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U, + 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU, + 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U, + 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU, + 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U, + 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U, + 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U, + 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU, + 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U, + 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU, + 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U, + 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU, + 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U, + 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U, + 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU, + 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU, + 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU, + 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U, + 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U, + 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU, + 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U, + 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU, + 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U, + 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU, + 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U, + 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU, + 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU, + 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U, + 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU, + 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U, + 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU, + 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U, + 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U, + 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U, + 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU, + 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU, + 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U, + 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU, + 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, +}; +static const u32 Td0[256] = { + 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, + 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, + 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, + 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU, + 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, + 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U, + 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, + 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U, + 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, + 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U, + 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, + 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U, + 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, + 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU, + 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, + 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU, + 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, + 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU, + 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, + 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U, + 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, + 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU, + 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, + 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU, + 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, + 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU, + 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, + 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU, + 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, + 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U, + 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, + 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U, + 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, + 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U, + 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, + 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U, + 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, + 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U, + 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, + 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU, + 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, + 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U, + 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, + 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U, + 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, + 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU, + 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, + 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U, + 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, + 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U, + 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, + 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU, + 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, + 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU, + 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, + 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U, + 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, + 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU, + 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, + 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U, + 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, + 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U, + 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, + 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U, +}; +static const u32 Td1[256] = { + 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, + 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U, + 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, + 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U, + 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, + 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U, + 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, + 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U, + 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, + 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU, + 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, + 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU, + 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, + 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU, + 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, + 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U, + 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, + 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU, + 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, + 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U, + 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, + 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U, + 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, + 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU, + 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, + 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U, + 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, + 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU, + 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, + 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU, + 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, + 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U, + 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, + 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU, + 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, + 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U, + 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, + 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U, + 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, + 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U, + 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, + 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU, + 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, + 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU, + 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, + 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU, + 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, + 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U, + 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, + 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U, + 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, + 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U, + 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, + 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U, + 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, + 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U, + 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, + 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U, + 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, + 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU, + 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, + 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U, + 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, + 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U, +}; +static const u32 Td2[256] = { + 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, + 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U, + 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, + 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U, + 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, + 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U, + 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, + 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U, + 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, + 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU, + 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, + 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U, + 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, + 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U, + 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, + 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U, + 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, + 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, + 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, + 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, + + 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, + 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, + 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, + 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U, + 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, + 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU, + 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, + 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U, + 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, + 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U, + 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, + 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU, + 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, + 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU, + 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, + 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U, + 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, + 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U, + 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, + 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U, + 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, + 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU, + 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, + 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U, + 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, + 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU, + 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, + 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U, + 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, + 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U, + 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, + 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U, + 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, + 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U, + 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, + 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U, + 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, + 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U, + 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, + 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U, + 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, + 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU, + 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, + 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U, +}; +static const u32 Td3[256] = { + 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, + 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU, + 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, + 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U, + 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, + 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU, + 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, + 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU, + 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, + 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU, + 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, + 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U, + 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, + 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U, + 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, + 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU, + 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, + 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U, + 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, + 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU, + 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, + 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U, + 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, + 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U, + 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, + 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU, + 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, + 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U, + 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, + 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU, + 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, + 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U, + 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, + 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU, + 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, + 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U, + 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, + 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U, + 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, + 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U, + 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, + 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU, + 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, + 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U, + 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, + 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU, + 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, + 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU, + 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, + 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U, + 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, + 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U, + 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, + 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U, + 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, + 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU, + 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, + 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU, + 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, + 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U, + 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, + 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, + 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, + 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, +}; +static const u32 Td4[256] = { + 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, + 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U, + 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, + 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU, + 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, + 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U, + 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, + 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU, + 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, + 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU, + 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, + 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU, + 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, + 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U, + 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, + 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U, + 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, + 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U, + 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, + 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U, + 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, + 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU, + 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, + 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U, + 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, + 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU, + 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, + 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U, + 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, + 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U, + 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, + 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU, + 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U, + 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU, + 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU, + 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U, + 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U, + 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U, + 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U, + 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU, + 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U, + 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U, + 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU, + 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU, + 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU, + 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U, + 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU, + 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U, + 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U, + 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U, + 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U, + 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU, + 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U, + 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU, + 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU, + 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU, + 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU, + 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U, + 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU, + 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U, + 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU, + 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U, + 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U, + 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU, +}; +static const u32 rcon[] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ +}; + +#ifndef SWAP +#define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00) +#endif + +#ifdef _MSC_VER +#define GETU32(p) SWAP(*((u32 *)(p))) +#define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); } +#else +#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3])) +#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); } +#endif + +/** + * Expand the cipher key into the encryption key schedule. + * + * @return the number of rounds for the given cipher key size. + */ +static inline int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) { + int i = 0; + u32 temp; + + rk[0] = GETU32(cipherKey ); + rk[1] = GETU32(cipherKey + 4); + rk[2] = GETU32(cipherKey + 8); + rk[3] = GETU32(cipherKey + 12); + if (keyBits == 128) { + for (;;) { + temp = rk[3]; + rk[4] = rk[0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + if (++i == 10) { + return 10; + } + rk += 4; + } + } + rk[4] = GETU32(cipherKey + 16); + rk[5] = GETU32(cipherKey + 20); + if (keyBits == 192) { + for (;;) { + temp = rk[ 5]; + rk[ 6] = rk[ 0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 7] = rk[ 1] ^ rk[ 6]; + rk[ 8] = rk[ 2] ^ rk[ 7]; + rk[ 9] = rk[ 3] ^ rk[ 8]; + if (++i == 8) { + return 12; + } + rk[10] = rk[ 4] ^ rk[ 9]; + rk[11] = rk[ 5] ^ rk[10]; + rk += 6; + } + } + rk[6] = GETU32(cipherKey + 24); + rk[7] = GETU32(cipherKey + 28); + if (keyBits == 256) { + for (;;) { + temp = rk[ 7]; + rk[ 8] = rk[ 0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 9] = rk[ 1] ^ rk[ 8]; + rk[10] = rk[ 2] ^ rk[ 9]; + rk[11] = rk[ 3] ^ rk[10]; + if (++i == 7) { + return 14; + } + temp = rk[11]; + rk[12] = rk[ 4] ^ + (Te4[(temp >> 24) ] & 0xff000000) ^ + (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(temp ) & 0xff] & 0x000000ff); + rk[13] = rk[ 5] ^ rk[12]; + rk[14] = rk[ 6] ^ rk[13]; + rk[15] = rk[ 7] ^ rk[14]; + + rk += 8; + } + } + return 0; +} + +/** + * Expand the cipher key into the decryption key schedule. + * + * @return the number of rounds for the given cipher key size. + */ +static inline int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) { + int Nr, i, j; + u32 temp; + + /* expand the cipher key: */ + Nr = rijndaelKeySetupEnc(rk, cipherKey, keyBits); + /* invert the order of the round keys: */ + for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) { + temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; + temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; + temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; + temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; + } + /* apply the inverse MixColumn transform to all round keys but the first and the last: */ + for (i = 1; i < Nr; i++) { + rk += 4; + rk[0] = + Td0[Te4[(rk[0] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[0] ) & 0xff] & 0xff]; + rk[1] = + Td0[Te4[(rk[1] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[1] ) & 0xff] & 0xff]; + rk[2] = + Td0[Te4[(rk[2] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[2] ) & 0xff] & 0xff]; + rk[3] = + Td0[Te4[(rk[3] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[3] ) & 0xff] & 0xff]; + } + return Nr; +} + +static inline void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]) { + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(pt ) ^ rk[0]; + s1 = GETU32(pt + 4) ^ rk[1]; + s2 = GETU32(pt + 8) ^ rk[2]; + s3 = GETU32(pt + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39]; + if (Nr > 10) { + /* round 10: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47]; + if (Nr > 12) { + /* round 12: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55]; + } + } + rk += Nr << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = Nr >> 1; + for (;;) { + t0 = + Te0[(s0 >> 24) ] ^ + Te1[(s1 >> 16) & 0xff] ^ + Te2[(s2 >> 8) & 0xff] ^ + Te3[(s3 ) & 0xff] ^ + rk[4]; + t1 = + Te0[(s1 >> 24) ] ^ + Te1[(s2 >> 16) & 0xff] ^ + Te2[(s3 >> 8) & 0xff] ^ + Te3[(s0 ) & 0xff] ^ + rk[5]; + t2 = + Te0[(s2 >> 24) ] ^ + Te1[(s3 >> 16) & 0xff] ^ + Te2[(s0 >> 8) & 0xff] ^ + Te3[(s1 ) & 0xff] ^ + rk[6]; + t3 = + Te0[(s3 >> 24) ] ^ + Te1[(s0 >> 16) & 0xff] ^ + Te2[(s1 >> 8) & 0xff] ^ + Te3[(s2 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Te0[(t0 >> 24) ] ^ + Te1[(t1 >> 16) & 0xff] ^ + Te2[(t2 >> 8) & 0xff] ^ + Te3[(t3 ) & 0xff] ^ + rk[0]; + s1 = + Te0[(t1 >> 24) ] ^ + Te1[(t2 >> 16) & 0xff] ^ + Te2[(t3 >> 8) & 0xff] ^ + Te3[(t0 ) & 0xff] ^ + rk[1]; + s2 = + Te0[(t2 >> 24) ] ^ + Te1[(t3 >> 16) & 0xff] ^ + Te2[(t0 >> 8) & 0xff] ^ + Te3[(t1 ) & 0xff] ^ + rk[2]; + s3 = + Te0[(t3 >> 24) ] ^ + Te1[(t0 >> 16) & 0xff] ^ + Te2[(t1 >> 8) & 0xff] ^ + Te3[(t2 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (Te4[(t0 >> 24) ] & 0xff000000) ^ + (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(ct , s0); + s1 = + (Te4[(t1 >> 24) ] & 0xff000000) ^ + (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(ct + 4, s1); + s2 = + (Te4[(t2 >> 24) ] & 0xff000000) ^ + (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(ct + 8, s2); + s3 = + (Te4[(t3 >> 24) ] & 0xff000000) ^ + (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(ct + 12, s3); +} + +static inline void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]) { + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(ct ) ^ rk[0]; + s1 = GETU32(ct + 4) ^ rk[1]; + s2 = GETU32(ct + 8) ^ rk[2]; + s3 = GETU32(ct + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39]; + if (Nr > 10) { + /* round 10: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47]; + if (Nr > 12) { + /* round 12: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55]; + } + } + rk += Nr << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = Nr >> 1; + for (;;) { + t0 = + Td0[(s0 >> 24) ] ^ + Td1[(s3 >> 16) & 0xff] ^ + Td2[(s2 >> 8) & 0xff] ^ + Td3[(s1 ) & 0xff] ^ + rk[4]; + t1 = + Td0[(s1 >> 24) ] ^ + Td1[(s0 >> 16) & 0xff] ^ + Td2[(s3 >> 8) & 0xff] ^ + Td3[(s2 ) & 0xff] ^ + rk[5]; + t2 = + Td0[(s2 >> 24) ] ^ + Td1[(s1 >> 16) & 0xff] ^ + Td2[(s0 >> 8) & 0xff] ^ + Td3[(s3 ) & 0xff] ^ + rk[6]; + t3 = + Td0[(s3 >> 24) ] ^ + Td1[(s2 >> 16) & 0xff] ^ + Td2[(s1 >> 8) & 0xff] ^ + Td3[(s0 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Td0[(t0 >> 24) ] ^ + Td1[(t3 >> 16) & 0xff] ^ + Td2[(t2 >> 8) & 0xff] ^ + Td3[(t1 ) & 0xff] ^ + rk[0]; + s1 = + Td0[(t1 >> 24) ] ^ + Td1[(t0 >> 16) & 0xff] ^ + Td2[(t3 >> 8) & 0xff] ^ + Td3[(t2 ) & 0xff] ^ + rk[1]; + s2 = + Td0[(t2 >> 24) ] ^ + Td1[(t1 >> 16) & 0xff] ^ + Td2[(t0 >> 8) & 0xff] ^ + Td3[(t3 ) & 0xff] ^ + rk[2]; + s3 = + Td0[(t3 >> 24) ] ^ + Td1[(t2 >> 16) & 0xff] ^ + Td2[(t1 >> 8) & 0xff] ^ + Td3[(t0 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (Td4[(t0 >> 24) ] & 0xff000000) ^ + (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(pt , s0); + s1 = + (Td4[(t1 >> 24) ] & 0xff000000) ^ + (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(pt + 4, s1); + s2 = + (Td4[(t2 >> 24) ] & 0xff000000) ^ + (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(pt + 8, s2); + s3 = + (Td4[(t3 >> 24) ] & 0xff000000) ^ + (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(pt + 12, s3); +} + +#ifdef INTERMEDIATE_VALUE_KAT + +static inline void rijndaelEncryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) { + int r; + u32 s0, s1, s2, s3, t0, t1, t2, t3; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(block ) ^ rk[0]; + s1 = GETU32(block + 4) ^ rk[1]; + s2 = GETU32(block + 8) ^ rk[2]; + s3 = GETU32(block + 12) ^ rk[3]; + rk += 4; + + /* + * Nr - 1 full rounds: + */ + for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) { + t0 = + Te0[(s0 >> 24) ] ^ + Te1[(s1 >> 16) & 0xff] ^ + Te2[(s2 >> 8) & 0xff] ^ + Te3[(s3 ) & 0xff] ^ + rk[0]; + t1 = + Te0[(s1 >> 24) ] ^ + Te1[(s2 >> 16) & 0xff] ^ + Te2[(s3 >> 8) & 0xff] ^ + Te3[(s0 ) & 0xff] ^ + rk[1]; + t2 = + Te0[(s2 >> 24) ] ^ + Te1[(s3 >> 16) & 0xff] ^ + Te2[(s0 >> 8) & 0xff] ^ + Te3[(s1 ) & 0xff] ^ + rk[2]; + t3 = + Te0[(s3 >> 24) ] ^ + Te1[(s0 >> 16) & 0xff] ^ + Te2[(s1 >> 8) & 0xff] ^ + Te3[(s2 ) & 0xff] ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + rk += 4; + + } + + /* + * apply last round and + * map cipher state to byte array block: + */ + if (rounds == Nr) { + t0 = + (Te4[(s0 >> 24) ] & 0xff000000) ^ + (Te4[(s1 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s2 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s3 ) & 0xff] & 0x000000ff) ^ + rk[0]; + t1 = + (Te4[(s1 >> 24) ] & 0xff000000) ^ + (Te4[(s2 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s3 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s0 ) & 0xff] & 0x000000ff) ^ + rk[1]; + t2 = + (Te4[(s2 >> 24) ] & 0xff000000) ^ + (Te4[(s3 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s0 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s1 ) & 0xff] & 0x000000ff) ^ + rk[2]; + t3 = + (Te4[(s3 >> 24) ] & 0xff000000) ^ + (Te4[(s0 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s1 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s2 ) & 0xff] & 0x000000ff) ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + } + + PUTU32(block , s0); + PUTU32(block + 4, s1); + PUTU32(block + 8, s2); + PUTU32(block + 12, s3); +} + +static inline void rijndaelDecryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) { + int r; + u32 s0, s1, s2, s3, t0, t1, t2, t3; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(block ) ^ rk[0]; + s1 = GETU32(block + 4) ^ rk[1]; + s2 = GETU32(block + 8) ^ rk[2]; + s3 = GETU32(block + 12) ^ rk[3]; + rk += 4; + + /* + * Nr - 1 full rounds: + */ + for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) { + t0 = + Td0[(s0 >> 24) ] ^ + Td1[(s3 >> 16) & 0xff] ^ + Td2[(s2 >> 8) & 0xff] ^ + Td3[(s1 ) & 0xff] ^ + rk[0]; + t1 = + Td0[(s1 >> 24) ] ^ + Td1[(s0 >> 16) & 0xff] ^ + Td2[(s3 >> 8) & 0xff] ^ + Td3[(s2 ) & 0xff] ^ + rk[1]; + t2 = + Td0[(s2 >> 24) ] ^ + Td1[(s1 >> 16) & 0xff] ^ + Td2[(s0 >> 8) & 0xff] ^ + Td3[(s3 ) & 0xff] ^ + rk[2]; + t3 = + Td0[(s3 >> 24) ] ^ + Td1[(s2 >> 16) & 0xff] ^ + Td2[(s1 >> 8) & 0xff] ^ + Td3[(s0 ) & 0xff] ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + rk += 4; + + } + + /* + * complete the last round and + * map cipher state to byte array block: + */ + t0 = + (Td4[(s0 >> 24) ] & 0xff000000) ^ + (Td4[(s3 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s2 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s1 ) & 0xff] & 0x000000ff); + t1 = + (Td4[(s1 >> 24) ] & 0xff000000) ^ + (Td4[(s0 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s3 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s2 ) & 0xff] & 0x000000ff); + t2 = + (Td4[(s2 >> 24) ] & 0xff000000) ^ + (Td4[(s1 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s0 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s3 ) & 0xff] & 0x000000ff); + t3 = + (Td4[(s3 >> 24) ] & 0xff000000) ^ + (Td4[(s2 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s1 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s0 ) & 0xff] & 0x000000ff); + + if (rounds == Nr) { + t0 ^= rk[0]; + t1 ^= rk[1]; + t2 ^= rk[2]; + t3 ^= rk[3]; + } + + PUTU32(block , t0); + PUTU32(block + 4, t1); + PUTU32(block + 8, t2); + PUTU32(block + 12, t3); +} + +#endif /* INTERMEDIATE_VALUE_KAT */ + +#endif /* __RIJNDAEL_ALG_FST_H */ diff -r 719eecd798d6 -r 89573e53cc29 tboot/include/umac.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tboot/include/umac.h Wed Jan 14 19:03:08 2009 -0800 @@ -0,0 +1,2212 @@ +/* ----------------------------------------------------------------------- + * + * umac.c -- C Implementation UMAC Message Authentication + * + * Version 0.92 of draft-krovetz-umac-07.txt -- 2006 February 21 + * + * For a full description of UMAC message authentication see the UMAC + * world-wide-web page at http://www.cs.ucdavis.edu/~rogaway/umac + * Please report bugs and suggestions to the UMAC webpage. + * + * Copyright (c) 1999-2006 Ted Krovetz + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and with or without fee, is hereby + * granted provided that the above copyright notice appears in all copies + * and in supporting documentation, and that the name of the copyright + * holder not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior permission. + * + * Comments should be directed to Ted Krovetz (tdk@acm.org) + * + * ---------------------------------------------------------------------- */ + + /* ////////////////////// IMPORTANT NOTES ///////////////////////////////// + * + * 1) This version does not work properly on messages larger than 16MB + * + * 2) If you set the switch to use SSE2, then all data must be 16-byte + * aligned + * + * 3) When calling the function umac(), it is assumed that msg is in + * a writable buffer of length divisible by 32 bytes. The message itself + * does not have to fill the entire buffer, but bytes beyond msg may be + * zeroed. + * + * 4) Two free AES implementations are supported by this implementation of + * UMAC. Paulo Barreto's version is in the public domain and can be found + * at http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ (search for + * "Barreto"). The only two files needed are rijndael-alg-fst.c and + * rijndael-alg-fst.h. Brian Gladman's version is distributed with the GNU + * Public lisence at http://fp.gladman.plus.com/AES/index.htm. It + * includes a fast IA-32 assembly version. + * + * 5) With FORCE_C_ONLY flags set to 0, incorrect results are sometimes + * produced under gcc with optimizations set -O3 or higher. Dunno why. + * + /////////////////////////////////////////////////////////////////////// */ + +/* ---------------------------------------------------------------------- */ +/* --- User Switches ---------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ + +#ifdef __cplusplus + extern "C" { +#endif + +#define UMAC_OUTPUT_LEN 4 /* Alowable: 4, 8, 12, 16 */ +#define FORCE_C_ONLY 1 /* ANSI C and 64-bit integers req'd */ +#define GLADMAN_AES 0 /* Change to 1 to use Gladman's AES */ +#define SSE2 0 /* Is SSE2 is available? */ +#define RUN_TESTS 0 /* Run basic correctness/speed tests */ + +/* ---------------------------------------------------------------------- */ +/* -- Global Includes --------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ + +typedef void *umac_ctx_t; +typedef void *uhash_ctx_t; + +typedef char umac_t[UMAC_OUTPUT_LEN]; + +static const char nonce[] = "abcdefgh"; + +//#include +//#include +#if GLADMAN_AES +#include "aes.h" +#else +#include "rijndael-alg-fst.h" +#endif + + +/* ---------------------------------------------------------------------- */ +/* --- Primitive Data Types --- */ +/* ---------------------------------------------------------------------- */ + +/* The following assumptions may need change on your system */ +#ifndef __XEN__ +typedef unsigned char UINT8; /* 1 byte */ +typedef unsigned short UINT16; /* 2 byte */ +typedef unsigned int UINT32; /* 4 byte */ +typedef unsigned long long UINT64; /* 8 bytes */ +#endif +typedef unsigned long UWORD; /* Register */ +typedef long ptrdiff_t; + +/* ---------------------------------------------------------------------- */ +/* --- Constants ------------------------------------------------ */ +/* ---------------------------------------------------------------------- */ + +#define UMAC_KEY_LEN 16 /* UMAC takes 16 bytes of external key */ + +/* GNU gcc and Microsoft Visual C++ (and copycats) on IA-32 are supported + * with some assembly + */ +#define GCC_X86 (__GNUC__ && __i386__) /* GCC on IA-32 */ +#define MSC_X86 (_M_IX86) /* Microsoft on IA-32 */ + +/* Message "words" are read from memory in an endian-specific manner. */ +/* For this implementation to behave correctly, __LITTLE_ENDIAN__ must */ +/* be set true if the host computer is little-endian. */ + +#ifndef __LITTLE_ENDIAN__ +#if __i386__ || __alpha__ || _M_IX86 || __LITTLE_ENDIAN +#define __LITTLE_ENDIAN__ 1 +#else +#define __LITTLE_ENDIAN__ 0 +#endif +#endif + +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ----- Architecture Specific ------------------------------------------ */ +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ + +#if (MSC_X86) +#pragma warning(disable: 4731) /* Turn off "ebp manipulation" warning */ +#pragma warning(disable: 4311) /* Turn off "pointer trunc" warning */ +#if (__MWERKS__) +#define mmword xmmword /* Metrowerks C 3.03 doesn't recognize mmword */ +#endif +#endif + +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ----- Primitive Routines --------------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ + + +/* ---------------------------------------------------------------------- */ +/* --- 32-bit by 32-bit to 64-bit Multiplication ------------------------ */ +/* ---------------------------------------------------------------------- */ + +#define MUL64(a,b) ((UINT64)((UINT64)(UINT32)(a) * (UINT64)(UINT32)(b))) + +/* ---------------------------------------------------------------------- */ +/* --- Endian Conversion --- Forcing assembly on some platforms */ +/* ---------------------------------------------------------------------- */ + +/* Lots of endian reversals happen in UMAC. PowerPC and Intel Architechture + * both support efficient endian conversion, but compilers seem unable to + * automatically utilize the efficient assembly opcodes. The architechture- + * specific versions utilize them. + */ + +#if (MSC_X86 && ! FORCE_C_ONLY) + +static inline UINT32 LOAD_UINT32_REVERSED(void *p) +{ + __asm { + mov eax, p + mov eax, [eax] + bswap eax + } +} + +static inline void STORE_UINT32_REVERSED(void *p, UINT32 x) +{ + __asm { + mov eax,x + bswap eax + mov ecx, p + mov [ecx], eax + } +} + +#elif (GCC_X86 && ! FORCE_C_ONLY) + +static inline UINT32 LOAD_UINT32_REVERSED(void *ptr) +{ + UINT32 temp; + asm volatile("bswap %0" : "=r" (temp) : "0" (*(UINT32 *)ptr)); + return temp; +} + +static inline void STORE_UINT32_REVERSED(void *ptr, UINT32 x) +{ + asm volatile("bswap %0" : "=r" (*(UINT32 *)ptr) : "0" (x)); +} + +#else + +static inline UINT32 LOAD_UINT32_REVERSED(void *ptr) +{ + UINT32 temp = *(UINT32 *)ptr; + temp = (temp >> 24) | ((temp & 0x00FF0000) >> 8 ) + | ((temp & 0x0000FF00) << 8 ) | (temp << 24); + return (UINT32)temp; +} + +static inline void STORE_UINT32_REVERSED(void *ptr, UINT32 x) +{ + UINT32 i = (UINT32)x; + *(UINT32 *)ptr = (i >> 24) | ((i & 0x00FF0000) >> 8 ) + | ((i & 0x0000FF00) << 8 ) | (i << 24); +} + +#endif + +/* The following definitions use the above reversal-primitives to do the right + * thing on endian specific load and stores. + */ + +#if (__LITTLE_ENDIAN__) +#define LOAD_UINT32_LITTLE(ptr) (*(UINT32 *)(ptr)) +#define STORE_UINT32_BIG(ptr,x) STORE_UINT32_REVERSED(ptr,x) +#else +#define LOAD_UINT32_LITTLE(ptr) LOAD_UINT32_REVERSED(ptr) +#define STORE_UINT32_BIG(ptr,x) (*(UINT32 *)(ptr) = (UINT32)(x)) +#endif + + + +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ----- Begin KDF & PDF Section ---------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ + +/* UMAC uses AES with 16 byte block and key lengths */ +#define AES_BLOCK_LEN 16 + +#if GLADMAN_AES +typedef aes_encrypt_ctx aes_int_key[1]; /* AES internal */ + +#define aes_encryption(in,out,int_key) \ + aes_encrypt((in),(out),(int_key)) +#define aes_key_setup(key,int_key) \ + aes_encrypt_key128((key),(int_key)) +#else +#define AES_ROUNDS ((UMAC_KEY_LEN / 4) + 6) +typedef UINT8 aes_int_key[AES_ROUNDS+1][4][4]; /* AES internal */ +#define aes_encryption(in,out,int_key) \ + rijndaelEncrypt((u32 *)(int_key), AES_ROUNDS, (u8 *)(in), (u8 *)(out)) +#define aes_key_setup(key,int_key) \ + rijndaelKeySetupEnc((u32 *)(int_key), (const unsigned char *)(key), \ + UMAC_KEY_LEN*8) +#endif + +/* The user-supplied UMAC key is stretched using AES in a counter + * mode to supply all random bits needed by UMAC. The kdf function takes + * an AES internal key representation 'key' and writes a stream of + * 'nbytes' bytes to the memory pointed at by 'buffer_ptr'. Each distinct + * 'index' causes a distinct byte stream. + */ +static inline void kdf(void *buffer_ptr, aes_int_key key, UINT8 index, int nbytes) +{ + UINT8 in_buf[AES_BLOCK_LEN] = {0}; + UINT8 out_buf[AES_BLOCK_LEN]; + UINT8 *dst_buf = (UINT8 *)buffer_ptr; + int i; + + /* Setup the initial value */ + in_buf[AES_BLOCK_LEN-9] = index; + in_buf[AES_BLOCK_LEN-1] = i = 1; + + while (nbytes >= AES_BLOCK_LEN) { + aes_encryption(in_buf, out_buf, key); + memcpy(dst_buf,out_buf,AES_BLOCK_LEN); + in_buf[AES_BLOCK_LEN-1] = ++i; + nbytes -= AES_BLOCK_LEN; + dst_buf += AES_BLOCK_LEN; + } + if (nbytes) { + aes_encryption(in_buf, out_buf, key); + memcpy(dst_buf,out_buf,nbytes); + } +} + +/* The final UHASH result is XOR'd with the output of a pseudorandom + * function. Here, we use AES to generate random output and + * xor the appropriate bytes depending on the last bits of nonce. + * This scheme is optimized for sequential, increasing big-endian nonces. + */ + +typedef struct { + UINT8 cache[AES_BLOCK_LEN]; /* Previous AES output is saved */ + UINT8 nonce[AES_BLOCK_LEN]; /* The AES input making above cache */ + aes_int_key prf_key; /* Expanded AES key for PDF */ +} pdf_ctx; + +static inline void pdf_init(pdf_ctx *pc, aes_int_key prf_key) +{ + UINT8 buf[UMAC_KEY_LEN]; + + kdf(buf, prf_key, 0, UMAC_KEY_LEN); + aes_key_setup(buf, pc->prf_key); + + /* Initialize pdf and cache */ + memset(pc->nonce, 0, sizeof(pc->nonce)); + aes_encryption(pc->nonce, pc->cache, pc->prf_key); +} + +static inline void pdf_gen_xor(pdf_ctx *pc, const UINT8 nonce[8], UINT8 buf[8]) +{ + /* 'index' indicates that we'll be using the 0th or 1st eight bytes + * of the AES output. If last time around we returned the index-1st + * element, then we may have the result in the cache already. + */ + +#if (UMAC_OUTPUT_LEN == 4) +#define LOW_BIT_MASK 3 +#elif (UMAC_OUTPUT_LEN == 8) +#define LOW_BIT_MASK 1 +#elif (UMAC_OUTPUT_LEN > 8) +#define LOW_BIT_MASK 0 +#endif + + UINT8 tmp_nonce_lo[4]; + int index = nonce[7] & LOW_BIT_MASK; + *(UINT32 *)tmp_nonce_lo = ((UINT32 *)nonce)[1]; + tmp_nonce_lo[3] &= ~LOW_BIT_MASK; /* zero last bit */ + + if ( (((UINT32 *)tmp_nonce_lo)[0] != ((UINT32 *)pc->nonce)[1]) || + (((UINT32 *)nonce)[0] != ((UINT32 *)pc->nonce)[0]) ) + { + ((UINT32 *)pc->nonce)[0] = ((UINT32 *)nonce)[0]; + ((UINT32 *)pc->nonce)[1] = ((UINT32 *)tmp_nonce_lo)[0]; + aes_encryption(pc->nonce, pc->cache, pc->prf_key); + } + +#if (UMAC_OUTPUT_LEN == 4) + *((UINT32 *)buf) ^= ((UINT32 *)pc->cache)[index]; +#elif (UMAC_OUTPUT_LEN == 8) + *((UINT64 *)buf) ^= ((UINT64 *)pc->cache)[index]; +#elif (UMAC_OUTPUT_LEN == 12) + ((UINT64 *)buf)[0] ^= ((UINT64 *)pc->cache)[0]; + ((UINT32 *)buf)[2] ^= ((UINT32 *)pc->cache)[2]; +#elif (UMAC_OUTPUT_LEN == 16) + ((UINT64 *)buf)[0] ^= ((UINT64 *)pc->cache)[0]; + ((UINT64 *)buf)[1] ^= ((UINT64 *)pc->cache)[1]; +#endif +} + +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ----- Begin NH Hash Section ------------------------------------------ */ +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ + +/* The NH-based hash functions used in UMAC are described in the UMAC paper + * and specification, both of which can be found at the UMAC website. + * The interface to this implementation has two + * versions, one expects the entire message being hashed to be passed + * in a single buffer and returns the hash result immediately. The second + * allows the message to be passed in a sequence of buffers. In the + * muliple-buffer interface, the client calls the routine nh_update() as + * many times as necessary. When there is no more data to be fed to the + * hash, the client calls nh_final() which calculates the hash output. + * Before beginning another hash calculation the nh_reset() routine + * must be called. The single-buffer routine, nh(), is equivalent to + * the sequence of calls nh_update() and nh_final(); however it is + * optimized and should be prefered whenever the multiple-buffer interface + * is not necessary. When using either interface, it is the client's + * responsability to pass no more than L1_KEY_LEN bytes per hash result. + * + * The routine nh_init() initializes the nh_ctx data structure and + * must be called once, before any other PDF routine. + */ + + /* The "nh_aux" routines do the actual NH hashing work. They + * expect buffers to be multiples of L1_PAD_BOUNDARY. These routines + * produce output for all STREAMS NH iterations in one call, + * allowing the parallel implementation of the streams. + */ + +#define STREAMS (UMAC_OUTPUT_LEN / 4) /* Number of times hash is applied */ +#define L1_KEY_LEN 1024 /* Internal key bytes */ +#define L1_KEY_SHIFT 16 /* Toeplitz key shift between streams */ +#define L1_PAD_BOUNDARY 32 /* pad message to boundary multiple */ +#define ALLOC_BOUNDARY 16 /* Keep buffers aligned to this */ +#define HASH_BUF_BYTES 64 /* nh_aux_hb buffer multiple */ + +typedef struct { + UINT8 nh_key [L1_KEY_LEN + L1_KEY_SHIFT * (STREAMS - 1)]; /* NH Key */ + UINT8 data [HASH_BUF_BYTES]; /* Incomming data buffer */ + int next_data_empty; /* Bookeeping variable for data buffer. */ + int bytes_hashed; /* Bytes (out of L1_KEY_LEN) incorperated. */ + UINT64 state[STREAMS]; /* on-line state */ +} nh_ctx; + + + +/* ---------------------------------------------------------------------- */ +#if ( ! FORCE_C_ONLY && ( GCC_X86 || MSC_X86 ) ) +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ +#if ( SSE2 ) +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ +#if ( MSC_X86 ) +/* ---------------------------------------------------------------------- */ + +/* This macro uses movdqa which requires 16-byte aligned data and key. */ +#define NH_STEP_1(n) \ + movdqa xmm2, n[ecx] \ + __asm movdqa xmm0, n[eax] \ + __asm movdqa xmm3, n+16[ecx] \ + __asm movdqa xmm1, n+16[eax] \ + __asm paddd xmm2, xmm0 \ + __asm paddd xmm3, xmm1 \ + __asm movdqa xmm5, xmm2 \ + __asm pmuludq xmm2, xmm3 \ + __asm psrldq xmm3, 4 \ + __asm paddq xmm6, xmm2 \ + __asm psrldq xmm5, 4 \ + __asm pmuludq xmm3, xmm5 \ + __asm paddq xmm6, xmm3 + +static inline void nh_aux_1(void *kp, void *dp, void *hp, UINT32 dlen) +{ + __asm{ + mov edx, dlen + mov ebx, hp + mov ecx, kp + mov eax, dp + sub edx, 128 + movq xmm6, mmword ptr [ebx] + jb label2 +label1: + NH_STEP_1(0) + NH_STEP_1(32) + NH_STEP_1(64) + NH_STEP_1(96) + add eax, 128 + add ecx, 128 + sub edx, 128 + jnb label1 +label2: + add edx,128 + je label4 +label3: + NH_STEP_1(0) + add eax, 32 + add ecx, 32 + sub edx, 32 + jne label3 +label4: + movdqa xmm0,xmm6 + psrldq xmm0, 8 + paddq xmm6, xmm0 + movq mmword ptr [ebx], xmm6 + } +} + + +/* This macro uses movdqa which requires 16-byte aligned data and key. */ +#define NH_STEP_2(n) \ + movdqa xmm0, n[eax] \ + __asm movdqa xmm3, n+16[ecx] \ + __asm movdqa xmm1, n+16[eax] \ + __asm paddd xmm2, xmm0 \ + __asm movdqa xmm4, xmm3 \ + __asm paddd xmm3, xmm1 \ + __asm movdqa xmm5, xmm2 \ + __asm pmuludq xmm2, xmm3 \ + __asm psrldq xmm3, 4 \ + __asm paddq xmm6, xmm2 \ + __asm movdqa xmm2, n+32[ecx] \ + __asm psrldq xmm5, 4 \ + __asm pmuludq xmm3, xmm5 \ + __asm paddd xmm1, xmm2 \ + __asm paddd xmm4, xmm0 \ + __asm paddq xmm6, xmm3 \ + __asm movdqa xmm3, xmm4 \ + __asm pmuludq xmm4, xmm1 \ + __asm psrldq xmm1, 4 \ + __asm psrldq xmm3, 4 \ + __asm pmuludq xmm3, xmm1 \ + __asm paddq xmm7, xmm4 \ + __asm paddq xmm7, xmm3 + + +static inline void nh_aux_2(void *kp, void *dp, void *hp, UINT32 dlen) +/* Perform 2 streams simultaneously */ +{ + __asm{ + mov edx, dlen + mov ebx, hp + mov ecx, kp + mov eax, dp + sub edx, 128 + movq xmm6, mmword ptr [ebx] + movq xmm7, mmword ptr 8[ebx] + movdqa xmm2, [ecx] + jb label2 +label1: + NH_STEP_2(0) + NH_STEP_2(32) + NH_STEP_2(64) + NH_STEP_2(96) + add eax, 128 + add ecx, 128 + sub edx, 128 + jnb label1 +label2: + add edx,128 + je label4 +label3: + NH_STEP_2(0) + add eax, 32 + add ecx, 32 + sub edx, 32 + jne label3 +label4: + movdqa xmm0,xmm6 + movdqa xmm1,xmm7 + psrldq xmm0, 8 + psrldq xmm1, 8 + paddq xmm6, xmm0 + paddq xmm7, xmm1 + movq mmword ptr [ebx], xmm6 + movq mmword ptr 8[ebx], xmm7 + } +} + +/* ---------------------------------------------------------------------- */ +#elif (GCC_X86) +/* ---------------------------------------------------------------------- */ + +#define NH_STEP_1(n) \ + "movdqa "#n"(%0), %%xmm2\n\t" \ + "movdqa "#n"(%1), %%xmm0\n\t" \ + "movdqa "#n"+16(%0), %%xmm3\n\t" \ + "movdqa "#n"+16(%1), %%xmm1\n\t" \ + "paddd %%xmm0, %%xmm2\n\t" \ + "paddd %%xmm1, %%xmm3\n\t" \ + "movdqa %%xmm2, %%xmm5\n\t" \ + "pmuludq %%xmm3, %%xmm2\n\t" \ + "psrldq $4, %%xmm3\n\t" \ + "paddq %%xmm2, %%xmm6\n\t" \ + "psrldq $4, %%xmm5\n\t" \ + "pmuludq %%xmm5, %%xmm3\n\t" \ + "paddq %%xmm3, %%xmm6\n\t" + +static inline void nh_aux_1(void *kp, void *dp, void *hp, UINT32 dlen) +{ + UINT32 d1,d2,d3; + asm volatile ( + "sub $128, %2\n\t" + "movq (%3), %%xmm6\n\t" + "jb 2f\n\t" + ".align 4,0x90\n" + "1:\n\t" + NH_STEP_1(0) + NH_STEP_1(32) + NH_STEP_1(64) + NH_STEP_1(96) + "add $128, %1\n\t" + "add $128, %0\n\t" + "sub $128, %2\n\t" + "jnb 1b\n\t" + ".align 4,0x90\n" + "2:\n\t" + "add $128, %2\n\t" + "je 4f\n\t" + ".align 4,0x90\n" + "3:\n\t" + NH_STEP_1(0) + "add $32, %1\n\t" + "add $32, %0\n\t" + "sub $32, %2\n\t" + "jne 3b\n\t" + ".align 4,0x90\n" + "4:\n\t" + "movdqa %%xmm6, %%xmm0\n\t" + "psrldq $8, %%xmm0\n\t" + "paddq %%xmm0, %%xmm6\n\t" + "movq %%xmm6, (%3)" + : "+r" (kp), "+r" (dp), "+r" (dlen) + : "r" (hp) + : "memory"); +} + +#define NH_STEP_2(n) \ + "movdqa "#n"(%1), %%xmm0\n\t" \ + "movdqa "#n"+16(%0), %%xmm3\n\t" \ + "movdqa "#n"+16(%1), %%xmm1\n\t" \ + "paddd %%xmm0, %%xmm2\n\t" \ + "movdqa %%xmm3, %%xmm4\n\t" \ + "paddd %%xmm1, %%xmm3\n\t" \ + "movdqa %%xmm2, %%xmm5\n\t" \ + "pmuludq %%xmm3, %%xmm2\n\t" \ + "psrldq $4, %%xmm3\n\t" \ + "paddq %%xmm2, %%xmm6\n\t" \ + "movdqa "#n"+32(%0), %%xmm2\n\t" \ + "psrldq $4, %%xmm5\n\t" \ + "pmuludq %%xmm5, %%xmm3\n\t" \ + "paddd %%xmm2, %%xmm1\n\t" \ + "paddd %%xmm0, %%xmm4\n\t" \ + "paddq %%xmm3, %%xmm6\n\t" \ + "movdqa %%xmm4, %%xmm3\n\t" \ + "pmuludq %%xmm1, %%xmm4\n\t" \ + "psrldq $4, %%xmm1\n\t" \ + "psrldq $4, %%xmm3\n\t" \ + "pmuludq %%xmm1, %%xmm3\n\t" \ + "paddq %%xmm4, %%xmm7\n\t" \ + "paddq %%xmm3, %%xmm7\n\t" + + +static inline void nh_aux_2(void *kp, void *dp, void *hp, UINT32 dlen) +{ + UINT32 d1,d2,d3; + asm volatile ( + "sub $128, %2\n\t" + "movq (%3), %%xmm6\n\t" + "movq 8(%3), %%xmm7\n\t" + "movdqa (%0), %%xmm2\n\t" + "jb 2f\n\t" + ".align 4,0x90\n" + "1:\n\t" + NH_STEP_2(0) + NH_STEP_2(32) + NH_STEP_2(64) + NH_STEP_2(96) + "add $128, %1\n\t" + "add $128, %0\n\t" + "sub $128, %2\n\t" + "jnb 1b\n\t" + ".align 4,0x90\n" + "2:\n\t" + "add $128, %2\n\t" + "je 4f\n\t" + ".align 4,0x90\n" + "3:\n\t" + NH_STEP_2(0) + "add $32, %1\n\t" + "add $32, %0\n\t" + "sub $32, %2\n\t" + "jne 3b\n\t" + ".align 4,0x90\n" + "4:\n\t" + "movdqa %%xmm6, %%xmm0\n\t" + "movdqa %%xmm7, %%xmm1\n\t" + "psrldq $8, %%xmm0\n\t" + "psrldq $8, %%xmm1\n\t" + "paddq %%xmm0, %%xmm6\n\t" + "paddq %%xmm1, %%xmm7\n\t" + "movq %%xmm6, (%3)\n\t" + "movq %%xmm7, 8(%3)" + : "+r" (kp), "+r" (dp), "+r" (dlen) + : "r" (hp) + : "memory"); +} + +/* ---------------------------------------------------------------------- */ +#endif /* MSC GCC Sections for SSE2, not C */ +/* ---------------------------------------------------------------------- */ + +static inline void nh_aux(void *kp, void *dp, void *hp, UINT32 dlen) +/* NH hashing primitive. 128 bits are written at hp by performing two */ +/* passes over the data with the second key being the toeplitz shift of */ +/* the first. */ +{ +#if (UMAC_OUTPUT_LEN == 4) + nh_aux_1(kp,dp,hp,dlen); +#elif (UMAC_OUTPUT_LEN == 8) + nh_aux_2(kp,dp,hp,dlen); +#elif (UMAC_OUTPUT_LEN == 12) + nh_aux_2(kp,dp,hp,dlen); + nh_aux_1((UINT8 *)kp+32,dp,(UINT8 *)hp+16,dlen); +#elif (UMAC_OUTPUT_LEN == 16) + nh_aux_2(kp,dp,hp,dlen); + nh_aux_2((UINT8 *)kp+32,dp,(UINT8 *)hp+16,dlen); +#endif +} + + + +/* ---------------------------------------------------------------------- */ +#else /* not SSE2 */ +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ +#if ( MSC_X86 ) +/* ---------------------------------------------------------------------- */ + +#define NH_STEP(n) \ + mov eax,n[ebx] \ + __asm mov edx,n+16[ebx] \ + __asm add eax,n[ecx] \ + __asm add edx,n+16[ecx] \ + __asm mul edx \ + __asm add esi,eax \ + __asm adc edi,edx + + +static inline void nh_aux_1(void *kp, void *dp, void *hp, UINT32 dlen) +{ + __asm{ + push ebp + mov ecx,kp + mov ebx,dp + mov eax,hp + mov ebp,dlen + sub ebp,128 + mov esi,[eax] + mov edi,4[eax] + jb label2 /* if 0 */ +label1: + NH_STEP(0) + NH_STEP(4) + NH_STEP(8) + NH_STEP(12) + NH_STEP(32) + NH_STEP(36) + NH_STEP(40) + NH_STEP(44) + NH_STEP(64) + NH_STEP(68) + NH_STEP(72) + NH_STEP(76) + NH_STEP(96) + NH_STEP(100) + NH_STEP(104) + NH_STEP(108) + add ecx,128 + add ebx,128 + sub ebp,128 + jnb label1 +label2: + add ebp,128 + je label4 +label3: + NH_STEP(0) + NH_STEP(4) + NH_STEP(8) + NH_STEP(12) + add ecx,32 + add ebx,32 + sub ebp,32 + jne label3 +label4: + pop ebp + mov eax,hp + mov [eax],esi + mov 4[eax],edi + } +} + +/* ---------------------------------------------------------------------- */ +#elif ( GCC_X86 ) +/* ---------------------------------------------------------------------- */ + +#define NH_STEP(n) \ + "movl "#n"(%%ebx),%%eax\n\t" \ + "movl "#n"+16(%%ebx),%%edx\n\t" \ + "addl "#n"(%%ecx),%%eax\n\t" \ + "addl "#n"+16(%%ecx),%%edx\n\t" \ + "mull %%edx\n\t" \ + "addl %%eax,%%esi\n\t" \ + "adcl %%edx,%%edi\n\t" + +static inline void nh_aux_1(void *kp, void *dp, void *hp, UINT32 dlen) +/* NH hashing primitive. Previous (partial) hash result is loaded and */ +/* then stored via hp pointer. The length of the data pointed at by dp is */ +/* guaranteed to be divisible by HASH_BUF_BYTES (64), which means we can */ +/* optimize by unrolling the loop. 64 bits are written at hp. */ +{ + UINT32 *p = (UINT32 *)hp; + + asm volatile ( + "\n\t" + "pushl %%eax\n\t" + "pushl %%ebp\n\t" + "subl $128,%%eax\n\t" + "movl %%eax,%%ebp\n\t" + "jb 2f\n\t" + ".align 4,0x90\n" + "1:\n\t" + + NH_STEP(0) + NH_STEP(4) + NH_STEP(8) + NH_STEP(12) + NH_STEP(32) + NH_STEP(36) + NH_STEP(40) + NH_STEP(44) + NH_STEP(64) + NH_STEP(68) + NH_STEP(72) + NH_STEP(76) + NH_STEP(96) + NH_STEP(100) + NH_STEP(104) + NH_STEP(108) + + "addl $128,%%ecx\n\t" + "addl $128,%%ebx\n\t" + "subl $128,%%ebp\n\t" + "jnb 1b\n\t" + ".align 4\n" + "2:\n\t" + "addl $128,%%ebp\n\t" + "je 4f\n\t" + ".align 4,0x90\n" + "3:\n\t" + + NH_STEP(0) + NH_STEP(4) + NH_STEP(8) + NH_STEP(12) + + "addl $32,%%ecx\n\t" + "addl $32,%%ebx\n\t" + "subl $32,%%ebp\n\t" + "jne 3b\n\t" + ".align 4\n" + "4:\n\t" + "popl %%ebp\n\t" + "popl %%eax" + : "+S" (p[0]), "+D" (p[1]), "+c" (kp), "+b" (dp) + : "a" (dlen) + : "edx", "memory"); +} + +/* ---------------------------------------------------------------------- */ +#endif /* GCC or MSC, not SSE2, not C */ +/* ---------------------------------------------------------------------- */ + +static inline void nh_aux(void *kp, void *dp, void *hp, UINT32 dlen) +/* NH hashing primitive. 128 bits are written at hp by performing two */ +/* passes over the data with the second key being the toeplitz shift of */ +/* the first. */ +{ + nh_aux_1(kp,dp,hp,dlen); +#if (UMAC_OUTPUT_LEN >= 8) + nh_aux_1((UINT8 *)kp+16,dp,(UINT8 *)hp+8,dlen); +#endif +#if (UMAC_OUTPUT_LEN >= 12) + nh_aux_1((UINT8 *)kp+32,dp,(UINT8 *)hp+16,dlen); +#endif +#if (UMAC_OUTPUT_LEN == 16) + nh_aux_1((UINT8 *)kp+48,dp,(UINT8 *)hp+24,dlen); +#endif +} + +/* ---------------------------------------------------------------------- */ +#endif /* SSE2 */ +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ +#else /* FORCE_C_ONLY */ +/* ---------------------------------------------------------------------- */ + +#if (UMAC_OUTPUT_LEN == 4) + +static inline void nh_aux(void *kp, void *dp, void *hp, UINT32 dlen) +/* NH hashing primitive. Previous (partial) hash result is loaded and +* then stored via hp pointer. The length of the data pointed at by "dp", +* "dlen", is guaranteed to be divisible by L1_PAD_BOUNDARY (32). Key +* is expected to be endian compensated in memory at key setup. +*/ +{ + UINT64 h; + UWORD c = dlen / 32; + UINT32 *k = (UINT32 *)kp; + UINT32 *d = (UINT32 *)dp; + UINT32 d0,d1,d2,d3,d4,d5,d6,d7; + UINT32 k0,k1,k2,k3,k4,k5,k6,k7; + + h = *((UINT64 *)hp); + do { + d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1); + d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3); + d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5); + d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7); + k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3); + k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7); + h += MUL64((k0 + d0), (k4 + d4)); + h += MUL64((k1 + d1), (k5 + d5)); + h += MUL64((k2 + d2), (k6 + d6)); + h += MUL64((k3 + d3), (k7 + d7)); + + d += 8; + k += 8; + } while (--c); + *((UINT64 *)hp) = h; +} + +#elif (UMAC_OUTPUT_LEN == 8) + +static inline void nh_aux(void *kp, void *dp, void *hp, UINT32 dlen) +/* Same as previous nh_aux, but two streams are handled in one pass, + * reading and writing 16 bytes of hash-state per call. + */ +{ + UINT64 h1,h2; + UWORD c = dlen / 32; + UINT32 *k = (UINT32 *)kp; + UINT32 *d = (UINT32 *)dp; + UINT32 d0,d1,d2,d3,d4,d5,d6,d7; + UINT32 k0,k1,k2,k3,k4,k5,k6,k7, + k8,k9,k10,k11; + + h1 = *((UINT64 *)hp); + h2 = *((UINT64 *)hp + 1); + k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3); + do { + d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1); + d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3); + d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5); + d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7); + k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7); + k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11); + + h1 += MUL64((k0 + d0), (k4 + d4)); + h2 += MUL64((k4 + d0), (k8 + d4)); + + h1 += MUL64((k1 + d1), (k5 + d5)); + h2 += MUL64((k5 + d1), (k9 + d5)); + + h1 += MUL64((k2 + d2), (k6 + d6)); + h2 += MUL64((k6 + d2), (k10 + d6)); + + h1 += MUL64((k3 + d3), (k7 + d7)); + h2 += MUL64((k7 + d3), (k11 + d7)); + + k0 = k8; k1 = k9; k2 = k10; k3 = k11; + + d += 8; + k += 8; + } while (--c); + ((UINT64 *)hp)[0] = h1; + ((UINT64 *)hp)[1] = h2; +} + +#elif (UMAC_OUTPUT_LEN == 12) + +static inline void nh_aux(void *kp, void *dp, void *hp, UINT32 dlen) +/* Same as previous nh_aux, but two streams are handled in one pass, + * reading and writing 24 bytes of hash-state per call. +*/ +{ + UINT64 h1,h2,h3; + UWORD c = dlen / 32; + UINT32 *k = (UINT32 *)kp; + UINT32 *d = (UINT32 *)dp; + UINT32 d0,d1,d2,d3,d4,d5,d6,d7; + UINT32 k0,k1,k2,k3,k4,k5,k6,k7, + k8,k9,k10,k11,k12,k13,k14,k15; + + h1 = *((UINT64 *)hp); + h2 = *((UINT64 *)hp + 1); + h3 = *((UINT64 *)hp + 2); + k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3); + k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7); + do { + d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1); + d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3); + d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5); + d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7); + k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11); + k12 = *(k+12); k13 = *(k+13); k14 = *(k+14); k15 = *(k+15); + + h1 += MUL64((k0 + d0), (k4 + d4)); + h2 += MUL64((k4 + d0), (k8 + d4)); + h3 += MUL64((k8 + d0), (k12 + d4)); + + h1 += MUL64((k1 + d1), (k5 + d5)); + h2 += MUL64((k5 + d1), (k9 + d5)); + h3 += MUL64((k9 + d1), (k13 + d5)); + + h1 += MUL64((k2 + d2), (k6 + d6)); + h2 += MUL64((k6 + d2), (k10 + d6)); + h3 += MUL64((k10 + d2), (k14 + d6)); + + h1 += MUL64((k3 + d3), (k7 + d7)); + h2 += MUL64((k7 + d3), (k11 + d7)); + h3 += MUL64((k11 + d3), (k15 + d7)); + + k0 = k8; k1 = k9; k2 = k10; k3 = k11; + k4 = k12; k5 = k13; k6 = k14; k7 = k15; + + d += 8; + k += 8; + } while (--c); + ((UINT64 *)hp)[0] = h1; + ((UINT64 *)hp)[1] = h2; + ((UINT64 *)hp)[2] = h3; +} + +#elif (UMAC_OUTPUT_LEN == 16) + +static inline void nh_aux(void *kp, void *dp, void *hp, UINT32 dlen) +/* Same as previous nh_aux, but two streams are handled in one pass, + * reading and writing 24 bytes of hash-state per call. +*/ +{ + UINT64 h1,h2,h3,h4; + UWORD c = dlen / 32; + UINT32 *k = (UINT32 *)kp; + UINT32 *d = (UINT32 *)dp; + UINT32 d0,d1,d2,d3,d4,d5,d6,d7; + UINT32 k0,k1,k2,k3,k4,k5,k6,k7, + k8,k9,k10,k11,k12,k13,k14,k15, + k16,k17,k18,k19; + + h1 = *((UINT64 *)hp); + h2 = *((UINT64 *)hp + 1); + h3 = *((UINT64 *)hp + 2); + h4 = *((UINT64 *)hp + 3); + k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3); + k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7); + do { + d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1); + d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3); + d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5); + d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7); + k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11); + k12 = *(k+12); k13 = *(k+13); k14 = *(k+14); k15 = *(k+15); + k16 = *(k+16); k17 = *(k+17); k18 = *(k+18); k19 = *(k+19); + + h1 += MUL64((k0 + d0), (k4 + d4)); + h2 += MUL64((k4 + d0), (k8 + d4)); + h3 += MUL64((k8 + d0), (k12 + d4)); + h4 += MUL64((k12 + d0), (k16 + d4)); + + h1 += MUL64((k1 + d1), (k5 + d5)); + h2 += MUL64((k5 + d1), (k9 + d5)); + h3 += MUL64((k9 + d1), (k13 + d5)); + h4 += MUL64((k13 + d1), (k17 + d5)); + + h1 += MUL64((k2 + d2), (k6 + d6)); + h2 += MUL64((k6 + d2), (k10 + d6)); + h3 += MUL64((k10 + d2), (k14 + d6)); + h4 += MUL64((k14 + d2), (k18 + d6)); + + h1 += MUL64((k3 + d3), (k7 + d7)); + h2 += MUL64((k7 + d3), (k11 + d7)); + h3 += MUL64((k11 + d3), (k15 + d7)); + h4 += MUL64((k15 + d3), (k19 + d7)); + + k0 = k8; k1 = k9; k2 = k10; k3 = k11; + k4 = k12; k5 = k13; k6 = k14; k7 = k15; + k8 = k16; k9 = k17; k10 = k18; k11 = k19; + + d += 8; + k += 8; + } while (--c); + ((UINT64 *)hp)[0] = h1; + ((UINT64 *)hp)[1] = h2; + ((UINT64 *)hp)[2] = h3; + ((UINT64 *)hp)[3] = h4; +} + +/* ---------------------------------------------------------------------- */ +#endif /* UMAC_OUTPUT_LENGTH */ +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ +#endif /* FORCE_C_ONLY */ +/* ---------------------------------------------------------------------- */ + + +/* ---------------------------------------------------------------------- */ + +static inline void nh_transform(nh_ctx *hc, UINT8 *buf, UINT32 nbytes) +/* This function is a wrapper for the primitive NH hash functions. It takes + * as argument "hc" the current hash context and a buffer which must be a + * multiple of L1_PAD_BOUNDARY. The key passed to nh_aux is offset + * appropriately according to how much message has been hashed already. + */ +{ + UINT8 *key; + + key = hc->nh_key + hc->bytes_hashed; + nh_aux(key, buf, hc->state, nbytes); +} + +/* ---------------------------------------------------------------------- */ + +static inline void endian_convert(void *buf, UWORD bpw, UINT32 num_bytes) +/* We endian convert the keys on little-endian computers to */ +/* compensate for the lack of big-endian memory reads during hashing. */ +{ + UWORD iters = num_bytes / bpw; + if (bpw == 4) { + UINT32 *p = (UINT32 *)buf; + do { + *p = LOAD_UINT32_REVERSED(p); + p++; + } while (--iters); + } else if (bpw == 8) { + UINT32 *p = (UINT32 *)buf; + UINT32 t; + do { + t = LOAD_UINT32_REVERSED(p+1); + p[1] = LOAD_UINT32_REVERSED(p); + p[0] = t; + p += 2; + } while (--iters); + } +} +#if (__LITTLE_ENDIAN__) +#define endian_convert_if_le(x,y,z) endian_convert((x),(y),(z)) +#else +#define endian_convert_if_le(x,y,z) do{}while(0) /* Do nothing */ +#endif + +/* ---------------------------------------------------------------------- */ + +static inline void nh_reset(nh_ctx *hc) +/* Reset nh_ctx to ready for hashing of new data */ +{ + hc->bytes_hashed = 0; + hc->next_data_empty = 0; + hc->state[0] = 0; +#if (UMAC_OUTPUT_LEN >= 8) + hc->state[1] = 0; +#endif +#if (UMAC_OUTPUT_LEN >= 12) + hc->state[2] = 0; +#endif +#if (UMAC_OUTPUT_LEN == 16) + hc->state[3] = 0; +#endif + +} + +/* ---------------------------------------------------------------------- */ + +static inline void nh_init(nh_ctx *hc, aes_int_key prf_key) +/* Generate nh_key, endian convert and reset to be ready for hashing. */ +{ + kdf(hc->nh_key, prf_key, 1, sizeof(hc->nh_key)); + endian_convert_if_le(hc->nh_key, 4, sizeof(hc->nh_key)); + nh_reset(hc); +} + +/* ---------------------------------------------------------------------- */ + +static inline void nh_update(nh_ctx *hc, UINT8 *buf, UINT32 nbytes) +/* Incorporate nbytes of data into a nh_ctx, buffer whatever is not an */ +/* even multiple of HASH_BUF_BYTES. */ +{ + UINT32 i,j; + + j = hc->next_data_empty; + if ((j + nbytes) >= HASH_BUF_BYTES) { + if (j) { + i = HASH_BUF_BYTES - j; + memcpy(hc->data+j, buf, i); + nh_transform(hc,hc->data,HASH_BUF_BYTES); + nbytes -= i; + buf += i; + hc->bytes_hashed += HASH_BUF_BYTES; + } + if (nbytes >= HASH_BUF_BYTES) { + i = nbytes & ~(HASH_BUF_BYTES - 1); + nh_transform(hc, buf, i); + nbytes -= i; + buf += i; + hc->bytes_hashed += i; + } + j = 0; + } + memcpy(hc->data + j, buf, nbytes); + hc->next_data_empty = j + nbytes; +} + +/* ---------------------------------------------------------------------- */ + +static inline void zero_pad(UINT8 *p, int nbytes) +{ +/* Write "nbytes" of zeroes, beginning at "p" */ + if (nbytes >= (int)sizeof(UWORD)) { + while ((ptrdiff_t)p % sizeof(UWORD)) { + *p = 0; + nbytes--; + p++; + } + while (nbytes >= (int)sizeof(UWORD)) { + *(UWORD *)p = 0; + nbytes -= sizeof(UWORD); + p += sizeof(UWORD); + } + } + while (nbytes) { + *p = 0; + nbytes--; + p++; + } +} + +/* ---------------------------------------------------------------------- */ + +static inline void nh_final(nh_ctx *hc, UINT8 *result) +/* After passing some number of data buffers to nh_update() for integration + * into an NH context, nh_final is called to produce a hash result. If any + * bytes are in the buffer hc->data, incorporate them into the + * NH context. Finally, add into the NH accumulation "state" the total number + * of bits hashed. The resulting numbers are written to the buffer "result". + * If nh_update was never called, L1_PAD_BOUNDARY zeroes are incorporated. + */ +{ + int nh_len, nbits; + + if (hc->next_data_empty != 0) { + nh_len = ((hc->next_data_empty + (L1_PAD_BOUNDARY - 1)) & + ~(L1_PAD_BOUNDARY - 1)); + zero_pad(hc->data + hc->next_data_empty, + nh_len - hc->next_data_empty); + nh_transform(hc, hc->data, nh_len); + hc->bytes_hashed += hc->next_data_empty; + } else if (hc->bytes_hashed == 0) { + nh_len = L1_PAD_BOUNDARY; + zero_pad(hc->data, L1_PAD_BOUNDARY); + nh_transform(hc, hc->data, nh_len); + } + + nbits = (hc->bytes_hashed << 3); + ((UINT64 *)result)[0] = ((UINT64 *)hc->state)[0] + nbits; +#if (UMAC_OUTPUT_LEN >= 8) + ((UINT64 *)result)[1] = ((UINT64 *)hc->state)[1] + nbits; +#endif +#if (UMAC_OUTPUT_LEN >= 12) + ((UINT64 *)result)[2] = ((UINT64 *)hc->state)[2] + nbits; +#endif +#if (UMAC_OUTPUT_LEN == 16) + ((UINT64 *)result)[3] = ((UINT64 *)hc->state)[3] + nbits; +#endif + nh_reset(hc); +} + +/* ---------------------------------------------------------------------- */ + +static inline void nh(nh_ctx *hc, UINT8 *buf, UINT32 padded_len, + UINT32 unpadded_len, UINT8 *result) +/* All-in-one nh_update() and nh_final() equivalent. + * Assumes that padded_len is divisible by L1_PAD_BOUNDARY and result is + * well aligned + */ +{ + UINT32 nbits; + + /* Initialize the hash state */ + nbits = (unpadded_len << 3); + + ((UINT64 *)result)[0] = nbits; +#if (UMAC_OUTPUT_LEN >= 8) + ((UINT64 *)result)[1] = nbits; +#endif +#if (UMAC_OUTPUT_LEN >= 12) + ((UINT64 *)result)[2] = nbits; +#endif +#if (UMAC_OUTPUT_LEN == 16) + ((UINT64 *)result)[3] = nbits; +#endif + + nh_aux(hc->nh_key, buf, result, padded_len); +} + +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ----- Begin UHASH Section -------------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ + +/* UHASH is a multi-layered algorithm. Data presented to UHASH is first + * hashed by NH. The NH output is then hashed by a polynomial-hash layer + * unless the initial data to be hashed is short. After the polynomial- + * layer, an inner-product hash is used to produce the final UHASH output. + * + * UHASH provides two interfaces, one all-at-once and another where data + * buffers are presented sequentially. In the sequential interface, the + * UHASH client calls the routine uhash_update() as many times as necessary. + * When there is no more data to be fed to UHASH, the client calls + * uhash_final() which + * calculates the UHASH output. Before beginning another UHASH calculation + * the uhash_reset() routine must be called. The all-at-once UHASH routine, + * uhash(), is equivalent to the sequence of calls uhash_update() and + * uhash_final(); however it is optimized and should be + * used whenever the sequential interface is not necessary. + * + * The routine uhash_init() initializes the uhash_ctx data structure and + * must be called once, before any other UHASH routine. + */ + +/* ---------------------------------------------------------------------- */ +/* ----- Constants and uhash_ctx ---------------------------------------- */ +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ +/* ----- Poly hash and Inner-Product hash Constants --------------------- */ +/* ---------------------------------------------------------------------- */ + +/* Primes and masks */ +#define p36 ((UINT64)0x0000000FFFFFFFFBull) /* 2^36 - 5 */ +#define p64 ((UINT64)0xFFFFFFFFFFFFFFC5ull) /* 2^64 - 59 */ +#define m36 ((UINT64)0x0000000FFFFFFFFFull) /* The low 36 of 64 bits */ + + +/* ---------------------------------------------------------------------- */ + +typedef struct uhash_ctx { + nh_ctx hash; /* Hash context for L1 NH hash */ + UINT64 poly_key_8[STREAMS]; /* p64 poly keys */ + UINT64 poly_accum[STREAMS]; /* poly hash result */ + UINT64 ip_keys[STREAMS*4]; /* Inner-product keys */ + UINT32 ip_trans[STREAMS]; /* Inner-product translation */ + UINT32 msg_len; /* Total length of data passed */ + /* to uhash */ +} uhash_ctx; + +/* ---------------------------------------------------------------------- */ + + +/* The polynomial hashes use Horner's rule to evaluate a polynomial one + * word at a time. As described in the specification, poly32 and poly64 + * require keys from special domains. The following impelementations exploit + * the special domains to avoid overflow. The results are not guaranteed to + * be within Z_p32 and Z_p64, but the Inner-Product hash implementation + * patches any errant values. + */ + +static inline UINT64 poly64(UINT64 cur, UINT64 key, UINT64 data) +{ + UINT32 key_hi = (UINT32)(key >> 32), + key_lo = (UINT32)key, + cur_hi = (UINT32)(cur >> 32), + cur_lo = (UINT32)cur, + x_lo, + x_hi; + UINT64 X,T,res; + + X = MUL64(key_hi, cur_lo) + MUL64(cur_hi, key_lo); + x_lo = (UINT32)X; + x_hi = (UINT32)(X >> 32); + + res = (MUL64(key_hi, cur_hi) + x_hi) * 59 + MUL64(key_lo, cur_lo); + + T = ((UINT64)x_lo << 32); + res += T; + if (res < T) + res += 59; + + res += data; + if (res < data) + res += 59; + + return res; +} + + +/* Although UMAC is specified to use a ramped polynomial hash scheme, this + * impelemtation does not handle all ramp levels. Because we don't handle + * the ramp up to p128 modulus in this implementation, we are limited to + * 2^14 poly_hash() invocations per stream (for a total capacity of 2^24 + * bytes input to UMAC per tag, ie. 16MB). + */ +static inline void poly_hash(uhash_ctx_t hhc, UINT32 data_in[]) +{ + int i; + UINT64 *data=(UINT64*)data_in; + uhash_ctx *hc = (uhash_ctx *)hhc; + + for (i = 0; i < STREAMS; i++) { + if ((UINT32)(data[i] >> 32) == 0xfffffffful) { + hc->poly_accum[i] = poly64(hc->poly_accum[i], + hc->poly_key_8[i], p64 - 1); + hc->poly_accum[i] = poly64(hc->poly_accum[i], + hc->poly_key_8[i], (data[i] - 59)); + } else { + hc->poly_accum[i] = poly64(hc->poly_accum[i], + hc->poly_key_8[i], data[i]); + } + } +} + + +/* ---------------------------------------------------------------------- */ + + +/* The final step in UHASH is an inner-product hash. The poly hash + * produces a result not neccesarily WORD_LEN bytes long. The inner- + * product hash breaks the polyhash output into 16-bit chunks and + * multiplies each with a 36 bit key. + */ + +#if (MSC_X86 && ! FORCE_C_ONLY) + +static inline UINT64 ip_aux(UINT64 t, UINT64 *ipkp, UINT64 data) +{ + UINT32 data_hi = (UINT32)(data >> 32), + data_lo = (UINT32)(data), + t_hi = (UINT32)(t >> 32), + t_lo = (UINT32)(t); + __asm{ + mov edi, ipkp + mov ebx,data_hi + mov ecx,data_lo + mov esi, t_lo + mov edx, t_hi + push ebp + mov ebp,edx + mov eax,ebx + shr eax,16 + mul DWORD PTR 0[edi] + add esi,eax + adc ebp,edx + mov eax,ebx + shr eax,16 + mul DWORD PTR 4[edi] + add ebp,eax + + movzx eax,bx + mul DWORD PTR 8[edi] + add esi,eax + adc ebp,edx + movzx eax,bx + mul DWORD PTR 12[edi] + add ebp,eax + + mov eax,ecx + shr eax,16 + mul DWORD PTR 16[edi] + add esi,eax + adc ebp,edx + mov eax,ecx + shr eax,16 + mul DWORD PTR 20[edi] + add ebp,eax + + movzx eax,cx + mul DWORD PTR 24[edi] + add esi,eax + adc ebp,edx + movzx eax,cx + mul DWORD PTR 28[edi] + lea edx,[eax+ebp] + mov eax,esi + pop ebp + /* MSVC returns UINT64 in edx:eax */ + } +} + +static inline UINT32 ip_reduce_p36(UINT64 t) +{ + UINT32 t_hi = (UINT32)(t >> 32), + t_lo = (UINT32)(t); + __asm{ + mov edx,t_hi + mov eax,t_lo + mov edi,edx + and edx,15 + shr edi,4 + lea edi,[edi+edi*4] + add eax,edi + adc edx,0 + cmp edx,0xf + jb skip_sub + ja do_sub + cmp eax,0xfffffffb + jb skip_sub +do_sub: + sub eax, 0xfffffffb + /* sbb edx, 0xf We don't return the high word */ +skip_sub: + } +} + +#elif (GCC_X86 && ! FORCE_C_ONLY) + +static inline UINT64 ip_aux(UINT64 t, UINT64 *ipkp, UINT64 data) +{ + UINT32 dummy1, dummy2; + asm volatile( + "pushl %%ebp\n\t" + "movl %%eax,%%esi\n\t" + "movl %%edx,%%ebp\n\t" + "movl %%ebx,%%eax\n\t" + "shrl $16,%%eax\n\t" + "mull 0(%%edi)\n\t" + "addl %%eax,%%esi\n\t" + "adcl %%edx,%%ebp\n\t" + "movl %%ebx,%%eax\n\t" + "shrl $16,%%eax\n\t" + "mull 4(%%edi)\n\t" + "addl %%eax,%%ebp\n\t" + + "movzwl %%bx,%%eax\n\t" + "mull 8(%%edi)\n\t" + "addl %%eax,%%esi\n\t" + "adcl %%edx,%%ebp\n\t" + "movzwl %%bx,%%eax\n\t" + "mull 12(%%edi)\n\t" + "addl %%eax,%%ebp\n\t" + + "movl %%ecx,%%eax\n\t" + "shrl $16,%%eax\n\t" + "mull 16(%%edi)\n\t" + "addl %%eax,%%esi\n\t" + "adcl %%edx,%%ebp\n\t" + "movl %%ecx,%%eax\n\t" + "shrl $16,%%eax\n\t" + "mull 20(%%edi)\n\t" + "addl %%eax,%%ebp\n\t" + + "movzwl %%cx,%%eax\n\t" + "mull 24(%%edi)\n\t" + "addl %%eax,%%esi\n\t" + "adcl %%edx,%%ebp\n\t" + "movzwl %%cx,%%eax\n\t" + "mull 28(%%edi)\n\t" + "leal (%%eax,%%ebp),%%edx\n\t" + "movl %%esi,%%eax\n\t" + "popl %%ebp" + : "+A"(t), "=b"(dummy1), "=c"(dummy2) + : "D"(ipkp), "1"((UINT32)(data>>32)), "2"((UINT32)data) + : "esi"); + + return t; +} + +static inline UINT32 ip_reduce_p36(UINT64 t) +{ + asm volatile( + "movl %%edx,%%edi\n\t" + "andl $15,%%edx\n\t" + "shrl $4,%%edi\n\t" + "leal (%%edi,%%edi,4),%%edi\n\t" + "addl %%edi,%%eax\n\t" + "adcl $0,%%edx\n\t" + : "+A"(t) + : + : "edi"); + + if (t >= p36) + t -= p36; + + return (UINT32)(t); +} + +#else + +static inline UINT64 ip_aux(UINT64 t, UINT64 *ipkp, UINT64 data) +{ + t = t + ipkp[0] * (UINT64)(UINT16)(data >> 48); + t = t + ipkp[1] * (UINT64)(UINT16)(data >> 32); + t = t + ipkp[2] * (UINT64)(UINT16)(data >> 16); + t = t + ipkp[3] * (UINT64)(UINT16)(data); + + return t; +} + +static inline UINT32 ip_reduce_p36(UINT64 t) +{ +/* Divisionless modular reduction */ + UINT64 ret; + + ret = (t & m36) + 5 * (t >> 36); + if (ret >= p36) + ret -= p36; + + /* return least significant 32 bits */ + return (UINT32)(ret); +} + + +#endif + +/* If the data being hashed by UHASH is no longer than L1_KEY_LEN, then + * the polyhash stage is skipped and ip_short is applied directly to the + * NH output. + */ +static inline void ip_short(uhash_ctx_t hhc, UINT8 *nh_res, char *res) +{ + UINT64 t; + UINT64 *nhp = (UINT64 *)nh_res; + uhash_ctx *ahc = (uhash_ctx *)hhc; + + t = ip_aux(0,ahc->ip_keys, nhp[0]); + STORE_UINT32_BIG((UINT32 *)res+0, ip_reduce_p36(t) ^ ahc->ip_trans[0]); +#if (UMAC_OUTPUT_LEN >= 8) + t = ip_aux(0,ahc->ip_keys+4, nhp[1]); + STORE_UINT32_BIG((UINT32 *)res+1, ip_reduce_p36(t) ^ ahc->ip_trans[1]); +#endif +#if (UMAC_OUTPUT_LEN >= 12) + t = ip_aux(0,ahc->ip_keys+8, nhp[2]); + STORE_UINT32_BIG((UINT32 *)res+2, ip_reduce_p36(t) ^ ahc->ip_trans[2]); +#endif +#if (UMAC_OUTPUT_LEN == 16) + t = ip_aux(0,ahc->ip_keys+12, nhp[3]); + STORE_UINT32_BIG((UINT32 *)res+3, ip_reduce_p36(t) ^ ahc->ip_trans[3]); +#endif +} + +/* If the data being hashed by UHASH is longer than L1_KEY_LEN, then + * the polyhash stage is not skipped and ip_long is applied to the + * polyhash output. + */ +static inline void ip_long(uhash_ctx_t hhc, char *res) +{ + int i; + UINT64 t; + uhash_ctx *ahc = (uhash_ctx *)hhc; + + for (i = 0; i < STREAMS; i++) { + /* fix polyhash output not in Z_p64 */ + if (ahc->poly_accum[i] >= p64) + ahc->poly_accum[i] -= p64; + t = ip_aux(0,ahc->ip_keys+(i*4), ahc->poly_accum[i]); + STORE_UINT32_BIG((UINT32 *)res+i, + ip_reduce_p36(t) ^ ahc->ip_trans[i]); + } +} + + +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ + +/* Reset uhash context for next hash session */ +static inline int uhash_reset(uhash_ctx_t hhc) +{ + uhash_ctx *pc = (uhash_ctx *)hhc; + nh_reset(&pc->hash); + pc->msg_len = 0; + pc->poly_accum[0] = 1; +#if (UMAC_OUTPUT_LEN >= 8) + pc->poly_accum[1] = 1; +#endif +#if (UMAC_OUTPUT_LEN >= 12) + pc->poly_accum[2] = 1; +#endif +#if (UMAC_OUTPUT_LEN == 16) + pc->poly_accum[3] = 1; +#endif + return 1; +} + +/* ---------------------------------------------------------------------- */ + +/* Given a pointer to the internal key needed by kdf() and a uhash context, + * initialize the NH context and generate keys needed for poly and inner- + * product hashing. All keys are endian adjusted in memory so that native + * loads cause correct keys to be in registers during calculation. + */ +static inline void uhash_init(uhash_ctx_t hhc, aes_int_key prf_key) +{ + int i; + UINT8 buf[(8*STREAMS+4)*sizeof(UINT64)]; + uhash_ctx *ahc = (uhash_ctx *)hhc; + + /* Zero the entire uhash context */ + memset(ahc, 0, sizeof(uhash_ctx)); + + /* Initialize the L1 hash */ + nh_init(&ahc->hash, prf_key); + + /* Setup L2 hash variables */ + kdf(buf, prf_key, 2, sizeof(buf)); /* Fill buffer with index 1 key */ + for (i = 0; i < STREAMS; i++) { + /* Fill keys from the buffer, skipping bytes in the buffer not + * used by this implementation. Endian reverse the keys if on a + * little-endian computer. + */ + memcpy(ahc->poly_key_8+i, buf+24*i, 8); + endian_convert_if_le(ahc->poly_key_8+i, 8, 8); + /* Mask the 64-bit keys to their special domain */ + ahc->poly_key_8[i] &= ((UINT64)0x01ffffffu << 32) + 0x01ffffffu; + ahc->poly_accum[i] = 1; /* Our polyhash prepends a non-zero word */ + } + + /* Setup L3-1 hash variables */ + kdf(buf, prf_key, 3, sizeof(buf)); /* Fill buffer with index 2 key */ + for (i = 0; i < STREAMS; i++) + memcpy(ahc->ip_keys+4*i, buf+(8*i+4)*sizeof(UINT64), + 4*sizeof(UINT64)); + endian_convert_if_le(ahc->ip_keys, sizeof(UINT64), + sizeof(ahc->ip_keys)); + for (i = 0; i < STREAMS*4; i++) + ahc->ip_keys[i] %= p36; /* Bring into Z_p36 */ + + /* Setup L3-2 hash variables */ + /* Fill buffer with index 4 key */ + kdf(ahc->ip_trans, prf_key, 4, STREAMS * sizeof(UINT32)); + endian_convert_if_le(ahc->ip_trans, sizeof(UINT32), + STREAMS * sizeof(UINT32)); +} + +#if 0 +/* ---------------------------------------------------------------------- */ + +uhash_ctx_t uhash_alloc(char key[]) +{ +/* Allocate memory and force to a 16-byte boundary. */ + uhash_ctx_t ctx; + char bytes_to_add; + aes_int_key prf_key; + + ctx = (uhash_ctx_t)malloc(sizeof(uhash_ctx)+ALLOC_BOUNDARY); + if (ctx) { + if (ALLOC_BOUNDARY) { + bytes_to_add = ALLOC_BOUNDARY - + ((ptrdiff_t)ctx & (ALLOC_BOUNDARY -1)); + ctx = (uhash_ctx_t)((char *)ctx + bytes_to_add); + *((char *)ctx - 1) = bytes_to_add; + } + aes_key_setup(key,prf_key); + uhash_init(ctx, prf_key); + } + return (ctx); +} + +/* ---------------------------------------------------------------------- */ + +int uhash_free(uhash_ctx_t ctx) +{ +/* Free memory allocated by uhash_alloc */ + char bytes_to_sub; + + if (ctx) { + if (ALLOC_BOUNDARY) { + bytes_to_sub = *((char *)ctx - 1); + ctx = (uhash_ctx_t)((char *)ctx - bytes_to_sub); + } + free(ctx); + } + return (1); +} +#endif + +/* ---------------------------------------------------------------------- */ + +static inline int uhash_update(uhash_ctx_t hhc, char *input, long len) +/* Given len bytes of data, we parse it into L1_KEY_LEN chunks and + * hash each one with NH, calling the polyhash on each NH output. + */ +{ + UWORD bytes_hashed, bytes_remaining; + UINT8 nh_result[STREAMS*sizeof(UINT64)]; + uhash_ctx *ctx = (uhash_ctx *)hhc; + + if (ctx->msg_len + len <= L1_KEY_LEN) { + nh_update(&ctx->hash, (UINT8 *)input, len); + ctx->msg_len += len; + } else { + + bytes_hashed = ctx->msg_len % L1_KEY_LEN; + if (ctx->msg_len == L1_KEY_LEN) + bytes_hashed = L1_KEY_LEN; + + if (bytes_hashed + len >= L1_KEY_LEN) { + + /* If some bytes have been passed to the hash function */ + /* then we want to pass at most (L1_KEY_LEN - bytes_hashed) */ + /* bytes to complete the current nh_block. */ + if (bytes_hashed) { + bytes_remaining = (L1_KEY_LEN - bytes_hashed); + nh_update(&ctx->hash, (UINT8 *)input, bytes_remaining); + nh_final(&ctx->hash, nh_result); + ctx->msg_len += bytes_remaining; + poly_hash(ctx,(UINT32 *)nh_result); + len -= bytes_remaining; + input += bytes_remaining; + } + + /* Hash directly from input stream if enough bytes */ + while (len >= L1_KEY_LEN) { + nh(&ctx->hash, (UINT8 *)input, L1_KEY_LEN, + L1_KEY_LEN, nh_result); + ctx->msg_len += L1_KEY_LEN; + len -= L1_KEY_LEN; + input += L1_KEY_LEN; + poly_hash(ctx,(UINT32 *)nh_result); + } + } + + /* pass remaining < L1_KEY_LEN bytes of input data to NH */ + if (len) { + nh_update(&ctx->hash, (UINT8 *)input, len); + ctx->msg_len += len; + } + } + + return (1); +} + +/* ---------------------------------------------------------------------- */ + +static inline int uhash_final(uhash_ctx_t hhc, char *res) +/* Incorporate any pending data, pad, and generate tag */ +{ + UINT8 nh_result[STREAMS*sizeof(UINT64)]; + uhash_ctx *ctx = (uhash_ctx *)hhc; + + if (ctx->msg_len > L1_KEY_LEN) { + if (ctx->msg_len % L1_KEY_LEN) { + nh_final(&ctx->hash, nh_result); + poly_hash(ctx,(UINT32 *)nh_result); + } + ip_long(ctx, res); + } else { + nh_final(&ctx->hash, nh_result); + ip_short(ctx,nh_result, res); + } + uhash_reset(ctx); + return (1); +} + +/* ---------------------------------------------------------------------- */ + +static inline int uhash(uhash_ctx_t hhc, char *msg, long len, char *res) +/* assumes that msg is in a writable buffer of length divisible by */ +/* L1_PAD_BOUNDARY. Bytes beyond msg[len] may be zeroed. */ +{ + UINT8 nh_result[STREAMS*sizeof(UINT64)]; + UINT32 nh_len; + int extra_zeroes_needed; + uhash_ctx *ahc = (uhash_ctx *)hhc; + + /* If the message to be hashed is no longer than L1_HASH_LEN, we skip + * the polyhash. + */ + if (len <= L1_KEY_LEN) { + if (len == 0) /* If zero length messages will not */ + nh_len = L1_PAD_BOUNDARY; /* be seen, comment out this case */ + else + nh_len = ((len + (L1_PAD_BOUNDARY - 1)) & ~(L1_PAD_BOUNDARY - 1)); + extra_zeroes_needed = nh_len - len; + zero_pad((UINT8 *)msg + len, extra_zeroes_needed); + nh(&ahc->hash, (UINT8 *)msg, nh_len, len, nh_result); + ip_short(ahc,nh_result, res); + } else { + /* Otherwise, we hash each L1_KEY_LEN chunk with NH, passing the NH + * output to poly_hash(). + */ + do { + nh(&ahc->hash, (UINT8 *)msg, L1_KEY_LEN, L1_KEY_LEN, nh_result); + poly_hash(ahc,(UINT32 *)nh_result); + len -= L1_KEY_LEN; + msg += L1_KEY_LEN; + } while (len >= L1_KEY_LEN); + if (len) { + nh_len = ((len + (L1_PAD_BOUNDARY - 1)) & ~(L1_PAD_BOUNDARY - 1)); + extra_zeroes_needed = nh_len - len; + zero_pad((UINT8 *)msg + len, extra_zeroes_needed); + nh(&ahc->hash, (UINT8 *)msg, nh_len, len, nh_result); + poly_hash(ahc,(UINT32 *)nh_result); + } + + ip_long(ahc, res); + } + + uhash_reset(ahc); + return 1; +} + +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ----- Begin UMAC Section --------------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ + +/* The UMAC interface has two interfaces, an all-at-once interface where + * the entire message to be authenticated is passed to UMAC in one buffer, + * and a sequential interface where the message is presented a little at a + * time. The all-at-once is more optimaized than the sequential version and + * should be preferred when the sequential interface is not required. + */ +typedef struct umac_ctx { + uhash_ctx hash; /* Hash function for message compression */ + pdf_ctx pdf; /* PDF for hashed output */ +} umac_ctx; + +/* ---------------------------------------------------------------------- */ + +static inline int umac_reset(umac_ctx_t ctx) +/* Reset the hash function to begin a new authentication. */ +{ + uhash_reset((uhash_ctx_t)&((umac_ctx *)ctx)->hash); + return (1); +} + +/* ---------------------------------------------------------------------- */ + +static inline int umac_delete(umac_ctx_t ctx) +/* Deallocate the ctx structure */ +{ +/* char bytes_to_sub; + + if (ctx) { + if (ALLOC_BOUNDARY) { + bytes_to_sub = *((char *)ctx - 1); + ctx = (umac_ctx_t)((char *)ctx - bytes_to_sub); + } + free(ctx); + } +*/ + return (1); +} + +/* ---------------------------------------------------------------------- */ +static char g_umac_ctx[sizeof(umac_ctx)+ALLOC_BOUNDARY]; + +static inline umac_ctx_t umac_new(char key[]) +/* Dynamically allocate a umac_ctx struct, initialize variables, + * generate subkeys from key. Align to 16-byte boundary. + */ +{ + umac_ctx_t ctx; + char bytes_to_add; + aes_int_key prf_key; + + //ctx = (umac_ctx_t)malloc(sizeof(umac_ctx)+ALLOC_BOUNDARY); + ctx = (umac_ctx_t)g_umac_ctx; + if (ctx) { + if (ALLOC_BOUNDARY) { + bytes_to_add = ALLOC_BOUNDARY - + ((ptrdiff_t)ctx & (ALLOC_BOUNDARY - 1)); + ctx = (umac_ctx_t)((char *)ctx + bytes_to_add); + *((char *)ctx - 1) = bytes_to_add; + } + aes_key_setup(key,prf_key); + pdf_init(&((umac_ctx *)ctx)->pdf, prf_key); + uhash_init((uhash_ctx_t)&((umac_ctx *)ctx)->hash, prf_key); + } + + return (ctx); +} + +/* ---------------------------------------------------------------------- */ + +static inline int umac_final(umac_ctx_t ctx, char tag[], const char nonce[8]) +/* Incorporate any pending data, pad, and generate tag */ +{ + uhash_final((uhash_ctx_t)&((umac_ctx *)ctx)->hash, (char *)tag); + pdf_gen_xor(&((umac_ctx *)ctx)->pdf, (UINT8 *)nonce, (UINT8 *)tag); + + return (1); +} + +/* ---------------------------------------------------------------------- */ + +static inline int umac_update(umac_ctx_t ctx, char *input, long len) +/* Given len bytes of data, we parse it into L1_KEY_LEN chunks and */ +/* hash each one, calling the PDF on the hashed output whenever the hash- */ +/* output buffer is full. */ +{ + uhash_update((uhash_ctx_t)&((umac_ctx *)ctx)->hash, input, len); + return (1); +} + +/* ---------------------------------------------------------------------- */ + +static inline int umac(umac_ctx_t ctx, char *input, + long len, char tag[], + const char nonce[8]) +/* All-in-one version simply calls umac_update() and umac_final(). */ +{ + uhash((uhash_ctx_t)&((umac_ctx *)ctx)->hash, input, len, (char *)tag); + pdf_gen_xor(&((umac_ctx *)ctx)->pdf, (UINT8 *)nonce, (UINT8 *)tag); + + return (1); +} + +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ----- End UMAC Section ----------------------------------------------- */ +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ + +/* If RUN_TESTS is defined non-zero, then we define a main() function and */ +/* run some verification and speed tests. */ + +#if RUN_TESTS + +#include +#include + +void pbuf(void *buf, UWORD n, char *s) +{ + UWORD i; + UINT8 *cp = (UINT8 *)buf; + + if (n <= 0 || n >= 30) + n = 30; + + if (s) + printf("%s: ", s); + + for (i = 0; i < n; i++) + printf("%02X", (unsigned char)cp[i]); + printf("\n"); +} + +void primitive_verify(void) +{ + #if (UMAC_KEY_LEN == 16) + UINT8 key[16] = {0}; + UINT8 pt[16] = {'\x80',0,/* remainder auto filled with zeroes */}; + char res[] = "3AD78E726C1EC02B7EBFE92B23D9EC34"; + #elif (UMAC_KEY_LEN == 32) + UINT8 key[32] = {0}; + UINT8 pt[16] = {'\x80',0,/* remainder auto filled with zeroes */}; + char res[] = "DDC6BF79 C1576 D8D9AEB6F9A75FD4E"; + #endif + aes_int_key k1; + + aes_key_setup(key, k1); + aes_encryption(pt, pt, k1); + printf("\nAES Test\n"); + pbuf(pt, 16, "Digest is "); + printf("Digest should be: %s\n", res); +} + +void umac_verify(void) +{ + umac_ctx_t ctx; + char *data_ptr; + int data_len = 32 * 1024; + char nonce[] = "abcdefgh"; + char tag[21] = {0}; + char tag2[21] = {0}; + int bytes_over_boundary, i, j; + int inc[] = {1,99,512}; + int lengths[] = {0,3,1024,32768}; + char *results[] = {"4D61E4F5AAB959C8B800A2BE546302AD", + "67C1700CA30B532DCD9B970655B47B45", + "05CB9405EC38D9F0B356D9E6D5BC5D03", + "048C543CB72443A46011A76438BA2AF4"}; + + /* Initialize Memory and UMAC */ + data_ptr = (char *)malloc(data_len + 48); + if (data_ptr == 0) + return; + bytes_over_boundary = (ptrdiff_t)data_ptr & (16 - 1); + if (bytes_over_boundary != 0) + data_ptr += (16 - bytes_over_boundary); + memset(data_ptr, 'a', data_len); + ctx = umac_new("abcdefghijklmnop"); + + printf("Testing known vectors.\n\n"); + printf("Msg %-*s Is\n", UMAC_OUTPUT_LEN * 2, "Should be"); + printf("--- %-*s --\n", UMAC_OUTPUT_LEN * 2, "---------"); + + for (i = 0; (unsigned)i < sizeof(lengths)/sizeof(*lengths); i++) { + memset(data_ptr, 'a', lengths[i]); + umac(ctx, data_ptr, lengths[i], tag, nonce); + umac_reset(ctx); + printf("'a' * %5d : %.*s ", lengths[i], UMAC_OUTPUT_LEN * 2, results[i]); + pbuf(tag, UMAC_OUTPUT_LEN, NULL); + } + + printf("\nVerifying consistancy of single- and" + " multiple-call interfaces.\n"); + for (i = 1; i < (int)(sizeof(inc)/sizeof(inc[0])); i++) { + for (j = 0; j <= data_len-inc[i]; j+=inc[i]) + umac_update(ctx, data_ptr+j, inc[i]); + umac_final(ctx, tag, nonce); + umac_reset(ctx); + + umac(ctx, data_ptr, (data_len/inc[i])*inc[i], tag2, nonce); + umac_reset(ctx); + nonce[7]++; + + if (memcmp(tag,tag2,sizeof(tag))) + printf("\ninc = %d data_len = %d failed!\n", + inc[i], data_len); + } + printf("Done.\n"); + umac_delete(ctx); +} + + +double run_cpb_test(umac_ctx_t ctx, int nbytes, char *data_ptr, + int data_len, double hz) +{ + clock_t ticks; + double secs; + char nonce[8] = {0}; + char tag[UMAC_OUTPUT_LEN+1] = {0}; /* extra char for null terminator */ + unsigned long total_mbs; + unsigned long iters_per_tag, remaining; + unsigned long tag_iters, i, j; + + if (nbytes <= 16) + total_mbs = 5; + if (nbytes <= 32) + total_mbs = 30; + else if (nbytes <= 64) + total_mbs = 400; + else if (nbytes <= 256) + total_mbs = 800; + else if (nbytes <= 1024) + total_mbs = 1600; + else + total_mbs = 2500; + + tag_iters = (total_mbs * 1024 * 1024) / (nbytes) + 1; + + if (nbytes <= data_len) { + + i = tag_iters; + umac(ctx, data_ptr, nbytes, tag, nonce); + ticks = clock(); + do { + umac(ctx, data_ptr, nbytes, tag, nonce); + nonce[7] += 1; + } while (--i); + ticks = clock() - ticks; + + } else { + + i = tag_iters; + iters_per_tag = nbytes / data_len; + remaining = nbytes % data_len; + umac_update(ctx, data_ptr, data_len); + umac_final(ctx, tag, nonce); + ticks = clock(); + do { + j = iters_per_tag; + do { + umac_update(ctx, data_ptr, data_len); + } while (--j); + if (remaining) + umac_update(ctx, data_ptr, remaining); + umac_final(ctx, tag, nonce); + nonce[7] += 1; + } while (--i); + ticks = clock() - ticks; + + } + + secs = (double)ticks / CLOCKS_PER_SEC; + return (secs * (hz/(tag_iters*nbytes))); +} + +void speed_test(void) +{ + umac_ctx_t ctx; + char *data_ptr; + int data_len; + double hz; + double cpb; + int bytes_over_boundary, i; + int length_range_low = 1; + int length_range_high = 0; + int length_pts[] = {44,64,256,512,552,1024,1500,8*1024,256*1024}; + + /* hz and data_len must be set appropriately for your system + * for optimal results. + */ + #if (GCC_X86 || MSC_X86) + hz = ((double)2000e6); + data_len = 4096; + #else + hz = ((double)1420e6); + data_len = 8192; + #endif + + /* Allocate memory and align to 16-byte multiple */ + data_ptr = (char *)malloc(data_len + 16); + bytes_over_boundary = (ptrdiff_t)data_ptr & (16 - 1); + if (bytes_over_boundary != 0) + data_ptr += (16 - bytes_over_boundary); + for (i = 0; i < data_len; i++) + data_ptr[i] = (i*i) % 128; + ctx = umac_new("abcdefghijklmnopqrstuvwxyz"); + + printf("\n"); + if (length_range_low < length_range_high) { + for (i = length_range_low; i <= length_range_high; i++) { + cpb = run_cpb_test(ctx, i, data_ptr, data_len, hz); + printf("Authenticating %8d byte messages: %5.2f cpb.\n", i, cpb); + } + } + + if (sizeof(length_pts) > 0) { + for (i = 0; i < (int)(sizeof(length_pts)/sizeof(int)); i++) { + cpb = run_cpb_test(ctx, length_pts[i], data_ptr, data_len, hz); + printf("Authenticating %8d byte messages: %5.2f cpb.\n", + length_pts[i], cpb); + } + } + umac_delete(ctx); +} + +int main(void) +{ + #if GLADMAN_AES + gen_tabs(); + #endif + umac_verify(); + primitive_verify(); + speed_test(); + /* printf("Push return to continue\n"); getchar(); */ + return (1); +} + +#endif + +#ifdef __cplusplus + } +#endif