diff --git a/tools/firmware/hvmloader/acpi/acpi2_0.h b/tools/firmware/hvmloader/acpi/acpi2_0.h --- a/tools/firmware/hvmloader/acpi/acpi2_0.h +++ b/tools/firmware/hvmloader/acpi/acpi2_0.h @@ -283,6 +283,66 @@ struct acpi_20_madt { uint32_t flags; }; +/* + * System Resource Affinity Table header definition (SRAT) (Version 3.0) + * X2APIC_CPU_AFFINITY is defined in version 4.0 + */ +struct acpi_30_srat { + struct acpi_header header; /* Common ACPI table header */ + uint32_t table_revision; /* Must be value '1' */ + uint32_t reserved[2]; /* Reserved, must be zero */ +}; +#define ACPI_30_SRAT_TABLE_REVISION 0x1 + +/* Values for type (in SRAT subtables) */ +enum acpi_30_srat_type { + ACPI_30_SRAT_TYPE_CPU_AFFINITY = 0, + ACPI_30_SRAT_TYPE_MEMORY_AFFINITY = 1, + ACPI_30_SRAT_TYPE_RESERVED = 2 /* 2 and greater are reserved */ +}; + +/* type(0) : Processor Local APIC/SAPIC Affinity */ +struct acpi_30_srat_cpu_affinity { + uint8_t type; + uint8_t length; + uint8_t proximity_domain_lo; + uint8_t apic_id; + uint32_t flags; + uint8_t local_sapic_eid; + uint8_t proximity_domain_hi[3]; + uint32_t reserved; /* Reserved, must be zero */ +}; + +/* Flags */ +#define ACPI_30_SRAT_CPU_USE_AFFINITY (1) /* 00: Use affinity structure */ + +/* 1: Memory Affinity */ + +struct acpi_30_srat_mem_affinity { + uint8_t type; + uint8_t length; + uint32_t proximity_domain; + uint16_t reserved; /* Reserved, must be zero */ + uint64_t base_address; + uint64_t size; + uint32_t reserved1; + uint32_t flags; + uint64_t reserved2; /* Reserved, must be zero */ +}; + +/* Flags */ +#define ACPI_30_SRAT_MEM_ENABLED (1) /* 00: Use affinity structure */ +#define ACPI_30_SRAT_MEM_HOT_PLUGGABLE (1<<1) /* 01: Mem is hot pluggable */ +#define ACPI_30_SRAT_MEM_NON_VOLATILE (1<<2) /* 02: Mem is non-volatile */ + +/* + * System Locality Information Table header definition (SLIT) (Version 1.0) + */ +struct acpi_10_slit { + struct acpi_header header; + uint64_t locality_count; + uint8_t entry[1]; +}; /* * HPET Description Table @@ -367,6 +427,8 @@ struct acpi_20_madt_intsrcovr { #define ACPI_2_0_XSDT_SIGNATURE ASCII32('X','S','D','T') #define ACPI_2_0_TCPA_SIGNATURE ASCII32('T','C','P','A') #define ACPI_2_0_HPET_SIGNATURE ASCII32('H','P','E','T') +#define ACPI_3_0_SRAT_SIGNATURE ASCII32('S','R','A','T') +#define ACPI_1_0_SLIT_SIGNATURE ASCII32('S','L','I','T') /* * Table revision numbers. @@ -379,6 +441,8 @@ struct acpi_20_madt_intsrcovr { #define ACPI_2_0_TCPA_REVISION 0x02 #define ACPI_2_0_HPET_REVISION 0x01 #define ACPI_1_0_FADT_REVISION 0x01 +#define ACPI_3_0_SRAT_REVISION 0x01 +#define ACPI_1_0_SLIT_REVISION 0x01 #pragma pack () diff --git a/tools/firmware/hvmloader/acpi/build.c b/tools/firmware/hvmloader/acpi/build.c --- a/tools/firmware/hvmloader/acpi/build.c +++ b/tools/firmware/hvmloader/acpi/build.c @@ -149,6 +149,114 @@ static int construct_madt(struct acpi_20 return align16(offset); } +static int +construct_srat_cpu_affinity(struct acpi_30_srat_cpu_affinity *cpu_srat) +{ + struct acpi_30_srat_cpu_affinity *cpu_srat_iter; + int vnode, vcpu; + struct xen_domain_numa_info *numa_info = &hvm_info->numa_info; + for ( vnode = 0, cpu_srat_iter = cpu_srat; + vnode < numa_info->nr_vnodes; vnode++ ) + { + struct xen_vnode_info *vnode_info = &numa_info->vnode_info[vnode]; + for ( vcpu = 0 ; vcpu < HVM_MAX_VCPUS; vcpu++ ) + { + if (test_bit(vcpu, XEN_CPUMASK_BITMAP(&vnode_info->vcpu_mask))) + { + memset(cpu_srat_iter, 0, sizeof(*cpu_srat_iter)); + cpu_srat_iter->type = ACPI_30_SRAT_TYPE_CPU_AFFINITY; + cpu_srat_iter->length = sizeof(*cpu_srat); + cpu_srat_iter->proximity_domain_lo = vnode; + cpu_srat_iter->apic_id = LAPIC_ID(vcpu); + cpu_srat_iter->flags = ACPI_30_SRAT_CPU_USE_AFFINITY; + cpu_srat_iter++; + } + } + } + /* return length of the sub-table */ + return ((uint8_t *)cpu_srat_iter-(uint8_t *)cpu_srat); +} + +static int +construct_srat_mem_affinity(struct acpi_30_srat_mem_affinity *mem_srat) +{ + struct acpi_30_srat_mem_affinity *mem_srat_iter = mem_srat; + int vnode; + struct xen_domain_numa_info *numa_info = &hvm_info->numa_info; + uint64_t base_address; + + for ( vnode = 0, base_address = 0; vnode < numa_info->nr_vnodes; vnode++ ) + { + uint64_t size; + struct xen_vnode_info *vnode_info = &numa_info->vnode_info[vnode]; + memset(mem_srat_iter, 0, sizeof(*mem_srat_iter)); + mem_srat_iter->type = ACPI_30_SRAT_TYPE_MEMORY_AFFINITY; + mem_srat_iter->length = sizeof(*mem_srat_iter); + mem_srat_iter->proximity_domain = vnode; + mem_srat_iter->base_address = base_address; + size = ((uint64_t)vnode_info->nr_pages << PAGE_SHIFT); + mem_srat_iter->size = size; + mem_srat_iter->flags = ACPI_30_SRAT_MEM_ENABLED; + base_address += size; + mem_srat_iter++; + } + /* return length of the sub-table */ + return ((uint8_t *)mem_srat_iter-(uint8_t *)mem_srat); +} + +static int construct_srat(struct acpi_30_srat *srat) +{ + int offset; + + memset(srat, 0, sizeof(*srat)); + srat->header.signature = ACPI_3_0_SRAT_SIGNATURE; + srat->header.revision = ACPI_3_0_SRAT_REVISION; + fixed_strcpy(srat->header.oem_id, ACPI_OEM_ID); + fixed_strcpy(srat->header.oem_table_id, ACPI_OEM_TABLE_ID); + srat->header.oem_revision = ACPI_OEM_REVISION; + srat->header.creator_id = ACPI_CREATOR_ID; + srat->header.creator_revision = ACPI_CREATOR_REVISION; + srat->table_revision = ACPI_30_SRAT_TABLE_REVISION; + offset = sizeof(*srat); + + offset += construct_srat_cpu_affinity((struct acpi_30_srat_cpu_affinity *) + ((uint8_t *)srat + offset)); + + offset += construct_srat_mem_affinity((struct acpi_30_srat_mem_affinity *) + ((uint8_t *)srat + offset)); + + srat->header.length = offset; + set_checksum(srat, offsetof(struct acpi_header, checksum), offset); + + return offset; +} + +static int construct_slit(struct acpi_10_slit *slit) +{ + int offset, i, nr_vnodes; + struct xen_domain_numa_info *numa_info = &hvm_info->numa_info; + + memset(slit, 0, sizeof(*slit)); + slit->header.signature = ACPI_1_0_SLIT_SIGNATURE; + slit->header.revision = ACPI_1_0_SLIT_REVISION; + fixed_strcpy(slit->header.oem_id, ACPI_OEM_ID); + fixed_strcpy(slit->header.oem_table_id, ACPI_OEM_TABLE_ID); + slit->header.oem_revision = ACPI_OEM_REVISION; + slit->header.creator_id = ACPI_CREATOR_ID; + slit->header.creator_revision = ACPI_CREATOR_REVISION; + slit->locality_count = numa_info->nr_vnodes; + + nr_vnodes = numa_info->nr_vnodes; + for (i=0; i<(nr_vnodes*nr_vnodes); i++) + slit->entry[i] = numa_info->vnode_distance[i]; + + offset = sizeof(*slit)+(nr_vnodes*nr_vnodes)-1; + slit->header.length = offset; + set_checksum(slit, offsetof(struct acpi_header, checksum), offset); + + return offset; +} + static int construct_hpet(struct acpi_20_hpet *hpet) { int offset; @@ -177,6 +285,8 @@ static int construct_secondary_tables(ui struct acpi_20_madt *madt; struct acpi_20_hpet *hpet; struct acpi_20_tcpa *tcpa; + struct acpi_30_srat *srat; + struct acpi_10_slit *slit; static const uint16_t tis_signature[] = {0x0001, 0x0001, 0x0001}; uint16_t *tis_hdr; void *lasa; @@ -189,6 +299,17 @@ static int construct_secondary_tables(ui table_ptrs[nr_tables++] = (unsigned long)madt; } + /* SRAT/SLIT. */ + if ( hvm_info->numa_info.version == XEN_DOM_NUMA_INTERFACE_VERSION ) + { + srat = (struct acpi_30_srat *)&buf[offset]; + offset += construct_srat(srat); + table_ptrs[nr_tables++] = (unsigned long)srat; + slit = (struct acpi_10_slit *)&buf[offset]; + offset += construct_slit(slit); + table_ptrs[nr_tables++] = (unsigned long)slit; + } + /* HPET. */ if ( hpet_exists(ACPI_HPET_ADDRESS) ) { diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c +++ b/tools/libxc/xc_hvm_build.c @@ -33,7 +33,43 @@ #define NR_SPECIAL_PAGES 5 #define special_pfn(x) (0xff000u - NR_SPECIAL_PAGES + (x)) -static void build_hvm_info(void *hvm_info_page, uint64_t mem_size) +static void build_hvm_numa_info(struct hvm_info_table *hvm_info, + xc_domain_numa_layout_t *dom_layout) +{ + int vnode; + uint64_t vnode_pgstart; + struct xen_domain_numa_info *numa_info = &hvm_info->numa_info; + + numa_info->version = dom_layout->version; + numa_info->type = dom_layout->type; + numa_info->nr_vcpus = dom_layout->nr_vcpus; + numa_info->nr_vnodes = dom_layout->nr_vnodes; + /* high_mem_pgend is 32-bit, so we should be fine too */ + numa_info->nr_pages = dom_layout->nr_pages; + + for (vnode=0, vnode_pgstart=0; vnodenr_vnodes; vnode++) + { + xc_vnode_data_t *vnode_data = &dom_layout->vnode_data[vnode]; + struct xen_vnode_info *vnode_info = &numa_info->vnode_info[vnode]; + uint64_t vnode_pgend; + + memcpy(vnode_info, vnode_data, sizeof(*vnode_info)); + vnode_pgend = vnode_pgstart + vnode_info->nr_pages; + /* Account for hole in the memory map */ + if ( (vnode_pgstart < hvm_info->low_mem_pgend) && + (vnode_pgend >= hvm_info->low_mem_pgend) ) + vnode_pgend += ((1ull<<32) - HVM_BELOW_4G_RAM_END)>>PAGE_SHIFT; + + vnode_info->nr_pages = vnode_pgend - vnode_pgstart; + vnode_pgstart += vnode_info->nr_pages; + } + memcpy(numa_info->vnode_distance, dom_layout->vnode_distance, + sizeof(numa_info->vnode_distance)); + return; +} + +static void build_hvm_info(void *hvm_info_page, uint64_t mem_size, + xc_domain_numa_layout_t *dom_layout) { struct hvm_info_table *hvm_info = (struct hvm_info_table *) (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET); @@ -63,6 +99,9 @@ static void build_hvm_info(void *hvm_inf hvm_info->high_mem_pgend = highmem_end >> PAGE_SHIFT; hvm_info->reserved_mem_pgstart = special_pfn(0); + if ( dom_layout && ( dom_layout->type == XEN_DOM_NUMA_SPLIT )) + build_hvm_numa_info(hvm_info, dom_layout); + /* Finish with the checksum. */ for ( i = 0, sum = 0; i < hvm_info->length; i++ ) sum += ((uint8_t *)hvm_info)[i]; @@ -411,8 +450,8 @@ out: return rc; } -static int -setup_guest_special_pages(xc_interface *xch, uint32_t dom, uint64_t memsize) +static int setup_guest_special_pages(xc_interface *xch, uint32_t dom, + uint64_t memsize, xc_domain_numa_layout_t *dom_layout) { void *hvm_info_page; struct xen_add_to_physmap xatp; @@ -424,7 +463,7 @@ setup_guest_special_pages(xc_interface * xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, HVM_INFO_PFN)) == NULL ) goto error_out; - build_hvm_info(hvm_info_page, memsize); + build_hvm_info(hvm_info_page, memsize, dom_layout); munmap(hvm_info_page, PAGE_SIZE); /* Map and initialise shared_info page. */ @@ -532,7 +571,7 @@ static int setup_guest(xc_interface *xch if ( rc < 0 ) goto error_out; - rc = setup_guest_special_pages(xch, dom, v_end); + rc = setup_guest_special_pages(xch, dom, v_end, dom_layout); if ( rc < 0 ) goto error_out; diff --git a/xen/include/public/hvm/hvm_info_table.h b/xen/include/public/hvm/hvm_info_table.h --- a/xen/include/public/hvm/hvm_info_table.h +++ b/xen/include/public/hvm/hvm_info_table.h @@ -25,12 +25,15 @@ #ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ #define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ +#include "../dom_numa.h" + #define HVM_INFO_PFN 0x09F #define HVM_INFO_OFFSET 0x800 #define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET) /* Maximum we can support with current vLAPIC ID mapping. */ -#define HVM_MAX_VCPUS 128 +#define HVM_MAX_VCPUS XEN_MAX_VCPUS +#define HVM_MAX_VNODES XEN_MAX_VNODES struct hvm_info_table { char signature[8]; /* "HVM INFO" */ @@ -70,6 +73,9 @@ struct hvm_info_table { /* Bitmap of which CPUs are online at boot time. */ uint8_t vcpu_online[(HVM_MAX_VCPUS + 7)/8]; + + /* Domain NUMA memory distribution */ + struct xen_domain_numa_info numa_info; }; #endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */