diff -r 1ea82b45817b linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c Tue Oct 03 08:05:41 2006 +0200 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c Tue Oct 03 09:07:37 2006 +0200 @@ -108,7 +108,6 @@ xencomm_privcmd_sysctl(privcmd_hypercall (void *)desc); break; case XEN_SYSCTL_tbuf_op: - case XEN_SYSCTL_physinfo: case XEN_SYSCTL_sched_id: break; case XEN_SYSCTL_perfc_op: @@ -140,6 +139,25 @@ xencomm_privcmd_sysctl(privcmd_hypercall set_xen_guest_handle(kern_op.u.getdomaininfolist.buffer, (void *)desc); break; + case XEN_SYSCTL_physinfo: + ret = xencomm_create( + xen_guest_handle(kern_op.u.physinfo.memory_chunks), + PUBLIC_MAXCHUNKS * sizeof(node_data_t), + &desc, GFP_KERNEL); + if (ret) + return ret; + set_xen_guest_handle(kern_op.u.physinfo.memory_chunks, + (void *)desc); + + ret = xencomm_create( + xen_guest_handle(kern_op.u.physinfo.node_to_cpu), + PUBLIC_MAX_NUMNODES * sizeof(u64), + &desc1, GFP_KERNEL); + if (ret) + xencomm_free(desc); + set_xen_guest_handle(kern_op.u.physinfo.node_to_cpu, + (void *)desc1); + break; default: printk("%s: unknown sysctl cmd %d\n", __func__, kern_op.cmd); return -ENOSYS; @@ -152,7 +170,7 @@ xencomm_privcmd_sysctl(privcmd_hypercall ret = xencomm_arch_hypercall_sysctl(op_desc); - /* FIXME: should we restore the handle? */ + /* FIXME: should we restore the handles? */ if (copy_to_user(user_op, &kern_op, sizeof(xen_sysctl_t))) ret = -EFAULT; diff -r 1ea82b45817b xen/arch/ia64/linux-xen/Makefile --- a/xen/arch/ia64/linux-xen/Makefile Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/arch/ia64/linux-xen/Makefile Tue Oct 03 09:07:37 2006 +0200 @@ -16,3 +16,5 @@ obj-y += unaligned.o obj-y += unaligned.o obj-y += unwind.o obj-y += iosapic.o +obj-y += numa.o +obj-y += mm-numa.o diff -r 1ea82b45817b xen/arch/ia64/linux-xen/README.origin --- a/xen/arch/ia64/linux-xen/README.origin Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/arch/ia64/linux-xen/README.origin Tue Oct 03 09:07:37 2006 +0200 @@ -24,6 +24,8 @@ sort.c -> linux/lib/sort.c sort.c -> linux/lib/sort.c time.c -> linux/arch/ia64/kernel/time.c tlb.c -> linux/arch/ia64/mm/tlb.c +mm-numa.c -> linux/arch/ia64/mm/numa.c +numa.c -> linux/arch/ia64/kernel/numa.c unaligned.c -> linux/arch/ia64/kernel/unaligned.c unwind.c -> linux/arch/ia64/kernel/unwind.c unwind_decoder.c -> linux/arch/ia64/kernel/unwind_decoder.c diff -r 1ea82b45817b xen/arch/ia64/linux-xen/mm_contig.c --- a/xen/arch/ia64/linux-xen/mm_contig.c Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/arch/ia64/linux-xen/mm_contig.c Tue Oct 03 09:07:37 2006 +0200 @@ -308,4 +308,4 @@ paging_init (void) #endif /* !CONFIG_VIRTUAL_MEM_MAP */ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } -#endif +#endif /* XEN */ diff -r 1ea82b45817b xen/arch/ia64/xen/acpi.c --- a/xen/arch/ia64/xen/acpi.c Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/arch/ia64/xen/acpi.c Tue Oct 03 09:07:37 2006 +0200 @@ -53,6 +53,7 @@ #include #ifdef XEN #include +#include #endif #define BAD_MADT_ENTRY(entry, end) ( \ @@ -457,6 +458,7 @@ acpi_numa_memory_affinity_init (struct a num_node_memblks++; } +static unsigned int numnodes; void __init acpi_numa_arch_fixup (void) { diff -r 1ea82b45817b xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/arch/ia64/xen/dom0_ops.c Tue Oct 03 09:07:37 2006 +0200 @@ -22,8 +22,10 @@ #include #include #include +#include void build_physmap_table(struct domain *d); +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) extern unsigned long total_pages; @@ -183,13 +185,14 @@ long arch_do_sysctl(xen_sysctl_t *op, XE { long ret = 0; - if ( !IS_PRIV(current->domain) ) - return -EPERM; - switch ( op->cmd ) { case XEN_SYSCTL_physinfo: { + int i,j; + node_data_t *chunks; + u64 *map, node_to_cpu_64[MAX_NUMNODES]; + xen_sysctl_physinfo_t *pi = &op->u.physinfo; pi->threads_per_core = @@ -199,13 +202,75 @@ long arch_do_sysctl(xen_sysctl_t *op, XE pi->sockets_per_node = num_online_cpus() / cpus_weight(cpu_core_map[0]); pi->nr_cpus = (u32)num_online_cpus(); - pi->nr_nodes = 1; pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->cpu_khz = local_cpu_data->proc_freq / 1000; memset(pi->hw_cap, 0, sizeof(pi->hw_cap)); //memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4); ret = 0; + + /* fetch memory_chunk pointer from guest*/ + get_xen_guest_handle(chunks, pi->memory_chunks); + + printf ("chunks=%p, num_node_memblks=%u\n", chunks, num_node_memblks); + /* if it is set, fill out memory chunk array */ + if ( chunks != NULL ) { + if (num_node_memblks == 0) { + /* Non-NUMA machine. Put pseudo-values. */ + node_data_t data; + data.node_start_pfn = 0; + data.node_spanned_pages = total_pages; + data.node_id = 0; + /* copy memory chunk structs to guest */ + if ( copy_to_guest_offset(pi->memory_chunks, 0, &data, 1) ) { + ret = -EFAULT; + break; + } + } + else + for ( i = 0; + i < num_node_memblks && i < PUBLIC_MAXCHUNKS; i++ ) + { + node_data_t data; + data.node_start_pfn = + node_memblk[i].start_paddr >> PAGE_SHIFT; + data.node_spanned_pages = + node_memblk[i].size >> PAGE_SHIFT; + data.node_id = node_memblk[i].nid; + /* copy memory chunk structs to guest */ + if ( copy_to_guest_offset(pi->memory_chunks, i, + &data, 1) ) { + ret = -EFAULT; + break; + } + } + } + /* set number of notes */ + pi->nr_nodes = num_online_nodes(); + + /* fetch node_to_cpu pointer from guest */ + get_xen_guest_handle(map, pi->node_to_cpu); + + /* if set, fill out node_to_cpu array */ + if ( map != NULL ) + { + /* copy cpu to node mapping to domU */ + /* converting cpumask to u64 b/c userspace doesn't + * know about cpumask_t and is accepting a u64 */ + memset(node_to_cpu_64, 0, sizeof(node_to_cpu_64)); + for ( i = 0; i < pi->nr_nodes; i++ ) { + for ( j = 0; j < num_online_cpus(); j++ ) + if ( cpu_isset(j, node_to_cpumask(i)) ) + node_to_cpu_64[i] |= (u64)1 << j; + + if ( copy_to_guest_offset(pi->node_to_cpu, + i, &(node_to_cpu_64[i]), 1) ) { + ret = -EFAULT; + break; + } + } + } + if ( copy_to_guest(u_sysctl, op, 1) ) ret = -EFAULT; } diff -r 1ea82b45817b xen/common/domctl.c --- a/xen/common/domctl.c Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/common/domctl.c Tue Oct 03 09:07:37 2006 +0200 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff -r 1ea82b45817b xen/common/page_alloc.c --- a/xen/common/page_alloc.c Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/common/page_alloc.c Tue Oct 03 09:07:37 2006 +0200 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff -r 1ea82b45817b xen/drivers/acpi/numa.c --- a/xen/drivers/acpi/numa.c Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/drivers/acpi/numa.c Tue Oct 03 09:07:37 2006 +0200 @@ -36,6 +36,11 @@ #include #include /* __va() */ +#define ACPI_DEBUG_OUTPUT +#undef ACPI_DEBUG_PRINT +#define ACPI_DEBUG_PRINT_2(log,log2,log3,...) printf( __VA_ARGS__) +#define ACPI_DEBUG_PRINT(X) ACPI_DEBUG_PRINT_2 X + #define ACPI_NUMA 0x80000000 #define _COMPONENT ACPI_NUMA ACPI_MODULE_NAME("numa") diff -r 1ea82b45817b xen/include/asm-ia64/config.h --- a/xen/include/asm-ia64/config.h Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/include/asm-ia64/config.h Tue Oct 03 09:07:37 2006 +0200 @@ -26,6 +26,9 @@ #ifdef CONFIG_XEN_SMP #define CONFIG_SMP 1 #define NR_CPUS 64 +#define CONFIG_NUMA +#define CONFIG_ACPI_NUMA +#define NODES_SHIFT 3 #else #undef CONFIG_SMP #define NR_CPUS 1 diff -r 1ea82b45817b xen/include/asm-ia64/linux/asm/README.origin --- a/xen/include/asm-ia64/linux/asm/README.origin Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/include/asm-ia64/linux/asm/README.origin Tue Oct 03 09:07:37 2006 +0200 @@ -43,3 +43,4 @@ unaligned.h -> linux/include/asm-ia64/u unaligned.h -> linux/include/asm-ia64/unaligned.h unistd.h -> linux/include/asm-ia64/unistd.h unwind.h -> linux/include/asm-ia64/unwind.h +nodedata.h -> linux/include/asm-ia64/nodedate.h diff -r 1ea82b45817b xen/include/asm-ia64/linux/asm/acpi.h --- a/xen/include/asm-ia64/linux/asm/acpi.h Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/include/asm-ia64/linux/asm/acpi.h Tue Oct 03 09:07:37 2006 +0200 @@ -108,8 +108,10 @@ extern unsigned int get_cpei_target_cpu( extern unsigned int get_cpei_target_cpu(void); #ifdef CONFIG_ACPI_NUMA +#ifndef XEN /* Proximity bitmap length; _PXM is at most 255 (8 bit)*/ #define MAX_PXM_DOMAINS (256) +#endif extern int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS]; extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; #endif diff -r 1ea82b45817b xen/include/asm-ia64/linux/asm/numa.h --- a/xen/include/asm-ia64/linux/asm/numa.h Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/include/asm-ia64/linux/asm/numa.h Tue Oct 03 09:07:37 2006 +0200 @@ -71,4 +71,8 @@ extern int paddr_to_nid(unsigned long pa #endif /* CONFIG_NUMA */ +#ifdef XEN +#define phys_to_nid(paddr) paddr_to_nid(paddr) +#endif + #endif /* _ASM_IA64_NUMA_H */ diff -r 1ea82b45817b xen/include/asm-ia64/xenpage.h --- a/xen/include/asm-ia64/xenpage.h Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/include/asm-ia64/xenpage.h Tue Oct 03 09:07:37 2006 +0200 @@ -1,9 +1,5 @@ #ifndef _ASM_IA64_XENPAGE_H #define _ASM_IA64_XENPAGE_H - -#ifdef CONFIG_DISCONTIGMEM -#error "xenpage.h: page macros need to be defined for CONFIG_DISCONTIGMEM" -#endif #undef mfn_valid #undef page_to_mfn diff -r 1ea82b45817b xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Tue Oct 03 08:05:41 2006 +0200 +++ b/xen/include/public/arch-ia64.h Tue Oct 03 09:07:37 2006 +0200 @@ -28,6 +28,7 @@ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigne __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); __DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +__DEFINE_XEN_GUEST_HANDLE(u64, unsigned long); DEFINE_XEN_GUEST_HANDLE(char); DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(long); diff -r 1ea82b45817b xen/arch/ia64/linux-xen/mm-numa.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux-xen/mm-numa.c Tue Oct 03 09:07:37 2006 +0200 @@ -0,0 +1,75 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * This file contains NUMA specific variables and functions which can + * be split away from DISCONTIGMEM and are used on NUMA machines with + * contiguous memory. + * + * 2002/08/07 Erich Focht + */ + +#include +#include +#include +#include +#ifndef XEN +#include +#endif +#include +#include +#include +#include + + +/* + * The following structures are usually initialized by ACPI or + * similar mechanisms and describe the NUMA characteristics of the machine. + */ +int num_node_memblks; +struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; +struct node_cpuid_s node_cpuid[NR_CPUS]; +/* + * This is a matrix with "distances" between nodes, they should be + * proportional to the memory access latency ratios. + */ +u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; + +/* Identify which cnode a physical address resides on */ +int +paddr_to_nid(unsigned long paddr) +{ + int i; + + for (i = 0; i < num_node_memblks; i++) + if (paddr >= node_memblk[i].start_paddr && + paddr < node_memblk[i].start_paddr + node_memblk[i].size) + break; + + return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0); +} + +#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA) +/* + * Because of holes evaluate on section limits. + * If the section of memory exists, then return the node where the section + * resides. Otherwise return node 0 as the default. This is used by + * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where + * the section resides. + */ +int early_pfn_to_nid(unsigned long pfn) +{ + int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; + + for (i = 0; i < num_node_memblks; i++) { + ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT; + esec = (node_memblk[i].start_paddr + node_memblk[i].size + + ((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT; + if (section >= ssec && section < esec) + return node_memblk[i].nid; + } + + return 0; +} +#endif diff -r 1ea82b45817b xen/arch/ia64/linux-xen/numa.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/linux-xen/numa.c Tue Oct 03 09:07:37 2006 +0200 @@ -0,0 +1,67 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ia64 kernel NUMA specific stuff + * + * Copyright (C) 2002 Erich Focht + * Copyright (C) 2004 Silicon Graphics, Inc. + * Jesse Barnes + */ +#ifdef XEN +#include +#endif +#include +#include +#include +#include +#include +#ifdef XEN +#include +#endif + +#ifdef XEN +nodemask_t node_online_map = { { [0] = 1UL } }; +#endif + +u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_to_node_map); + +cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; + +/** + * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays + * + * Build cpu to node mapping and initialize the per node cpu masks using + * info from the node_cpuid array handed to us by ACPI. + */ +void __init build_cpu_to_node_map(void) +{ + int cpu, i, node; + + for(node=0; node < MAX_NUMNODES; node++) + cpus_clear(node_to_cpu_mask[node]); + + for(cpu = 0; cpu < NR_CPUS; ++cpu) { + node = -1; + for (i = 0; i < NR_CPUS; ++i) + if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { + node = node_cpuid[i].nid; + break; + } + cpu_to_node_map[cpu] = (node >= 0) ? node : 0; + if (node >= 0) + cpu_set(cpu, node_to_cpu_mask[node]); + } +} diff -r 1ea82b45817b xen/include/asm-ia64/linux-null/asm/mmzone.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-ia64/linux-null/asm/mmzone.h Tue Oct 03 09:07:37 2006 +0200 @@ -0,0 +1,1 @@ +/* Empty file. */ diff -r 1ea82b45817b xen/include/asm-ia64/linux/asm/nodedata.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-ia64/linux/asm/nodedata.h Tue Oct 03 09:07:37 2006 +0200 @@ -0,0 +1,52 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2002 NEC Corp. + * Copyright (c) 2002 Erich Focht + * Copyright (c) 2002 Kimio Suganuma + */ +#ifndef _ASM_IA64_NODEDATA_H +#define _ASM_IA64_NODEDATA_H + +#include +#include + +#include +#include + +#ifdef CONFIG_NUMA + +/* + * Node Data. One of these structures is located on each node of a NUMA system. + */ + +struct pglist_data; +struct ia64_node_data { + short active_cpu_count; + short node; + struct pglist_data *pg_data_ptrs[MAX_NUMNODES]; +}; + + +/* + * Return a pointer to the node_data structure for the executing cpu. + */ +#define local_node_data (local_cpu_data->node_data) + +/* + * Given a node id, return a pointer to the pg_data_t for the node. + * + * NODE_DATA - should be used in all code not related to system + * initialization. It uses pernode data structures to minimize + * offnode memory references. However, these structure are not + * present during boot. This macro can be used once cpu_init + * completes. + */ +#define NODE_DATA(nid) (local_node_data->pg_data_ptrs[nid]) + +#endif /* CONFIG_NUMA */ + +#endif /* _ASM_IA64_NODEDATA_H */