[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH v1 02/21] x86: NUMA: Refactor NUMA code



On Mon, Feb 20, 2017 at 6:07 PM, Julien Grall <julien.grall@xxxxxxx> wrote:
> Hello Vijay,
>
>
> On 09/02/17 15:56, vijay.kilari@xxxxxxxxx wrote:
>>
>> From: Vijaya Kumar K <Vijaya.Kumar@xxxxxxxxxx>
>>
>> Move common generic NUMA code to xen/common/numa.c from
>> xen/arch/x86/numa.c. Also move generic code in header file
>> xen/include/asm-x86/numa.h to xen/include/xen/numa.h
>>
>> This common code can be re-used later for ARM.
>>
>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@xxxxxxxxxx>
>> ---
>>  xen/arch/x86/numa.c        | 299 ---------------------------------------
>>  xen/common/Makefile        |   1 +
>>  xen/common/numa.c          | 342
>> +++++++++++++++++++++++++++++++++++++++++++++
>>  xen/include/asm-x86/numa.h |  47 -------
>>  xen/include/xen/numa.h     |  54 +++++++
>>  5 files changed, 397 insertions(+), 346 deletions(-)
>>
>> diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
>> index 6f4d438..bc787e0 100644
>> --- a/xen/arch/x86/numa.c
>> +++ b/xen/arch/x86/numa.c
>> @@ -25,27 +25,12 @@ custom_param("numa", numa_setup);
>>  #define Dprintk(x...)
>>  #endif
>>
>> -/* from proto.h */
>> -#define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
>> -
>> -struct node_data node_data[MAX_NUMNODES];
>> -
>> -/* Mapping from pdx to node id */
>> -int memnode_shift;
>> -static typeof(*memnodemap) _memnodemap[64];
>> -unsigned long memnodemapsize;
>> -u8 *memnodemap;
>> -
>> -nodeid_t cpu_to_node[NR_CPUS] __read_mostly = {
>> -    [0 ... NR_CPUS-1] = NUMA_NO_NODE
>> -};
>>  /*
>>   * Keep BIOS's CPU2node information, should not be used for memory
>> allocaion
>>   */
>>  nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {
>>      [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
>>  };
>> -cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
>>
>>  nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
>
>
> Why this variable is not moved?
Ok. Can be moved.
>
> [...]
>
>>  void __init numa_init_array(void)
>
>
> Same question for this function.

Initially I was suspicious about the comment in this function and thought
it might be valid of x86 alone. But looks like it is generic.
I will have a look.

>
>>  {
>>      int rr, i;
>> @@ -288,16 +145,6 @@ void __init numa_initmem_init(unsigned long
>> start_pfn, unsigned long end_pfn)
>>                      (u64)end_pfn << PAGE_SHIFT);
>>  }
>>
>> -void numa_add_cpu(int cpu)
>> -{
>> -    cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
>> -}
>> -
>> -void numa_set_node(int cpu, nodeid_t node)
>> -{
>> -    cpu_to_node[cpu] = node;
>> -}
>> -
>>  /* [numa=off] */
>>  static __init int numa_setup(char *opt)
>
>
> Same question here. Everything in numa_setup and the fake NUMA looks
> valid for ARM too.

numa_setup() is taken in separate patch. fake numa case is not considered.
for x86 it is under separate config CONFIG_NUMA_EMU.

>
> [....]
>
>> diff --git a/xen/common/Makefile b/xen/common/Makefile
>> index 0fed30b..c1bd2ff 100644
>> --- a/xen/common/Makefile
>> +++ b/xen/common/Makefile
>> @@ -63,6 +63,7 @@ obj-y += wait.o
>>  obj-bin-y += warning.init.o
>>  obj-$(CONFIG_XENOPROF) += xenoprof.o
>>  obj-y += xmalloc_tlsf.o
>> +obj-y += numa.o
>
>
> This needs to be gated with CONFIG_NUMA.

As it is shared with x86 and prior this changes it is not gated under
any config for x86.

>
>>
>>  obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma unlzo
>> unlz4 earlycpio,$(n).init.o)
>>
>> diff --git a/xen/common/numa.c b/xen/common/numa.c
>> new file mode 100644
>> index 0000000..59dcb63
>> --- /dev/null
>> +++ b/xen/common/numa.c
>> @@ -0,0 +1,342 @@
>> +/*
>> + * Common NUMA handling functions for x86 and arm.
>> + * Original code extracted from arch/x86/numa.c
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>
>
> Xen is GPLv2 only. Please update to:
>
> "This program is free software; you can redistribute it and/or
> modify it under the terms and conditions of the GNU General Public
> License, version 2, as published by the Free Software Foundation."
>
>
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +
>> +#include <xen/mm.h>
>> +#include <xen/string.h>
>> +#include <xen/init.h>
>> +#include <xen/ctype.h>
>> +#include <xen/nodemask.h>
>> +#include <xen/numa.h>
>> +#include <xen/keyhandler.h>
>> +#include <xen/time.h>
>> +#include <xen/smp.h>
>> +#include <xen/pfn.h>
>> +#include <xen/sched.h>
>> +#include <xen/errno.h>
>> +#include <xen/softirq.h>
>> +#include <asm/setup.h>
>> +
>> +struct node_data node_data[MAX_NUMNODES];
>> +
>> +/* Mapping from pdx to node id */
>
>
> Looking at this comment, it looks like the NUMA support should depend on
> HAS_PDX as this is not something that may be able on all the architecture.

yes it uses pfn_to_pdx() while updating memnodemap.
May be comment should be suffice.

>
>> +int memnode_shift;
>> +unsigned long memnodemapsize;
>> +u8 *memnodemap;
>> +typeof(*memnodemap) _memnodemap[64];
>> +
>> +nodeid_t cpu_to_node[NR_CPUS] __read_mostly = {
>> +    [0 ... NR_CPUS-1] = NUMA_NO_NODE
>> +};
>> +
>> +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
>
>
> [...]
>
>> +void numa_add_cpu(int cpu)
>> +{
>> +    cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
>> +}
>> +
>> +void numa_set_node(int cpu, nodeid_t node)
>> +{
>> +    cpu_to_node[cpu] = node;
>> +}
>> +
>> +EXPORT_SYMBOL(node_to_cpumask);
>> +EXPORT_SYMBOL(memnode_shift);
>> +EXPORT_SYMBOL(memnodemap);
>> +EXPORT_SYMBOL(node_data);
>
>
> Those 4 lines are not part of the original code. Why did you add them?
>
> To ease review I would like to see this patch split multiple one:
>         - multiple small to prepare the code (export function, change the
> type...)
>         - a patch to move the code and only move it. No changes at all in
> it.
OK
>
> [...]
>
>> diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
>> index 2479238..61bcd8e 100644
>> --- a/xen/include/asm-x86/numa.h
>> +++ b/xen/include/asm-x86/numa.h
>> @@ -17,64 +17,17 @@ extern cpumask_t     node_to_cpumask[];
>>  #define node_to_first_cpu(node)  (__ffs(node_to_cpumask[node]))
>>  #define node_to_cpumask(node)    (node_to_cpumask[node])
>>
>> -struct node {
>> -       u64 start,end;
>> -};
>> -
>> -extern int compute_hash_shift(struct node *nodes, int numnodes,
>> -                             nodeid_t *nodeids);
>>  extern nodeid_t pxm_to_node(unsigned int pxm);
>>
>>  #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
>> -#define VIRTUAL_BUG_ON(x)
>>
>> -extern void numa_add_cpu(int cpu);
>>  extern void numa_init_array(void);
>>  extern bool_t numa_off;
>>
>> -
>
>
> Spurious change.
>
>
>>  extern int srat_disabled(void);
>> -extern void numa_set_node(int cpu, nodeid_t node);
>> -extern nodeid_t setup_node(unsigned int pxm);
>>  extern void srat_detect_node(int cpu);
>>
>> -extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
>>  extern nodeid_t apicid_to_node[];
>> -extern void init_cpu_to_node(void);
>> -
>> -static inline void clear_node_cpumask(int cpu)
>> -{
>> -       cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
>> -}
>> -
>> -/* Simple perfect hash to map pdx to node numbers */
>> -extern int memnode_shift;
>> -extern unsigned long memnodemapsize;
>> -extern u8 *memnodemap;
>> -
>> -struct node_data {
>> -    unsigned long node_start_pfn;
>> -    unsigned long node_spanned_pages;
>> -};
>> -
>> -extern struct node_data node_data[];
>> -
>> -static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
>> -{
>> -       nodeid_t nid;
>> -       VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >=
>> memnodemapsize);
>> -       nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
>> -       VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
>> -       return nid;
>> -}
>> -
>> -#define NODE_DATA(nid)         (&(node_data[nid]))
>> -
>> -#define node_start_pfn(nid)    (NODE_DATA(nid)->node_start_pfn)
>> -#define node_spanned_pages(nid)
>> (NODE_DATA(nid)->node_spanned_pages)
>> -#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
>> -                                NODE_DATA(nid)->node_spanned_pages)
>> -
>>  extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
>>
>>  void srat_parse_regions(u64 addr);
>> diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
>> index 7aef1a8..dd33c92 100644
>> --- a/xen/include/xen/numa.h
>> +++ b/xen/include/xen/numa.h
>> @@ -18,4 +18,58 @@
>>    (((d)->vcpu != NULL && (d)->vcpu[0] != NULL) \
>>     ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE)
>>
>> +struct node {
>> +       u64 start,end;
>
>
> This contains hard tab. It looks like that asm-x86/numa.h add a mix hard
> tab/soft tab. Can you have a clean-up patch to drop them first?

OK. I will try.
>
>> +};
>> +
>> +struct node_data {
>> +    unsigned long node_start_pfn;
>> +    unsigned long node_spanned_pages;
>> +    nodeid_t      node_id;
>> +};
>> +
>> +#define NODE_DATA(nid)         (&(node_data[nid]))
>
>
> Hard tab.
>
>> +#define VIRTUAL_BUG_ON(x)
>
>
> What is the point to have a BUG_ON that is a nop?

yes.. that is part of original code. As part of clean up patch.
I will drop it.

>
>> +
>> +#ifdef CONFIG_NUMA
>> +extern void init_cpu_to_node(void);
>> +
>> +static inline void clear_node_cpumask(int cpu)
>> +{
>> +       cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
>
>
> Hard tab.
>
>> +}
>
>
> You move this function in xen/numa.h but this is never used in xen code. It
> would be better to drop it.
>
>> +
>> +/* Simple perfect hash to map pdx to node numbers */
>> +extern int memnode_shift;
>> +extern unsigned long memnodemapsize;
>> +extern u8 *memnodemap;
>> +extern typeof(*memnodemap) _memnodemap[];
>> +
>> +extern struct node_data node_data[];
>> +
>> +static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
>> +{
>> +       nodeid_t nid;
>> +       VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >=
>> memnodemapsize);
>> +       nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
>> +       VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
>> +       return nid;
>
>
> Hard tab in all this function.
>
>> +}
>> +
>> +#define node_start_pfn(nid)    (NODE_DATA(nid)->node_start_pfn)
>> +#define node_spanned_pages(nid)
>> (NODE_DATA(nid)->node_spanned_pages)
>> +#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
>> +                                NODE_DATA(nid)->node_spanned_pages)
>
>
> Same for those 3 macros.
>
>> +
>> +#else
>> +#define init_cpu_to_node() do {} while (0)
>
>
> Please use static inline init_cpu_to_node(void) {}
>
>> +#define clear_node_cpumask(cpu) do {} while (0)
>
>
> Not point of having this one.
>
>> +#endif /* CONFIG_NUMA */
>> +
>> +extern void numa_add_cpu(int cpu);
>> +extern nodeid_t setup_node(unsigned int pxm);
>> +extern void numa_set_node(int cpu, nodeid_t node);
>> +extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
>
>
> I am not sure to understand why those function are not protected by #ifdef
> CONFIFG_NUMA.
As these defined in xen/common/numa.c which is not under CONFIG_NUMA,
I have kept them outside CONFIG_NUMA

>
>> +extern int compute_hash_shift(struct node *nodes, int numnodes,
>
>
> The function name is a bit too generic.
>
>> +                             nodeid_t *nodeids);
>>  #endif /* _XEN_NUMA_H */
>>
>
> Cheers,
>
> --
> Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.