[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [XEN RFC PATCH 08/40] xen/x86: Move NUMA memory node map functions to common


  • To: Julien Grall <julien@xxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>, "sstabellini@xxxxxxxxxx" <sstabellini@xxxxxxxxxx>, "jbeulich@xxxxxxxx" <jbeulich@xxxxxxxx>
  • From: Wei Chen <Wei.Chen@xxxxxxx>
  • Date: Tue, 24 Aug 2021 04:07:20 +0000
  • Accept-language: en-US
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=arm.com; dmarc=pass action=none header.from=arm.com; dkim=pass header.d=arm.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=QayhYDrE9aE/Cr+MhhTWEQQA9VBWjywp4l8p1vWoSKQ=; b=ZWLRxzoTKJ4JGDkywd1GwGfDRC71PvvW57TU3T+VA+aZzEsggjhjusZziNzuqECnUkpaKL4rrQMLbyd1heISEN4RkhzCSNZW7ufxFOyZzD0H/u0WHX3oBbGMXrAuq1CJE8dMZUfljKbCXvDq3HroZZ2P5w4ji0mIYMESqiaIoIjN44/ELFpr4VNODSdFNzQ3suT136c+uRr2fnTXJ8ioh5zqdUnRlrDFzMf+HIFjZKesy+Q6MtvaNE8e+7leqwIPZEpEcqBILV5QBv41pLHzVaZ3akviEwFUwh52M+vRxrCIbLnAK/2hvjGUeJJRmbwipJtdT/w0WuR/CRRmr8PFHw==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=V2zIPiT5iPJxoO7+Fb2zI0NLyjKnYIVpWyBNlXYQqLLstoP4FSgRs+4UPsF8m6aCAsnE0Ni5lOie1d7qj3dDhokgSIbEUc9sD9uYr0EcpGBu/gXj2BQlDBVsCYBv5yBuEHeSQ//21u3+a7k7E4CHocZlf+sdbdBmLjY0Yp2LPjq0osp35rXdQWM77sQNIKcXSMojhYgD3PUBFgm89I/z3dDE5A6g68SONw/ZKgcdXHCEF80ndciuy7hgcPW6A9mTfcEL+ecxVRvXtRjkUTmDfm/MCVzkYjb15nOmtf/IneOCyWiwkAuxVJut5WMJ0dJaw6aXws6qnV6pZ0xsnxTftg==
  • Authentication-results-original: xen.org; dkim=none (message not signed) header.d=none;xen.org; dmarc=none action=none header.from=arm.com;
  • Cc: Bertrand Marquis <Bertrand.Marquis@xxxxxxx>
  • Delivery-date: Tue, 24 Aug 2021 04:08:04 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Nodisclaimer: true
  • Original-authentication-results: xen.org; dkim=none (message not signed) header.d=none;xen.org; dmarc=none action=none header.from=arm.com;
  • Thread-index: AQHXjpsxBGls5NwFnkiq4w4yl3t8HKuBcIyAgACsofA=
  • Thread-topic: [XEN RFC PATCH 08/40] xen/x86: Move NUMA memory node map functions to common

Hi Julien,

> -----Original Message-----
> From: Julien Grall <julien@xxxxxxx>
> Sent: 2021年8月24日 1:47
> To: Wei Chen <Wei.Chen@xxxxxxx>; xen-devel@xxxxxxxxxxxxxxxxxxxx;
> sstabellini@xxxxxxxxxx; jbeulich@xxxxxxxx
> Cc: Bertrand Marquis <Bertrand.Marquis@xxxxxxx>
> Subject: Re: [XEN RFC PATCH 08/40] xen/x86: Move NUMA memory node map
> functions to common
> 
> Hi Wei,
> 
> On 11/08/2021 11:23, Wei Chen wrote:
> > In the later patches we will add NUMA support to Arm. Arm
> > NUMA support will follow current memory node map management
> > as x86. So this part of code can be common, in this case,
> > we move this part of code from arch/x86 to common.
> 
> I would add "No functional changes intended" to make clear this patch is
> only moving code.

Ok, I will do it.

> 
> >
> > Signed-off-by: Wei Chen <wei.chen@xxxxxxx>
> > ---
> >   xen/arch/x86/numa.c        | 114 --------------------------------
> >   xen/common/Makefile        |   1 +
> >   xen/common/numa.c          | 131 +++++++++++++++++++++++++++++++++++++
> >   xen/include/asm-x86/numa.h |  29 --------
> >   xen/include/xen/numa.h     |  35 ++++++++++
> >   5 files changed, 167 insertions(+), 143 deletions(-)
> >   create mode 100644 xen/common/numa.c
> >
> > diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
> > index d23f4f7919..a6211be121 100644
> > --- a/xen/arch/x86/numa.c
> > +++ b/xen/arch/x86/numa.c
> > @@ -29,14 +29,6 @@ custom_param("numa", numa_setup);
> >   /* from proto.h */
> >   #define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
> >
> > -struct node_data node_data[MAX_NUMNODES];
> > -
> > -/* Mapping from pdx to node id */
> > -int memnode_shift;
> > -static typeof(*memnodemap) _memnodemap[64];
> > -unsigned long memnodemapsize;
> > -u8 *memnodemap;
> > -
> >   nodeid_t cpu_to_node[NR_CPUS] __read_mostly = {
> >       [0 ... NR_CPUS-1] = NUMA_NO_NODE
> >   };
> > @@ -58,112 +50,6 @@ int srat_disabled(void)
> >       return numa_off || acpi_numa < 0;
> >   }
> >
> > -/*
> > - * Given a shift value, try to populate memnodemap[]
> > - * Returns :
> > - * 1 if OK
> > - * 0 if memnodmap[] too small (of shift too small)
> > - * -1 if node overlap or lost ram (shift too big)
> > - */
> > -static int __init populate_memnodemap(const struct node *nodes,
> > -                                      int numnodes, int shift, nodeid_t
> *nodeids)
> > -{
> > -    unsigned long spdx, epdx;
> > -    int i, res = -1;
> > -
> > -    memset(memnodemap, NUMA_NO_NODE, memnodemapsize *
> sizeof(*memnodemap));
> > -    for ( i = 0; i < numnodes; i++ )
> > -    {
> > -        spdx = paddr_to_pdx(nodes[i].start);
> > -        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
> > -        if ( spdx >= epdx )
> > -            continue;
> > -        if ( (epdx >> shift) >= memnodemapsize )
> > -            return 0;
> > -        do {
> > -            if ( memnodemap[spdx >> shift] != NUMA_NO_NODE )
> > -                return -1;
> > -
> > -            if ( !nodeids )
> > -                memnodemap[spdx >> shift] = i;
> > -            else
> > -                memnodemap[spdx >> shift] = nodeids[i];
> > -
> > -            spdx += (1UL << shift);
> > -        } while ( spdx < epdx );
> > -        res = 1;
> > -    }
> > -
> > -    return res;
> > -}
> > -
> > -static int __init allocate_cachealigned_memnodemap(void)
> > -{
> > -    unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap));
> > -    unsigned long mfn = mfn_x(alloc_boot_pages(size, 1));
> > -
> > -    memnodemap = mfn_to_virt(mfn);
> > -    mfn <<= PAGE_SHIFT;
> > -    size <<= PAGE_SHIFT;
> > -    printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
> > -           mfn, mfn + size);
> > -    memnodemapsize = size / sizeof(*memnodemap);
> > -
> > -    return 0;
> > -}
> > -
> > -/*
> > - * The LSB of all start and end addresses in the node map is the value
> of the
> > - * maximum possible shift.
> > - */
> > -static int __init extract_lsb_from_nodes(const struct node *nodes,
> > -                                         int numnodes)
> > -{
> > -    int i, nodes_used = 0;
> > -    unsigned long spdx, epdx;
> > -    unsigned long bitfield = 0, memtop = 0;
> > -
> > -    for ( i = 0; i < numnodes; i++ )
> > -    {
> > -        spdx = paddr_to_pdx(nodes[i].start);
> > -        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
> > -        if ( spdx >= epdx )
> > -            continue;
> > -        bitfield |= spdx;
> > -        nodes_used++;
> > -        if ( epdx > memtop )
> > -            memtop = epdx;
> > -    }
> > -    if ( nodes_used <= 1 )
> > -        i = BITS_PER_LONG - 1;
> > -    else
> > -        i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
> > -    memnodemapsize = (memtop >> i) + 1;
> > -    return i;
> > -}
> > -
> > -int __init compute_hash_shift(struct node *nodes, int numnodes,
> > -                              nodeid_t *nodeids)
> > -{
> > -    int shift;
> > -
> > -    shift = extract_lsb_from_nodes(nodes, numnodes);
> > -    if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) )
> > -        memnodemap = _memnodemap;
> > -    else if ( allocate_cachealigned_memnodemap() )
> > -        return -1;
> > -    printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift);
> > -
> > -    if ( populate_memnodemap(nodes, numnodes, shift, nodeids) != 1 )
> > -    {
> > -        printk(KERN_INFO "Your memory is not aligned you need to "
> > -               "rebuild your hypervisor with a bigger NODEMAPSIZE "
> > -               "shift=%d\n", shift);
> > -        return -1;
> > -    }
> > -
> > -    return shift;
> > -}
> >   /* initialize NODE_DATA given nodeid and start/end */
> >   void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
> >   {
> > diff --git a/xen/common/Makefile b/xen/common/Makefile
> > index 54de70d422..f8f667e90a 100644
> > --- a/xen/common/Makefile
> > +++ b/xen/common/Makefile
> > @@ -54,6 +54,7 @@ obj-y += wait.o
> >   obj-bin-y += warning.init.o
> >   obj-$(CONFIG_XENOPROF) += xenoprof.o
> >   obj-y += xmalloc_tlsf.o
> > +obj-$(CONFIG_NUMA) += numa.o
> 
> AFAICT, the Makefile is listing the file in alphabetical order. So
> please add numa.o in the correct position.
> 

Thanks for the reminder, I will fix it.

> >
> >   obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma
> lzo unlzo unlz4 unzstd earlycpio,$(n).init.o)
> >
> > diff --git a/xen/common/numa.c b/xen/common/numa.c
> > new file mode 100644
> > index 0000000000..e65b6a6676
> > --- /dev/null
> > +++ b/xen/common/numa.c
> > @@ -0,0 +1,131 @@
> > +/*
> > + * Generic VM initialization for x86-64 NUMA setups.
> > + * Copyright 2002,2003 Andi Kleen, SuSE Labs.
> > + * Adapted for Xen: Ryan Harper <ryanh@xxxxxxxxxx>
> > + */
> > +
> > +#include <xen/mm.h>
> > +#include <xen/string.h>
> > +#include <xen/init.h>
> > +#include <xen/ctype.h>
> 
> You don't seem to use any helpers./types directly defined by at least
> this header...
> 
> > +#include <xen/nodemask.h>
> > +#include <xen/numa.h>
> > +#include <xen/time.h>
> 
> ... this one and ...
> 
> > +#include <xen/smp.h>
> 
> ... this one. Can you check the list of headers and introduce the
> minimum? If the dependency is required by another headers, then I think
> that dependency should be moved in the header requiring it.
> 

I will check it in next version. If it isn't needed, I will remove it.

> > +#include <xen/pfn.h>
> > +#include <xen/sched.h>
> 
> Please sort the includes in alphabetical order.
> 

OK

> > +
> > +struct node_data node_data[MAX_NUMNODES];
> > +
> > +/* Mapping from pdx to node id */
> > +int memnode_shift;
> > +typeof(*memnodemap) _memnodemap[64];
> > +unsigned long memnodemapsize;
> > +u8 *memnodemap;
> > +
> > +/*
> > + * Given a shift value, try to populate memnodemap[]
> > + * Returns :
> > + * 1 if OK
> > + * 0 if memnodmap[] too small (of shift too small)
> > + * -1 if node overlap or lost ram (shift too big)
> > + */
> > +static int __init populate_memnodemap(const struct node *nodes,
> > +                                      int numnodes, int shift, nodeid_t
> *nodeids)
> > +{
> > +    unsigned long spdx, epdx;
> > +    int i, res = -1;
> > +
> > +    memset(memnodemap, NUMA_NO_NODE, memnodemapsize *
> sizeof(*memnodemap));
> > +    for ( i = 0; i < numnodes; i++ )
> > +    {
> > +        spdx = paddr_to_pdx(nodes[i].start);
> > +        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
> > +        if ( spdx >= epdx )
> > +            continue;
> > +        if ( (epdx >> shift) >= memnodemapsize )
> > +            return 0;
> > +        do {
> > +            if ( memnodemap[spdx >> shift] != NUMA_NO_NODE )
> > +                return -1;
> > +
> > +            if ( !nodeids )
> > +                memnodemap[spdx >> shift] = i;
> > +            else
> > +                memnodemap[spdx >> shift] = nodeids[i];
> > +
> > +            spdx += (1UL << shift);
> > +        } while ( spdx < epdx );
> > +        res = 1;
> > +    }
> > +
> > +    return res;
> > +}
> > +
> > +static int __init allocate_cachealigned_memnodemap(void)
> > +{
> > +    unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap));
> > +    unsigned long mfn = mfn_x(alloc_boot_pages(size, 1));
> > +
> > +    memnodemap = mfn_to_virt(mfn);
> > +    mfn <<= PAGE_SHIFT;
> > +    size <<= PAGE_SHIFT;
> > +    printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
> > +           mfn, mfn + size);
> > +    memnodemapsize = size / sizeof(*memnodemap);
> > +
> > +    return 0;
> > +}
> > +
> > +/*
> > + * The LSB of all start and end addresses in the node map is the value
> of the
> > + * maximum possible shift.
> > + */
> > +static int __init extract_lsb_from_nodes(const struct node *nodes,
> > +                                         int numnodes)
> > +{
> > +    int i, nodes_used = 0;
> > +    unsigned long spdx, epdx;
> > +    unsigned long bitfield = 0, memtop = 0;
> > +
> > +    for ( i = 0; i < numnodes; i++ )
> > +    {
> > +        spdx = paddr_to_pdx(nodes[i].start);
> > +        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
> > +        if ( spdx >= epdx )
> > +            continue;
> > +        bitfield |= spdx;
> > +        nodes_used++;
> > +        if ( epdx > memtop )
> > +            memtop = epdx;
> > +    }
> > +    if ( nodes_used <= 1 )
> > +        i = BITS_PER_LONG - 1;
> > +    else
> > +        i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
> > +    memnodemapsize = (memtop >> i) + 1;
> > +    return i;
> > +}
> > +
> > +int __init compute_hash_shift(struct node *nodes, int numnodes,
> > +                              nodeid_t *nodeids)
> > +{
> > +    int shift;
> > +
> > +    shift = extract_lsb_from_nodes(nodes, numnodes);
> > +    if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) )
> > +        memnodemap = _memnodemap;
> > +    else if ( allocate_cachealigned_memnodemap() )
> > +        return -1;
> > +    printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift);
> > +
> > +    if ( populate_memnodemap(nodes, numnodes, shift, nodeids) != 1 )
> > +    {
> > +        printk(KERN_INFO "Your memory is not aligned you need to "
> > +               "rebuild your hypervisor with a bigger NODEMAPSIZE "
> > +               "shift=%d\n", shift);
> > +        return -1;
> > +    }
> > +
> > +    return shift;
> > +}
> > diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
> > index bada2c0bb9..abe5617d01 100644
> > --- a/xen/include/asm-x86/numa.h
> > +++ b/xen/include/asm-x86/numa.h
> > @@ -26,7 +26,6 @@ extern int compute_hash_shift(struct node *nodes, int
> numnodes,
> >   extern nodeid_t pxm_to_node(unsigned int pxm);
> >
> >   #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
> > -#define VIRTUAL_BUG_ON(x)
> >
> >   extern void numa_add_cpu(int cpu);
> >   extern void numa_init_array(void);
> > @@ -47,34 +46,6 @@ static inline void clear_node_cpumask(int cpu)
> >     cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
> >   }
> >
> > -/* Simple perfect hash to map pdx to node numbers */
> > -extern int memnode_shift;
> > -extern unsigned long memnodemapsize;
> > -extern u8 *memnodemap;
> > -
> > -struct node_data {
> > -    unsigned long node_start_pfn;
> > -    unsigned long node_spanned_pages;
> > -};
> > -
> > -extern struct node_data node_data[];
> > -
> > -static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
> > -{
> > -   nodeid_t nid;
> > -   VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >=
> memnodemapsize);
> > -   nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
> > -   VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
> > -   return nid;
> > -}
> > -
> > -#define NODE_DATA(nid)             (&(node_data[nid]))
> > -
> > -#define node_start_pfn(nid)        (NODE_DATA(nid)->node_start_pfn)
> > -#define node_spanned_pages(nid)    (NODE_DATA(nid)->node_spanned_pages)
> > -#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
> > -                            NODE_DATA(nid)->node_spanned_pages)
> > -
> >   extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
> >
> >   void srat_parse_regions(u64 addr);
> > diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
> > index 7aef1a88dc..39e8a4e00a 100644
> > --- a/xen/include/xen/numa.h
> > +++ b/xen/include/xen/numa.h
> > @@ -18,4 +18,39 @@
> >     (((d)->vcpu != NULL && (d)->vcpu[0] != NULL) \
> >      ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE)
> >
> > +/* The following content can be used when NUMA feature is enabled */
> > +#if defined(CONFIG_NUMA)
> 
> Please use #ifdef CONFIG_NUMA
> 
> > +
> > +/* Simple perfect hash to map pdx to node numbers */
> > +extern int memnode_shift;
> > +extern unsigned long memnodemapsize;
> > +extern u8 *memnodemap;
> > +extern typeof(*memnodemap) _memnodemap[64];
> 
> AFAICT, this will be turned static against in a follow-up patch. Can
> this be avoided?
> 

I will try it in next version.

> > +
> > +struct node_data {
> > +    unsigned long node_start_pfn;
> > +    unsigned long node_spanned_pages;
> > +};
> > +
> > +extern struct node_data node_data[];
> > +#define VIRTUAL_BUG_ON(x)
> > +
> > +static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
> > +{
> > +   nodeid_t nid;
> > +   VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >=
> memnodemapsize);
> > +   nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
> > +   VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
> > +   return nid;
> > +}
> > +
> > +#define NODE_DATA(nid)             (&(node_data[nid]))
> > +
> > +#define node_start_pfn(nid)        (NODE_DATA(nid)->node_start_pfn)
> > +#define node_spanned_pages(nid)    (NODE_DATA(nid)->node_spanned_pages)
> > +#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
> > +                            NODE_DATA(nid)->node_spanned_pages)
> > +
> > +#endif /* CONFIG_NUMA */
> > +
> >   #endif /* _XEN_NUMA_H */
> >
> 
> Cheers,
> 
> --
> Julien Grall

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.