|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 12/12] xen/events: use the FIFO-based ABI if available
On Fri, Aug 09, 2013 at 07:15:21PM +0100, David Vrabel wrote:
> From: David Vrabel <david.vrabel@xxxxxxxxxx>
>
> If the hypervisor supports the FIFO-based ABI, enable it by
> initializing the control block for the boot VCPU and subsequent VCPUs
> as they are brought up. The event array is expanded as required when
> event ports are setup.
>
> This implementation has some known limitations:
>
> - Migration will not work as the control blocks or event arrays are
> not remapped by Xen at the destination.
>
Is it possible to hook into xen_vcpu_restore to make it work?
> - The timer VIRQ which previously was treated as the highest priority
> event has the default priority.
>
So what's missing from the series is a patch that allows kernel to
actually make use of the priority queues? IMHO it's not just about the
timer VIRQ. How do you plan to expose this interface to drivers?
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> ---
> drivers/xen/events/Makefile | 1 +
> drivers/xen/events/events.c | 7 +-
> drivers/xen/events/events_internal.h | 1 +
> drivers/xen/events/fifo.c | 335
> ++++++++++++++++++++++++++++++++++
> 4 files changed, 343 insertions(+), 1 deletions(-)
> create mode 100644 drivers/xen/events/fifo.c
>
> diff --git a/drivers/xen/events/Makefile b/drivers/xen/events/Makefile
> index aea331e..74644d0 100644
> --- a/drivers/xen/events/Makefile
> +++ b/drivers/xen/events/Makefile
> @@ -1,2 +1,3 @@
> obj-y += events.o
> +obj-y += fifo.o
> obj-y += n-level.o
> diff --git a/drivers/xen/events/events.c b/drivers/xen/events/events.c
> index 55d33d1..2e7416e 100644
> --- a/drivers/xen/events/events.c
> +++ b/drivers/xen/events/events.c
> @@ -1589,8 +1589,13 @@ void xen_callback_vector(void) {}
> void __init xen_init_IRQ(void)
> {
> int i;
> + int ret;
>
> - xen_evtchn_init_nlevel();
> + ret = xen_evtchn_init_fifo();
> + if (ret < 0) {
> + printk(KERN_INFO "xen: falling back to n-level event channels");
> + xen_evtchn_init_nlevel();
> + }
>
> evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
> sizeof(*evtchn_to_irq), GFP_KERNEL);
> diff --git a/drivers/xen/events/events_internal.h
> b/drivers/xen/events/events_internal.h
> index 9d8b70c..e14f616 100644
> --- a/drivers/xen/events/events_internal.h
> +++ b/drivers/xen/events/events_internal.h
> @@ -138,5 +138,6 @@ static inline void xen_evtchn_handle_events(int cpu)
> }
>
> void xen_evtchn_init_nlevel(void);
> +int xen_evtchn_init_fifo(void);
>
> #endif /* #ifndef __EVENTS_INTERNAL_H__ */
> diff --git a/drivers/xen/events/fifo.c b/drivers/xen/events/fifo.c
> new file mode 100644
> index 0000000..b07a603
> --- /dev/null
> +++ b/drivers/xen/events/fifo.c
> @@ -0,0 +1,335 @@
> +/*
> + * Xen event channels (FIFO-based ABI)
> + *
> + * Copyright (C) 2013 Citrix Systems R&D ltd.
> + *
> + * This source code is licensed under the GNU General Public License,
> + * Version 2 or later. See the file COPYING for more details.
> + */
> +
> +#include <linux/linkage.h>
> +#include <linux/interrupt.h>
> +#include <linux/irq.h>
> +#include <linux/module.h>
> +#include <linux/smp.h>
> +#include <linux/percpu.h>
> +#include <linux/cpu.h>
> +
> +#include <asm/sync_bitops.h>
> +#include <asm/xen/hypercall.h>
> +#include <asm/xen/hypervisor.h>
> +#include <asm/xen/page.h>
> +
> +#include <xen/xen.h>
> +#include <xen/xen-ops.h>
> +#include <xen/events.h>
> +#include <xen/interface/xen.h>
> +#include <xen/interface/event_channel.h>
> +
> +#include "events_internal.h"
> +
> +#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
> +#define MAX_EVENT_ARRAY_PAGES ((1 << EVTCHN_FIFO_LINK_BITS) \
> + / EVENT_WORDS_PER_PAGE)
> +
> +struct evtchn_fifo_queue {
> + uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
> +};
> +
> +static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block);
> +static DEFINE_PER_CPU(struct evtchn_fifo_queue, cpu_queue);
> +static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES];
> +static unsigned event_array_pages;
> +
> +#define BM(w) ((unsigned long *)(w))
> +
> +static inline event_word_t *event_word_from_port(int port)
> +{
> + int i = port / EVENT_WORDS_PER_PAGE;
> +
> + if (i >= event_array_pages)
> + return NULL;
> + return event_array[i] + port % EVENT_WORDS_PER_PAGE;
> +}
> +
> +static unsigned fifo_max_channels(void)
> +{
> + return 1 << EVTCHN_FIFO_LINK_BITS;
> +}
> +
> +static unsigned fifo_nr_channels(void)
> +{
> + return event_array_pages * EVENT_WORDS_PER_PAGE;
> +}
> +
> +static int fifo_setup(struct irq_info *info)
> +{
> + int port = info->evtchn;
> + int i;
> + int ret = -ENOMEM;
> +
> + i = port / EVENT_WORDS_PER_PAGE;
> +
> + if (i >= MAX_EVENT_ARRAY_PAGES)
> + return -EINVAL;
> +
> + while (i >= event_array_pages) {
> + struct page *array_page = NULL;
> + struct evtchn_expand_array expand_array;
> +
> + array_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> + if (array_page == NULL)
> + goto error;
> +
> + expand_array.array_mfn = virt_to_mfn(page_address(array_page));
> +
> + ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array,
> &expand_array);
> + if (ret < 0) {
> + __free_page(array_page);
> + goto error;
> + }
> +
> + event_array[event_array_pages++] = page_address(array_page);
> + }
> + return 0;
> +
> + error:
> + if (event_array_pages == 0)
> + panic("xen: unable to expand event array with initial page
> (%d)\n", ret);
> + else
> + printk(KERN_ERR "xen: unable to expand event array (%d)\n",
> ret);
> + return ret;
> +}
> +
> +static void fifo_bind_to_cpu(struct irq_info *info, int cpu)
> +{
> + /* no-op */
> +}
> +
> +static void fifo_clear_pending(int port)
> +{
> + event_word_t *word = event_word_from_port(port);
> + sync_clear_bit(EVTCHN_FIFO_PENDING, BM(word));
> +}
> +
> +static void fifo_set_pending(int port)
> +{
> + event_word_t *word = event_word_from_port(port);
> + sync_set_bit(EVTCHN_FIFO_PENDING, BM(word));
> +}
> +
> +static bool fifo_is_pending(int port)
> +{
> + event_word_t *word = event_word_from_port(port);
> + return sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
> +}
> +
> +static bool fifo_test_and_set_mask(int port)
> +{
> + event_word_t *word = event_word_from_port(port);
> + return sync_test_and_set_bit(EVTCHN_FIFO_MASKED, BM(word));
> +}
> +
> +static void fifo_mask(int port)
> +{
> + event_word_t *word = event_word_from_port(port);
> + if (word)
> + sync_set_bit(EVTCHN_FIFO_MASKED, BM(word));
> +}
> +
> +static void fifo_unmask(int port)
> +{
> + unsigned int cpu = get_cpu();
> + bool do_hypercall = false;
> + bool evtchn_pending = false;
> +
> + BUG_ON(!irqs_disabled());
> +
> + if (unlikely((cpu != cpu_from_evtchn(port))))
> + do_hypercall = true;
> + else {
> + event_word_t *word = event_word_from_port(port);
> +
> + sync_clear_bit(EVTCHN_FIFO_MASKED, BM(word));
> + evtchn_pending = sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
> + if (evtchn_pending)
> + do_hypercall = true;
> + }
> +
> + if (do_hypercall) {
> + struct evtchn_unmask unmask = { .port = port };
> + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
> + }
> +
> + put_cpu();
> +}
> +
> +static uint32_t clear_linked(volatile event_word_t *word)
> +{
> + event_word_t n, o, w;
> +
> + w = *word;
> +
> + do {
> + o = w;
> + n = (w & ~((1 << EVTCHN_FIFO_LINKED) | EVTCHN_FIFO_LINK_MASK));
> + } while ( (w = sync_cmpxchg(word, o, n)) != o );
> +
> + return w & EVTCHN_FIFO_LINK_MASK;
> +}
> +
> +static void handle_irq_for_port(int port)
> +{
> + int irq;
> + struct irq_desc *desc;
> +
> + irq = get_evtchn_to_irq(port);
> + if (irq != -1) {
> + desc = irq_to_desc(irq);
> + if (desc)
> + generic_handle_irq_desc(irq, desc);
> + }
> +}
> +
> +static void consume_one_event(int cpu,
> + struct evtchn_fifo_control_block *control_block,
> + int priority, uint32_t *ready)
> +{
> + struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
> + uint32_t head;
> + int port;
> + event_word_t *word;
> +
> + head = q->head[priority];
> +
> + /* Reached the tail last time? Read the new HEAD from the
> + control block. */
> + if (head == 0) {
> + rmb(); /* Ensure word is up-to-date before reading head. */
> + head = control_block->head[priority];
> + }
> +
> + port = head;
> + word = event_word_from_port(port);
> + head = clear_linked(word);
> +
> + /*
> + * If the link is non-zero, there are more events in the
> + * queue, otherwise the queue is empty.
> + *
> + * If the queue is empty, clear this priority from our local
> + * copy of the ready word.
> + */
> + if (head == 0)
> + clear_bit(priority, BM(ready));
> +
> + if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))
> + && !sync_test_bit(EVTCHN_FIFO_MASKED, BM(word)))
> + handle_irq_for_port(port);
> +
> + q->head[priority] = head;
> +}
> +
> +#define EVTCHN_FIFO_READY_MASK ((1 << EVTCHN_FIFO_MAX_QUEUES) - 1)
> +
> +static void fifo_handle_events(int cpu)
> +{
> + struct evtchn_fifo_control_block *control_block;
> + uint32_t ready;
> + int q;
> +
> + control_block = per_cpu(cpu_control_block, cpu);
> +
> + ready = xchg(&control_block->ready, 0);
> +
> + while (ready & EVTCHN_FIFO_READY_MASK) {
> + q = find_first_bit(BM(&ready), EVTCHN_FIFO_MAX_QUEUES);
> + consume_one_event(cpu, control_block, q, &ready);
> + ready |= xchg(&control_block->ready, 0);
> + }
> +}
> +
> +static const struct evtchn_ops evtchn_ops_fifo = {
> + .max_channels = fifo_max_channels,
> + .nr_channels = fifo_nr_channels,
> + .setup = fifo_setup,
> + .bind_to_cpu = fifo_bind_to_cpu,
> + .clear_pending = fifo_clear_pending,
> + .set_pending = fifo_set_pending,
> + .is_pending = fifo_is_pending,
> + .test_and_set_mask = fifo_test_and_set_mask,
> + .mask = fifo_mask,
> + .unmask = fifo_unmask,
> + .handle_events = fifo_handle_events,
> +};
> +
> +static int __cpuinit fifo_init_control_block(int cpu)
> +{
> + struct page *control_block = NULL;
> + struct evtchn_init_control init_control;
> + int ret = -ENOMEM;
> +
> + control_block = alloc_page(GFP_KERNEL|__GFP_ZERO);
> + if (control_block == NULL)
> + goto error;
> +
> + init_control.control_mfn = virt_to_mfn(page_address(control_block));
> + init_control.offset = 0;
> + init_control.vcpu = cpu;
> +
> + ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
> + if (ret < 0)
> + goto error;
> +
> + per_cpu(cpu_control_block, cpu) = page_address(control_block);
> +
> + return 0;
> +
> + error:
> + __free_page(control_block);
> + return ret;
> +}
> +
> +static int __cpuinit fifo_cpu_notification(struct notifier_block *self,
> + unsigned long action, void *hcpu)
> +{
> + int cpu = (long)hcpu;
> + int ret = 0;
> +
> + switch (action) {
> + case CPU_UP_PREPARE:
> + ret = fifo_init_control_block(cpu);
> + break;
On the hypervisor side fifo_init_control_block would return -EINVAL if
this CPU has previous mapped control block.
Do you need to tear down the control block when you offline a CPU? (That
would mean another sub-op for the interface)
> + default:
> + break;
> + }
> + return ret < 0 ? NOTIFY_BAD : NOTIFY_OK;
> +}
> +
> +static struct notifier_block fifo_cpu_notifier __cpuinitdata = {
> + .notifier_call = fifo_cpu_notification,
> +};
> +
> +
> +int __init xen_evtchn_init_fifo(void)
> +{
> + int cpu = get_cpu();
> + int ret;
> +
> + ret = fifo_init_control_block(cpu);
> + if (ret < 0)
> + goto error;
> +
> + printk(KERN_INFO "xen: switching to FIFO-based event channels\n");
> +
> + evtchn_ops = &evtchn_ops_fifo;
> +
> + register_cpu_notifier(&fifo_cpu_notifier);
> +
> + put_cpu();
> + return 0;
> +
> + error:
> + put_cpu();
> + return ret;
Could be written as:
if (ret < 0)
goto out;
...
register_cpu_notifier();
ret = 0;
out:
put_cpu();
return ret;
It is only a matter of personal taste though.
Wei.
> +}
> --
> 1.7.2.5
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |