diff -r 1e3977e029fd -r 19c77906a845 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Mon May 8 18:21:41 2006 +++ b/xen/arch/x86/domain.c Wed May 10 14:30:39 2006 @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -210,11 +209,6 @@ if ( !is_idle_domain(d) ) { - d->arch.ioport_caps = - rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex); - if ( d->arch.ioport_caps == NULL ) - goto fail_nomem; - if ( (d->shared_info = alloc_xenheap_page()) == NULL ) goto fail_nomem; diff -r 1e3977e029fd -r 19c77906a845 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Mon May 8 18:21:41 2006 +++ b/xen/arch/x86/domain_build.c Wed May 10 14:30:39 2006 @@ -62,9 +62,6 @@ static unsigned int opt_dom0_shadow; boolean_param("dom0_shadow", opt_dom0_shadow); -static char opt_dom0_ioports_disable[200] = ""; -string_param("dom0_ioports_disable", opt_dom0_ioports_disable); - #if defined(__i386__) /* No ring-3 access in initial leaf page tables. */ #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) @@ -98,43 +95,6 @@ if ( order-- == 0 ) break; return page; -} - -static void process_dom0_ioports_disable(void) -{ - unsigned long io_from, io_to; - char *t, *u, *s = opt_dom0_ioports_disable; - - if ( *s == '\0' ) - return; - - while ( (t = strsep(&s, ",")) != NULL ) - { - io_from = simple_strtoul(t, &u, 16); - if ( u == t ) - { - parse_error: - printk("Invalid ioport range <%s> " - "in dom0_ioports_disable, skipping\n", t); - continue; - } - - if ( *u == '\0' ) - io_to = io_from; - else if ( *u == '-' ) - io_to = simple_strtoul(u + 1, &u, 16); - else - goto parse_error; - - if ( (*u != '\0') || (io_to < io_from) || (io_to >= 65536) ) - goto parse_error; - - printk("Disabling dom0 access to ioport range %04lx-%04lx\n", - io_from, io_to); - - if ( ioports_deny_access(dom0, io_from, io_to) != 0 ) - BUG(); - } } static const char *feature_names[XENFEAT_NR_SUBMAPS*32] = { @@ -804,45 +764,8 @@ panic("Dom0 requires supervisor-mode execution\n"); } - rc = 0; - /* DOM0 is permitted full I/O capabilities. */ - rc |= ioports_permit_access(dom0, 0, 0xFFFF); - rc |= iomem_permit_access(dom0, 0UL, ~0UL); - rc |= irqs_permit_access(dom0, 0, NR_IRQS-1); - - /* - * Modify I/O port access permissions. - */ - /* Master Interrupt Controller (PIC). */ - rc |= ioports_deny_access(dom0, 0x20, 0x21); - /* Slave Interrupt Controller (PIC). */ - rc |= ioports_deny_access(dom0, 0xA0, 0xA1); - /* Interval Timer (PIT). */ - rc |= ioports_deny_access(dom0, 0x40, 0x43); - /* PIT Channel 2 / PC Speaker Control. */ - rc |= ioports_deny_access(dom0, 0x61, 0x61); - /* Command-line I/O ranges. */ - process_dom0_ioports_disable(); - - /* - * Modify I/O memory access permissions. - */ - /* Local APIC. */ - if ( mp_lapic_addr != 0 ) - { - mfn = paddr_to_pfn(mp_lapic_addr); - rc |= iomem_deny_access(dom0, mfn, mfn); - } - /* I/O APICs. */ - for ( i = 0; i < nr_ioapics; i++ ) - { - mfn = paddr_to_pfn(mp_ioapics[i].mpc_apicaddr); - if ( smp_found_config ) - rc |= iomem_deny_access(dom0, mfn, mfn); - } - - BUG_ON(rc != 0); + BUG_ON(iocap_set_default_io_domain(dom0) != 0); return 0; } diff -r 1e3977e029fd -r 19c77906a845 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Mon May 8 18:21:41 2006 +++ b/xen/arch/x86/irq.c Wed May 10 14:30:39 2006 @@ -112,6 +112,8 @@ /* Wait to make sure it's not being used on another CPU */ do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS ); + + irq_unreserve(irq); } int setup_irq(unsigned int irq, struct irqaction *new) @@ -119,12 +121,18 @@ unsigned int vector = irq_to_vector(irq); irq_desc_t *desc = &irq_desc[vector]; unsigned long flags; + int rc; + rc = irq_reserve(irq); + if (rc) + return rc; + spin_lock_irqsave(&desc->lock,flags); if ( desc->action != NULL ) { spin_unlock_irqrestore(&desc->lock,flags); + irq_unreserve(irq); return -EBUSY; } @@ -402,7 +410,7 @@ return 0; } -int pirq_guest_bind(struct vcpu *v, int irq, int will_share) +int arch_pirq_guest_bind(struct vcpu *v, int irq, int will_share) { unsigned int vector; irq_desc_t *desc; @@ -490,7 +498,7 @@ return rc; } -int pirq_guest_unbind(struct domain *d, int irq) +int arch_pirq_guest_unbind(struct domain *d, int irq) { unsigned int vector = irq_to_vector(irq); irq_desc_t *desc = &irq_desc[vector]; @@ -569,6 +577,38 @@ out: spin_unlock_irqrestore(&desc->lock, flags); return 0; +} + +int arch_pirq_in_use_by_domain(int irq, struct domain *d) +{ + unsigned int vector = irq_to_vector(irq); + irq_desc_t *desc = &irq_desc[vector]; + irq_guest_action_t *action; + unsigned long flags; + int in_use = 0, i; + + if ( (irq < 0) || (irq >= NR_IRQS) || (vector == 0) ) + return 0; + + spin_lock_irqsave(&desc->lock, flags); + + action = (irq_guest_action_t *)desc->action; + + if (desc->status & IRQ_GUEST) + { + for (i = 0; i < action->nr_guests; i++) + { + if (action->guest[i] == d) + { + in_use = 1; + break; + } + } + } + + spin_unlock_irqrestore(&desc->lock, flags); + + return in_use; } extern void dump_ioapic_irq_info(void); diff -r 1e3977e029fd -r 19c77906a845 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Mon May 8 18:21:41 2006 +++ b/xen/arch/x86/mm.c Wed May 10 14:30:39 2006 @@ -463,7 +463,7 @@ { unsigned long mfn = l1e_get_pfn(l1e); struct page_info *page = mfn_to_page(mfn); - int okay; + int okay, rc; if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) ) return 1; @@ -481,17 +481,17 @@ if ( d == dom_io ) d = current->domain; - if ( !iomem_access_permitted(d, mfn, mfn) ) - { - MEM_LOG("Non-privileged attempt to map I/O space %08lx", mfn); + rc = iomem_get(d, mfn); + if ( unlikely(rc) ) + { + if ( rc == -EACCES ) + MEM_LOG("Non-privileged attempt to map I/O space %08lx", mfn); + else + MEM_LOG("Error mapping I/O space %08lx err=%d", mfn, rc); return 0; } - - /* No reference counting for out-of-range I/O pages. */ - if ( !mfn_valid(mfn) ) + else return 1; - - d = dom_io; } okay = ((l1e_get_flags(l1e) & _PAGE_RW) ? @@ -612,8 +612,14 @@ struct domain *e; struct vcpu *v; - if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(pfn) ) + if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) ) return; + + if (!mfn_valid(pfn) || page_get_owner(page) == dom_io) + { + iomem_put(d, pfn); + return; + } e = page_get_owner(page); diff -r 1e3977e029fd -r 19c77906a845 xen/common/domain.c --- a/xen/common/domain.c Mon May 8 18:21:41 2006 +++ b/xen/common/domain.c Wed May 10 14:30:39 2006 @@ -66,9 +66,7 @@ if ( (v = alloc_vcpu(d, 0, cpu)) == NULL ) goto fail4; - d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex); - d->irq_caps = rangeset_new(d, "Interrupts", 0); - if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) ) + if ( iocap_domain_init(d) ) goto fail4; /* NB. alloc_vcpu() is undone in free_domain() */ if ( !is_idle_domain(d) ) @@ -135,6 +133,8 @@ sched_rem_domain(v); gnttab_release_mappings(d); domain_relinquish_resources(d); + iocap_domain_release_resources(d); + put_domain(d); send_guest_global_virq(dom0, VIRQ_DOM_EXC); diff -r 1e3977e029fd -r 19c77906a845 xen/common/event_channel.c --- a/xen/common/event_channel.c Mon May 8 18:21:41 2006 +++ b/xen/common/event_channel.c Wed May 10 14:30:39 2006 @@ -278,9 +278,6 @@ if ( (pirq < 0) || (pirq >= ARRAY_SIZE(d->pirq_to_evtchn)) ) return -EINVAL; - if ( !irq_access_permitted(d, pirq) ) - return -EPERM; - spin_lock(&d->evtchn_lock); if ( d->pirq_to_evtchn[pirq] != 0 ) @@ -341,8 +338,8 @@ break; case ECS_PIRQ: - if ( (rc = pirq_guest_unbind(d1, chn1->u.pirq)) == 0 ) - d1->pirq_to_evtchn[chn1->u.pirq] = 0; + pirq_guest_unbind(d1, chn1->u.pirq); + d1->pirq_to_evtchn[chn1->u.pirq] = 0; break; case ECS_VIRQ: diff -r 1e3977e029fd -r 19c77906a845 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Mon May 8 18:21:41 2006 +++ b/xen/include/asm-x86/domain.h Wed May 10 14:30:39 2006 @@ -72,9 +72,6 @@ /* Writable pagetables. */ struct ptwr_info ptwr[2]; - - /* I/O-port admin-specified access capabilities. */ - struct rangeset *ioport_caps; /* Shadow mode status and controls. */ struct shadow_ops *ops; diff -r 1e3977e029fd -r 19c77906a845 xen/include/asm-x86/iocap.h --- a/xen/include/asm-x86/iocap.h Mon May 8 18:21:41 2006 +++ b/xen/include/asm-x86/iocap.h Wed May 10 14:30:39 2006 @@ -7,14 +7,18 @@ #ifndef __X86_IOCAP_H__ #define __X86_IOCAP_H__ +#define IOCAP_IO_IOPORT (_IOCAP_NUM_COMMON_IO_RESOURCES) + +#define _IOCAP_MAX_IO_RES IOCAP_IO_IOPORT + #define ioports_permit_access(d, s, e) \ - rangeset_add_range((d)->arch.ioport_caps, s, e) + iocap_permit_access(IOCAP_IO_IOPORT, d, s, e) #define ioports_deny_access(d, s, e) \ - rangeset_remove_range((d)->arch.ioport_caps, s, e) + iocap_remove_access(IOCAP_IO_IOPORT, d, s, e) #define ioports_access_permitted(d, s, e) \ - rangeset_contains_range((d)->arch.ioport_caps, s, e) + iocap_access_permitted(IOCAP_IO_IOPORT, d, s, e) -#define cache_flush_permitted(d) \ - (!rangeset_is_empty((d)->iomem_caps)) +#define cache_flush_permitted(d) \ + iocap_domain_io_capable(d) #endif /* __X86_IOCAP_H__ */ diff -r 1e3977e029fd -r 19c77906a845 xen/include/xen/iocap.h --- a/xen/include/xen/iocap.h Mon May 8 18:21:41 2006 +++ b/xen/include/xen/iocap.h Wed May 10 14:30:39 2006 @@ -8,27 +8,123 @@ #define __XEN_IOCAP_H__ #include +#include + +struct domain; + +/* When Xen wants a resource, it's not shareable */ +#define _IORESF_unshare_on_reserve (0) +#define IORESF_unshare_on_reserve (1<<_IORESF_unshare_on_reserve) + +struct io_resource { + spinlock_t lock; + struct rangeset *available; + struct rangeset *shareable; + struct rangeset *reserved; + + int type; /* doubles as an array index */ + char *name; + + /* Reference counting for I/O resources - (get|put)_resource handle + * reference counting for a given resources and are called with lock + * held */ + int (*get_resource)(unsigned long res, struct domain *d, void *data); + void (*put_resource)(unsigned long res, struct domain *d, void *data); + int (*resource_in_use)(unsigned long res, struct domain *d); + + /* Flags given to all rangesets created for this I/O resource */ + unsigned int rangeset_flags; + + /* See the IORESF_* flags above */ + unsigned int flags; + + unsigned long min, max; +}; + +/* give/remove the specified domain access to I/O resources. permit will only + * succeed if the specified resources are available and not in-use by other + * domains (unless marked shareable). remove will only succeed if the resources + * are not presently in-use by the specified domain. */ +int iocap_permit_access(unsigned int io_res_idx, struct domain *d, + unsigned long s, unsigned long e); +int iocap_remove_access(unsigned int io_res_idx, struct domain *d, + unsigned long s, unsigned long e); + +/* check to see if a given domain has access to the specified resources */ +int iocap_access_permitted(unsigned int io_res_idx, struct domain *d, + unsigned long s, unsigned long e); + +/* iocap_(get|put)_resource perform the access check and the reference counting + * in a single atomic step */ +int iocap_get_resource(unsigned int io_res_idx, struct domain *d, + unsigned long s, unsigned long e, void *data); +void iocap_put_resource(unsigned int io_res_idx, struct domain *d, + unsigned long s, unsigned long e, void *data); + +/* iocap_(un)?reserve is for Xen to mark that it is using a resource */ +int iocap_reserve(unsigned int io_res_idx, unsigned long s, unsigned long e); +int iocap_unreserve(unsigned int io_res_idx, unsigned long s, unsigned long e); + +/* iocap_(un)?share marks resources as shareable (or not) by multiple domains */ +int iocap_share(unsigned int io_res_idx, unsigned long s, unsigned long e); +int iocap_unshare(unsigned int io_res_idx, unsigned long s, unsigned long e); + +int iocap_domain_init(struct domain *d); +int iocap_domain_release_resources(struct domain *d); + +int iocap_domain_io_capable(struct domain *d); + +/* the default_io_domain is the all powerful I/O domain that gets access to all + * resources that are not assigned to Xen or to other domains. There can only + * be one at a time and once the default_io_domain is set, it is set until that + * domain is destroyed. (this feature is intended for dom0 until it can be + * restricted to specified resources) */ +int iocap_set_default_io_domain(struct domain *d); + +int iocap_ioresource_register(struct io_resource *res); + +int iocap_init(void); + +#define IOCAP_IO_MEMORY (0) +#define IOCAP_IO_IRQ (1) +#define _IOCAP_NUM_COMMON_IO_RESOURCES (IOCAP_IO_IRQ+1) + +/* The arch-specific iocap implementation is responsible for defining + * _IOCAP_MAX_IO_RES */ #include -#define iomem_permit_access(d, s, e) \ - rangeset_add_range((d)->iomem_caps, s, e) +#define IOCAP_NUM_IO_RESOURCES (_IOCAP_MAX_IO_RES+1) + +#define iomem_permit_access(d, s, e) \ + iocap_permit_access(IOCAP_IO_MEMORY, d, s, e) #define iomem_deny_access(d, s, e) \ - rangeset_remove_range((d)->iomem_caps, s, e) -#define iomem_access_permitted(d, s, e) \ - rangeset_contains_range((d)->iomem_caps, s, e) + iocap_remove_access(IOCAP_IO_MEMORY, d, s, e) +#define iomem_get(d, mfn) \ + iocap_get_resource(IOCAP_IO_MEMORY, d, mfn, mfn, NULL) +#define iomem_put(d, mfn) \ + iocap_put_resource(IOCAP_IO_MEMORY, d, mfn, mfn, NULL) +struct iocap_irq_info +{ + struct vcpu *v; + int will_share; +}; + +/* permission to use an irq is checked in pirq_guest_bind */ #define irq_permit_access(d, i) \ - rangeset_add_singleton((d)->irq_caps, i) + iocap_permit_access(IOCAP_IO_IRQ, d, i, i) #define irq_deny_access(d, i) \ - rangeset_remove_singleton((d)->irq_caps, i) + iocap_remove_access(IOCAP_IO_IRQ, d, i, i) #define irqs_permit_access(d, s, e) \ - rangeset_add_range((d)->irq_caps, s, e) + iocap_permit_access(IOCAP_IO_IRQ, d, s, e) #define irqs_deny_access(d, s, e) \ - rangeset_remove_range((d)->irq_caps, s, e) -#define irq_access_permitted(d, i) \ - rangeset_contains_singleton((d)->irq_caps, i) + iocap_remove_access(IOCAP_IO_IRQ, d, s, e) +#define irq_reserve(i) \ + iocap_reserve(IOCAP_IO_IRQ, i, i) +#define irq_unreserve(i) \ + iocap_unreserve(IOCAP_IO_IRQ, i, i) #define multipage_allocation_permitted(d) \ - (!rangeset_is_empty((d)->iomem_caps)) + iocap_domain_io_capable(d) #endif /* __XEN_IOCAP_H__ */ diff -r 1e3977e029fd -r 19c77906a845 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Mon May 8 18:21:41 2006 +++ b/xen/include/xen/sched.h Wed May 10 14:30:39 2006 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -137,8 +138,7 @@ DECLARE_BITMAP(pirq_mask, NR_IRQS); /* I/O capabilities (access to IRQs and memory-mapped I/O). */ - struct rangeset *iomem_caps; - struct rangeset *irq_caps; + struct rangeset *iocaps[IOCAP_NUM_IO_RESOURCES]; unsigned long domain_flags; unsigned long vm_assist; diff -r 1e3977e029fd -r 19c77906a845 xen/arch/x86/iocap.c --- /dev/null Mon May 8 18:21:41 2006 +++ b/xen/arch/x86/iocap.c Wed May 10 14:30:39 2006 @@ -0,0 +1,107 @@ +/****************************************************************************** + * iocap.c + * + * I/O Resource Accountant - handles architecture-specific parts of the + * I/O Resource Accountant. All of the resources + * are actually defined in here so that architecture- + * specific reference counting can be done on them. + */ +#include +#include +#include +#include + +static int dont_share_irqs = 0; +boolean_param("noshare_irqs", dont_share_irqs); + +/* Reference counting for pages of I/O memory - xen/arch/x86/iomm.c */ +extern int get_io_page(unsigned long mfn, struct domain *d, void *data); +extern void put_io_page(unsigned long mfn, struct domain *d, void *data); +extern u32 get_io_page_count(unsigned long mfn, struct domain *d); + +static int iomem_in_use(unsigned long mfn, struct domain *d) +{ + return (get_io_page_count(mfn, d) > 0); +} + +/* Reference counting for physical interrupts - xen/arch/x86/irq.c */ +extern int arch_pirq_guest_bind(struct vcpu *p, int irq, int will_share); +extern int arch_pirq_guest_unbind(struct domain *p, int irq); +extern int arch_pirq_in_use_by_domain(int irq, struct domain *d); + +static int irq_in_use(unsigned long irq, struct domain *d) +{ + ASSERT(irq < NR_IRQS); + + return arch_pirq_in_use_by_domain(irq, d); +} + +static int irq_get_resource(unsigned long irq, struct domain *d, void *data) +{ + struct iocap_irq_info *irq_info = data; + + return arch_pirq_guest_bind(irq_info->v, (int)irq, irq_info->will_share); +} + +static void irq_put_resource(unsigned long irq, struct domain *d, void *data) +{ + arch_pirq_guest_unbind(d, (int)irq); +} + +static struct io_resource iomem_resource = { + .type = IOCAP_IO_MEMORY, + .name = "I/O Memory", + .rangeset_flags = RANGESETF_prettyprint_hex, + .resource_in_use = iomem_in_use, + .get_resource = get_io_page, + .put_resource = put_io_page, + .min = 0UL, + .max = ~0UL, +}; + +static struct io_resource irq_resource = { + .type = IOCAP_IO_IRQ, + .name = "Interrupts", + .resource_in_use = irq_in_use, + .get_resource = irq_get_resource, + .put_resource = irq_put_resource, + .min = 0UL, + .max = NR_IRQS-1, + .flags = IORESF_unshare_on_reserve, +}; + +/* Don't need reference counting on i/o ports because the access list is + * consulted at every i/o port instructions */ +static struct io_resource ioport_resource = { + .type = IOCAP_IO_IOPORT, + .name = "I/O Ports", + .rangeset_flags = RANGESETF_prettyprint_hex, + .min = 0UL, + .max = 0xffffUL, +}; + +int arch_iocap_init(void) +{ + int rc; + + rc = iocap_ioresource_register(&ioport_resource); + if (rc) + goto out; + + rc = iocap_ioresource_register(&iomem_resource); + if (rc) + goto out; + + rc = iocap_ioresource_register(&irq_resource); + if (rc) + goto out; + + /* For now, IRQs can be shared among domains by default - this doesn't + * any domain can just bind to a physical IRQ. The domain must also + * have this IRQ in its access list */ + if (!dont_share_irqs) + rc = iocap_share(IOCAP_IO_IRQ, irq_resource.min, irq_resource.max); + + out: + return rc; +} diff -r 1e3977e029fd -r 19c77906a845 xen/common/iocap.c --- /dev/null Mon May 8 18:21:41 2006 +++ b/xen/common/iocap.c Wed May 10 14:30:39 2006 @@ -0,0 +1,666 @@ +/****************************************************************************** + * iocap.c + * + * I/O Resource Accountant - tracks usage and handles access control of + * I/O resources. + */ +#include +#include +#include +#include +#include + +extern int arch_iocap_init(void); + +static struct io_resource *io_res[IOCAP_NUM_IO_RESOURCES]; + +static struct domain *default_io_domain = NULL; + +static DEFINE_SPINLOCK(iocap_lock); + +/* Once the default_io_domain is set, it can't be changed until that domain + * relinquishes its I/O resources (otherwise, we won't know where to check + * in permit_access) */ +int iocap_set_default_io_domain(struct domain *d) +{ + int rc = 0; + + DPRINTK("iocap_set_default_io_domain: domain %d\n", d->domain_id); + + spin_lock(&iocap_lock); + if (default_io_domain) + rc = -EEXIST; + else + { + if (get_domain(d)) + default_io_domain = d; + else + rc = -EINVAL; + } + spin_unlock(&iocap_lock); + + return rc; +} + +static inline struct domain *get_default_io_domain(void) +{ + struct domain *d = NULL; + + spin_lock(&iocap_lock); + get_knownalive_domain(default_io_domain); + d = default_io_domain; + spin_unlock(&iocap_lock); + + return d; +} + +/* Doesn't call get_domain on the default_io_domain. Be careful + * what you do with the returned pointer! */ +static inline struct domain *_get_default_io_domain(void) +{ + struct domain *d = NULL; + + spin_lock(&iocap_lock); + d = default_io_domain; + spin_unlock(&iocap_lock); + + return d; +} + +static inline int resource_in_use(struct io_resource *res, struct domain *d, + unsigned long s, unsigned long e) +{ + if (res->resource_in_use) + { + do + { + if (res->resource_in_use(s, d)) + return 1; + } + while ((s++) != e); + } + + return 0; +} + +/* call with res->lock held. d == NULL when Xen is reserving a resource. */ +static int permit_access(struct io_resource *res, + struct domain *d, + struct rangeset *access_list, + unsigned long s, unsigned long e) +{ + struct domain *d_io = NULL; + int rc; + + ASSERT(s <= e); + + DPRINTK("iocap_permit_access: %s [%lx, %lx]\n", res->name, s, e); + + if (!rangeset_contains_range(res->available, s, e)) + { + rc = -EBUSY; + goto out; + } + + if (!rangeset_contains_range(res->shareable, s, e)) + { + /* If part of the range is shareable, put not the entire range, fail. + * In the interest of simplicity, we won't do the checks to move + * the part of the range that is not shareable and add the part that is + */ + if (rangeset_contains_partial_range(res->shareable, s, e)) + { + rc = -EXDEV; + goto out; + } + + /* If the resource is available, the default_io_domain could be + * using it. Check before giving this resource away. */ + d_io = get_default_io_domain(); + if (d_io && d != d_io && resource_in_use(res, d_io, s, e)) + { + rc = -EBUSY; + goto out; + } + + rc = rangeset_move_range(access_list, res->available, s, e); + } + else + rc = rangeset_add_range(access_list, s, e); + +out: + DPRINTK("permit_access: rc = %d\n", rc); + if (d_io) + put_domain(d_io); + return rc; +} + +int iocap_permit_access(unsigned int io_res_idx, struct domain *d, + unsigned long s, unsigned long e) +{ + int rc = 0; + struct io_resource *res; + + ASSERT(io_res_idx < IOCAP_NUM_IO_RESOURCES); + + res = io_res[io_res_idx]; + + spin_lock(&res->lock); + rc = permit_access(res, d, d->iocaps[io_res_idx], s, e); + spin_unlock(&res->lock); + + return rc; +} + +/* call with res->lock held. d == NULL when Xen is unreserving a resource. */ +static int remove_access(struct io_resource *res, + struct domain *d, + struct rangeset *access_list, + unsigned long s, unsigned long e) +{ + int rc; + + ASSERT(s <= e); + + DPRINTK("iocap_remove_access: %s [%lx, %lx]\n", res->name, s, e); + + if (!rangeset_contains_range(access_list, s, e)) + { + rc = -ENOENT; + goto out; + } + + if (resource_in_use(res, d, s, e)) + { + rc = -EBUSY; + goto out; + } + + rc = rangeset_move_range(res->available, access_list, s, e); + +out: + return rc; +} + +int iocap_remove_access(unsigned int io_res_idx, struct domain *d, + unsigned long s, unsigned long e) +{ + int rc = 0; + struct io_resource *res; + + ASSERT(io_res_idx < IOCAP_NUM_IO_RESOURCES); + + res = io_res[io_res_idx]; + + spin_lock(&res->lock); + rc = remove_access(res, d, d->iocaps[io_res_idx], s, e); + spin_unlock(&res->lock); + + return rc; +} + +static int cleanup_resource(struct io_resource *res, + struct rangeset *user) +{ + int rc; + + spin_lock(&res->lock); + rc = rangeset_move(res->available, user); + spin_unlock(&res->lock); + + return rc; +} + +int iocap_domain_release_resources(struct domain *d) +{ + int i; + struct domain *d_io_cleanup = NULL; + + spin_lock(&iocap_lock); + if (d == default_io_domain) + { + d_io_cleanup = default_io_domain; + default_io_domain = NULL; + } + spin_unlock(&iocap_lock); + + for (i=0; iiocaps[i]) + cleanup_resource(io_res[i], d->iocaps[i]); + } + + if (d_io_cleanup) + put_domain(d_io_cleanup); + + return 0; +} + +int iocap_domain_init(struct domain *d) +{ + int i; + int rc = 0; + + for (i=0; iiocaps[i] = NULL; + + for (i=0; iiocaps[i] = rangeset_new(d, io_res[i]->name, + io_res[i]->rangeset_flags); + if (!d->iocaps[i]) { + rc = -ENOMEM; + break; + } + } + + if (rc) + iocap_domain_release_resources(d); + return rc; +} + +int iocap_domain_io_capable(struct domain *d) +{ + int i; + + if (d == default_io_domain) + return 1; + + for (i=0; iiocaps[i] && !rangeset_is_empty(d->iocaps[i])) + return 1; + return 0; +} + +/* This should be called with res->lock held, but it doesn't have to be. + * There's an ever so slight chance that the default_io_domain may + * return false if a resource is moved from available to the + * default_io_domain's access list while in this function without + * a lock being held, but failing safe may be an acceptable risk + * to avoid holding a global lock for long. */ +static int access_permitted(struct io_resource *res, struct domain *d, + unsigned long s, unsigned long e) +{ + int permitted = 0; + struct domain *d_io = NULL; + + ASSERT(d != NULL); + + if (rangeset_contains_range(d->iocaps[res->type], s, e)) + permitted = 1; + else + { + d_io = _get_default_io_domain(); + if (d_io && d == d_io && rangeset_contains_range(res->available, s, e)) + permitted = 1; + } + + return permitted; +} + +int iocap_access_permitted(unsigned int io_res_idx, struct domain *d, + unsigned long s, unsigned long e) +{ + /* If get_resource is specified for a resource type, you shouldn't be + * calling iocap_access_permitted. Reference counting won't be done + * appropriately! There's a race condition that can occur between + * checking access with iocap_access_permitted and increasing the + * reference count and the checks to resource_in_use. Please use + * iocap_(get|put)_resource when possible to avoid this problem! */ + + struct io_resource *res; + + ASSERT(io_res_idx < IOCAP_NUM_IO_RESOURCES); + ASSERT(s <= e); + + res = io_res[io_res_idx]; + + return access_permitted(res, d, s, e); +} + +int iocap_get_resource(unsigned int io_res_idx, struct domain *d, + unsigned long s, unsigned long e, void *data) +{ + struct io_resource *res; + int rc = 0; + unsigned long r; + + ASSERT(io_res_idx < IOCAP_NUM_IO_RESOURCES); + ASSERT(s <= e); + + res = io_res[io_res_idx]; + + spin_lock(&res->lock); + + if (!access_permitted(res, d, s, e)) + { + rc = -EACCES; + goto out; + } + + if (res->get_resource) + { + r = s; + do + { + rc = res->get_resource(r, d, data); + } + while ((r++) != e && !rc); + + if (rc && res->put_resource) + { + r--; + do + { + res->put_resource(r, d, data); + } + while ((r--) != s); + } + } + + out: + spin_unlock(&res->lock); + return rc; +} + +void iocap_put_resource(unsigned int io_res_idx, struct domain *d, + unsigned long s, unsigned long e, void *data) +{ + struct io_resource *res; + + ASSERT(io_res_idx < IOCAP_NUM_IO_RESOURCES); + ASSERT(s <= e); + + res = io_res[io_res_idx]; + + if (res->put_resource) + { + spin_lock(&res->lock); + + do + { + res->put_resource(e, d, data); + } + while ((e--) != s); + + spin_unlock(&res->lock); + } +} + +int iocap_share(unsigned int io_res_idx, unsigned long s, unsigned long e) +{ + struct io_resource *res; + int rc; + + ASSERT(io_res_idx < IOCAP_NUM_IO_RESOURCES); + + res = io_res[io_res_idx]; + + spin_lock(&res->lock); + if (s <= e && io_res[io_res_idx]->min <= s && e <= io_res[io_res_idx]->max) + rc = rangeset_add_range(io_res[io_res_idx]->shareable, s, e); + else + rc = -ENXIO; + spin_unlock(&res->lock); + + return rc; +} + +/* call with res->lock held */ +static int test_unshare(struct io_resource *res, unsigned long r) +{ + int rc = 0; + struct domain *d; + + if (!rangeset_contains_singleton(res->shareable, r)) + goto out; + + /* If anybody is using a resource in the range, we can't unshare it. Well, + * if there was just one user, we could unshare it and remove the resource + * from the available list, but the logic gets a bit tricky when trying + * to do that atomically for a range of resources (where we'd have to test + * them all, store the counts (so we know whether or not to remove the + * resource from the available list), and then mark them as unshareable. + * Note that the default_io_domain could be a user of a shareable resource. + * This is simpler. */ + + read_lock(&domlist_lock); + for_each_domain(d) + { + if (rangeset_contains_singleton(d->iocaps[res->type], r)) + { + rc = -EBUSY; + break; + } + } + read_unlock(&domlist_lock); + + if (rc) + goto out; + + if (rangeset_contains_singleton(res->reserved, r)) + rc = -EBUSY; + + out: + return rc; +} + +/* call with res->lock held */ +static inline int test_unshare_range(struct io_resource *res, + unsigned long s, unsigned long e) +{ + int rc = 0; + + do + { + rc = test_unshare(res, s); + } + while ((s++) != e && !rc); + + return rc; +} + +/* call with res->lock held */ +static inline int unshare(struct io_resource *res, + unsigned long s, unsigned long e) +{ + return rangeset_remove_range(res->shareable, s, e); +} + +int iocap_unshare(unsigned int io_res_idx, unsigned long s, unsigned long e) +{ + int rc = 0; + struct io_resource *res; + + ASSERT(io_res_idx < IOCAP_NUM_IO_RESOURCES); + ASSERT(s <= e); + + res = io_res[io_res_idx]; + + spin_lock(&res->lock); + + rc = test_unshare_range(res, s, e); + if (rc) + goto out; + + rc = unshare(res, s, e); + + out: + spin_unlock(&res->lock); + + return rc; +} + +int iocap_reserve(unsigned int io_res_idx, unsigned long s, unsigned long e) +{ + struct io_resource *res; + int rc; + + ASSERT(io_res_idx < IOCAP_NUM_IO_RESOURCES); + res = io_res[io_res_idx]; + + spin_lock(&res->lock); + + if (res->flags & IORESF_unshare_on_reserve) + { + rc = test_unshare_range(res, s, e); + if (rc) + goto out; + + rc = unshare(res, s, e); + if (rc) + goto out; + } + + rc = permit_access(res, NULL, res->reserved, s, e); + + out: + spin_unlock(&res->lock); + return rc; +} + +int iocap_unreserve(unsigned int io_res_idx, unsigned long s, unsigned long e) +{ + struct io_resource *res; + int rc; + + ASSERT(io_res_idx < IOCAP_NUM_IO_RESOURCES); + res = io_res[io_res_idx]; + + spin_lock(&res->lock); + rc = remove_access(res, NULL, res->reserved, s, e); + spin_unlock(&res->lock); + + return rc; +} + +int iocap_ioresource_register(struct io_resource *res) +{ + int rc = 0; + + ASSERT(res != NULL); + + DPRINTK("iocap_ioresource_register: name = %s\n", res->name); + + spin_lock_init(&res->lock); + + res->available = rangeset_new(NULL, "Available Resources", + res->rangeset_flags); + if (!res->available) + { + rc = -ENOMEM; + goto fail1; + } + + rc = rangeset_add_range(res->available, res->min, res->max); + if (rc) + goto fail1; + + res->shareable = rangeset_new(NULL, "Shareable Resources", + res->rangeset_flags); + if (!res->shareable) + { + rc = -ENOMEM; + goto fail2; + } + + res->reserved = rangeset_new(NULL, "Reserved Resources", + res->rangeset_flags); + if (!res->reserved) + { + rc = -ENOMEM; + goto fail3; + } + + spin_lock(&iocap_lock); + if (io_res[res->type]) + rc = -EEXIST; + else + io_res[res->type] = res; + spin_unlock(&iocap_lock); + + if (!rc) + return 0; + + fail3: + xfree(res->shareable); + + fail2: + xfree(res->available); + + fail1: + xfree(res); + return rc; +} + +static void dump_iocap(unsigned char key) +{ + int i; + + printk("I/O Resources:\n"); + + for (i = 0; i < IOCAP_NUM_IO_RESOURCES; i++) + { + if (io_res[i]) + { + spin_lock(&io_res[i]->lock); + + printk("%s {\n", io_res[i]->name); + + printk(" "); + rangeset_printk(io_res[i]->available); + printk("\n"); + + printk(" "); + rangeset_printk(io_res[i]->shareable); + printk("\n"); + + printk(" "); + rangeset_printk(io_res[i]->reserved); + printk("\n"); + + printk("}\n"); + + spin_unlock(&io_res[i]->lock); + } + } +} + +int iocap_init(void) +{ + int rc; + int i; + + DPRINTK("iocap_init: registering i/o resources\n"); + + for (i = 0; i < IOCAP_NUM_IO_RESOURCES; i++) + io_res[i] = NULL; + + rc = arch_iocap_init(); + + if (rc) + panic("Failed to initialize I/O resource lists!\n"); + else + register_keyhandler( 'I', dump_iocap, "dump available I/O resources"); + + return rc; +} + +/* Interfaces specific to certain resources */ + +int pirq_guest_bind(struct vcpu *v, int irq, int will_share) +{ + struct iocap_irq_info irq_info; + + irq_info.v = v; + irq_info.will_share = will_share; + + return iocap_get_resource(IOCAP_IO_IRQ, v->domain, irq, irq, &irq_info); +} + +int pirq_guest_unbind(struct domain *d, int irq) +{ + iocap_put_resource(IOCAP_IO_IRQ, d, irq, irq, NULL); + return 0; +} +