# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Date 1189784449 -3600
# Node ID f4bbd3f327e4308aa2aebf5484fc32d1d1ff4b41
# Parent acfa9290746f9c00e30dca7a62e9f7a96702b3b5
Intel vt-d specific changes in arch/x86/hvm/vmx/vtd.
Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx>
Signed-off-by: Guy Zana <guy@xxxxxxxxxxxx>
---
xen/arch/x86/hvm/vmx/vtd/Makefile | 4
xen/arch/x86/hvm/vmx/vtd/dmar.c | 494 ++++++++
xen/arch/x86/hvm/vmx/vtd/dmar.h | 90 +
xen/arch/x86/hvm/vmx/vtd/intel-iommu.c | 1927 +++++++++++++++++++++++++++++++++
xen/arch/x86/hvm/vmx/vtd/io.c | 120 ++
xen/arch/x86/hvm/vmx/vtd/msi.h | 128 ++
xen/arch/x86/hvm/vmx/vtd/pci-direct.h | 48
xen/arch/x86/hvm/vmx/vtd/pci_regs.h | 449 +++++++
xen/arch/x86/hvm/vmx/vtd/utils.c | 302 +++++
9 files changed, 3562 insertions(+)
diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/Makefile Fri Sep 14 16:40:49 2007 +0100
@@ -0,0 +1,4 @@
+obj-y += intel-iommu.o
+obj-y += dmar.o
+obj-y += utils.o
+obj-y += io.o
diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/dmar.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/dmar.c Fri Sep 14 16:40:49 2007 +0100
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Ashok Raj <ashok.raj@xxxxxxxxx>
+ * Copyright (C) Shaohua Li <shaohua.li@xxxxxxxxx>
+ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> - adapted to xen
+ */
+
+#include <xen/init.h>
+#include <xen/bitmap.h>
+#include <xen/kernel.h>
+#include <xen/acpi.h>
+#include <xen/mm.h>
+#include <xen/xmalloc.h>
+#include <asm/string.h>
+#include "dmar.h"
+#include "pci-direct.h"
+#include "pci_regs.h"
+
+#undef PREFIX
+#define PREFIX VTDPREFIX "ACPI DMAR:"
+#define DEBUG
+
+#define MIN_SCOPE_LEN (sizeof(struct acpi_pci_path) + sizeof(struct
acpi_dev_scope))
+
+LIST_HEAD(acpi_drhd_units);
+LIST_HEAD(acpi_rmrr_units);
+LIST_HEAD(acpi_atsr_units);
+LIST_HEAD(acpi_ioapic_units);
+
+u8 dmar_host_address_width;
+
+static int __init acpi_register_drhd_unit(struct acpi_drhd_unit *drhd)
+{
+ /*
+ * add INCLUDE_ALL at the tail, so scan the list will find it at
+ * the very end.
+ */
+ if (drhd->include_all)
+ list_add_tail(&drhd->list, &acpi_drhd_units);
+ else
+ list_add(&drhd->list, &acpi_drhd_units);
+ return 0;
+}
+
+static int __init acpi_register_rmrr_unit(struct acpi_rmrr_unit *rmrr)
+{
+ list_add(&rmrr->list, &acpi_rmrr_units);
+ return 0;
+}
+
+static int acpi_pci_device_match(struct pci_dev *devices, int cnt,
+ struct pci_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ if ((dev->bus == devices->bus) &&
+ (dev->devfn == devices->devfn))
+ return 1;
+ devices++;
+ }
+ return 0;
+}
+
+static int __init acpi_register_atsr_unit(struct acpi_atsr_unit *atsr)
+{
+ /*
+ * add ALL_PORTS at the tail, so scan the list will find it at
+ * the very end.
+ */
+ if (atsr->all_ports)
+ list_add_tail(&atsr->list, &acpi_atsr_units);
+ else
+ list_add(&atsr->list, &acpi_atsr_units);
+ return 0;
+}
+
+struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev)
+{
+ struct acpi_drhd_unit *drhd;
+ struct acpi_drhd_unit *include_all_drhd;
+
+ include_all_drhd = NULL;
+ list_for_each_entry(drhd, &acpi_drhd_units, list) {
+ if (drhd->include_all)
+ include_all_drhd = drhd;
+ if (acpi_pci_device_match(drhd->devices,
+ drhd->devices_cnt, dev))
+ {
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "acpi_find_matched_drhd_unit: drhd->address = %lx\n",
+ drhd->address);
+ return drhd;
+ }
+ }
+
+ if (include_all_drhd) {
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "acpi_find_matched_drhd_unit:include_all_drhd->addr = %lx\n",
+ include_all_drhd->address);
+ return include_all_drhd;;
+ }
+
+ return(NULL);
+}
+
+struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev)
+{
+ struct acpi_rmrr_unit *rmrr;
+
+ list_for_each_entry(rmrr, &acpi_rmrr_units, list) {
+ if (acpi_pci_device_match(rmrr->devices,
+ rmrr->devices_cnt, dev))
+ goto out;
+ }
+ rmrr = NULL;
+out:
+ return rmrr;
+}
+
+struct acpi_atsr_unit * acpi_find_matched_atsr_unit(struct pci_dev *dev)
+{
+ struct acpi_atsr_unit *atsru;
+ struct acpi_atsr_unit *all_ports_atsru;
+
+ all_ports_atsru = NULL;
+ list_for_each_entry(atsru, &acpi_atsr_units, list) {
+ if (atsru->all_ports)
+ all_ports_atsru = atsru;
+ if (acpi_pci_device_match(atsru->devices, atsru->devices_cnt, dev))
+ return atsru;
+ }
+ if (all_ports_atsru) {
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "acpi_find_matched_atsr_unit: all_ports_atsru\n");
+ return all_ports_atsru;;
+ }
+ return(NULL);
+}
+
+static int __init acpi_parse_dev_scope(void *start, void *end, int *cnt,
+ struct pci_dev **devices)
+{
+ struct acpi_dev_scope *scope;
+ u8 bus, sub_bus, sec_bus;
+ struct acpi_pci_path *path;
+ struct acpi_ioapic_unit *acpi_ioapic_unit = NULL;
+ int count, dev_count=0;
+ struct pci_dev *pdev;
+ u8 dev, func;
+ u32 l;
+ void *tmp;
+
+ *cnt = 0;
+ tmp = start;
+ while (start < end) {
+ scope = start;
+ if (scope->length < MIN_SCOPE_LEN ||
+ (scope->dev_type != ACPI_DEV_ENDPOINT &&
+ scope->dev_type != ACPI_DEV_P2PBRIDGE)) {
+ printk(KERN_WARNING PREFIX "Invalid device scope\n");
+ return -EINVAL;
+ }
+ (*cnt)++;
+ start += scope->length;
+ }
+
+ start = tmp;
+ while (start < end) {
+ scope = start;
+ path = (struct acpi_pci_path *)(scope + 1);
+ count = (scope->length - sizeof(struct acpi_dev_scope))
+ /sizeof(struct acpi_pci_path);
+ bus = scope->start_bus;
+
+ while (--count) {
+ bus = read_pci_config_byte(bus, path->dev,
+ path->fn, PCI_SECONDARY_BUS);
+ path++;
+ }
+
+ if (scope->dev_type == ACPI_DEV_ENDPOINT) {
+ printk(KERN_WARNING PREFIX
+ "found endpoint: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
+ dev_count++;
+ } else if (scope->dev_type == ACPI_DEV_P2PBRIDGE) {
+ printk(KERN_WARNING PREFIX
+ "found bridge: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
+
+ sec_bus = read_pci_config_byte(bus, path->dev,
+ path->fn, PCI_SECONDARY_BUS);
+ sub_bus = read_pci_config_byte(bus, path->dev,
+ path->fn, PCI_SUBORDINATE_BUS);
+ while (sec_bus <= sub_bus) {
+ for (dev = 0; dev < 32; dev++) {
+ for (func = 0; func < 8; func++) {
+ l = read_pci_config(sec_bus, dev, func, PCI_VENDOR_ID);
+
+ /* some broken boards return 0 or ~0 if a slot is
empty: */
+ if (l == 0xffffffff || l == 0x00000000 ||
+ l == 0x0000ffff || l == 0xffff0000)
+ break;
+ dev_count++;
+ }
+ }
+ sec_bus++;
+ }
+ } else if (scope->dev_type == ACPI_DEV_IOAPIC) {
+ printk(KERN_WARNING PREFIX
+ "found IOAPIC: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
+ dev_count++;
+ } else {
+ printk(KERN_WARNING PREFIX
+ "found MSI HPET: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
+ dev_count++;
+ }
+
+ start += scope->length;
+ }
+
+ *cnt = dev_count;
+ *devices = xmalloc_array(struct pci_dev, *cnt);
+ if (!*devices)
+ return -ENOMEM;
+ memset(*devices, 0, sizeof(struct pci_dev) * (*cnt));
+
+ pdev = *devices;
+ start = tmp;
+ while (start < end) {
+ scope = start;
+ path = (struct acpi_pci_path *)(scope + 1);
+ count = (scope->length - sizeof(struct acpi_dev_scope))
+ /sizeof(struct acpi_pci_path);
+ bus = scope->start_bus;
+
+ while (--count) {
+ bus = read_pci_config_byte(bus, path->dev, path->fn,
PCI_SECONDARY_BUS);
+ path++;
+ }
+
+ if (scope->dev_type == ACPI_DEV_ENDPOINT) {
+ printk(KERN_WARNING PREFIX
+ "found endpoint: bdf = %x:%x:%x\n", bus, path->dev, path->fn);
+
+ pdev->bus = bus;
+ pdev->devfn = PCI_DEVFN(path->dev, path->fn);
+ pdev++;
+ } else if (scope->dev_type == ACPI_DEV_P2PBRIDGE) {
+ printk(KERN_WARNING PREFIX
+ "found bridge: bus = %x dev = %x func = %x\n", bus, path->dev,
path->fn);
+
+ sec_bus = read_pci_config_byte(bus, path->dev, path->fn,
PCI_SECONDARY_BUS);
+ sub_bus = read_pci_config_byte(bus, path->dev, path->fn,
PCI_SUBORDINATE_BUS);
+
+ while (sec_bus <= sub_bus) {
+ for (dev = 0; dev < 32; dev++) {
+ for (func = 0; func < 8; func++) {
+ l = read_pci_config(sec_bus, dev, func, PCI_VENDOR_ID);
+
+ /* some broken boards return 0 or ~0 if a slot is
empty: */
+ if (l == 0xffffffff || l == 0x00000000 ||
+ l == 0x0000ffff || l == 0xffff0000)
+ break;
+
+ pdev->bus = sec_bus;
+ pdev->devfn = PCI_DEVFN(dev, func);
+ pdev++;
+ }
+ }
+ sec_bus++;
+ }
+ } else if (scope->dev_type == ACPI_DEV_IOAPIC) {
+ acpi_ioapic_unit = xmalloc(struct acpi_ioapic_unit);
+ acpi_ioapic_unit->apic_id = scope->enum_id;
+ acpi_ioapic_unit->ioapic.bdf.bus = bus;
+ acpi_ioapic_unit->ioapic.bdf.dev = path->dev;
+ acpi_ioapic_unit->ioapic.bdf.func = path->fn;
+ list_add(&acpi_ioapic_unit->list, &acpi_ioapic_units);
+ printk(KERN_WARNING PREFIX
+ "found IOAPIC: bus = %x dev = %x func = %x\n", bus, path->dev,
path->fn);
+ } else {
+ printk(KERN_WARNING PREFIX
+ "found MSI HPET: bus = %x dev = %x func = %x\n", bus,
path->dev, path->fn);
+ }
+
+ start += scope->length;
+ }
+
+ return 0;
+}
+
+static int __init
+acpi_parse_one_drhd(struct acpi_dmar_entry_header *header)
+{
+ struct acpi_table_drhd * drhd = (struct acpi_table_drhd *)header;
+ struct acpi_drhd_unit *dmaru;
+ int ret = 0;
+ static int include_all;
+
+ dmaru = xmalloc(struct acpi_drhd_unit);
+ if (!dmaru)
+ return -ENOMEM;
+ memset(dmaru, 0, sizeof(struct acpi_drhd_unit));
+
+ dmaru->address = drhd->address;
+ dmaru->include_all = drhd->flags & 1; /* BIT0: INCLUDE_ALL */
+ printk(KERN_WARNING PREFIX "dmaru->address = %lx\n", dmaru->address);
+
+ if (!dmaru->include_all) {
+ ret = acpi_parse_dev_scope((void *)(drhd + 1),
+ ((void *)drhd) + header->length,
+ &dmaru->devices_cnt, &dmaru->devices);
+ }
+ else {
+ printk(KERN_WARNING PREFIX "found INCLUDE_ALL\n");
+ /* Only allow one INCLUDE_ALL */
+ if (include_all) {
+ printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL "
+ "device scope is allowed\n");
+ ret = -EINVAL;
+ }
+ include_all = 1;
+ }
+
+ if (ret)
+ xfree(dmaru);
+ else
+ acpi_register_drhd_unit(dmaru);
+ return ret;
+}
+
+static int __init
+acpi_parse_one_rmrr(struct acpi_dmar_entry_header *header)
+{
+ struct acpi_table_rmrr *rmrr = (struct acpi_table_rmrr *)header;
+ struct acpi_rmrr_unit *rmrru;
+ int ret = 0;
+
+ rmrru = xmalloc(struct acpi_rmrr_unit);
+ if (!rmrru)
+ return -ENOMEM;
+ memset(rmrru, 0, sizeof(struct acpi_rmrr_unit));
+
+#ifdef VTD_DEBUG
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "acpi_parse_one_rmrr: base = %lx end = %lx\n",
+ rmrr->base_address, rmrr->end_address);
+#endif
+
+ rmrru->base_address = rmrr->base_address;
+ rmrru->end_address = rmrr->end_address;
+ ret = acpi_parse_dev_scope((void *)(rmrr + 1),
+ ((void*)rmrr) + header->length,
+ &rmrru->devices_cnt, &rmrru->devices);
+
+ if (ret || (rmrru->devices_cnt == 0))
+ xfree(rmrru);
+ else
+ acpi_register_rmrr_unit(rmrru);
+ return ret;
+}
+
+static int __init
+acpi_parse_one_atsr(struct acpi_dmar_entry_header *header)
+{
+ struct acpi_table_atsr *atsr = (struct acpi_table_atsr *)header;
+ struct acpi_atsr_unit *atsru;
+ int ret = 0;
+ static int all_ports;
+
+ atsru = xmalloc(struct acpi_atsr_unit);
+ if (!atsru)
+ return -ENOMEM;
+ memset(atsru, 0, sizeof(struct acpi_atsr_unit));
+
+ atsru->all_ports = atsr->flags & 1; /* BIT0: ALL_PORTS */
+ if (!atsru->all_ports) {
+ ret = acpi_parse_dev_scope((void *)(atsr + 1),
+ ((void *)atsr) + header->length,
+ &atsru->devices_cnt, &atsru->devices);
+ }
+ else {
+ printk(KERN_WARNING PREFIX "found ALL_PORTS\n");
+ /* Only allow one ALL_PORTS */
+ if (all_ports) {
+ printk(KERN_WARNING PREFIX "Only one ALL_PORTS "
+ "device scope is allowed\n");
+ ret = -EINVAL;
+ }
+ all_ports = 1;
+ }
+
+ if (ret)
+ xfree(atsr);
+ else
+ acpi_register_atsr_unit(atsru);
+ return ret;
+}
+
+static void __init
+acpi_table_print_dmar_entry(struct acpi_dmar_entry_header *header)
+{
+ struct acpi_table_drhd *drhd;
+ struct acpi_table_rmrr *rmrr;
+
+ switch (header->type) {
+ case ACPI_DMAR_DRHD:
+ drhd = (struct acpi_table_drhd *)header;
+ break;
+ case ACPI_DMAR_RMRR:
+ rmrr = (struct acpi_table_rmrr *)header;
+ break;
+ }
+}
+
+static int __init
+acpi_parse_dmar(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_dmar *dmar = NULL;
+ struct acpi_dmar_entry_header *entry_header;
+ int ret = 0;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ dmar = (struct acpi_table_dmar *)__acpi_map_table(phys_addr, size);
+ if (!dmar) {
+ printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
+ return -ENODEV;
+ }
+
+ if (!dmar->haw) {
+ printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n");
+ return -EINVAL;
+ }
+
+ dmar_host_address_width = dmar->haw;
+ printk (KERN_INFO PREFIX "Host address width %d\n",
+ dmar_host_address_width);
+
+ entry_header = (struct acpi_dmar_entry_header *)(dmar + 1);
+ while (((unsigned long)entry_header) < (((unsigned long)dmar) + size)) {
+ acpi_table_print_dmar_entry(entry_header);
+
+ switch (entry_header->type) {
+ case ACPI_DMAR_DRHD:
+ printk (KERN_INFO PREFIX "found ACPI_DMAR_DRHD\n");
+ ret = acpi_parse_one_drhd(entry_header);
+ break;
+ case ACPI_DMAR_RMRR:
+ printk (KERN_INFO PREFIX "found ACPI_DMAR_RMRR\n");
+ ret = acpi_parse_one_rmrr(entry_header);
+ break;
+ case ACPI_DMAR_ATSR:
+ printk (KERN_INFO PREFIX "found ACPI_DMAR_RMRR\n");
+ ret = acpi_parse_one_atsr(entry_header);
+ break;
+ default:
+ printk(KERN_WARNING PREFIX "Unknown DMAR structure type\n");
+ ret = -EINVAL;
+ break;
+ }
+ if (ret)
+ break;
+
+ entry_header = ((void *)entry_header + entry_header->length);
+ }
+ return ret;
+}
+
+int acpi_dmar_init(void)
+{
+ acpi_table_parse(ACPI_DMAR, acpi_parse_dmar);
+ if (list_empty(&acpi_drhd_units)) {
+ printk(KERN_ERR PREFIX "No DMAR devices found\n");
+ return -ENODEV;
+ } else
+ vtd_enabled = 1;
+ return 0;
+}
diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/dmar.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/dmar.h Fri Sep 14 16:40:49 2007 +0100
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Ashok Raj <ashok.raj@xxxxxxxxx>
+ * Copyright (C) Shaohua Li <shaohua.li@xxxxxxxxx>
+ */
+
+#ifndef _DMAR_H_
+#define _DMAR_H_
+
+#include <xen/list.h>
+#include <asm/iommu.h>
+
+extern u8 dmar_host_address_width;
+
+struct acpi_drhd_unit {
+ struct list_head list;
+ unsigned long address; /* register base address of the unit */
+ struct pci_dev *devices; /* target devices */
+ int devices_cnt;
+ u8 include_all:1;
+ struct iommu *iommu;
+};
+
+struct acpi_rmrr_unit {
+ struct list_head list;
+ unsigned long base_address;
+ unsigned long end_address;
+ struct pci_dev *devices; /* target devices */
+ int devices_cnt;
+ u8 allow_all:1;
+};
+
+struct acpi_atsr_unit {
+ struct list_head list;
+ struct pci_dev *devices; /* target devices */
+ int devices_cnt;
+ u8 all_ports:1;
+};
+
+#define for_each_iommu(domain, iommu) \
+ list_for_each_entry(iommu, \
+ &(domain->arch.hvm_domain.hvm_iommu.iommu_list), list)
+
+#define for_each_pdev(domain, pdev) \
+ list_for_each_entry(pdev, \
+ &(domain->arch.hvm_domain.hvm_iommu.pdev_list), list)
+
+#define for_each_drhd_unit(drhd) \
+ list_for_each_entry(drhd, &acpi_drhd_units, list)
+#define for_each_rmrr_device(rmrr, pdev) \
+ list_for_each_entry(rmrr, &acpi_rmrr_units, list) { \
+ int _i; \
+ for (_i = 0; _i < rmrr->devices_cnt; _i++) { \
+ pdev = &(rmrr->devices[_i]);
+#define end_for_each_rmrr_device(rmrr, pdev) \
+ } \
+ }
+
+struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev);
+struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev);
+
+/* This one is for interrupt remapping */
+struct acpi_ioapic_unit {
+ struct list_head list;
+ int apic_id;
+ union {
+ u16 info;
+ struct {
+ u16 bus: 8,
+ dev: 5,
+ func: 3;
+ }bdf;
+ }ioapic;
+};
+
+#endif // _DMAR_H_
diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c Fri Sep 14 16:40:49 2007 +0100
@@ -0,0 +1,1927 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Ashok Raj <ashok.raj@xxxxxxxxx>
+ * Copyright (C) Shaohua Li <shaohua.li@xxxxxxxxx>
+ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> - adapted to xen
+ */
+
+#include <xen/init.h>
+#include <xen/irq.h>
+#include <xen/spinlock.h>
+#include <xen/sched.h>
+#include <xen/xmalloc.h>
+#include <xen/domain_page.h>
+#include <asm/delay.h>
+#include <asm/string.h>
+#include <asm/iommu.h>
+#include <asm/hvm/vmx/intel-iommu.h>
+#include "dmar.h"
+#include "pci-direct.h"
+#include "pci_regs.h"
+#include "msi.h"
+
+extern void print_iommu_regs(struct acpi_drhd_unit *drhd);
+extern void print_vtd_entries(struct domain *d, int bus, int devfn,
+ unsigned long gmfn);
+extern void (*interrupt[])(void);
+
+#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
+
+#define time_after(a,b) \
+ (typecheck(unsigned long, a) && \
+ typecheck(unsigned long, b) && \
+ ((long)(b) - (long)(a) < 0))
+
+unsigned int x86_clflush_size;
+void clflush_cache_range(void *adr, int size)
+{
+ int i;
+ for (i = 0; i < size; i += x86_clflush_size)
+ clflush(adr + i);
+}
+
+static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
+{
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
+}
+
+#define iommu_flush_cache_entry(iommu, addr) \
+ __iommu_flush_cache(iommu, addr, 8)
+#define iommu_flush_cache_page(iommu, addr) \
+ __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K)
+
+int nr_iommus;
+/* context entry handling */
+static struct context_entry * device_to_context_entry(struct iommu *iommu,
+ u8 bus, u8 devfn)
+{
+ struct root_entry *root;
+ struct context_entry *context;
+ unsigned long phy_addr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ root = &iommu->root_entry[bus];
+ if (!root_present(*root)) {
+ phy_addr = (unsigned long) alloc_xenheap_page();
+ if (!phy_addr) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return NULL;
+ }
+ memset((void *) phy_addr, 0, PAGE_SIZE);
+ iommu_flush_cache_page(iommu, (void *)phy_addr);
+ phy_addr = virt_to_maddr((void *)phy_addr);
+ set_root_value(*root, phy_addr);
+ set_root_present(*root);
+ iommu_flush_cache_entry(iommu, root);
+ }
+ phy_addr = (unsigned long) get_context_addr(*root);
+ context = (struct context_entry *)maddr_to_virt(phy_addr);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return &context[devfn];
+}
+
+static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
+{
+ struct root_entry *root;
+ struct context_entry *context;
+ unsigned long phy_addr;
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ root = &iommu->root_entry[bus];
+ if (!root_present(*root)) {
+ ret = 0;
+ goto out;
+ }
+ phy_addr = get_context_addr(*root);
+ context = (struct context_entry *)maddr_to_virt(phy_addr);
+ ret = context_present(context[devfn]);
+out:
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return ret;
+}
+
+/* page table handling */
+#define LEVEL_STRIDE (9)
+#define LEVEL_MASK ((1 << LEVEL_STRIDE) - 1)
+#define agaw_to_level(val) ((val) + 2)
+#define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
+#define width_to_agaw(w) ((w - 30)/LEVEL_STRIDE)
+#define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
+#define address_level_offset(addr, level) \
+ ((addr >> level_to_offset_bits(level)) & LEVEL_MASK)
+#define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
+#define level_size(l) (1 << level_to_offset_bits(l))
+#define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))
+static struct dma_pte * addr_to_dma_pte(struct domain *domain, u64 addr)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(domain);
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ int addr_width = agaw_to_width(hd->agaw);
+ struct dma_pte *parent, *pte = NULL, *pgd;
+ int level = agaw_to_level(hd->agaw);
+ int offset;
+ unsigned long flags;
+
+ drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+ iommu = drhd->iommu;
+
+ addr &= (((u64)1) << addr_width) - 1;
+ spin_lock_irqsave(&hd->mapping_lock, flags);
+ if (!hd->pgd) {
+ pgd = (struct dma_pte *)alloc_xenheap_page();
+ if (!pgd && !hd->pgd) {
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ return NULL;
+ }
+ memset((u8*)pgd, 0, PAGE_SIZE);
+ if (!hd->pgd)
+ hd->pgd = pgd;
+ else /* somebody is fast */
+ free_xenheap_page((void *) pgd);
+ }
+ parent = hd->pgd;
+ while (level > 0) {
+ u8 *tmp;
+ offset = address_level_offset(addr, level);
+ pte = &parent[offset];
+ if (level == 1)
+ break;
+ if (dma_pte_addr(*pte) == 0) {
+ tmp = alloc_xenheap_page();
+ if (tmp == NULL)
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "addr_to_dma_pte: tmp == NULL\n");
+
+ memset(tmp, 0, PAGE_SIZE);
+ iommu_flush_cache_page(iommu, tmp);
+
+ if (!tmp && dma_pte_addr(*pte) == 0) {
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ return NULL;
+ }
+ if (dma_pte_addr(*pte) == 0) {
+ dma_set_pte_addr(*pte,
+ virt_to_maddr(tmp));
+ /*
+ * high level table always sets r/w, last level
+ * page table control read/write
+ */
+ dma_set_pte_readable(*pte);
+ dma_set_pte_writable(*pte);
+ iommu_flush_cache_entry(iommu, pte);
+ } else /* somebody is fast */
+ free_xenheap_page(tmp);
+ }
+ parent = maddr_to_virt(dma_pte_addr(*pte));
+ level--;
+ }
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ return pte;
+}
+
+/* return address's pte at specific level */
+static struct dma_pte *dma_addr_level_pte(struct domain *domain, u64 addr,
+ int level)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(domain);
+ struct dma_pte *parent, *pte = NULL;
+ int total = agaw_to_level(hd->agaw);
+ int offset;
+
+ parent = hd->pgd;
+ while (level <= total) {
+ offset = address_level_offset(addr, total);
+ pte = &parent[offset];
+ if (level == total)
+ return pte;
+
+ if (dma_pte_addr(*pte) == 0)
+ break;
+ parent = maddr_to_virt(dma_pte_addr(*pte));
+ total--;
+ }
+ return NULL;
+}
+
+static void iommu_flush_write_buffer(struct iommu *iommu)
+{
+ u32 val;
+ unsigned long flag;
+ unsigned long start_time;
+
+ if (!cap_rwbf(iommu->cap))
+ return;
+ val = iommu->gcmd | DMA_GCMD_WBF;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
+
+ /* Make sure hardware complete it */
+ start_time = jiffies;
+ while (1) {
+ val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+ if (!(val & DMA_GSTS_WBFS))
+ break;
+ if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
+ panic("DMAR hardware is malfunctional, please disable
IOMMU\n");
+ cpu_relax();
+ }
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+/* return value determine if we need a write buffer flush */
+static int __iommu_flush_context(struct iommu *iommu,
+ u16 did, u16 source_id, u8 function_mask, u64 type,
+ int non_present_entry_flush)
+{
+ u64 val = 0;
+ unsigned long flag;
+ unsigned long start_time;
+
+ /*
+ * In the non-present entry flush case, if hardware doesn't cache
+ * non-present entry we do nothing and if hardware cache non-present
+ * entry, we flush entries of domain 0 (the domain id is used to cache
+ * any non-present entries)
+ */
+ if (non_present_entry_flush) {
+ if (!cap_caching_mode(iommu->cap))
+ return 1;
+ else
+ did = 0;
+ }
+
+ /* use register invalidation */
+ switch (type)
+ {
+ case DMA_CCMD_GLOBAL_INVL:
+ val = DMA_CCMD_GLOBAL_INVL;
+ break;
+ case DMA_CCMD_DOMAIN_INVL:
+ val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
+ break;
+ case DMA_CCMD_DEVICE_INVL:
+ val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
+ |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
+ break;
+ default:
+ BUG();
+ }
+ val |= DMA_CCMD_ICC;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
+
+ /* Make sure hardware complete it */
+ start_time = jiffies;
+ while (1) {
+ val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
+ if (!(val & DMA_CCMD_ICC))
+ break;
+ if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
+ panic("DMAR hardware is malfunctional, please disable
IOMMU\n");
+ cpu_relax();
+ }
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+ /* flush context entry will implictly flush write buffer */
+ return 0;
+}
+
+static int inline iommu_flush_context_global(struct iommu *iommu,
+ int non_present_entry_flush)
+{
+ return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
+ non_present_entry_flush);
+}
+
+static int inline iommu_flush_context_domain(struct iommu *iommu, u16 did,
+ int non_present_entry_flush)
+{
+ return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
+ non_present_entry_flush);
+}
+
+static int inline iommu_flush_context_device(struct iommu *iommu,
+ u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
+{
+ return __iommu_flush_context(iommu, did, source_id, function_mask,
+ DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
+}
+
+/* return value determine if we need a write buffer flush */
+static int __iommu_flush_iotlb(struct iommu *iommu, u16 did,
+ u64 addr, unsigned int size_order, u64 type,
+ int non_present_entry_flush)
+{
+ int tlb_offset = ecap_iotlb_offset(iommu->ecap);
+ u64 val = 0, val_iva = 0;
+ unsigned long flag;
+ unsigned long start_time;
+
+ /*
+ * In the non-present entry flush case, if hardware doesn't cache
+ * non-present entry we do nothing and if hardware cache non-present
+ * entry, we flush entries of domain 0 (the domain id is used to cache
+ * any non-present entries)
+ */
+ if (non_present_entry_flush) {
+ if (!cap_caching_mode(iommu->cap))
+ return 1;
+ else
+ did = 0;
+ }
+
+ /* use register invalidation */
+ switch (type) {
+ case DMA_TLB_GLOBAL_FLUSH:
+ /* global flush doesn't need set IVA_REG */
+ val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
+ break;
+ case DMA_TLB_DSI_FLUSH:
+ val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
+ break;
+ case DMA_TLB_PSI_FLUSH:
+ val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
+ /* Note: always flush non-leaf currently */
+ val_iva = size_order | addr;
+ break;
+ default:
+ BUG();
+ }
+ /* Note: set drain read/write */
+#if 0
+ /*
+ * This is probably to be super secure.. Looks like we can
+ * ignore it without any impact.
+ */
+ if (cap_read_drain(iommu->cap))
+ val |= DMA_TLB_READ_DRAIN;
+#endif
+ if (cap_write_drain(iommu->cap))
+ val |= DMA_TLB_WRITE_DRAIN;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ /* Note: Only uses first TLB reg currently */
+ if (val_iva)
+ dmar_writeq(iommu->reg, tlb_offset, val_iva);
+ dmar_writeq(iommu->reg, tlb_offset + 8, val);
+
+ /* Make sure hardware complete it */
+ start_time = jiffies;
+ while (1) {
+ val = dmar_readq(iommu->reg, tlb_offset + 8);
+ if (!(val & DMA_TLB_IVT))
+ break;
+ if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
+ panic("DMAR hardware is malfunctional, please disable
IOMMU\n");
+ cpu_relax();
+ }
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+
+ /* check IOTLB invalidation granularity */
+ if (DMA_TLB_IAIG(val) == 0)
+ printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
+ if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
+ printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual
%x\n",
+ (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
+ /* flush context entry will implictly flush write buffer */
+ return 0;
+}
+
+static int inline iommu_flush_iotlb_global(struct iommu *iommu,
+ int non_present_entry_flush)
+{
+ return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
+ non_present_entry_flush);
+}
+
+static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
+ int non_present_entry_flush)
+{
+ return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
+ non_present_entry_flush);
+}
+
+static int inline get_alignment(u64 base, unsigned int size)
+{
+ int t = 0;
+ u64 end;
+
+ end = base + size - 1;
+ while (base != end) {
+ t++;
+ base >>= 1;
+ end >>= 1;
+ }
+ return t;
+}
+
+static int inline iommu_flush_iotlb_psi(struct iommu *iommu, u16 did,
+ u64 addr, unsigned int pages, int non_present_entry_flush)
+{
+ unsigned int align;
+
+ BUG_ON(addr & (~PAGE_MASK_4K));
+ BUG_ON(pages == 0);
+
+ /* Fallback to domain selective flush if no PSI support */
+ if (!cap_pgsel_inv(iommu->cap))
+ return iommu_flush_iotlb_dsi(iommu, did,
+ non_present_entry_flush);
+
+ /*
+ * PSI requires page size is 2 ^ x, and the base address is naturally
+ * aligned to the size
+ */
+ align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
+ /* Fallback to domain selective flush if size is too big */
+ if (align > cap_max_amask_val(iommu->cap))
+ return iommu_flush_iotlb_dsi(iommu, did,
+ non_present_entry_flush);
+
+ addr >>= PAGE_SHIFT_4K + align;
+ addr <<= PAGE_SHIFT_4K + align;
+
+ return __iommu_flush_iotlb(iommu, did, addr, align,
+ DMA_TLB_PSI_FLUSH, non_present_entry_flush);
+}
+
+void flush_all(void)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ int i = 0;
+
+ wbinvd();
+ for_each_drhd_unit(drhd) {
+ iommu = drhd->iommu;
+ iommu_flush_context_global(iommu, 0);
+ iommu_flush_iotlb_global(iommu, 0);
+ i++;
+ }
+}
+
+/* clear one page's page table */
+static void dma_pte_clear_one(struct domain *domain, u64 addr)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ struct dma_pte *pte = NULL;
+
+ drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+
+ /* get last level pte */
+ pte = dma_addr_level_pte(domain, addr, 1);
+
+ if (pte) {
+ dma_clear_pte(*pte);
+ iommu_flush_cache_entry(drhd->iommu, pte);
+
+ for_each_drhd_unit(drhd) {
+ iommu = drhd->iommu;
+ if (cap_caching_mode(iommu->cap))
+ {
+ iommu_flush_iotlb_psi(iommu, domain->domain_id, addr, 1, 0);
+ }
+ else if (cap_rwbf(iommu->cap))
+ iommu_flush_write_buffer(iommu);
+ }
+ }
+}
+
+/* clear last level pte, a tlb flush should be followed */
+static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(domain);
+ int addr_width = agaw_to_width(hd->agaw);
+
+ start &= (((u64)1) << addr_width) - 1;
+ end &= (((u64)1) << addr_width) - 1;
+ /* in case it's partial page */
+ start = PAGE_ALIGN_4K(start);
+ end &= PAGE_MASK_4K;
+
+ /* we don't need lock here, nobody else touches the iova range */
+ while (start < end) {
+ dma_pte_clear_one(domain, start);
+ start += PAGE_SIZE_4K;
+ }
+}
+
+/* free page table pages. last level pte should already be cleared */
+// static void dma_pte_free_pagetable(struct domain *domain, u64 start, u64
end)
+void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
+{
+ struct acpi_drhd_unit *drhd;
+ struct hvm_iommu *hd = domain_hvm_iommu(domain);
+ struct iommu *iommu;
+ int addr_width = agaw_to_width(hd->agaw);
+ struct dma_pte *pte;
+ int total = agaw_to_level(hd->agaw);
+ int level;
+ u32 tmp;
+
+ drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+ iommu = drhd->iommu;
+
+ start &= (((u64)1) << addr_width) - 1;
+ end &= (((u64)1) << addr_width) - 1;
+
+ /* we don't need lock here, nobody else touches the iova range */
+ level = 2;
+ while (level <= total) {
+ tmp = align_to_level(start, level);
+ if (tmp >= end || (tmp + level_size(level) > end))
+ return;
+
+ while (tmp < end) {
+ pte = dma_addr_level_pte(domain, tmp, level);
+ if (pte) {
+ free_xenheap_page((void *) maddr_to_virt(dma_pte_addr(*pte)));
+ dma_clear_pte(*pte);
+ iommu_flush_cache_entry(iommu, pte);
+ }
+ tmp += level_size(level);
+ }
+ level++;
+ }
+ /* free pgd */
+ if (start == 0 && end == ((((u64)1) << addr_width) - 1)) {
+ free_xenheap_page((void *)hd->pgd);
+ hd->pgd = NULL;
+ }
+}
+
+/* iommu handling */
+static int iommu_set_root_entry(struct iommu *iommu)
+{
+ void *addr;
+ u32 cmd, sts;
+ struct root_entry *root;
+ unsigned long flags;
+
+ if (iommu == NULL)
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "iommu_set_root_entry: iommu == NULL\n");
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ if (!iommu->root_entry) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ root = (struct root_entry *)alloc_xenheap_page();
+ memset((u8*)root, 0, PAGE_SIZE);
+ iommu_flush_cache_page(iommu, root);
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ if (!root && !iommu->root_entry) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return -ENOMEM;
+ }
+
+ if (!iommu->root_entry)
+ iommu->root_entry = root;
+ else /* somebody is fast */
+ free_xenheap_page((void *)root);
+ }
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ addr = iommu->root_entry;
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr));
+ cmd = iommu->gcmd | DMA_GCMD_SRTP;
+ dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
+
+ /* Make sure hardware complete it */
+ while (1) {
+ sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+ if (sts & DMA_GSTS_RTPS)
+ break;
+ cpu_relax();
+ }
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ return 0;
+}
+
+static int iommu_enable_translation(struct iommu *iommu)
+{
+ u32 sts;
+ unsigned long flags;
+
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "iommu_enable_translation: enabling vt-d translation\n");
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ iommu->gcmd |= DMA_GCMD_TE;
+ dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
+ /* Make sure hardware complete it */
+ while (1) {
+ sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+ if (sts & DMA_GSTS_TES) {
+ break;
+ }
+ cpu_relax();
+ }
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+ return 0;
+}
+
+int iommu_disable_translation(struct iommu *iommu)
+{
+ u32 sts;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ iommu->gcmd &= ~ DMA_GCMD_TE;
+ dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
+
+ /* Make sure hardware complete it */
+ while(1) {
+ sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_TES))
+ break;
+ cpu_relax();
+ }
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+ return 0;
+}
+
+static struct iommu *vector_to_iommu[NR_VECTORS];
+static int iommu_page_fault_do_one(struct iommu *iommu, int type,
+ u8 fault_reason, u16 source_id, u32 addr)
+{
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ "iommu_page_fault:%s: DEVICE %x:%x.%x addr %x REASON %x\n",
+ (type ? "DMA Read" : "DMA Write"),
+ (source_id >> 8), PCI_SLOT(source_id & 0xFF),
+ PCI_FUNC(source_id & 0xFF), addr, fault_reason);
+
+ print_vtd_entries(current->domain, (source_id >> 8),(source_id & 0xff),
+ (addr >> PAGE_SHIFT));
+ return 0;
+}
+
+#define PRIMARY_FAULT_REG_LEN (16)
+static void iommu_page_fault(int vector, void *dev_id,
+ struct cpu_user_regs *regs)
+{
+ struct iommu *iommu = dev_id;
+ int reg, fault_index;
+ u32 fault_status;
+ unsigned long flags;
+
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ /* FIXME: ignore advanced fault log */
+ if (!(fault_status & DMA_FSTS_PPF))
+ return;
+ fault_index = dma_fsts_fault_record_index(fault_status);
+ reg = cap_fault_reg_offset(iommu->cap);
+ while (1) {
+ u8 fault_reason;
+ u16 source_id;
+ u32 guest_addr;
+ int type;
+ u32 data;
+
+ /* highest 32 bits */
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ data = dmar_readl(iommu->reg, reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 12);
+ if (!(data & DMA_FRCD_F)) {
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+ break;
+ }
+
+ fault_reason = dma_frcd_fault_reason(data);
+ type = dma_frcd_type(data);
+
+ data = dmar_readl(iommu->reg, reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 8);
+ source_id = dma_frcd_source_id(data);
+
+ guest_addr = dmar_readq(iommu->reg, reg +
+ fault_index * PRIMARY_FAULT_REG_LEN);
+ guest_addr = dma_frcd_page_addr(guest_addr);
+ /* clear the fault */
+ dmar_writel(iommu->reg, reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ iommu_page_fault_do_one(iommu, type, fault_reason,
+ source_id, guest_addr);
+
+ fault_index++;
+ if (fault_index > cap_num_fault_regs(iommu->cap))
+ fault_index = 0;
+ }
+ /* clear primary fault overflow */
+ if (fault_status & DMA_FSTS_PFO) {
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+ }
+ return;
+}
+
+static void dma_msi_unmask(unsigned int vector)
+{
+ struct iommu *iommu = vector_to_iommu[vector];
+ unsigned long flags;
+
+ /* unmask it */
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
+static void dma_msi_mask(unsigned int vector)
+{
+ unsigned long flags;
+ struct iommu *iommu = vector_to_iommu[vector];
+
+ /* mask it */
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
+static unsigned int dma_msi_startup(unsigned int vector)
+{
+ dma_msi_unmask(vector);
+ return 0;
+}
+
+static void dma_msi_end(unsigned int vector)
+{
+ dma_msi_unmask(vector);
+ ack_APIC_irq();
+}
+
+static void dma_msi_data_init(struct iommu *iommu, int vector)
+{
+ u32 msi_data = 0;
+ unsigned long flags;
+
+ /* Fixed, edge, assert mode. Follow MSI setting */
+ msi_data |= vector & 0xff;
+ msi_data |= 1 << 14;
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
+static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
+{
+ u64 msi_address;
+ unsigned long flags;
+
+ /* Physical, dedicated cpu. Follow MSI setting */
+ msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
+ msi_address |= MSI_PHYSICAL_MODE << 2;
+ msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
+ msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
+
+ spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
+ dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
+static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
+{
+ struct iommu *iommu = vector_to_iommu[vector];
+ dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
+}
+
+static struct hw_interrupt_type dma_msi_type = {
+ .typename = "DMA_MSI",
+ .startup = dma_msi_startup,
+ .shutdown = dma_msi_mask,
+ .enable = dma_msi_unmask,
+ .disable = dma_msi_mask,
+ .ack = dma_msi_mask,
+ .end = dma_msi_end,
+ .set_affinity = dma_msi_set_affinity,
+};
+
+int iommu_set_interrupt(struct iommu *iommu)
+{
+ int vector, ret;
+ unsigned long flags;
+
+ vector = assign_irq_vector(AUTO_ASSIGN);
+ vector_to_iommu[vector] = iommu;
+
+ /* VT-d fault is a MSI, make irq == vector */
+ irq_vector[vector] = vector;
+ vector_irq[vector] = vector;
+
+ if (!vector) {
+ gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&irq_desc[vector].lock, flags);
+ irq_desc[vector].handler = &dma_msi_type;
+ spin_unlock_irqrestore(&irq_desc[vector].lock, flags);
+ set_intr_gate(vector, interrupt[vector]);
+ ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
+ if (ret)
+ gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
+ return vector;
+}
+
+struct iommu *iommu_alloc(void *hw_data)
+{
+ struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
+ struct iommu *iommu;
+
+ if (nr_iommus > MAX_IOMMUS) {
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
+ return NULL;
+ }
+
+ iommu = xmalloc(struct iommu);
+ if (!iommu)
+ return NULL;
+ memset(iommu, 0, sizeof(struct iommu));
+
+ set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
+ iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
+ iommu->reg, drhd->address);
+ nr_iommus++;
+
+ if (!iommu->reg) {
+ printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n");
+ goto error;
+ }
+
+ iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
+ iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
+
+ spin_lock_init(&iommu->lock);
+ spin_lock_init(&iommu->register_lock);
+
+ drhd->iommu = iommu;
+ return iommu;
+error:
+ xfree(iommu);
+ return NULL;
+}
+
+static void free_iommu(struct iommu *iommu)
+{
+ if (!iommu)
+ return;
+ if (iommu->root_entry)
+ free_xenheap_page((void *)iommu->root_entry);
+ if (iommu->reg)
+ iounmap(iommu->reg);
+ free_irq(iommu->vector);
+ xfree(iommu);
+}
+
+#define guestwidth_to_adjustwidth(gaw) ({ \
+ int agaw; \
+ int r = (gaw - 12) % 9; \
+ if (r == 0) \
+ agaw = gaw; \
+ else \
+ agaw = gaw + 9 - r; \
+ if (agaw > 64) \
+ agaw = 64; \
+ agaw; })
+int iommu_domain_init(struct domain *domain)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(domain);
+ struct iommu *iommu = NULL;
+ int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
+ int adjust_width, agaw;
+ unsigned long sagaw;
+ struct acpi_drhd_unit *drhd;
+
+ if (list_empty(&acpi_drhd_units))
+ return 0;
+ spin_lock_init(&hd->mapping_lock);
+ spin_lock_init(&hd->iommu_list_lock);
+ INIT_LIST_HEAD(&hd->pdev_list);
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->iommu)
+ iommu = drhd->iommu;
+ else
+ iommu = iommu_alloc(drhd);
+ }
+
+ /* calculate AGAW */
+ if (guest_width > cap_mgaw(iommu->cap))
+ guest_width = cap_mgaw(iommu->cap);
+ adjust_width = guestwidth_to_adjustwidth(guest_width);
+ agaw = width_to_agaw(adjust_width);
+ /* FIXME: hardware doesn't support it, choose a bigger one? */
+ sagaw = cap_sagaw(iommu->cap);
+ if (!test_bit(agaw, &sagaw)) {
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "IOMMU: hardware doesn't support the agaw\n");
+ agaw = find_next_bit(&sagaw, 5, agaw);
+ if (agaw >= 5)
+ return -ENODEV;
+ }
+ hd->agaw = agaw;
+ return 0;
+}
+
+static int domain_context_mapping_one(
+ struct domain *domain,
+ struct iommu *iommu,
+ u8 bus, u8 devfn)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(domain);
+ struct context_entry *context;
+ unsigned long flags;
+ int ret = 0;
+
+ context = device_to_context_entry(iommu, bus, devfn);
+ if (!context) {
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_mapping_one:context == NULL:bdf = %x:%x:%x \n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ return -ENOMEM;
+ }
+ spin_lock_irqsave(&iommu->lock, flags);
+ if (context_present(*context)) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ return 0;
+ }
+
+#ifdef VTD_DEBUG
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "context_mapping_one_1-%x:%x:%x-*context = %lx %lx\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn), context->hi, context->lo);
+#endif
+
+ /*
+ * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
+ * be 1 based as required by intel's iommu hw.
+ */
+ context_set_domain_id(*context, domain->domain_id);
+ context_set_address_width(*context, hd->agaw);
+
+ if (ecap_pass_thru(iommu->ecap))
+ context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
+ else {
+ context_set_address_root(*context, virt_to_maddr(hd->pgd));
+ context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
+ }
+
+ context_set_fault_enable(*context);
+ context_set_present(*context);
+ iommu_flush_cache_entry(iommu, context);
+
+#ifdef VTD_DEBUG
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "context_mapping_one_2-%x:%x:%x-*context=%lx %lx hd->pgd = %p\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ context->hi, context->lo, hd->pgd);
+#endif
+
+ if (iommu_flush_context_device(iommu, domain->domain_id,
+ (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
+ iommu_flush_write_buffer(iommu);
+ else
+ iommu_flush_iotlb_dsi(iommu, domain->domain_id, 0);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return ret;
+}
+
+static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap)
+{
+ u8 id;
+ int ttl = 48;
+
+ while (ttl--) {
+ pos = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos);
+ if (pos < 0x40)
+ break;
+ pos &= ~3;
+ id = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pos + PCI_CAP_LIST_ID);
+
+ if (id == 0xff)
+ break;
+ if (id == cap)
+ return pos;
+ pos += PCI_CAP_LIST_NEXT;
+ }
+ return 0;
+}
+
+#define PCI_BASE_CLASS_BRIDGE 0x06
+#define PCI_CLASS_BRIDGE_PCI 0x0604
+
+#define DEV_TYPE_PCIe_ENDPOINT 1
+#define DEV_TYPE_PCI_BRIDGE 2
+#define DEV_TYPE_PCI 3
+
+int pdev_type(struct pci_dev *dev)
+{
+ u16 class_device;
+ u16 status;
+
+ class_device = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
+ if (class_device == PCI_CLASS_BRIDGE_PCI)
+ return DEV_TYPE_PCI_BRIDGE;
+
+ status = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), PCI_STATUS);
+
+ if (!(status & PCI_STATUS_CAP_LIST))
+ return DEV_TYPE_PCI;
+
+ if (__pci_find_next_cap(dev->bus, dev->devfn, PCI_CAPABILITY_LIST,
PCI_CAP_ID_EXP))
+ return DEV_TYPE_PCIe_ENDPOINT;
+
+ return DEV_TYPE_PCI;
+}
+
+#define MAX_BUSES 256
+struct pci_dev bus2bridge[MAX_BUSES];
+
+static int domain_context_mapping(
+ struct domain *domain,
+ struct iommu *iommu,
+ struct pci_dev *pdev)
+{
+ int ret = 0;
+ int dev, func, sec_bus, sub_bus;
+ u32 type;
+
+ type = pdev_type(pdev);
+ if (type == DEV_TYPE_PCI_BRIDGE) {
+ sec_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
+
+ if (bus2bridge[sec_bus].bus == 0) {
+ bus2bridge[sec_bus].bus = pdev->bus;
+ bus2bridge[sec_bus].devfn = pdev->devfn;
+ }
+
+ sub_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
+
+ if (sec_bus != sub_bus) {
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "context_mapping: nested PCI bridge not supported\n");
+ dprintk(XENLOG_INFO VTDPREFIX,
+ " bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n",
+ pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+ sec_bus, sub_bus);
+ }
+ }
+
+ if (type == DEV_TYPE_PCIe_ENDPOINT) {
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_mapping:PCIe : bdf = %x:%x:%x\n",
+ pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ ret = domain_context_mapping_one(domain, iommu,
+ (u8)(pdev->bus), (u8) (pdev->devfn));
+ }
+
+ /* PCI devices */
+ if (type == DEV_TYPE_PCI) {
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_mapping:PCI: bdf = %x:%x:%x\n",
+ pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+ if (pdev->bus == 0)
+ ret = domain_context_mapping_one(domain, iommu,
+ (u8)(pdev->bus), (u8) (pdev->devfn));
+ else {
+ if (bus2bridge[pdev->bus].bus != 0)
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "domain_context_mapping:bus2bridge[pdev->bus].bus==0\n");
+
+ ret = domain_context_mapping_one(domain, iommu,
+ (u8)(bus2bridge[pdev->bus].bus),
+ (u8)(bus2bridge[pdev->bus].devfn));
+
+ /* now map everything behind the PCI bridge */
+ for (dev = 0; dev < 32; dev++) {
+ for (func = 0; func < 8; func++) {
+ ret = domain_context_mapping_one(domain, iommu,
+ pdev->bus, (u8)PCI_DEVFN(dev, func));
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ }
+ return ret;
+}
+
+static int domain_context_unmap_one(
+ struct domain *domain,
+ struct iommu *iommu,
+ u8 bus, u8 devfn)
+{
+ struct context_entry *context;
+ unsigned long flags;
+
+ context = device_to_context_entry(iommu, bus, devfn);
+ if (!context) {
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ return -ENOMEM;
+ }
+ spin_lock_irqsave(&iommu->lock, flags);
+ if (!context_present(*context)) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_unmap_one-%x:%x:%x- context NOT present:return\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ return 0;
+ }
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_unmap_one_1:bdf = %x:%x:%x\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ context_clear_present(*context);
+ context_clear_entry(*context);
+ iommu_flush_cache_entry(iommu, context);
+ iommu_flush_context_global(iommu, 0);
+ iommu_flush_iotlb_global(iommu, 0);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_unmap_one_2:bdf = %x:%x:%x\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ return 0;
+}
+
+static int domain_context_unmap(
+ struct domain *domain,
+ struct iommu *iommu,
+ struct pci_dev *pdev)
+{
+ int ret = 0;
+ int dev, func, sec_bus, sub_bus;
+ u32 type;
+
+ type = pdev_type(pdev);
+ if (type == DEV_TYPE_PCI_BRIDGE) {
+ sec_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
+ sub_bus = read_pci_config_byte(pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
+
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_unmap:BRIDGE:%x:%x:%x sec_bus=%x sub_bus=%x\n",
+ pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), sec_bus, sub_bus);
+ }
+
+ if (type == DEV_TYPE_PCIe_ENDPOINT) {
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_unmap:PCIe : bdf = %x:%x:%x\n",
+ pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ ret = domain_context_unmap_one(domain, iommu,
+ (u8)(pdev->bus), (u8) (pdev->devfn));
+ }
+
+ /* PCI devices */
+ if (type == DEV_TYPE_PCI) {
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_unmap:PCI: bdf = %x:%x:%x\n",
+ pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ if (pdev->bus == 0)
+ ret = domain_context_unmap_one(domain, iommu,
+ (u8)(pdev->bus), (u8) (pdev->devfn));
+ else {
+ if (bus2bridge[pdev->bus].bus != 0)
+ gdprintk(XENLOG_INFO VTDPREFIX,
+
"domain_context_mapping:bus2bridge[pdev->bus].bus==0\n");
+
+ ret = domain_context_unmap_one(domain, iommu,
+ (u8)(bus2bridge[pdev->bus].bus),
+ (u8)(bus2bridge[pdev->bus].devfn));
+
+ /* now map everything behind the PCI bridge */
+ for (dev = 0; dev < 32; dev++) {
+ for (func = 0; func < 8; func++) {
+ ret = domain_context_unmap_one(domain, iommu,
+ pdev->bus, (u8)PCI_DEVFN(dev, func));
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ }
+ return ret;
+}
+
+void reassign_device_ownership(
+ struct domain *source,
+ struct domain *target,
+ u8 bus, u8 devfn)
+{
+ struct hvm_iommu *source_hd = domain_hvm_iommu(source);
+ struct hvm_iommu *target_hd = domain_hvm_iommu(target);
+ struct pci_dev *pdev;
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ int status;
+ unsigned long flags;
+
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "reassign_device-%x:%x:%x- source = %d target = %d\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ source->domain_id, target->domain_id);
+
+ for_each_pdev(source, pdev) {
+ if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
+ continue;
+
+ pdev->bus = bus;
+ pdev->devfn = devfn;
+ drhd = acpi_find_matched_drhd_unit(pdev);
+ iommu = drhd->iommu;
+ domain_context_unmap(source, iommu, pdev);
+
+ /*
+ * move pci device from the source domain to target domain.
+ */
+ spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
+ spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
+ list_move(&pdev->list, &target_hd->pdev_list);
+ spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
+ spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
+
+ status = domain_context_mapping(target, iommu, pdev);
+ if (status != 0)
+ gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
+
+ /*
+ * We are done.
+ */
+ break;
+ }
+}
+
+void return_devices_to_dom0(struct domain *d)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct pci_dev *pdev;
+
+ while (!list_empty(&hd->pdev_list)) {
+ pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "return_devices_to_dom0: bdf = %x:%x:%x\n",
+ pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
+ }
+
+#ifdef VTD_DEBUG
+ for_each_pdev(dom0, pdev) {
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
+ dom0->domain_id, pdev->bus,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ }
+#endif
+}
+
+void iommu_domain_teardown(struct domain *d)
+{
+ if (list_empty(&acpi_drhd_units))
+ return;
+
+#if CONFIG_PAGING_LEVELS == 3
+ {
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ int level = agaw_to_level(hd->agaw);
+ struct dma_pte *pgd = NULL;
+
+ switch (level)
+ {
+ case VTD_PAGE_TABLE_LEVEL_3:
+ if ( hd->pgd )
+ free_xenheap_page((void *)hd->pgd);
+ break;
+ case VTD_PAGE_TABLE_LEVEL_4:
+ if ( hd->pgd )
+ {
+ pgd = hd->pgd;
+ if ( pgd[0].val != 0 )
+ free_xenheap_page((void*)maddr_to_virt(
+ dma_pte_addr(pgd[0])));
+ }
+ break;
+ default:
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "Unsupported p2m table sharing level!\n");
+ break;
+ }
+ }
+#endif
+ return_devices_to_dom0(d);
+}
+
+static int domain_context_mapped(struct domain *domain, struct pci_dev *pdev)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ int ret;
+
+ for_each_drhd_unit(drhd) {
+ iommu = drhd->iommu;
+ ret = device_context_mapped(iommu, pdev->bus, pdev->devfn);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+int iommu_map_page(struct domain *d, paddr_t gfn, paddr_t mfn)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ struct dma_pte *pte = NULL;
+
+ drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+ iommu = drhd->iommu;
+
+ /* do nothing if dom0 and iommu supports pass thru */
+ if (ecap_pass_thru(iommu->ecap) && (d->domain_id == 0))
+ return 0;
+
+ pte = addr_to_dma_pte(d, gfn << PAGE_SHIFT_4K);
+ if (!pte)
+ return -ENOMEM;
+ dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K);
+ dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
+ iommu_flush_cache_entry(iommu, pte);
+
+ for_each_drhd_unit(drhd) {
+ iommu = drhd->iommu;
+ if (cap_caching_mode(iommu->cap))
+ iommu_flush_iotlb_psi(iommu, d->domain_id,
+ gfn << PAGE_SHIFT_4K, 1, 0);
+ else if (cap_rwbf(iommu->cap))
+ iommu_flush_write_buffer(iommu);
+ }
+ return 0;
+}
+
+int iommu_unmap_page(struct domain *d, dma_addr_t gfn)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ struct dma_pte *pte = NULL;
+
+ drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+ iommu = drhd->iommu;
+
+ /* do nothing if dom0 and iommu supports pass thru */
+ if (ecap_pass_thru(iommu->ecap) && (d->domain_id == 0))
+ return 0;
+
+ /* get last level pte */
+ pte = dma_addr_level_pte(d, gfn << PAGE_SHIFT_4K, 1);
+ dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K);
+
+ return 0;
+}
+
+int iommu_page_mapping(struct domain *domain, dma_addr_t iova,
+ void *hpa, size_t size, int prot)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ unsigned long start_pfn, end_pfn;
+ struct dma_pte *pte = NULL;
+ int index;
+
+ drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+ iommu = drhd->iommu;
+ if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
+ return -EINVAL;
+ iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
+ start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K);
+ end_pfn = (unsigned long)
+ ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K);
+ index = 0;
+ while (start_pfn < end_pfn) {
+ pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
+ if (!pte)
+ return -ENOMEM;
+ dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
+ dma_set_pte_prot(*pte, prot);
+ iommu_flush_cache_entry(iommu, pte);
+ start_pfn++;
+ index++;
+ }
+
+ for_each_drhd_unit(drhd) {
+ iommu = drhd->iommu;
+ if (cap_caching_mode(iommu->cap))
+ iommu_flush_iotlb_psi(iommu, domain->domain_id, iova, size, 0);
+ else if (cap_rwbf(iommu->cap))
+ iommu_flush_write_buffer(iommu);
+ }
+ return 0;
+}
+
+int iommu_page_unmapping(struct domain *domain, dma_addr_t addr, size_t size)
+{
+ struct dma_pte *pte = NULL;
+
+ /* get last level pte */
+ pte = dma_addr_level_pte(domain, addr, 1);
+ dma_pte_clear_range(domain, addr, addr + size);
+
+ return 0;
+}
+
+void iommu_flush(struct domain *d, dma_addr_t gfn, u64 *p2m_entry)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu = NULL;
+ struct dma_pte *pte = (struct dma_pte *) p2m_entry;
+
+ for_each_drhd_unit(drhd) {
+ iommu = drhd->iommu;
+ if (cap_caching_mode(iommu->cap))
+ iommu_flush_iotlb_psi(iommu, d->domain_id,
+ gfn << PAGE_SHIFT_4K, 1, 0);
+ else if (cap_rwbf(iommu->cap))
+ iommu_flush_write_buffer(iommu);
+ }
+ iommu_flush_cache_entry(iommu, pte);
+}
+
+int
+prepare_device(struct domain *domain, struct pci_dev dev)
+{
+ return 0;
+}
+
+static int iommu_prepare_rmrr_dev(
+ struct domain *d,
+ struct acpi_rmrr_unit *rmrr,
+ struct pci_dev *pdev)
+{
+ struct acpi_drhd_unit *drhd;
+ unsigned long size;
+ int ret;
+
+ /* page table init */
+ size = rmrr->end_address - rmrr->base_address + 1;
+ ret = iommu_page_mapping(d, rmrr->base_address,
+ (void *)rmrr->base_address, size,
+ DMA_PTE_READ|DMA_PTE_WRITE);
+ if (ret)
+ return ret;
+
+ if (domain_context_mapped(d, pdev) == 0) {
+ drhd = acpi_find_matched_drhd_unit(pdev);
+ ret = domain_context_mapping(d, drhd->iommu, pdev);
+ if (!ret)
+ return 0;
+ }
+ return ret;
+}
+
+void __init setup_dom0_devices(void)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(dom0);
+ struct acpi_drhd_unit *drhd;
+ struct pci_dev *pdev;
+ int bus, dev, func;
+ u32 l;
+ u8 hdr_type;
+ int ret;
+
+#ifdef DEBUG_VTD_CONTEXT_ENTRY
+ for (bus = 0; bus < 256; bus++) {
+ for (dev = 0; dev < 32; dev++) {
+ for (func = 0; func < 8; func++) {
+ struct context_entry *context;
+ struct pci_dev device;
+
+ device.bus = bus;
+ device.devfn = PCI_DEVFN(dev, func);
+ drhd = acpi_find_matched_drhd_unit(&device);
+ context = device_to_context_entry(drhd->iommu,
+ bus, PCI_DEVFN(dev, func));
+ if ((context->lo != 0) || (context->hi != 0))
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "setup_dom0_devices-%x:%x:%x- context not 0\n",
+ bus, dev, func);
+ }
+ }
+ }
+#endif
+
+ for (bus = 0; bus < 256; bus++) {
+ for (dev = 0; dev < 32; dev++) {
+ for (func = 0; func < 8; func++) {
+ l = read_pci_config(bus, dev, func, PCI_VENDOR_ID);
+ /* some broken boards return 0 or ~0 if a slot is empty: */
+ if (l == 0xffffffff || l == 0x00000000 ||
+ l == 0x0000ffff || l == 0xffff0000)
+ continue;
+ pdev = xmalloc(struct pci_dev);
+ pdev->bus = bus;
+ pdev->devfn = PCI_DEVFN(dev, func);
+ list_add_tail(&pdev->list, &hd->pdev_list);
+
+ drhd = acpi_find_matched_drhd_unit(pdev);
+ ret = domain_context_mapping(dom0, drhd->iommu, pdev);
+ if (ret != 0)
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "domain_context_mapping failed\n");
+
+ hdr_type = read_pci_config(bus, dev, func, PCI_HEADER_TYPE);
+ // if ((hdr_type & 0x8) == 0)
+ // break;
+ }
+ }
+ }
+ for_each_pdev(dom0, pdev) {
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "setup_dom0_devices: bdf = %x:%x:%x\n",
+ pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ }
+}
+
+void clear_fault_bit(struct iommu *iommu)
+{
+ u64 val;
+
+ val = dmar_readq(
+ iommu->reg,
+ cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
+ dmar_writeq(
+ iommu->reg,
+ cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
+ val);
+ dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
+}
+
+/*
+ * Called from ACPI discovery code, once all DMAR's and RMRR's are done
+ * scanning, we need to run through and initialize as much of it as necessary
+ */
+int vtd_enable = 1;
+static void setup_vtd_enable(char *s)
+{
+ if ( !strcmp(s, "0") )
+ vtd_enable = 0;
+ else if ( !strcmp(s, "1") )
+ vtd_enable = 1;
+ else
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "Unknown vtd_enable value specified: '%s'\n", s);
+ dprintk(XENLOG_INFO VTDPREFIX, "vtd_enable = %x\n", vtd_enable);
+}
+custom_param("vtd", setup_vtd_enable);
+
+static int init_vtd_hw(void)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ int ret;
+
+ for_each_drhd_unit(drhd) {
+ iommu = drhd->iommu;
+ ret = iommu_set_root_entry(iommu);
+ if (ret) {
+ gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+static int enable_vtd_translation(void)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ int vector = 0;
+
+ for_each_drhd_unit(drhd) {
+ iommu = drhd->iommu;
+ vector = iommu_set_interrupt(iommu);
+ dma_msi_data_init(iommu, vector);
+ dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
+ iommu->vector = vector;
+ clear_fault_bit(iommu);
+ if (vtd_enable && iommu_enable_translation(iommu))
+ return -EIO;
+ }
+ return 0;
+}
+
+static void setup_dom0_rmrr(void)
+{
+ struct acpi_rmrr_unit *rmrr;
+ struct pci_dev *pdev;
+ int ret;
+
+ for_each_rmrr_device(rmrr, pdev)
+ ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev);
+ if (ret)
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "IOMMU: mapping reserved region failed\n");
+ end_for_each_rmrr_device(rmrr, pdev)
+}
+
+int iommu_setup(void)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(dom0);
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+
+ if (list_empty(&acpi_drhd_units))
+ return 0;
+
+ INIT_LIST_HEAD(&hd->pdev_list);
+
+ /* start from scratch */
+ flush_all();
+
+ /* setup clflush size */
+ x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8;
+
+ /*
+ * allocate IO page directory page for the domain.
+ */
+ drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+ iommu = drhd->iommu;
+
+ hd->pgd = (struct dma_pte *)alloc_xenheap_page();
+ memset((u8*)hd->pgd, 0, PAGE_SIZE);
+
+ if (init_vtd_hw())
+ goto error;
+ setup_dom0_devices();
+ setup_dom0_rmrr();
+ if (enable_vtd_translation())
+ goto error;
+
+ return 0;
+
+error:
+ printk("iommu_setup() failed\n");
+ for_each_drhd_unit(drhd) {
+ iommu = drhd->iommu;
+ free_iommu(iommu);
+ }
+ return -EIO;
+}
+
+int assign_device(struct domain *d, u8 bus, u8 devfn)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct acpi_rmrr_unit *rmrr;
+ struct pci_dev *pdev;
+ int ret = 0;
+
+ if (list_empty(&acpi_drhd_units))
+ return ret;
+
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "assign_device: bus = %x dev = %x func = %x\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ reassign_device_ownership(dom0, d, bus, devfn);
+
+ /* setup rmrr identify mapping just once per domain */
+ if (list_empty(&hd->pdev_list))
+ for_each_rmrr_device(rmrr, pdev)
+ ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
+ if (ret)
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "IOMMU: mapping reserved region failed\n");
+ end_for_each_rmrr_device(rmrr, pdev)
+ return ret;
+}
+
+void iommu_set_pgd(struct domain *d)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ unsigned long p2m_table;
+
+ if (hd->pgd) {
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd);
+ hd->pgd = NULL;
+ }
+ p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
+
+#if CONFIG_PAGING_LEVELS == 3
+ if ( !hd->pgd )
+ {
+ int level = agaw_to_level(hd->agaw);
+ struct dma_pte *pmd = NULL;
+ struct dma_pte *pgd = NULL;
+ struct dma_pte *pte = NULL;
+ l3_pgentry_t *l3e;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&hd->mapping_lock, flags);
+ if (!hd->pgd) {
+ pgd = (struct dma_pte *)alloc_xenheap_page();
+ memset((u8*)pgd, 0, PAGE_SIZE);
+ if (!hd->pgd)
+ hd->pgd = pgd;
+ else /* somebody is fast */
+ free_xenheap_page((void *) pgd);
+ }
+
+ l3e = map_domain_page(p2m_table);
+ switch(level)
+ {
+ case VTD_PAGE_TABLE_LEVEL_3: /* Weybridge */
+ /* We only support 8 entries for the PAE L3 p2m table */
+ for ( i = 0; i < 8 ; i++ )
+ {
+ /* Don't create new L2 entry, use ones from p2m table */
+ pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
+ }
+ break;
+
+ case VTD_PAGE_TABLE_LEVEL_4: /* Stoakley */
+ /* We allocate one more page for the top vtd page table. */
+ pmd = (struct dma_pte *)alloc_xenheap_page();
+ memset((u8*)pmd, 0, PAGE_SIZE);
+ pte = &pgd[0];
+ dma_set_pte_addr(*pte, virt_to_maddr(pmd));
+ dma_set_pte_readable(*pte);
+ dma_set_pte_writable(*pte);
+
+ for ( i = 0; i < 8; i++ )
+ {
+ /* Don't create new L2 entry, use ones from p2m table */
+ pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
+ }
+ break;
+ default:
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "iommu_set_pgd:Unsupported p2m table sharing level!\n");
+ break;
+ }
+ unmap_domain_page(l3e);
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ }
+#elif CONFIG_PAGING_LEVELS == 4
+ if ( !hd->pgd )
+ {
+ int level = agaw_to_level(hd->agaw);
+ l3_pgentry_t *l3e;
+ mfn_t pgd_mfn;
+
+ switch (level)
+ {
+ case VTD_PAGE_TABLE_LEVEL_3:
+ l3e = map_domain_page(p2m_table);
+ if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
+ {
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "iommu_set_pgd: second level wasn't there\n");
+ unmap_domain_page(l3e);
+ return;
+ }
+ pgd_mfn = _mfn(l3e_get_pfn(*l3e));
+ unmap_domain_page(l3e);
+ hd->pgd = maddr_to_virt(pagetable_get_paddr(
+ pagetable_from_mfn(pgd_mfn)));
+ break;
+
+ case VTD_PAGE_TABLE_LEVEL_4:
+ pgd_mfn = _mfn(p2m_table);
+ hd->pgd = maddr_to_virt(pagetable_get_paddr(
+ pagetable_from_mfn(pgd_mfn)));
+ break;
+ default:
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "iommu_set_pgd:Unsupported p2m table sharing level!\n");
+ break;
+ }
+ }
+#endif
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "iommu_set_pgd: hd->pgd = %p\n", hd->pgd);
+}
+
+
+u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
+int iommu_suspend(void)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ int i = 0;
+
+ if (!vtd_enable)
+ return 0;
+
+ flush_all();
+ for_each_drhd_unit(drhd) {
+ iommu = drhd->iommu;
+ iommu_state[DMAR_RTADDR_REG * i] =
+ (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG);
+ iommu_state[DMAR_FECTL_REG * i] =
+ (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
+ iommu_state[DMAR_FEDATA_REG * i] =
+ (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
+ iommu_state[DMAR_FEADDR_REG * i] =
+ (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
+ iommu_state[DMAR_FEUADDR_REG * i] =
+ (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
+ iommu_state[DMAR_PLMBASE_REG * i] =
+ (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG);
+ iommu_state[DMAR_PLMLIMIT_REG * i] =
+ (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG);
+ iommu_state[DMAR_PHMBASE_REG * i] =
+ (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG);
+ iommu_state[DMAR_PHMLIMIT_REG * i] =
+ (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG);
+ i++;
+ }
+
+ return 0;
+}
+
+int iommu_resume(void)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ int i = 0;
+
+ if (!vtd_enable)
+ return 0;
+
+ flush_all();
+
+ init_vtd_hw();
+ for_each_drhd_unit(drhd) {
+ iommu = drhd->iommu;
+ dmar_writeq( iommu->reg, DMAR_RTADDR_REG,
+ (u64) iommu_state[DMAR_RTADDR_REG * i]);
+ dmar_writel(iommu->reg, DMAR_FECTL_REG,
+ (u32) iommu_state[DMAR_FECTL_REG * i]);
+ dmar_writel(iommu->reg, DMAR_FEDATA_REG,
+ (u32) iommu_state[DMAR_FEDATA_REG * i]);
+ dmar_writel(iommu->reg, DMAR_FEADDR_REG,
+ (u32) iommu_state[DMAR_FEADDR_REG * i]);
+ dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
+ (u32) iommu_state[DMAR_FEUADDR_REG * i]);
+ dmar_writel(iommu->reg, DMAR_PLMBASE_REG,
+ (u32) iommu_state[DMAR_PLMBASE_REG * i]);
+ dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG,
+ (u32) iommu_state[DMAR_PLMLIMIT_REG * i]);
+ dmar_writeq(iommu->reg, DMAR_PHMBASE_REG,
+ (u64) iommu_state[DMAR_PHMBASE_REG * i]);
+ dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG,
+ (u64) iommu_state[DMAR_PHMLIMIT_REG * i]);
+
+ if (iommu_enable_translation(iommu))
+ return -EIO;
+ i++;
+ }
+ return 0;
+}
diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/io.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/io.c Fri Sep 14 16:40:49 2007 +0100
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx>
+ * Copyright (C) Xiaohui Xin <xiaohui.xin@xxxxxxxxx>
+ */
+
+#include <xen/init.h>
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/trace.h>
+#include <xen/event.h>
+#include <xen/hypercall.h>
+#include <asm/current.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/paging.h>
+#include <asm/shadow.h>
+#include <asm/p2m.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vpt.h>
+#include <asm/hvm/vpic.h>
+#include <asm/hvm/vlapic.h>
+#include <public/sched.h>
+#include <xen/iocap.h>
+#include <public/hvm/ioreq.h>
+
+int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
+{
+ uint32_t device, intx;
+ uint32_t link, isa_irq;
+ struct hvm_irq *hvm_irq;
+
+ if (!vtd_enabled || (d == dom0))
+ return 0;
+
+ if (d->arch.hvm_domain.irq.mirq[mirq].valid)
+ {
+ device = d->arch.hvm_domain.irq.mirq[mirq].device;
+ intx = d->arch.hvm_domain.irq.mirq[mirq].intx;
+ link = hvm_pci_intx_link(device, intx);
+ hvm_irq = &d->arch.hvm_domain.irq;
+ isa_irq = hvm_irq->pci_link.route[link];
+
+ if ( !d->arch.hvm_domain.irq.girq[isa_irq].valid )
+ {
+ d->arch.hvm_domain.irq.girq[isa_irq].valid = 1;
+ d->arch.hvm_domain.irq.girq[isa_irq].device = device;
+ d->arch.hvm_domain.irq.girq[isa_irq].intx = intx;
+ d->arch.hvm_domain.irq.girq[isa_irq].machine_gsi = mirq;
+ }
+
+ if ( !test_and_set_bit(mirq, d->arch.hvm_domain.irq.dirq_mask) )
+ {
+ vcpu_kick(d->vcpu[0]);
+ return 1;
+ }
+ else
+ dprintk(XENLOG_INFO, "Want to pending mirq, but failed\n");
+ }
+ return 0;
+}
+
+void hvm_dpci_eoi(unsigned int guest_gsi, union vioapic_redir_entry *ent)
+{
+ struct domain *d = current->domain;
+ uint32_t device, intx, machine_gsi;
+ irq_desc_t *desc;
+
+ if (d->arch.hvm_domain.irq.girq[guest_gsi].valid)
+ {
+ device = d->arch.hvm_domain.irq.girq[guest_gsi].device;
+ intx = d->arch.hvm_domain.irq.girq[guest_gsi].intx;
+ machine_gsi = d->arch.hvm_domain.irq.girq[guest_gsi].machine_gsi;
+ gdprintk(XENLOG_INFO, "hvm_dpci_eoi:: device %x intx %x\n",
+ device, intx);
+ hvm_pci_intx_deassert(d, device, intx);
+ if ( (ent == NULL) || (ent && ent->fields.mask == 0) ) {
+ desc = &irq_desc[irq_to_vector(machine_gsi)];
+ desc->handler->end(irq_to_vector(machine_gsi));
+ }
+ }
+}
+
+int release_devices(struct domain *d)
+{
+ struct hvm_domain *hd = &d->arch.hvm_domain;
+ uint32_t i;
+ int ret = 0;
+
+ if (!vtd_enabled)
+ return ret;
+
+ /* unbind irq */
+ for (i = 0; i < NR_IRQS; i++) {
+ if (hd->irq.mirq[i].valid)
+ ret = pirq_guest_unbind(d, i);
+ }
+ iommu_domain_teardown(d);
+ return ret;
+}
diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/msi.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/msi.h Fri Sep 14 16:40:49 2007 +0100
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2003-2004 Intel
+ * Copyright (C) Tom Long Nguyen (tom.l.nguyen@xxxxxxxxx)
+ */
+
+#ifndef MSI_H
+#define MSI_H
+
+/*
+ * Assume the maximum number of hot plug slots supported by the system is about
+ * ten. The worstcase is that each of these slots is hot-added with a device,
+ * which has two MSI/MSI-X capable functions. To avoid any MSI-X driver, which
+ * attempts to request all available vectors, NR_HP_RESERVED_VECTORS is defined
+ * as below to ensure at least one message is assigned to each detected MSI/
+ * MSI-X device function.
+ */
+#define NR_HP_RESERVED_VECTORS 20
+
+extern int vector_irq[NR_VECTORS];
+extern void (*interrupt[NR_IRQS])(void);
+extern int pci_vector_resources(int last, int nr_released);
+
+/*
+ * MSI-X Address Register
+ */
+#define PCI_MSIX_FLAGS_QSIZE 0x7FF
+#define PCI_MSIX_FLAGS_ENABLE (1 << 15)
+#define PCI_MSIX_FLAGS_BIRMASK (7 << 0)
+#define PCI_MSIX_FLAGS_BITMASK (1 << 0)
+
+#define PCI_MSIX_ENTRY_SIZE 16
+#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0
+#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4
+#define PCI_MSIX_ENTRY_DATA_OFFSET 8
+#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12
+
+#define msi_control_reg(base) (base + PCI_MSI_FLAGS)
+#define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO)
+#define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI)
+#define msi_data_reg(base, is64bit) \
+ ( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
+#define msi_mask_bits_reg(base, is64bit) \
+ ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
+#define msi_disable(control) control &= ~PCI_MSI_FLAGS_ENABLE
+#define multi_msi_capable(control) \
+ (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
+#define multi_msi_enable(control, num) \
+ control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
+#define is_64bit_address(control) (control & PCI_MSI_FLAGS_64BIT)
+#define is_mask_bit_support(control) (control & PCI_MSI_FLAGS_MASKBIT)
+#define msi_enable(control, num) multi_msi_enable(control, num); \
+ control |= PCI_MSI_FLAGS_ENABLE
+
+#define msix_table_offset_reg(base) (base + 0x04)
+#define msix_pba_offset_reg(base) (base + 0x08)
+#define msix_enable(control) control |= PCI_MSIX_FLAGS_ENABLE
+#define msix_disable(control) control &= ~PCI_MSIX_FLAGS_ENABLE
+#define msix_table_size(control) ((control & PCI_MSIX_FLAGS_QSIZE)+1)
+#define multi_msix_capable msix_table_size
+#define msix_unmask(address) (address & ~PCI_MSIX_FLAGS_BITMASK)
+#define msix_mask(address) (address | PCI_MSIX_FLAGS_BITMASK)
+#define msix_is_pending(address) (address & PCI_MSIX_FLAGS_PENDMASK)
+
+/*
+ * MSI Defined Data Structures
+ */
+#define MSI_ADDRESS_HEADER 0xfee
+#define MSI_ADDRESS_HEADER_SHIFT 12
+#define MSI_ADDRESS_HEADER_MASK 0xfff000
+#define MSI_ADDRESS_DEST_ID_MASK 0xfff0000f
+#define MSI_TARGET_CPU_MASK 0xff
+#define MSI_TARGET_CPU_SHIFT 12
+#define MSI_DELIVERY_MODE 0
+#define MSI_LEVEL_MODE 1 /* Edge always assert */
+#define MSI_TRIGGER_MODE 0 /* MSI is edge sensitive */
+#define MSI_PHYSICAL_MODE 0
+#define MSI_LOGICAL_MODE 1
+#define MSI_REDIRECTION_HINT_MODE 0
+
+#define __LITTLE_ENDIAN_BITFIELD 1
+
+struct msg_data {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u32 vector : 8;
+ __u32 delivery_mode : 3; /* 000b: FIXED | 001b: lowest prior */
+ __u32 reserved_1 : 3;
+ __u32 level : 1; /* 0: deassert | 1: assert */
+ __u32 trigger : 1; /* 0: edge | 1: level */
+ __u32 reserved_2 : 16;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u32 reserved_2 : 16;
+ __u32 trigger : 1; /* 0: edge | 1: level */
+ __u32 level : 1; /* 0: deassert | 1: assert */
+ __u32 reserved_1 : 3;
+ __u32 delivery_mode : 3; /* 000b: FIXED | 001b: lowest prior */
+ __u32 vector : 8;
+#else
+#error "Bitfield endianness not defined! Check your byteorder.h"
+#endif
+} __attribute__ ((packed));
+
+struct msg_address {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u32 reserved_1 : 2;
+ __u32 dest_mode : 1; /*0:physic | 1:logic */
+ __u32 redirection_hint: 1; /*0: dedicated CPU
+ 1: lowest priority */
+ __u32 reserved_2 : 4;
+ __u32 dest_id : 24; /* Destination ID */
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u32 dest_id : 24; /* Destination ID */
+ __u32 reserved_2 : 4;
+ __u32 redirection_hint: 1; /*0: dedicated CPU
+ 1: lowest priority */
+ __u32 dest_mode : 1; /*0:physic | 1:logic */
+ __u32 reserved_1 : 2;
+#else
+#error "Bitfield endianness not defined! Check your byteorder.h"
+#endif
+ }u;
+ __u32 value;
+ }lo_address;
+ __u32 hi_address;
+} __attribute__ ((packed));
+
+#endif /* MSI_H */
diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/pci-direct.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/pci-direct.h Fri Sep 14 16:40:49 2007 +0100
@@ -0,0 +1,48 @@
+#ifndef ASM_PCI_DIRECT_H
+#define ASM_PCI_DIRECT_H 1
+
+#include <xen/types.h>
+#include <asm/io.h>
+
+/* Direct PCI access. This is used for PCI accesses in early boot before
+ the PCI subsystem works. */
+
+#define PDprintk(x...)
+
+static inline u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
+{
+ u32 v;
+ outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+ v = inl(0xcfc);
+ if (v != 0xffffffff)
+ PDprintk("%x reading 4 from %x: %x\n", slot, offset, v);
+ return v;
+}
+
+static inline u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
+{
+ u8 v;
+ outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+ v = inb(0xcfc + (offset&3));
+ PDprintk("%x reading 1 from %x: %x\n", slot, offset, v);
+ return v;
+}
+
+static inline u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
+{
+ u16 v;
+ outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+ v = inw(0xcfc + (offset&2));
+ PDprintk("%x reading 2 from %x: %x\n", slot, offset, v);
+ return v;
+}
+
+static inline void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset,
+ u32 val)
+{
+ PDprintk("%x writing to %x: %x\n", slot, offset, val);
+ outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+ outl(val, 0xcfc);
+}
+
+#endif
diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/pci_regs.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/pci_regs.h Fri Sep 14 16:40:49 2007 +0100
@@ -0,0 +1,449 @@
+/*
+ * pci_regs.h
+ *
+ * PCI standard defines
+ * Copyright 1994, Drew Eckhardt
+ * Copyright 1997--1999 Martin Mares <mj@xxxxxx>
+ *
+ * For more information, please consult the following manuals (look at
+ * http://www.pcisig.com/ for how to get them):
+ *
+ * PCI BIOS Specification
+ * PCI Local Bus Specification
+ * PCI to PCI Bridge Specification
+ * PCI System Design Guide
+ */
+
+#ifndef LINUX_PCI_REGS_H
+#define LINUX_PCI_REGS_H
+
+/*
+ * Under PCI, each device has 256 bytes of configuration address space,
+ * of which the first 64 bytes are standardized as follows:
+ */
+#define PCI_VENDOR_ID 0x00 /* 16 bits */
+#define PCI_DEVICE_ID 0x02 /* 16 bits */
+#define PCI_COMMAND 0x04 /* 16 bits */
+#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space
*/
+#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */
+#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */
+#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */
+#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and
invalidate */
+#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */
+#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */
+#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */
+#define PCI_COMMAND_SERR 0x100 /* Enable SERR */
+#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */
+#define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */
+
+#define PCI_STATUS 0x06 /* 16 bits */
+#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */
+#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */
+#define PCI_STATUS_UDF 0x40 /* Support User Definable
Features [obsolete] */
+#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */
+#define PCI_STATUS_PARITY 0x100 /* Detected parity error */
+#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */
+#define PCI_STATUS_DEVSEL_FAST 0x000
+#define PCI_STATUS_DEVSEL_MEDIUM 0x200
+#define PCI_STATUS_DEVSEL_SLOW 0x400
+#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */
+#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */
+#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */
+#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */
+#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */
+
+#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8
revision */
+#define PCI_REVISION_ID 0x08 /* Revision ID */
+#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */
+#define PCI_CLASS_DEVICE 0x0a /* Device class */
+
+#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */
+#define PCI_LATENCY_TIMER 0x0d /* 8 bits */
+#define PCI_HEADER_TYPE 0x0e /* 8 bits */
+#define PCI_HEADER_TYPE_NORMAL 0
+#define PCI_HEADER_TYPE_BRIDGE 1
+#define PCI_HEADER_TYPE_CARDBUS 2
+
+#define PCI_BIST 0x0f /* 8 bits */
+#define PCI_BIST_CODE_MASK 0x0f /* Return result */
+#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or
less */
+#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */
+
+/*
+ * Base addresses specify locations in memory or I/O space.
+ * Decoded size can be determined by writing a value of
+ * 0xffffffff to the register, and reading it back. Only
+ * 1 bits are decoded.
+ */
+#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */
+#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */
+#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */
+#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */
+#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */
+#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */
+#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O
*/
+#define PCI_BASE_ADDRESS_SPACE_IO 0x01
+#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00
+#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06
+#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */
+#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */
+#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */
+#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */
+#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL)
+#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL)
+/* bit 1 is reserved if address_space = 1 */
+
+/* Header type 0 (normal devices) */
+#define PCI_CARDBUS_CIS 0x28
+#define PCI_SUBSYSTEM_VENDOR_ID 0x2c
+#define PCI_SUBSYSTEM_ID 0x2e
+#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address,
10..1 reserved */
+#define PCI_ROM_ADDRESS_ENABLE 0x01
+#define PCI_ROM_ADDRESS_MASK (~0x7ffUL)
+
+#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list
entry */
+
+/* 0x35-0x3b are reserved */
+#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */
+#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */
+#define PCI_MIN_GNT 0x3e /* 8 bits */
+#define PCI_MAX_LAT 0x3f /* 8 bits */
+
+/* Header type 1 (PCI-to-PCI bridges) */
+#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */
+#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */
+#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge
*/
+#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary
interface */
+#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */
+#define PCI_IO_LIMIT 0x1d
+#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */
+#define PCI_IO_RANGE_TYPE_16 0x00
+#define PCI_IO_RANGE_TYPE_32 0x01
+#define PCI_IO_RANGE_MASK (~0x0fUL)
+#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit
14 used */
+#define PCI_MEMORY_BASE 0x20 /* Memory range behind */
+#define PCI_MEMORY_LIMIT 0x22
+#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
+#define PCI_MEMORY_RANGE_MASK (~0x0fUL)
+#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */
+#define PCI_PREF_MEMORY_LIMIT 0x26
+#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL
+#define PCI_PREF_RANGE_TYPE_32 0x00
+#define PCI_PREF_RANGE_TYPE_64 0x01
+#define PCI_PREF_RANGE_MASK (~0x0fUL)
+#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory
range */
+#define PCI_PREF_LIMIT_UPPER32 0x2c
+#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */
+#define PCI_IO_LIMIT_UPPER16 0x32
+/* 0x34 same as for htype 0 */
+/* 0x35-0x3b is reserved */
+#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for
htype 1 */
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_BRIDGE_CONTROL 0x3e
+#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary
interface */
+#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */
+#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */
+#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */
+#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */
+#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */
+#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on
secondary interface */
+
+/* Header type 2 (CardBus bridges) */
+#define PCI_CB_CAPABILITY_LIST 0x14
+/* 0x15 reserved */
+#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */
+#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */
+#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */
+#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */
+#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */
+#define PCI_CB_MEMORY_BASE_0 0x1c
+#define PCI_CB_MEMORY_LIMIT_0 0x20
+#define PCI_CB_MEMORY_BASE_1 0x24
+#define PCI_CB_MEMORY_LIMIT_1 0x28
+#define PCI_CB_IO_BASE_0 0x2c
+#define PCI_CB_IO_BASE_0_HI 0x2e
+#define PCI_CB_IO_LIMIT_0 0x30
+#define PCI_CB_IO_LIMIT_0_HI 0x32
+#define PCI_CB_IO_BASE_1 0x34
+#define PCI_CB_IO_BASE_1_HI 0x36
+#define PCI_CB_IO_LIMIT_1 0x38
+#define PCI_CB_IO_LIMIT_1_HI 0x3a
+#define PCI_CB_IO_RANGE_MASK (~0x03UL)
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_CB_BRIDGE_CONTROL 0x3e
+#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge
control register */
+#define PCI_CB_BRIDGE_CTL_SERR 0x02
+#define PCI_CB_BRIDGE_CTL_ISA 0x04
+#define PCI_CB_BRIDGE_CTL_VGA 0x08
+#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20
+#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */
+#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit
cards */
+#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both
memory regions */
+#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200
+#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400
+#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40
+#define PCI_CB_SUBSYSTEM_ID 0x42
+#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card
legacy mode base address (ExCa) */
+/* 0x48-0x7f reserved */
+
+/* Capability lists */
+
+#define PCI_CAP_LIST_ID 0 /* Capability ID */
+#define PCI_CAP_ID_PM 0x01 /* Power Management */
+#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */
+#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */
+#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */
+#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts
*/
+#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */
+#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */
+#define PCI_CAP_ID_HT_IRQCONF 0x08 /* HyperTransport IRQ Configuration */
+#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */
+#define PCI_CAP_ID_EXP 0x10 /* PCI Express */
+#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */
+#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */
+#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */
+#define PCI_CAP_SIZEOF 4
+
+/* Power Management Registers */
+
+#define PCI_PM_PMC 2 /* PM Capabilities Register */
+#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */
+#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */
+#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */
+#define PCI_PM_CAP_DSI 0x0020 /* Device specific
initialization */
+#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxilliary power support mask */
+#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */
+#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */
+#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */
+#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */
+#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */
+#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */
+#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */
+#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */
+#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */
+#define PCI_PM_CTRL 4 /* PM control and status register */
+#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to
D3) */
+#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */
+#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */
+#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */
+#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */
+#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */
+#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */
+#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */
+#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??)
*/
+#define PCI_PM_DATA_REGISTER 7 /* (??) */
+#define PCI_PM_SIZEOF 8
+
+/* AGP registers */
+
+#define PCI_AGP_VERSION 2 /* BCD version number */
+#define PCI_AGP_RFU 3 /* Rest of capability flags */
+#define PCI_AGP_STATUS 4 /* Status register */
+#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of
requests - 1 */
+#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */
+#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */
+#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */
+#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */
+#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */
+#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */
+#define PCI_AGP_COMMAND 8 /* Control register */
+#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of
requests */
+#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */
+#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions
*/
+#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses
*/
+#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */
+#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */
+#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */
+#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */
+#define PCI_AGP_SIZEOF 12
+
+/* Vital Product Data */
+
+#define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */
+#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */
+#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates
completion */
+#define PCI_VPD_DATA 4 /* 32-bits of data returned here */
+
+/* Slot Identification */
+
+#define PCI_SID_ESR 2 /* Expansion Slot Register */
+#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available
*/
+#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */
+#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */
+
+/* Message Signalled Interrupts registers */
+
+#define PCI_MSI_FLAGS 2 /* Various flags */
+#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */
+#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */
+#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */
+#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */
+#define PCI_MSI_FLAGS_MASKBIT 0x100 /* 64-bit mask bits allowed */
+#define PCI_MSI_RFU 3 /* Rest of capability flags */
+#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */
+#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if
PCI_MSI_FLAGS_64BIT set) */
+#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit
devices */
+#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit
devices */
+#define PCI_MSI_MASK_BIT 16 /* Mask bits register */
+
+/* CompactPCI Hotswap Register */
+
+#define PCI_CHSWP_CSR 2 /* Control and Status Register */
+#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */
+#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */
+#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */
+#define PCI_CHSWP_LOO 0x08 /* LED On / Off */
+#define PCI_CHSWP_PI 0x30 /* Programming Interface */
+#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */
+#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */
+
+/* PCI-X registers */
+
+#define PCI_X_CMD 2 /* Modes & Features */
+#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */
+#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */
+#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */
+#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */
+#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */
+#define PCI_X_STATUS 4 /* PCI-X capabilities */
+#define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */
+#define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */
+#define PCI_X_STATUS_64BIT 0x00010000 /* 64-bit device */
+#define PCI_X_STATUS_133MHZ 0x00020000 /* 133 MHz capable */
+#define PCI_X_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */
+#define PCI_X_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion
*/
+#define PCI_X_STATUS_COMPLEX 0x00100000 /* Device Complexity */
+#define PCI_X_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read
Count */
+#define PCI_X_STATUS_MAX_SPLIT 0x03800000 /* Designed Max
Outstanding Split Transactions */
+#define PCI_X_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read
Size */
+#define PCI_X_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error
Msg */
+#define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */
+#define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */
+
+/* PCI Express capability registers */
+
+#define PCI_EXP_FLAGS 2 /* Capabilities register */
+#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */
+#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */
+#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */
+#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */
+#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */
+#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */
+#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */
+#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */
+#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */
+#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */
+#define PCI_EXP_DEVCAP 4 /* Device capabilities */
+#define PCI_EXP_DEVCAP_PAYLOAD 0x07 /* Max_Payload_Size */
+#define PCI_EXP_DEVCAP_PHANTOM 0x18 /* Phantom functions */
+#define PCI_EXP_DEVCAP_EXT_TAG 0x20 /* Extended tags */
+#define PCI_EXP_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */
+#define PCI_EXP_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */
+#define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */
+#define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present
*/
+#define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */
+#define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */
+#define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */
+#define PCI_EXP_DEVCTL 8 /* Device Control */
+#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */
+#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */
+#define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */
+#define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */
+#define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */
+#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */
+#define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */
+#define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */
+#define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */
+#define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */
+#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */
+#define PCI_EXP_DEVSTA 10 /* Device Status */
+#define PCI_EXP_DEVSTA_CED 0x01 /* Correctable Error Detected */
+#define PCI_EXP_DEVSTA_NFED 0x02 /* Non-Fatal Error Detected */
+#define PCI_EXP_DEVSTA_FED 0x04 /* Fatal Error Detected */
+#define PCI_EXP_DEVSTA_URD 0x08 /* Unsupported Request Detected */
+#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */
+#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */
+#define PCI_EXP_LNKCAP 12 /* Link Capabilities */
+#define PCI_EXP_LNKCTL 16 /* Link Control */
+#define PCI_EXP_LNKSTA 18 /* Link Status */
+#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */
+#define PCI_EXP_SLTCTL 24 /* Slot Control */
+#define PCI_EXP_SLTSTA 26 /* Slot Status */
+#define PCI_EXP_RTCTL 28 /* Root Control */
+#define PCI_EXP_RTCTL_SECEE 0x01 /* System Error on Correctable Error */
+#define PCI_EXP_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */
+#define PCI_EXP_RTCTL_SEFEE 0x04 /* System Error on Fatal Error */
+#define PCI_EXP_RTCTL_PMEIE 0x08 /* PME Interrupt Enable */
+#define PCI_EXP_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */
+#define PCI_EXP_RTCAP 30 /* Root Capabilities */
+#define PCI_EXP_RTSTA 32 /* Root Status */
+
+/* Extended Capabilities (PCI-X 2.0 and Express) */
+#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff)
+#define PCI_EXT_CAP_VER(header) ((header >> 16) & 0xf)
+#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc)
+
+#define PCI_EXT_CAP_ID_ERR 1
+#define PCI_EXT_CAP_ID_VC 2
+#define PCI_EXT_CAP_ID_DSN 3
+#define PCI_EXT_CAP_ID_PWR 4
+
+/* Advanced Error Reporting */
+#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */
+#define PCI_ERR_UNC_TRAIN 0x00000001 /* Training */
+#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */
+#define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */
+#define PCI_ERR_UNC_FCP 0x00002000 /* Flow Control Protocol */
+#define PCI_ERR_UNC_COMP_TIME 0x00004000 /* Completion Timeout */
+#define PCI_ERR_UNC_COMP_ABORT 0x00008000 /* Completer Abort */
+#define PCI_ERR_UNC_UNX_COMP 0x00010000 /* Unexpected Completion */
+#define PCI_ERR_UNC_RX_OVER 0x00020000 /* Receiver Overflow */
+#define PCI_ERR_UNC_MALF_TLP 0x00040000 /* Malformed TLP */
+#define PCI_ERR_UNC_ECRC 0x00080000 /* ECRC Error Status */
+#define PCI_ERR_UNC_UNSUP 0x00100000 /* Unsupported Request */
+#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */
+ /* Same bits as above */
+#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */
+ /* Same bits as above */
+#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */
+#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */
+#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */
+#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */
+#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */
+#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */
+#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */
+ /* Same bits as above */
+#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */
+#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */
+#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */
+#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */
+#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */
+#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */
+#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */
+#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */
+#define PCI_ERR_ROOT_STATUS 48
+#define PCI_ERR_ROOT_COR_SRC 52
+#define PCI_ERR_ROOT_SRC 54
+
+/* Virtual Channel */
+#define PCI_VC_PORT_REG1 4
+#define PCI_VC_PORT_REG2 8
+#define PCI_VC_PORT_CTRL 12
+#define PCI_VC_PORT_STATUS 14
+#define PCI_VC_RES_CAP 16
+#define PCI_VC_RES_CTRL 20
+#define PCI_VC_RES_STATUS 26
+
+/* Power Budgeting */
+#define PCI_PWR_DSR 4 /* Data Select Register */
+#define PCI_PWR_DATA 8 /* Data Register */
+#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */
+#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */
+#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */
+#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */
+#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */
+#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */
+#define PCI_PWR_CAP 12 /* Capability */
+#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */
+
+#endif /* LINUX_PCI_REGS_H */
diff -r acfa9290746f -r f4bbd3f327e4 xen/arch/x86/hvm/vmx/vtd/utils.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vtd/utils.c Fri Sep 14 16:40:49 2007 +0100
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx>
+ */
+
+#include <xen/init.h>
+#include <xen/bitmap.h>
+#include <xen/irq.h>
+#include <xen/spinlock.h>
+#include <xen/sched.h>
+#include <asm/delay.h>
+#include <asm/iommu.h>
+#include <asm/hvm/vmx/intel-iommu.h>
+#include "dmar.h"
+#include "pci-direct.h"
+#include "pci_regs.h"
+#include "msi.h"
+
+#include <xen/mm.h>
+#include <xen/xmalloc.h>
+
+#if defined(__x86_64__)
+void print_iommu_regs(struct acpi_drhd_unit *drhd)
+{
+ struct iommu *iommu = drhd->iommu;
+
+ printk("---- print_iommu_regs ----\n");
+ printk("print_iommu_regs: drhd->address = %lx\n", drhd->address);
+ printk("print_iommu_regs: DMAR_VER_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_VER_REG));
+ printk("print_iommu_regs: DMAR_CAP_REG = %lx\n",
+ dmar_readq(iommu->reg,DMAR_CAP_REG));
+ printk("print_iommu_regs: n_fault_reg = %lx\n",
+ cap_num_fault_regs(dmar_readq(iommu->reg, DMAR_CAP_REG)));
+ printk("print_iommu_regs: fault_recording_offset_l = %lx\n",
+ cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)));
+ printk("print_iommu_regs: fault_recording_offset_h = %lx\n",
+ cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG))
+ 8);
+ printk("print_iommu_regs: fault_recording_reg_l = %lx\n",
+ dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg,
DMAR_CAP_REG))));
+ printk("print_iommu_regs: fault_recording_reg_h = %lx\n",
+ dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg,
DMAR_CAP_REG)) + 8));
+ printk("print_iommu_regs: DMAR_ECAP_REG = %lx\n",
+ dmar_readq(iommu->reg,DMAR_ECAP_REG));
+ printk("print_iommu_regs: DMAR_GCMD_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_GCMD_REG));
+ printk("print_iommu_regs: DMAR_GSTS_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_GSTS_REG));
+ printk("print_iommu_regs: DMAR_RTADDR_REG = %lx\n",
+ dmar_readq(iommu->reg,DMAR_RTADDR_REG));
+ printk("print_iommu_regs: DMAR_CCMD_REG = %lx\n",
+ dmar_readq(iommu->reg,DMAR_CCMD_REG));
+ printk("print_iommu_regs: DMAR_FSTS_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_FSTS_REG));
+ printk("print_iommu_regs: DMAR_FECTL_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_FECTL_REG));
+ printk("print_iommu_regs: DMAR_FEDATA_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_FEDATA_REG));
+ printk("print_iommu_regs: DMAR_FEADDR_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_FEADDR_REG));
+ printk("print_iommu_regs: DMAR_FEUADDR_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_FEUADDR_REG));
+}
+
+void print_vtd_entries(struct domain *d, int bus, int devfn,
+ unsigned long gmfn)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ struct context_entry *ctxt_entry;
+ struct root_entry *root_entry;
+ u64 *l3, *l2, *l1;
+ u32 l3_index, l2_index, l1_index;
+ u32 i = 0;
+
+ printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x devfn = %x, gmfn
= %lx\n", d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), devfn, gmfn);
+
+ for_each_drhd_unit(drhd) {
+ printk("---- print_vtd_entries %d ----\n", i++);
+
+ if (hd->pgd == NULL) {
+ printk(" hg->pgd == NULL\n");
+ return;
+ }
+
+ iommu = drhd->iommu;
+ root_entry = iommu->root_entry;
+ printk(" hd->pgd = %p virt_to_maddr(hd->pgd) = %lx\n",
+ hd->pgd, virt_to_maddr(hd->pgd));
+
+ printk(" root_entry = %p\n", root_entry);
+ if (root_entry == NULL) {
+ printk(" root_entry == NULL\n");
+ return;
+ }
+
+ printk(" root_entry[%x] = %lx\n", bus, root_entry[bus].val);
+ printk(" maddr_to_virt(root_entry[%x]) = %p\n",
+ bus, maddr_to_virt(root_entry[bus].val));
+
+ if (root_entry[bus].val == 0) {
+ printk(" root_entry[%x].lo == 0\n", bus);
+ return;
+ }
+
+ ctxt_entry = maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) <<
PAGE_SHIFT);
+ if (ctxt_entry == NULL) {
+ printk(" ctxt_entry == NULL\n");
+ return;
+ }
+
+ if (ctxt_entry[devfn].lo == 0) {
+ printk(" ctxt_entry[%x].lo == 0\n", devfn);
+ return;
+ }
+
+ printk(" context = %p\n", ctxt_entry);
+ printk(" context[%x] = %lx %lx\n",
+ devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
+ printk(" maddr_to_virt(context[%x].lo) = %p\n",
+ devfn, maddr_to_virt(ctxt_entry[devfn].lo));
+ printk(" context[%x] = %lx\n", devfn, ctxt_entry[devfn].lo);
+
+ l3 = maddr_to_virt(ctxt_entry[devfn].lo);
+ l3 = (u64*)(((u64) l3 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
+ printk(" l3 = %p\n", l3);
+ if (l3 == NULL) return;
+
+ l3_index = (gmfn >> 9 >> 9) & 0x1ff;
+ printk(" l3_index = %x\n", l3_index);
+ printk(" l3[%x] = %lx\n", l3_index, l3[l3_index]);
+
+ l2 = maddr_to_virt(l3[l3_index]);
+ l2 = (u64*)(((u64) l2 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
+ printk(" l2 = %p\n", l2);
+ if (l2 == NULL) return;
+
+ l2_index = (gmfn >> 9) & 0x1ff;
+ printk(" gmfn = %lx\n", gmfn);
+ printk(" gmfn >> 9= %lx\n", gmfn >> 9);
+ printk(" l2_index = %x\n", l2_index);
+ printk(" l2[%x] = %lx\n", l2_index, l2[l2_index]);
+
+ l1 = maddr_to_virt(l2[l2_index]);
+ l1 = (u64*)(((u64) l1 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
+ if (l1 == NULL) return;
+ l1_index = gmfn & 0x1ff;
+ printk(" l1 = %p\n", l1);
+ printk(" l1_index = %x\n", l1_index);
+ printk(" l1[%x] = %lx\n", l1_index, l1[l1_index]);
+ }
+}
+
+#else // !m64
+
+void print_iommu_regs(struct acpi_drhd_unit *drhd)
+{
+ struct iommu *iommu = drhd->iommu;
+
+ printk("---- print_iommu_regs ----\n");
+ printk("print_iommu_regs: drhd->address = %lx\n", drhd->address);
+ printk("print_iommu_regs: DMAR_VER_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_VER_REG));
+ printk("print_iommu_regs: DMAR_CAP_REG = %llx\n",
+ dmar_readq(iommu->reg,DMAR_CAP_REG));
+ printk("print_iommu_regs: n_fault_reg = %llx\n",
+ cap_num_fault_regs(dmar_readq(iommu->reg, DMAR_CAP_REG)));
+ printk("print_iommu_regs: fault_recording_offset_l = %llx\n",
+ cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)));
+ printk("print_iommu_regs: fault_recording_offset_h = %llx\n",
+ cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG))
+ 8);
+ printk("print_iommu_regs: fault_recording_reg_l = %llx\n",
+ dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg,
DMAR_CAP_REG))));
+ printk("print_iommu_regs: fault_recording_reg_h = %llx\n",
+ dmar_readq(iommu->reg, cap_fault_reg_offset(dmar_readq(iommu->reg,
DMAR_CAP_REG)) + 8));
+ printk("print_iommu_regs: DMAR_ECAP_REG = %llx\n",
+ dmar_readq(iommu->reg,DMAR_ECAP_REG));
+ printk("print_iommu_regs: DMAR_GCMD_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_GCMD_REG));
+ printk("print_iommu_regs: DMAR_GSTS_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_GSTS_REG));
+ printk("print_iommu_regs: DMAR_RTADDR_REG = %llx\n",
+ dmar_readq(iommu->reg,DMAR_RTADDR_REG));
+ printk("print_iommu_regs: DMAR_CCMD_REG = %llx\n",
+ dmar_readq(iommu->reg,DMAR_CCMD_REG));
+ printk("print_iommu_regs: DMAR_FSTS_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_FSTS_REG));
+ printk("print_iommu_regs: DMAR_FECTL_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_FECTL_REG));
+ printk("print_iommu_regs: DMAR_FEDATA_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_FEDATA_REG));
+ printk("print_iommu_regs: DMAR_FEADDR_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_FEADDR_REG));
+ printk("print_iommu_regs: DMAR_FEUADDR_REG = %x\n",
+ dmar_readl(iommu->reg,DMAR_FEUADDR_REG));
+}
+
+void print_vtd_entries(struct domain *d, int bus, int devfn,
+ unsigned long gmfn)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ struct context_entry *ctxt_entry;
+ struct root_entry *root_entry;
+ u64 *l3, *l2, *l1;
+ u32 l3_index, l2_index, l1_index;
+ u32 i = 0;
+
+ printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x devfn = %x, gmfn
= %lx\n", d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), devfn, gmfn);
+
+ for_each_drhd_unit(drhd) {
+ printk("---- print_vtd_entries %d ----\n", i++);
+
+ if (hd->pgd == NULL) {
+ printk(" hg->pgd == NULL\n");
+ return;
+ }
+
+ iommu = drhd->iommu;
+ root_entry = iommu->root_entry;
+ printk(" d->pgd = %p virt_to_maddr(hd->pgd) = %lx\n",
+ hd->pgd, virt_to_maddr(hd->pgd));
+
+ printk(" root_entry = %p\n", root_entry);
+ if (root_entry == NULL) {
+ printk(" root_entry == NULL\n");
+ return;
+ }
+
+ printk(" root_entry[%x] = %llx\n", bus, root_entry[bus].val);
+ printk(" maddr_to_virt(root_entry[%x]) = %p\n",
+ bus, maddr_to_virt(root_entry[bus].val));
+
+ if (root_entry[bus].val == 0) {
+ printk(" root_entry[%x].lo == 0\n", bus);
+ return;
+ }
+
+ ctxt_entry = maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) <<
PAGE_SHIFT);
+ if (ctxt_entry == NULL) {
+ printk(" ctxt_entry == NULL\n");
+ return;
+ }
+
+ if (ctxt_entry[devfn].lo == 0) {
+ printk(" ctxt_entry[%x].lo == 0\n", devfn);
+ return;
+ }
+
+ printk(" context = %p\n", ctxt_entry);
+ printk(" context[%x] = %llx %llx\n",
+ devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
+ printk(" maddr_to_virt(context[%x].lo) = %p\n",
+ devfn, maddr_to_virt(ctxt_entry[devfn].lo));
+ printk(" context[%x] = %llx\n", devfn, ctxt_entry[devfn].lo);
+
+ l3 = maddr_to_virt(ctxt_entry[devfn].lo);
+ l3 = (u64*)(((u32) l3 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
+ printk(" l3 = %p\n", l3);
+ if (l3 == NULL) return;
+
+ l3_index = (gmfn >> 9 >> 9) & 0x1ff;
+ printk(" l3_index = %x\n", l3_index);
+ printk(" l3[%x] = %llx\n", l3_index, l3[l3_index]);
+
+ l2 = maddr_to_virt(l3[l3_index]);
+ l2 = (u64*)(((u32) l2 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
+ printk(" l2 = %p\n", l2);
+ if (l2 == NULL) return;
+
+ l2_index = (gmfn >> 9) & 0x1ff;
+ printk(" gmfn = %lx\n", gmfn);
+ printk(" gmfn >> 9= %lx\n", gmfn >> 9);
+ printk(" l2_index = %x\n", l2_index);
+ printk(" l2[%x] = %llx\n", l2_index, l2[l2_index]);
+
+ l1 = maddr_to_virt(l2[l2_index]);
+ l1 = (u64*)(((u32) l1 >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
+ if (l1 == NULL) return;
+ l1_index = gmfn & 0x1ff;
+ printk(" l1 = %p\n", l1);
+ printk(" l1_index = %x\n", l1_index);
+ printk(" l1[%x] = %llx\n", l1_index, l1[l1_index]);
+ }
+}
+#endif // !m64
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|