[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 1/1] vl.c: Add pci_hole_min_size machine option.



On 03/04/14 11:58, Michael S. Tsirkin wrote:
On Tue, Mar 04, 2014 at 11:36:44AM -0500, Don Slutz wrote:
On 03/04/14 02:34, Michael S. Tsirkin wrote:
On Thu, Feb 27, 2014 at 05:32:23PM -0500, Don Slutz wrote:
This allows growing the pci_hole to the size needed.

Signed-off-by: Don Slutz <dslutz@xxxxxxxxxxx>
Could you supply the motivation for this change please?

If you add enough PCI devices then all mmio may not fit below 4G which may not 
be the layout the user wanted. This allows you to increase the below 4G address 
space that PCI devices can use and therefore in more cases not have any mmio 
that is above 4G.

There are real PCI cards that do not support mmio over 4G, so if you want to 
emulate them precisely, you may also need to increase the space below 4G for 
them.  There are drivers for these cards that also do not work if they have 
their mmio space mapped above 4G.

    -Don Slutz

OK limiting low RAM size might work for this, but we need to notify
BIOS about low RAM size, and teahc BIOS to use it, right?


Not that I know of.  BIOS gets this info via "cmos" and e820 map. For example:


dcs-xen-50:~/qemu>out/x86_64-softmmu/qemu-system-x86_64 -serial pty 
~/qemu-img/Lin-Net-disk1.raw -chardev stdio,id=seabios -device 
isa-debugcon,iobase=0x402,chardev=seabios -machine 
pc-i440fx-2.0,pci_hole_min_size=3G -m 4G
...
RamSize: 0x40000000 [cmos]
...
RamBlock: addr 0x0000000000000000 len 0x0000000040000000 [e820]
RamBlock: addr 0x0000000100000000 len 0x00000000c0000000 [e820]
...
e820 map has 7 items:
  0: 0000000000000000 - 000000000009fc00 = 1 RAM
  1: 000000000009fc00 - 00000000000a0000 = 2 RESERVED
  2: 00000000000f0000 - 0000000000100000 = 2 RESERVED
  3: 0000000000100000 - 000000003fffe000 = 1 RAM
  4: 000000003fffe000 - 0000000040000000 = 2 RESERVED
  5: 00000000fffc0000 - 0000000100000000 = 2 RESERVED
  6: 0000000100000000 - 00000001c0000000 = 1 RAM


And

dcs-xen-50:~/qemu>out/x86_64-softmmu/qemu-system-x86_64 -serial pty 
~/qemu-img/Lin-Net-disk1.raw -chardev stdio,id=seabios -device 
isa-debugcon,iobase=0x402,chardev=seabios -machine 
pc-i440fx-2.0,pci_hole_min_size=2G -m 4G
...
RamSize: 0x80000000 [cmos]
...
RamBlock: addr 0x0000000000000000 len 0x0000000080000000 [e820]
RamBlock: addr 0x0000000100000000 len 0x0000000080000000 [e820]
...
e820 map has 7 items:
  0: 0000000000000000 - 000000000009fc00 = 1 RAM
  1: 000000000009fc00 - 00000000000a0000 = 2 RESERVED
  2: 00000000000f0000 - 0000000000100000 = 2 RESERVED
  3: 0000000000100000 - 000000007fffe000 = 1 RAM
  4: 000000007fffe000 - 0000000080000000 = 2 RESERVED
  5: 00000000fffc0000 - 0000000100000000 = 2 RESERVED
  6: 0000000100000000 - 0000000180000000 = 1 RAM


Basically pc and q35 already have different PCI hole sizes and the BIOS already 
handles it.

   -Don Slutz



---
  hw/i386/pc_piix.c    | 14 ++++++++++++--
  hw/i386/pc_q35.c     | 14 ++++++++++++--
  include/hw/i386/pc.h |  3 +++
  vl.c                 | 16 ++++++++++++++++
  xen-all.c            | 28 +++++++++++++++++-----------
  5 files changed, 60 insertions(+), 15 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index d5dc1ef..58e273d 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -95,6 +95,7 @@ static void pc_init1(QEMUMachineInitArgs *args,
      DeviceState *icc_bridge;
      FWCfgState *fw_cfg = NULL;
      PcGuestInfo *guest_info;
+    ram_addr_t lowmem = 0xe0000000;
      if (xen_enabled() && xen_hvm_init(&ram_memory) != 0) {
          fprintf(stderr, "xen hardware virtual machine initialisation 
failed\n");
@@ -111,6 +112,11 @@ static void pc_init1(QEMUMachineInitArgs *args,
          kvmclock_create();
      }
+    /* Adjust for pci_hole_min_size */
+    if (pci_hole_min_size > ((1ULL << 32) - lowmem)) {
+        lowmem = (1ULL << 32) - pci_hole_min_size;
+    }
+
      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
       * If it doesn't, we need to split it in chunks below and above 4G.
       * In any case, try to make sure that guest addresses aligned at
@@ -118,8 +124,12 @@ static void pc_init1(QEMUMachineInitArgs *args,
       * For old machine types, use whatever split we used historically to avoid
       * breaking migration.
       */
-    if (args->ram_size >= 0xe0000000) {
-        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
+    if (args->ram_size >= lowmem) {
+        lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
+        /* Adjust for pci_hole_min_size */
+        if (pci_hole_min_size > ((1ULL << 32) - lowmem)) {
+            lowmem = (1ULL << 32) - pci_hole_min_size;
+        }
          above_4g_mem_size = args->ram_size - lowmem;
          below_4g_mem_size = lowmem;
      } else {
In fact, on piix there's no pci hole as such.
Instead, whatever is not claimed by any other device, is PCI.
So the name seems awkward for this case.
Same applies to q35, but I didn't check xen.

diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index a7f6260..a491f6a 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -82,6 +82,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
      PCIDevice *ahci;
      DeviceState *icc_bridge;
      PcGuestInfo *guest_info;
+    ram_addr_t lowmem = 0xb0000000;
      if (xen_enabled() && xen_hvm_init(&ram_memory) != 0) {
          fprintf(stderr, "xen hardware virtual machine initialisation 
failed\n");
@@ -97,6 +98,11 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
      kvmclock_create();
+    /* Adjust for pci_hole_min_size */
+    if (pci_hole_min_size > ((1ULL << 32) - lowmem)) {
+        lowmem = (1ULL << 32) - pci_hole_min_size;
+    }
+
      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
       * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
       * also known as MMCFG).
@@ -106,8 +112,12 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
       * For old machine types, use whatever split we used historically to avoid
       * breaking migration.
       */
-    if (args->ram_size >= 0xb0000000) {
-        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
+    if (args->ram_size >= lowmem) {
+        lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
+        /* Adjust for pci_hole_min_size */
+        if (pci_hole_min_size > ((1ULL << 32) - lowmem)) {
+            lowmem = (1ULL << 32) - pci_hole_min_size;
+        }
          above_4g_mem_size = args->ram_size - lowmem;
          below_4g_mem_size = lowmem;
      } else {
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9010246..43ea04b 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -204,6 +204,9 @@ int isa_vga_mm_init(hwaddr vram_base,
                      hwaddr ctrl_base, int it_shift,
                      MemoryRegion *address_space);
+/* vl.c */
+extern uint64_t pci_hole_min_size;
+
  /* ne2000.c */
  static inline bool isa_ne2000_init(ISABus *bus, int base, int irq, NICInfo 
*nd)
  {
diff --git a/vl.c b/vl.c
index 1d27b34..32266d3 100644
--- a/vl.c
+++ b/vl.c
@@ -123,6 +123,7 @@ int main(int argc, char **argv)
  #include "qom/object_interfaces.h"
  #define DEFAULT_RAM_SIZE 128
+#define MAX_PCI_HOLE_SIZE ((1ULL << 32) - 16 * 1024 * 1024)
  #define MAX_VIRTIO_CONSOLES 1
  #define MAX_SCLP_CONSOLES 1
@@ -213,6 +214,7 @@ static NotifierList machine_init_done_notifiers =
  static bool tcg_allowed = true;
  bool xen_allowed;
+uint64_t pci_hole_min_size;
  uint32_t xen_domid;
  enum xen_mode xen_mode = XEN_EMULATE;
  static int tcg_tb_size;
@@ -378,6 +380,10 @@ static QemuOptsList qemu_machine_opts = {
              .name = "firmware",
              .type = QEMU_OPT_STRING,
              .help = "firmware image",
+        }, {
+            .name = "pci_hole_min_size",
+            .type = QEMU_OPT_SIZE,
+            .help = "minimum size of PCI mmio hole below 4G",
          },
          { /* End of list */ }
      },
Hmm this adds this option to all machine types, doesn't it?
But besides xen and i386 machine types, everyone seems to
ignore it silently, which seems inelegant.


@@ -4050,6 +4056,16 @@ int main(int argc, char **argv, char **envp)
      initrd_filename = qemu_opt_get(machine_opts, "initrd");
      kernel_cmdline = qemu_opt_get(machine_opts, "append");
      bios_name = qemu_opt_get(machine_opts, "firmware");
+    pci_hole_min_size = qemu_opt_get_size(machine_opts,
+                                          "pci_hole_min_size",
+                                          pci_hole_min_size);
+    if (pci_hole_min_size > MAX_PCI_HOLE_SIZE) {
+        fprintf(stderr,
+                "%s: pci_hole_min_size=%llu too big, adjusted to %llu\n",
+                __func__, (unsigned long long) pci_hole_min_size,
+                MAX_PCI_HOLE_SIZE);
+        pci_hole_min_size = MAX_PCI_HOLE_SIZE;
+    }
      boot_order = machine->default_boot_order;
      opts = qemu_opts_find(qemu_find_opts("boot-opts"), NULL);
diff --git a/xen-all.c b/xen-all.c
index 4a594bd..dbe24c7 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -155,6 +155,15 @@ qemu_irq *xen_interrupt_controller_init(void)
  /* Memory Ops */
+static uint64_t mmio_hole_size(void)
+{
+    uint64_t sz = HVM_BELOW_4G_MMIO_LENGTH;
+    if (sz < pci_hole_min_size) {
+        sz = pci_hole_min_size;
+    }
+    return sz;
+}
+
  static void xen_ram_init(ram_addr_t ram_size, MemoryRegion **ram_memory_p)
  {
      MemoryRegion *sysmem = get_system_memory();
@@ -162,23 +171,20 @@ static void xen_ram_init(ram_addr_t ram_size, 
MemoryRegion **ram_memory_p)
      ram_addr_t block_len;
      block_len = ram_size;
-    if (ram_size >= HVM_BELOW_4G_RAM_END) {
-        /* Xen does not allocate the memory continuously, and keep a hole at
-         * HVM_BELOW_4G_MMIO_START of HVM_BELOW_4G_MMIO_LENGTH
+    below_4g_mem_size = (1ULL << 32) - mmio_hole_size();
+    if (ram_size < below_4g_mem_size) {
+        below_4g_mem_size = ram_size;
+    } else {
+        above_4g_mem_size = ram_size - below_4g_mem_size;
+        /* Xen does not allocate the memory continuously, and keep a hole of
+         * size mmio_hole_size().
           */
-        block_len += HVM_BELOW_4G_MMIO_LENGTH;
+        block_len += mmio_hole_size();
      }
      memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len);
      *ram_memory_p = &ram_memory;
      vmstate_register_ram_global(&ram_memory);
-    if (ram_size >= HVM_BELOW_4G_RAM_END) {
-        above_4g_mem_size = ram_size - HVM_BELOW_4G_RAM_END;
-        below_4g_mem_size = HVM_BELOW_4G_RAM_END;
-    } else {
-        below_4g_mem_size = ram_size;
-    }
-
      memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k",
                               &ram_memory, 0, 0xa0000);
      memory_region_add_subregion(sysmem, 0, &ram_640k);
--
1.8.4



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.