|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v12 09/15] vpci/header: program p2m with guest BAR view
On 1/12/24 10:06, Roger Pau Monné wrote:
> On Tue, Jan 09, 2024 at 04:51:24PM -0500, Stewart Hildebrand wrote:
>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
>>
>> Take into account guest's BAR view and program its p2m accordingly:
>> gfn is guest's view of the BAR and mfn is the physical BAR value.
>> This way hardware domain sees physical BAR values and guest sees
>> emulated ones.
>>
>> Hardware domain continues getting the BARs identity mapped, while for
>> domUs the BARs are mapped at the requested guest address without
>> modifying the BAR address in the device PCI config space.
>>
>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@xxxxxxxx>
>> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@xxxxxxx>
>
> Some nits and a request to add an extra assert. If you agree:
>
> Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
Thanks! I agree. I'll reply to this with a v12.1 patch.
>
>> ---
>> In v12:
>> - Update guest_addr in rom_write()
>> - Use unsigned long for start_mfn and map_mfn to reduce mfn_x() calls
>> - Use existing vmsix_table_*() functions
>> - Change vmsix_table_base() to use .guest_addr
>> In v11:
>> - Add vmsix_guest_table_addr() and vmsix_guest_table_base() functions
>> to access guest's view of the VMSIx tables.
>> - Use MFN (not GFN) to check access permissions
>> - Move page offset check to this patch
>> - Call rangeset_remove_range() with correct parameters
>> In v10:
>> - Moved GFN variable definition outside the loop in map_range()
>> - Updated printk error message in map_range()
>> - Now BAR address is always stored in bar->guest_addr, even for
>> HW dom, this removes bunch of ugly is_hwdom() checks in modify_bars()
>> - vmsix_table_base() now uses .guest_addr instead of .addr
>> In v9:
>> - Extended the commit message
>> - Use bar->guest_addr in modify_bars
>> - Extended printk error message in map_range
>> - Moved map_data initialization so .bar can be initialized during declaration
>> Since v5:
>> - remove debug print in map_range callback
>> - remove "identity" from the debug print
>> Since v4:
>> - moved start_{gfn|mfn} calculation into map_range
>> - pass vpci_bar in the map_data instead of start_{gfn|mfn}
>> - s/guest_addr/guest_reg
>> Since v3:
>> - updated comment (Roger)
>> - removed gfn_add(map->start_gfn, rc); which is wrong
>> - use v->domain instead of v->vpci.pdev->domain
>> - removed odd e.g. in comment
>> - s/d%d/%pd in altered code
>> - use gdprintk for map/unmap logs
>> Since v2:
>> - improve readability for data.start_gfn and restructure ?: construct
>> Since v1:
>> - s/MSI/MSI-X in comments
>> ---
>> xen/drivers/vpci/header.c | 81 +++++++++++++++++++++++++++++++--------
>> xen/include/xen/vpci.h | 3 +-
>> 2 files changed, 66 insertions(+), 18 deletions(-)
>>
>> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
>> index feccd070ddd0..f0b0b64b0929 100644
>> --- a/xen/drivers/vpci/header.c
>> +++ b/xen/drivers/vpci/header.c
>> @@ -34,6 +34,7 @@
>>
>> struct map_data {
>> struct domain *d;
>> + const struct vpci_bar *bar;
>> bool map;
>> };
>>
>> @@ -41,13 +42,24 @@ static int cf_check map_range(
>> unsigned long s, unsigned long e, void *data, unsigned long *c)
>> {
>> const struct map_data *map = data;
>> + /* Start address of the BAR as seen by the guest. */
>> + unsigned long start_gfn = PFN_DOWN(map->bar->guest_addr);
>> + /* Physical start address of the BAR. */
>> + unsigned long start_mfn = PFN_DOWN(map->bar->addr);
>> int rc;
>>
>> for ( ; ; )
>> {
>> unsigned long size = e - s + 1;
>> + /*
>> + * Ranges to be mapped don't always start at the BAR start address,
>> as
>> + * there can be holes or partially consumed ranges. Account for the
>> + * offset of the current address from the BAR start.
>> + */
>> + unsigned long map_mfn = start_mfn + s - start_gfn;
>> + unsigned long m_end = map_mfn + size - 1;
>>
>> - if ( !iomem_access_permitted(map->d, s, e) )
>> + if ( !iomem_access_permitted(map->d, map_mfn, m_end) )
>> {
>> printk(XENLOG_G_WARNING
>> "%pd denied access to MMIO range [%#lx, %#lx]\n",
>> @@ -55,7 +67,8 @@ static int cf_check map_range(
>> return -EPERM;
>> }
>>
>> - rc = xsm_iomem_mapping(XSM_HOOK, map->d, s, e, map->map);
>> + rc = xsm_iomem_mapping(XSM_HOOK, map->d, map_mfn, m_end,
>> + map->map);
>> if ( rc )
>> {
>> printk(XENLOG_G_WARNING
>> @@ -73,8 +86,8 @@ static int cf_check map_range(
>> * - {un}map_mmio_regions doesn't support preemption.
>> */
>>
>> - rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(s))
>> - : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(s));
>> + rc = map->map ? map_mmio_regions(map->d, _gfn(s), size,
>> _mfn(map_mfn))
>> + : unmap_mmio_regions(map->d, _gfn(s), size,
>> _mfn(map_mfn));
>> if ( rc == 0 )
>> {
>> *c += size;
>> @@ -83,8 +96,9 @@ static int cf_check map_range(
>> if ( rc < 0 )
>> {
>> printk(XENLOG_G_WARNING
>> - "Failed to identity %smap [%lx, %lx] for d%d: %d\n",
>> - map->map ? "" : "un", s, e, map->d->domain_id, rc);
>> + "Failed to %smap [%lx %lx] -> [%lx %lx] for %pd: %d\n",
>> + map->map ? "" : "un", s, e, map_mfn,
>> + map_mfn + size, map->d, rc);
>> break;
>> }
>> ASSERT(rc < size);
>> @@ -163,10 +177,6 @@ static void modify_decoding(const struct pci_dev *pdev,
>> uint16_t cmd,
>> bool vpci_process_pending(struct vcpu *v)
>> {
>> struct pci_dev *pdev = v->vpci.pdev;
>> - struct map_data data = {
>> - .d = v->domain,
>> - .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
>> - };
>> struct vpci_header *header = NULL;
>> unsigned int i;
>>
>> @@ -186,6 +196,11 @@ bool vpci_process_pending(struct vcpu *v)
>> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
>> {
>> struct vpci_bar *bar = &header->bars[i];
>> + struct map_data data = {
>> + .d = v->domain,
>> + .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
>> + .bar = bar,
>> + };
>> int rc;
>>
>> if ( rangeset_is_empty(bar->mem) )
>> @@ -236,7 +251,6 @@ bool vpci_process_pending(struct vcpu *v)
>> static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
>> uint16_t cmd)
>> {
>> - struct map_data data = { .d = d, .map = true };
>> struct vpci_header *header = &pdev->vpci->header;
>> int rc = 0;
>> unsigned int i;
>> @@ -246,6 +260,7 @@ static int __init apply_map(struct domain *d, const
>> struct pci_dev *pdev,
>> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
>> {
>> struct vpci_bar *bar = &header->bars[i];
>> + struct map_data data = { .d = d, .map = true, .bar = bar };
>>
>> if ( rangeset_is_empty(bar->mem) )
>> continue;
>> @@ -311,12 +326,16 @@ static int modify_bars(const struct pci_dev *pdev,
>> uint16_t cmd, bool rom_only)
>> * First fill the rangesets with the BAR of this device or with the ROM
>> * BAR only, depending on whether the guest is toggling the memory
>> decode
>> * bit of the command register, or the enable bit of the ROM BAR
>> register.
>> + *
>> + * For non-hardware domain we use guest physical addresses.
>> */
>> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
>> {
>> struct vpci_bar *bar = &header->bars[i];
>> unsigned long start = PFN_DOWN(bar->addr);
>> unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
>> + unsigned long start_guest = PFN_DOWN(bar->guest_addr);
>> + unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size - 1);
>>
>> if ( !bar->mem )
>> continue;
>> @@ -336,11 +355,25 @@ static int modify_bars(const struct pci_dev *pdev,
>> uint16_t cmd, bool rom_only)
>> continue;
>> }
>
> Should we assert that the BAR rangeset is empty here? To stay on the
> safe side.
Yes, I'll add an ASSERT.
>
>>
>> - rc = rangeset_add_range(bar->mem, start, end);
>> + /*
>> + * Make sure that the guest set address has the same page offset
>> + * as the physical address on the host or otherwise things won't
>> work as
>> + * expected.
>> + */
>> + if ( PAGE_OFFSET(bar->guest_addr) != PAGE_OFFSET(bar->addr) )
>> + {
>> + gprintk(XENLOG_G_WARNING,
>> + "%pp: Can't map BAR%d because of page offset mismatch:
>> %lx vs %lx\n",
> ^u
>
> Also when using the x modifier it's better to also use # to print the
> 0x prefix. You can also reduce the length of the message using
> s/because of/due to/ IMO:
>
> %pp: Can't map BAR%u due to offset mismatch: %lx vs %lx
Will do
>
>> + &pdev->sbdf, i, PAGE_OFFSET(bar->guest_addr),
>> + PAGE_OFFSET(bar->addr));
>
> Maybe worth printing the whole address?
Agreed, will do
>
>> + return -EINVAL;
>> + }
>> +
>> + rc = rangeset_add_range(bar->mem, start_guest, end_guest);
>> if ( rc )
>> {
>> printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
>> - start, end, rc);
>> + start_guest, end_guest, rc);
>> return rc;
>> }
>>
>> @@ -352,12 +385,12 @@ static int modify_bars(const struct pci_dev *pdev,
>> uint16_t cmd, bool rom_only)
>> if ( rangeset_is_empty(prev_bar->mem) )
>> continue;
>>
>> - rc = rangeset_remove_range(prev_bar->mem, start, end);
>> + rc = rangeset_remove_range(prev_bar->mem, start_guest,
>> end_guest);
>> if ( rc )
>> {
>> gprintk(XENLOG_WARNING,
>> "%pp: failed to remove overlapping range [%lx, %lx]:
>> %d\n",
>> - &pdev->sbdf, start, end, rc);
>> + &pdev->sbdf, start_guest, end_guest, rc);
>> return rc;
>> }
>> }
>> @@ -425,8 +458,8 @@ static int modify_bars(const struct pci_dev *pdev,
>> uint16_t cmd, bool rom_only)
>> for ( i = 0; i < ARRAY_SIZE(tmp->vpci->header.bars); i++ )
>> {
>> const struct vpci_bar *remote_bar =
>> &tmp->vpci->header.bars[i];
>> - unsigned long start = PFN_DOWN(remote_bar->addr);
>> - unsigned long end = PFN_DOWN(remote_bar->addr +
>> + unsigned long start = PFN_DOWN(remote_bar->guest_addr);
>> + unsigned long end = PFN_DOWN(remote_bar->guest_addr +
>> remote_bar->size - 1);
>>
>> if ( !remote_bar->enabled )
>> @@ -513,6 +546,8 @@ static void cf_check bar_write(
>> struct vpci_bar *bar = data;
>> bool hi = false;
>>
>> + ASSERT(is_hardware_domain(pdev->domain));
>> +
>> if ( bar->type == VPCI_BAR_MEM64_HI )
>> {
>> ASSERT(reg > PCI_BASE_ADDRESS_0);
>> @@ -543,6 +578,10 @@ static void cf_check bar_write(
>> */
>> bar->addr &= ~(0xffffffffULL << (hi ? 32 : 0));
>> bar->addr |= (uint64_t)val << (hi ? 32 : 0);
>> + /*
>> + * Update guest address as well, so hardware domain sees BAR identity
>> mapped
>> + */
>
> Can you drop the 'as well' and make this a single line comment?
>
> Otherwise maybe reword to:
>
> Update guest address, so hardware domain BAR is identity mapped.
>
> Sorry, I find it wasteful to have the opening and closing comment
> delimiters in separate lines for single line comments.
Yep, I'll make it single line
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |