Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
0002 /*
0003  * VFIO API definition
0004  *
0005  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
0006  *     Author: Alex Williamson <alex.williamson@redhat.com>
0007  *
0008  * This program is free software; you can redistribute it and/or modify
0009  * it under the terms of the GNU General Public License version 2 as
0010  * published by the Free Software Foundation.
0011  */
0012 #ifndef _UAPIVFIO_H
0013 #define _UAPIVFIO_H
0014 
0015 #include <linux/types.h>
0016 #include <linux/ioctl.h>
0017 
0018 #define VFIO_API_VERSION    0
0019 
0020 
0021 /* Kernel & User level defines for VFIO IOCTLs. */
0022 
0023 /* Extensions */
0024 
0025 #define VFIO_TYPE1_IOMMU        1
0026 #define VFIO_SPAPR_TCE_IOMMU        2
0027 #define VFIO_TYPE1v2_IOMMU      3
0028 /*
0029  * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping).  This
0030  * capability is subject to change as groups are added or removed.
0031  */
0032 #define VFIO_DMA_CC_IOMMU       4
0033 
0034 /* Check if EEH is supported */
0035 #define VFIO_EEH            5
0036 
0037 /* Two-stage IOMMU */
0038 #define VFIO_TYPE1_NESTING_IOMMU    6   /* Implies v2 */
0039 
0040 #define VFIO_SPAPR_TCE_v2_IOMMU     7
0041 
0042 /*
0043  * The No-IOMMU IOMMU offers no translation or isolation for devices and
0044  * supports no ioctls outside of VFIO_CHECK_EXTENSION.  Use of VFIO's No-IOMMU
0045  * code will taint the host kernel and should be used with extreme caution.
0046  */
0047 #define VFIO_NOIOMMU_IOMMU      8
0048 
0049 /* Supports VFIO_DMA_UNMAP_FLAG_ALL */
0050 #define VFIO_UNMAP_ALL          9
0051 
0052 /* Supports the vaddr flag for DMA map and unmap */
0053 #define VFIO_UPDATE_VADDR       10
0054 
0055 /*
0056  * The IOCTL interface is designed for extensibility by embedding the
0057  * structure length (argsz) and flags into structures passed between
0058  * kernel and userspace.  We therefore use the _IO() macro for these
0059  * defines to avoid implicitly embedding a size into the ioctl request.
0060  * As structure fields are added, argsz will increase to match and flag
0061  * bits will be defined to indicate additional fields with valid data.
0062  * It's *always* the caller's responsibility to indicate the size of
0063  * the structure passed by setting argsz appropriately.
0064  */
0065 
0066 #define VFIO_TYPE   (';')
0067 #define VFIO_BASE   100
0068 
0069 /*
0070  * For extension of INFO ioctls, VFIO makes use of a capability chain
0071  * designed after PCI/e capabilities.  A flag bit indicates whether
0072  * this capability chain is supported and a field defined in the fixed
0073  * structure defines the offset of the first capability in the chain.
0074  * This field is only valid when the corresponding bit in the flags
0075  * bitmap is set.  This offset field is relative to the start of the
0076  * INFO buffer, as is the next field within each capability header.
0077  * The id within the header is a shared address space per INFO ioctl,
0078  * while the version field is specific to the capability id.  The
0079  * contents following the header are specific to the capability id.
0080  */
0081 struct vfio_info_cap_header {
0082     __u16   id;     /* Identifies capability */
0083     __u16   version;    /* Version specific to the capability ID */
0084     __u32   next;       /* Offset of next capability */
0085 };
0086 
0087 /*
0088  * Callers of INFO ioctls passing insufficiently sized buffers will see
0089  * the capability chain flag bit set, a zero value for the first capability
0090  * offset (if available within the provided argsz), and argsz will be
0091  * updated to report the necessary buffer size.  For compatibility, the
0092  * INFO ioctl will not report error in this case, but the capability chain
0093  * will not be available.
0094  */
0095 
0096 /* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
0097 
0098 /**
0099  * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0)
0100  *
0101  * Report the version of the VFIO API.  This allows us to bump the entire
0102  * API version should we later need to add or change features in incompatible
0103  * ways.
0104  * Return: VFIO_API_VERSION
0105  * Availability: Always
0106  */
0107 #define VFIO_GET_API_VERSION        _IO(VFIO_TYPE, VFIO_BASE + 0)
0108 
0109 /**
0110  * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32)
0111  *
0112  * Check whether an extension is supported.
0113  * Return: 0 if not supported, 1 (or some other positive integer) if supported.
0114  * Availability: Always
0115  */
0116 #define VFIO_CHECK_EXTENSION        _IO(VFIO_TYPE, VFIO_BASE + 1)
0117 
0118 /**
0119  * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32)
0120  *
0121  * Set the iommu to the given type.  The type must be supported by an
0122  * iommu driver as verified by calling CHECK_EXTENSION using the same
0123  * type.  A group must be set to this file descriptor before this
0124  * ioctl is available.  The IOMMU interfaces enabled by this call are
0125  * specific to the value set.
0126  * Return: 0 on success, -errno on failure
0127  * Availability: When VFIO group attached
0128  */
0129 #define VFIO_SET_IOMMU          _IO(VFIO_TYPE, VFIO_BASE + 2)
0130 
0131 /* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */
0132 
0133 /**
0134  * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3,
0135  *                      struct vfio_group_status)
0136  *
0137  * Retrieve information about the group.  Fills in provided
0138  * struct vfio_group_info.  Caller sets argsz.
0139  * Return: 0 on succes, -errno on failure.
0140  * Availability: Always
0141  */
0142 struct vfio_group_status {
0143     __u32   argsz;
0144     __u32   flags;
0145 #define VFIO_GROUP_FLAGS_VIABLE     (1 << 0)
0146 #define VFIO_GROUP_FLAGS_CONTAINER_SET  (1 << 1)
0147 };
0148 #define VFIO_GROUP_GET_STATUS       _IO(VFIO_TYPE, VFIO_BASE + 3)
0149 
0150 /**
0151  * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32)
0152  *
0153  * Set the container for the VFIO group to the open VFIO file
0154  * descriptor provided.  Groups may only belong to a single
0155  * container.  Containers may, at their discretion, support multiple
0156  * groups.  Only when a container is set are all of the interfaces
0157  * of the VFIO file descriptor and the VFIO group file descriptor
0158  * available to the user.
0159  * Return: 0 on success, -errno on failure.
0160  * Availability: Always
0161  */
0162 #define VFIO_GROUP_SET_CONTAINER    _IO(VFIO_TYPE, VFIO_BASE + 4)
0163 
0164 /**
0165  * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5)
0166  *
0167  * Remove the group from the attached container.  This is the
0168  * opposite of the SET_CONTAINER call and returns the group to
0169  * an initial state.  All device file descriptors must be released
0170  * prior to calling this interface.  When removing the last group
0171  * from a container, the IOMMU will be disabled and all state lost,
0172  * effectively also returning the VFIO file descriptor to an initial
0173  * state.
0174  * Return: 0 on success, -errno on failure.
0175  * Availability: When attached to container
0176  */
0177 #define VFIO_GROUP_UNSET_CONTAINER  _IO(VFIO_TYPE, VFIO_BASE + 5)
0178 
0179 /**
0180  * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char)
0181  *
0182  * Return a new file descriptor for the device object described by
0183  * the provided string.  The string should match a device listed in
0184  * the devices subdirectory of the IOMMU group sysfs entry.  The
0185  * group containing the device must already be added to this context.
0186  * Return: new file descriptor on success, -errno on failure.
0187  * Availability: When attached to container
0188  */
0189 #define VFIO_GROUP_GET_DEVICE_FD    _IO(VFIO_TYPE, VFIO_BASE + 6)
0190 
0191 /* --------------- IOCTLs for DEVICE file descriptors --------------- */
0192 
0193 /**
0194  * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
0195  *                      struct vfio_device_info)
0196  *
0197  * Retrieve information about the device.  Fills in provided
0198  * struct vfio_device_info.  Caller sets argsz.
0199  * Return: 0 on success, -errno on failure.
0200  */
0201 struct vfio_device_info {
0202     __u32   argsz;
0203     __u32   flags;
0204 #define VFIO_DEVICE_FLAGS_RESET (1 << 0)    /* Device supports reset */
0205 #define VFIO_DEVICE_FLAGS_PCI   (1 << 1)    /* vfio-pci device */
0206 #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */
0207 #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)    /* vfio-amba device */
0208 #define VFIO_DEVICE_FLAGS_CCW   (1 << 4)    /* vfio-ccw device */
0209 #define VFIO_DEVICE_FLAGS_AP    (1 << 5)    /* vfio-ap device */
0210 #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)   /* vfio-fsl-mc device */
0211 #define VFIO_DEVICE_FLAGS_CAPS  (1 << 7)    /* Info supports caps */
0212     __u32   num_regions;    /* Max region index + 1 */
0213     __u32   num_irqs;   /* Max IRQ index + 1 */
0214     __u32   cap_offset; /* Offset within info struct of first cap */
0215 };
0216 #define VFIO_DEVICE_GET_INFO        _IO(VFIO_TYPE, VFIO_BASE + 7)
0217 
0218 /*
0219  * Vendor driver using Mediated device framework should provide device_api
0220  * attribute in supported type attribute groups. Device API string should be one
0221  * of the following corresponding to device flags in vfio_device_info structure.
0222  */
0223 
0224 #define VFIO_DEVICE_API_PCI_STRING      "vfio-pci"
0225 #define VFIO_DEVICE_API_PLATFORM_STRING     "vfio-platform"
0226 #define VFIO_DEVICE_API_AMBA_STRING     "vfio-amba"
0227 #define VFIO_DEVICE_API_CCW_STRING      "vfio-ccw"
0228 #define VFIO_DEVICE_API_AP_STRING       "vfio-ap"
0229 
0230 /*
0231  * The following capabilities are unique to s390 zPCI devices.  Their contents
0232  * are further-defined in vfio_zdev.h
0233  */
0234 #define VFIO_DEVICE_INFO_CAP_ZPCI_BASE      1
0235 #define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP     2
0236 #define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL      3
0237 #define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP      4
0238 
0239 /**
0240  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
0241  *                     struct vfio_region_info)
0242  *
0243  * Retrieve information about a device region.  Caller provides
0244  * struct vfio_region_info with index value set.  Caller sets argsz.
0245  * Implementation of region mapping is bus driver specific.  This is
0246  * intended to describe MMIO, I/O port, as well as bus specific
0247  * regions (ex. PCI config space).  Zero sized regions may be used
0248  * to describe unimplemented regions (ex. unimplemented PCI BARs).
0249  * Return: 0 on success, -errno on failure.
0250  */
0251 struct vfio_region_info {
0252     __u32   argsz;
0253     __u32   flags;
0254 #define VFIO_REGION_INFO_FLAG_READ  (1 << 0) /* Region supports read */
0255 #define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */
0256 #define VFIO_REGION_INFO_FLAG_MMAP  (1 << 2) /* Region supports mmap */
0257 #define VFIO_REGION_INFO_FLAG_CAPS  (1 << 3) /* Info supports caps */
0258     __u32   index;      /* Region index */
0259     __u32   cap_offset; /* Offset within info struct of first cap */
0260     __u64   size;       /* Region size (bytes) */
0261     __u64   offset;     /* Region offset from start of device fd */
0262 };
0263 #define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8)
0264 
0265 /*
0266  * The sparse mmap capability allows finer granularity of specifying areas
0267  * within a region with mmap support.  When specified, the user should only
0268  * mmap the offset ranges specified by the areas array.  mmaps outside of the
0269  * areas specified may fail (such as the range covering a PCI MSI-X table) or
0270  * may result in improper device behavior.
0271  *
0272  * The structures below define version 1 of this capability.
0273  */
0274 #define VFIO_REGION_INFO_CAP_SPARSE_MMAP    1
0275 
0276 struct vfio_region_sparse_mmap_area {
0277     __u64   offset; /* Offset of mmap'able area within region */
0278     __u64   size;   /* Size of mmap'able area */
0279 };
0280 
0281 struct vfio_region_info_cap_sparse_mmap {
0282     struct vfio_info_cap_header header;
0283     __u32   nr_areas;
0284     __u32   reserved;
0285     struct vfio_region_sparse_mmap_area areas[];
0286 };
0287 
0288 /*
0289  * The device specific type capability allows regions unique to a specific
0290  * device or class of devices to be exposed.  This helps solve the problem for
0291  * vfio bus drivers of defining which region indexes correspond to which region
0292  * on the device, without needing to resort to static indexes, as done by
0293  * vfio-pci.  For instance, if we were to go back in time, we might remove
0294  * VFIO_PCI_VGA_REGION_INDEX and let vfio-pci simply define that all indexes
0295  * greater than or equal to VFIO_PCI_NUM_REGIONS are device specific and we'd
0296  * make a "VGA" device specific type to describe the VGA access space.  This
0297  * means that non-VGA devices wouldn't need to waste this index, and thus the
0298  * address space associated with it due to implementation of device file
0299  * descriptor offsets in vfio-pci.
0300  *
0301  * The current implementation is now part of the user ABI, so we can't use this
0302  * for VGA, but there are other upcoming use cases, such as opregions for Intel
0303  * IGD devices and framebuffers for vGPU devices.  We missed VGA, but we'll
0304  * use this for future additions.
0305  *
0306  * The structure below defines version 1 of this capability.
0307  */
0308 #define VFIO_REGION_INFO_CAP_TYPE   2
0309 
0310 struct vfio_region_info_cap_type {
0311     struct vfio_info_cap_header header;
0312     __u32 type; /* global per bus driver */
0313     __u32 subtype;  /* type specific */
0314 };
0315 
0316 /*
0317  * List of region types, global per bus driver.
0318  * If you introduce a new type, please add it here.
0319  */
0320 
0321 /* PCI region type containing a PCI vendor part */
0322 #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE    (1 << 31)
0323 #define VFIO_REGION_TYPE_PCI_VENDOR_MASK    (0xffff)
0324 #define VFIO_REGION_TYPE_GFX                    (1)
0325 #define VFIO_REGION_TYPE_CCW            (2)
0326 #define VFIO_REGION_TYPE_MIGRATION_DEPRECATED   (3)
0327 
0328 /* sub-types for VFIO_REGION_TYPE_PCI_* */
0329 
0330 /* 8086 vendor PCI sub-types */
0331 #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION  (1)
0332 #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG  (2)
0333 #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG   (3)
0334 
0335 /* 10de vendor PCI sub-types */
0336 /*
0337  * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
0338  *
0339  * Deprecated, region no longer provided
0340  */
0341 #define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM  (1)
0342 
0343 /* 1014 vendor PCI sub-types */
0344 /*
0345  * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
0346  * to do TLB invalidation on a GPU.
0347  *
0348  * Deprecated, region no longer provided
0349  */
0350 #define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD    (1)
0351 
0352 /* sub-types for VFIO_REGION_TYPE_GFX */
0353 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
0354 
0355 /**
0356  * struct vfio_region_gfx_edid - EDID region layout.
0357  *
0358  * Set display link state and EDID blob.
0359  *
0360  * The EDID blob has monitor information such as brand, name, serial
0361  * number, physical size, supported video modes and more.
0362  *
0363  * This special region allows userspace (typically qemu) set a virtual
0364  * EDID for the virtual monitor, which allows a flexible display
0365  * configuration.
0366  *
0367  * For the edid blob spec look here:
0368  *    https://en.wikipedia.org/wiki/Extended_Display_Identification_Data
0369  *
0370  * On linux systems you can find the EDID blob in sysfs:
0371  *    /sys/class/drm/${card}/${connector}/edid
0372  *
0373  * You can use the edid-decode ulility (comes with xorg-x11-utils) to
0374  * decode the EDID blob.
0375  *
0376  * @edid_offset: location of the edid blob, relative to the
0377  *               start of the region (readonly).
0378  * @edid_max_size: max size of the edid blob (readonly).
0379  * @edid_size: actual edid size (read/write).
0380  * @link_state: display link state (read/write).
0381  * VFIO_DEVICE_GFX_LINK_STATE_UP: Monitor is turned on.
0382  * VFIO_DEVICE_GFX_LINK_STATE_DOWN: Monitor is turned off.
0383  * @max_xres: max display width (0 == no limitation, readonly).
0384  * @max_yres: max display height (0 == no limitation, readonly).
0385  *
0386  * EDID update protocol:
0387  *   (1) set link-state to down.
0388  *   (2) update edid blob and size.
0389  *   (3) set link-state to up.
0390  */
0391 struct vfio_region_gfx_edid {
0392     __u32 edid_offset;
0393     __u32 edid_max_size;
0394     __u32 edid_size;
0395     __u32 max_xres;
0396     __u32 max_yres;
0397     __u32 link_state;
0398 #define VFIO_DEVICE_GFX_LINK_STATE_UP    1
0399 #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
0400 };
0401 
0402 /* sub-types for VFIO_REGION_TYPE_CCW */
0403 #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD   (1)
0404 #define VFIO_REGION_SUBTYPE_CCW_SCHIB       (2)
0405 #define VFIO_REGION_SUBTYPE_CCW_CRW     (3)
0406 
0407 /* sub-types for VFIO_REGION_TYPE_MIGRATION */
0408 #define VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED (1)
0409 
0410 struct vfio_device_migration_info {
0411     __u32 device_state;         /* VFIO device state */
0412 #define VFIO_DEVICE_STATE_V1_STOP      (0)
0413 #define VFIO_DEVICE_STATE_V1_RUNNING   (1 << 0)
0414 #define VFIO_DEVICE_STATE_V1_SAVING    (1 << 1)
0415 #define VFIO_DEVICE_STATE_V1_RESUMING  (1 << 2)
0416 #define VFIO_DEVICE_STATE_MASK      (VFIO_DEVICE_STATE_V1_RUNNING | \
0417                      VFIO_DEVICE_STATE_V1_SAVING |  \
0418                      VFIO_DEVICE_STATE_V1_RESUMING)
0419 
0420 #define VFIO_DEVICE_STATE_VALID(state) \
0421     (state & VFIO_DEVICE_STATE_V1_RESUMING ? \
0422     (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_V1_RESUMING : 1)
0423 
0424 #define VFIO_DEVICE_STATE_IS_ERROR(state) \
0425     ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_V1_SAVING | \
0426                           VFIO_DEVICE_STATE_V1_RESUMING))
0427 
0428 #define VFIO_DEVICE_STATE_SET_ERROR(state) \
0429     ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_STATE_V1_SAVING | \
0430                          VFIO_DEVICE_STATE_V1_RESUMING)
0431 
0432     __u32 reserved;
0433     __u64 pending_bytes;
0434     __u64 data_offset;
0435     __u64 data_size;
0436 };
0437 
0438 /*
0439  * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
0440  * which allows direct access to non-MSIX registers which happened to be within
0441  * the same system page.
0442  *
0443  * Even though the userspace gets direct access to the MSIX data, the existing
0444  * VFIO_DEVICE_SET_IRQS interface must still be used for MSIX configuration.
0445  */
0446 #define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE  3
0447 
0448 /*
0449  * Capability with compressed real address (aka SSA - small system address)
0450  * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing
0451  * and by the userspace to associate a NVLink bridge with a GPU.
0452  *
0453  * Deprecated, capability no longer provided
0454  */
0455 #define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4
0456 
0457 struct vfio_region_info_cap_nvlink2_ssatgt {
0458     struct vfio_info_cap_header header;
0459     __u64 tgt;
0460 };
0461 
0462 /*
0463  * Capability with an NVLink link speed. The value is read by
0464  * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed"
0465  * property in the device tree. The value is fixed in the hardware
0466  * and failing to provide the correct value results in the link
0467  * not working with no indication from the driver why.
0468  *
0469  * Deprecated, capability no longer provided
0470  */
0471 #define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5
0472 
0473 struct vfio_region_info_cap_nvlink2_lnkspd {
0474     struct vfio_info_cap_header header;
0475     __u32 link_speed;
0476     __u32 __pad;
0477 };
0478 
0479 /**
0480  * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
0481  *                  struct vfio_irq_info)
0482  *
0483  * Retrieve information about a device IRQ.  Caller provides
0484  * struct vfio_irq_info with index value set.  Caller sets argsz.
0485  * Implementation of IRQ mapping is bus driver specific.  Indexes
0486  * using multiple IRQs are primarily intended to support MSI-like
0487  * interrupt blocks.  Zero count irq blocks may be used to describe
0488  * unimplemented interrupt types.
0489  *
0490  * The EVENTFD flag indicates the interrupt index supports eventfd based
0491  * signaling.
0492  *
0493  * The MASKABLE flags indicates the index supports MASK and UNMASK
0494  * actions described below.
0495  *
0496  * AUTOMASKED indicates that after signaling, the interrupt line is
0497  * automatically masked by VFIO and the user needs to unmask the line
0498  * to receive new interrupts.  This is primarily intended to distinguish
0499  * level triggered interrupts.
0500  *
0501  * The NORESIZE flag indicates that the interrupt lines within the index
0502  * are setup as a set and new subindexes cannot be enabled without first
0503  * disabling the entire index.  This is used for interrupts like PCI MSI
0504  * and MSI-X where the driver may only use a subset of the available
0505  * indexes, but VFIO needs to enable a specific number of vectors
0506  * upfront.  In the case of MSI-X, where the user can enable MSI-X and
0507  * then add and unmask vectors, it's up to userspace to make the decision
0508  * whether to allocate the maximum supported number of vectors or tear
0509  * down setup and incrementally increase the vectors as each is enabled.
0510  */
0511 struct vfio_irq_info {
0512     __u32   argsz;
0513     __u32   flags;
0514 #define VFIO_IRQ_INFO_EVENTFD       (1 << 0)
0515 #define VFIO_IRQ_INFO_MASKABLE      (1 << 1)
0516 #define VFIO_IRQ_INFO_AUTOMASKED    (1 << 2)
0517 #define VFIO_IRQ_INFO_NORESIZE      (1 << 3)
0518     __u32   index;      /* IRQ index */
0519     __u32   count;      /* Number of IRQs within this index */
0520 };
0521 #define VFIO_DEVICE_GET_IRQ_INFO    _IO(VFIO_TYPE, VFIO_BASE + 9)
0522 
0523 /**
0524  * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
0525  *
0526  * Set signaling, masking, and unmasking of interrupts.  Caller provides
0527  * struct vfio_irq_set with all fields set.  'start' and 'count' indicate
0528  * the range of subindexes being specified.
0529  *
0530  * The DATA flags specify the type of data provided.  If DATA_NONE, the
0531  * operation performs the specified action immediately on the specified
0532  * interrupt(s).  For example, to unmask AUTOMASKED interrupt [0,0]:
0533  * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1.
0534  *
0535  * DATA_BOOL allows sparse support for the same on arrays of interrupts.
0536  * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]):
0537  * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3,
0538  * data = {1,0,1}
0539  *
0540  * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd.
0541  * A value of -1 can be used to either de-assign interrupts if already
0542  * assigned or skip un-assigned interrupts.  For example, to set an eventfd
0543  * to be trigger for interrupts [0,0] and [0,2]:
0544  * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3,
0545  * data = {fd1, -1, fd2}
0546  * If index [0,1] is previously set, two count = 1 ioctls calls would be
0547  * required to set [0,0] and [0,2] without changing [0,1].
0548  *
0549  * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used
0550  * with ACTION_TRIGGER to perform kernel level interrupt loopback testing
0551  * from userspace (ie. simulate hardware triggering).
0552  *
0553  * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER
0554  * enables the interrupt index for the device.  Individual subindex interrupts
0555  * can be disabled using the -1 value for DATA_EVENTFD or the index can be
0556  * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
0557  *
0558  * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while
0559  * ACTION_TRIGGER specifies kernel->user signaling.
0560  */
0561 struct vfio_irq_set {
0562     __u32   argsz;
0563     __u32   flags;
0564 #define VFIO_IRQ_SET_DATA_NONE      (1 << 0) /* Data not present */
0565 #define VFIO_IRQ_SET_DATA_BOOL      (1 << 1) /* Data is bool (u8) */
0566 #define VFIO_IRQ_SET_DATA_EVENTFD   (1 << 2) /* Data is eventfd (s32) */
0567 #define VFIO_IRQ_SET_ACTION_MASK    (1 << 3) /* Mask interrupt */
0568 #define VFIO_IRQ_SET_ACTION_UNMASK  (1 << 4) /* Unmask interrupt */
0569 #define VFIO_IRQ_SET_ACTION_TRIGGER (1 << 5) /* Trigger interrupt */
0570     __u32   index;
0571     __u32   start;
0572     __u32   count;
0573     __u8    data[];
0574 };
0575 #define VFIO_DEVICE_SET_IRQS        _IO(VFIO_TYPE, VFIO_BASE + 10)
0576 
0577 #define VFIO_IRQ_SET_DATA_TYPE_MASK (VFIO_IRQ_SET_DATA_NONE | \
0578                      VFIO_IRQ_SET_DATA_BOOL | \
0579                      VFIO_IRQ_SET_DATA_EVENTFD)
0580 #define VFIO_IRQ_SET_ACTION_TYPE_MASK   (VFIO_IRQ_SET_ACTION_MASK | \
0581                      VFIO_IRQ_SET_ACTION_UNMASK | \
0582                      VFIO_IRQ_SET_ACTION_TRIGGER)
0583 /**
0584  * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
0585  *
0586  * Reset a device.
0587  */
0588 #define VFIO_DEVICE_RESET       _IO(VFIO_TYPE, VFIO_BASE + 11)
0589 
0590 /*
0591  * The VFIO-PCI bus driver makes use of the following fixed region and
0592  * IRQ index mapping.  Unimplemented regions return a size of zero.
0593  * Unimplemented IRQ types return a count of zero.
0594  */
0595 
0596 enum {
0597     VFIO_PCI_BAR0_REGION_INDEX,
0598     VFIO_PCI_BAR1_REGION_INDEX,
0599     VFIO_PCI_BAR2_REGION_INDEX,
0600     VFIO_PCI_BAR3_REGION_INDEX,
0601     VFIO_PCI_BAR4_REGION_INDEX,
0602     VFIO_PCI_BAR5_REGION_INDEX,
0603     VFIO_PCI_ROM_REGION_INDEX,
0604     VFIO_PCI_CONFIG_REGION_INDEX,
0605     /*
0606      * Expose VGA regions defined for PCI base class 03, subclass 00.
0607      * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df
0608      * as well as the MMIO range 0xa0000 to 0xbffff.  Each implemented
0609      * range is found at it's identity mapped offset from the region
0610      * offset, for example 0x3b0 is region_info.offset + 0x3b0.  Areas
0611      * between described ranges are unimplemented.
0612      */
0613     VFIO_PCI_VGA_REGION_INDEX,
0614     VFIO_PCI_NUM_REGIONS = 9 /* Fixed user ABI, region indexes >=9 use */
0615                  /* device specific cap to define content. */
0616 };
0617 
0618 enum {
0619     VFIO_PCI_INTX_IRQ_INDEX,
0620     VFIO_PCI_MSI_IRQ_INDEX,
0621     VFIO_PCI_MSIX_IRQ_INDEX,
0622     VFIO_PCI_ERR_IRQ_INDEX,
0623     VFIO_PCI_REQ_IRQ_INDEX,
0624     VFIO_PCI_NUM_IRQS
0625 };
0626 
0627 /*
0628  * The vfio-ccw bus driver makes use of the following fixed region and
0629  * IRQ index mapping. Unimplemented regions return a size of zero.
0630  * Unimplemented IRQ types return a count of zero.
0631  */
0632 
0633 enum {
0634     VFIO_CCW_CONFIG_REGION_INDEX,
0635     VFIO_CCW_NUM_REGIONS
0636 };
0637 
0638 enum {
0639     VFIO_CCW_IO_IRQ_INDEX,
0640     VFIO_CCW_CRW_IRQ_INDEX,
0641     VFIO_CCW_REQ_IRQ_INDEX,
0642     VFIO_CCW_NUM_IRQS
0643 };
0644 
0645 /**
0646  * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12,
0647  *                        struct vfio_pci_hot_reset_info)
0648  *
0649  * Return: 0 on success, -errno on failure:
0650  *  -enospc = insufficient buffer, -enodev = unsupported for device.
0651  */
0652 struct vfio_pci_dependent_device {
0653     __u32   group_id;
0654     __u16   segment;
0655     __u8    bus;
0656     __u8    devfn; /* Use PCI_SLOT/PCI_FUNC */
0657 };
0658 
0659 struct vfio_pci_hot_reset_info {
0660     __u32   argsz;
0661     __u32   flags;
0662     __u32   count;
0663     struct vfio_pci_dependent_device    devices[];
0664 };
0665 
0666 #define VFIO_DEVICE_GET_PCI_HOT_RESET_INFO  _IO(VFIO_TYPE, VFIO_BASE + 12)
0667 
0668 /**
0669  * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
0670  *                  struct vfio_pci_hot_reset)
0671  *
0672  * Return: 0 on success, -errno on failure.
0673  */
0674 struct vfio_pci_hot_reset {
0675     __u32   argsz;
0676     __u32   flags;
0677     __u32   count;
0678     __s32   group_fds[];
0679 };
0680 
0681 #define VFIO_DEVICE_PCI_HOT_RESET   _IO(VFIO_TYPE, VFIO_BASE + 13)
0682 
0683 /**
0684  * VFIO_DEVICE_QUERY_GFX_PLANE - _IOW(VFIO_TYPE, VFIO_BASE + 14,
0685  *                                    struct vfio_device_query_gfx_plane)
0686  *
0687  * Set the drm_plane_type and flags, then retrieve the gfx plane info.
0688  *
0689  * flags supported:
0690  * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_DMABUF are set
0691  *   to ask if the mdev supports dma-buf. 0 on support, -EINVAL on no
0692  *   support for dma-buf.
0693  * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_REGION are set
0694  *   to ask if the mdev supports region. 0 on support, -EINVAL on no
0695  *   support for region.
0696  * - VFIO_GFX_PLANE_TYPE_DMABUF or VFIO_GFX_PLANE_TYPE_REGION is set
0697  *   with each call to query the plane info.
0698  * - Others are invalid and return -EINVAL.
0699  *
0700  * Note:
0701  * 1. Plane could be disabled by guest. In that case, success will be
0702  *    returned with zero-initialized drm_format, size, width and height
0703  *    fields.
0704  * 2. x_hot/y_hot is set to 0xFFFFFFFF if no hotspot information available
0705  *
0706  * Return: 0 on success, -errno on other failure.
0707  */
0708 struct vfio_device_gfx_plane_info {
0709     __u32 argsz;
0710     __u32 flags;
0711 #define VFIO_GFX_PLANE_TYPE_PROBE (1 << 0)
0712 #define VFIO_GFX_PLANE_TYPE_DMABUF (1 << 1)
0713 #define VFIO_GFX_PLANE_TYPE_REGION (1 << 2)
0714     /* in */
0715     __u32 drm_plane_type;   /* type of plane: DRM_PLANE_TYPE_* */
0716     /* out */
0717     __u32 drm_format;   /* drm format of plane */
0718     __u64 drm_format_mod;   /* tiled mode */
0719     __u32 width;    /* width of plane */
0720     __u32 height;   /* height of plane */
0721     __u32 stride;   /* stride of plane */
0722     __u32 size; /* size of plane in bytes, align on page*/
0723     __u32 x_pos;    /* horizontal position of cursor plane */
0724     __u32 y_pos;    /* vertical position of cursor plane*/
0725     __u32 x_hot;    /* horizontal position of cursor hotspot */
0726     __u32 y_hot;    /* vertical position of cursor hotspot */
0727     union {
0728         __u32 region_index; /* region index */
0729         __u32 dmabuf_id;    /* dma-buf id */
0730     };
0731 };
0732 
0733 #define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14)
0734 
0735 /**
0736  * VFIO_DEVICE_GET_GFX_DMABUF - _IOW(VFIO_TYPE, VFIO_BASE + 15, __u32)
0737  *
0738  * Return a new dma-buf file descriptor for an exposed guest framebuffer
0739  * described by the provided dmabuf_id. The dmabuf_id is returned from VFIO_
0740  * DEVICE_QUERY_GFX_PLANE as a token of the exposed guest framebuffer.
0741  */
0742 
0743 #define VFIO_DEVICE_GET_GFX_DMABUF _IO(VFIO_TYPE, VFIO_BASE + 15)
0744 
0745 /**
0746  * VFIO_DEVICE_IOEVENTFD - _IOW(VFIO_TYPE, VFIO_BASE + 16,
0747  *                              struct vfio_device_ioeventfd)
0748  *
0749  * Perform a write to the device at the specified device fd offset, with
0750  * the specified data and width when the provided eventfd is triggered.
0751  * vfio bus drivers may not support this for all regions, for all widths,
0752  * or at all.  vfio-pci currently only enables support for BAR regions,
0753  * excluding the MSI-X vector table.
0754  *
0755  * Return: 0 on success, -errno on failure.
0756  */
0757 struct vfio_device_ioeventfd {
0758     __u32   argsz;
0759     __u32   flags;
0760 #define VFIO_DEVICE_IOEVENTFD_8     (1 << 0) /* 1-byte write */
0761 #define VFIO_DEVICE_IOEVENTFD_16    (1 << 1) /* 2-byte write */
0762 #define VFIO_DEVICE_IOEVENTFD_32    (1 << 2) /* 4-byte write */
0763 #define VFIO_DEVICE_IOEVENTFD_64    (1 << 3) /* 8-byte write */
0764 #define VFIO_DEVICE_IOEVENTFD_SIZE_MASK (0xf)
0765     __u64   offset;         /* device fd offset of write */
0766     __u64   data;           /* data to be written */
0767     __s32   fd;         /* -1 for de-assignment */
0768 };
0769 
0770 #define VFIO_DEVICE_IOEVENTFD       _IO(VFIO_TYPE, VFIO_BASE + 16)
0771 
0772 /**
0773  * VFIO_DEVICE_FEATURE - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
0774  *                 struct vfio_device_feature)
0775  *
0776  * Get, set, or probe feature data of the device.  The feature is selected
0777  * using the FEATURE_MASK portion of the flags field.  Support for a feature
0778  * can be probed by setting both the FEATURE_MASK and PROBE bits.  A probe
0779  * may optionally include the GET and/or SET bits to determine read vs write
0780  * access of the feature respectively.  Probing a feature will return success
0781  * if the feature is supported and all of the optionally indicated GET/SET
0782  * methods are supported.  The format of the data portion of the structure is
0783  * specific to the given feature.  The data portion is not required for
0784  * probing.  GET and SET are mutually exclusive, except for use with PROBE.
0785  *
0786  * Return 0 on success, -errno on failure.
0787  */
0788 struct vfio_device_feature {
0789     __u32   argsz;
0790     __u32   flags;
0791 #define VFIO_DEVICE_FEATURE_MASK    (0xffff) /* 16-bit feature index */
0792 #define VFIO_DEVICE_FEATURE_GET     (1 << 16) /* Get feature into data[] */
0793 #define VFIO_DEVICE_FEATURE_SET     (1 << 17) /* Set feature from data[] */
0794 #define VFIO_DEVICE_FEATURE_PROBE   (1 << 18) /* Probe feature support */
0795     __u8    data[];
0796 };
0797 
0798 #define VFIO_DEVICE_FEATURE     _IO(VFIO_TYPE, VFIO_BASE + 17)
0799 
0800 /*
0801  * Provide support for setting a PCI VF Token, which is used as a shared
0802  * secret between PF and VF drivers.  This feature may only be set on a
0803  * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing
0804  * open VFs.  Data provided when setting this feature is a 16-byte array
0805  * (__u8 b[16]), representing a UUID.
0806  */
0807 #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN    (0)
0808 
0809 /*
0810  * Indicates the device can support the migration API through
0811  * VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. If this GET succeeds, the RUNNING and
0812  * ERROR states are always supported. Support for additional states is
0813  * indicated via the flags field; at least VFIO_MIGRATION_STOP_COPY must be
0814  * set.
0815  *
0816  * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and
0817  * RESUMING are supported.
0818  *
0819  * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P
0820  * is supported in addition to the STOP_COPY states.
0821  *
0822  * Other combinations of flags have behavior to be defined in the future.
0823  */
0824 struct vfio_device_feature_migration {
0825     __aligned_u64 flags;
0826 #define VFIO_MIGRATION_STOP_COPY    (1 << 0)
0827 #define VFIO_MIGRATION_P2P      (1 << 1)
0828 };
0829 #define VFIO_DEVICE_FEATURE_MIGRATION 1
0830 
0831 /*
0832  * Upon VFIO_DEVICE_FEATURE_SET, execute a migration state change on the VFIO
0833  * device. The new state is supplied in device_state, see enum
0834  * vfio_device_mig_state for details
0835  *
0836  * The kernel migration driver must fully transition the device to the new state
0837  * value before the operation returns to the user.
0838  *
0839  * The kernel migration driver must not generate asynchronous device state
0840  * transitions outside of manipulation by the user or the VFIO_DEVICE_RESET
0841  * ioctl as described above.
0842  *
0843  * If this function fails then current device_state may be the original
0844  * operating state or some other state along the combination transition path.
0845  * The user can then decide if it should execute a VFIO_DEVICE_RESET, attempt
0846  * to return to the original state, or attempt to return to some other state
0847  * such as RUNNING or STOP.
0848  *
0849  * If the new_state starts a new data transfer session then the FD associated
0850  * with that session is returned in data_fd. The user is responsible to close
0851  * this FD when it is finished. The user must consider the migration data stream
0852  * carried over the FD to be opaque and must preserve the byte order of the
0853  * stream. The user is not required to preserve buffer segmentation when writing
0854  * the data stream during the RESUMING operation.
0855  *
0856  * Upon VFIO_DEVICE_FEATURE_GET, get the current migration state of the VFIO
0857  * device, data_fd will be -1.
0858  */
0859 struct vfio_device_feature_mig_state {
0860     __u32 device_state; /* From enum vfio_device_mig_state */
0861     __s32 data_fd;
0862 };
0863 #define VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE 2
0864 
0865 /*
0866  * The device migration Finite State Machine is described by the enum
0867  * vfio_device_mig_state. Some of the FSM arcs will create a migration data
0868  * transfer session by returning a FD, in this case the migration data will
0869  * flow over the FD using read() and write() as discussed below.
0870  *
0871  * There are 5 states to support VFIO_MIGRATION_STOP_COPY:
0872  *  RUNNING - The device is running normally
0873  *  STOP - The device does not change the internal or external state
0874  *  STOP_COPY - The device internal state can be read out
0875  *  RESUMING - The device is stopped and is loading a new internal state
0876  *  ERROR - The device has failed and must be reset
0877  *
0878  * And 1 optional state to support VFIO_MIGRATION_P2P:
0879  *  RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA
0880  *
0881  * The FSM takes actions on the arcs between FSM states. The driver implements
0882  * the following behavior for the FSM arcs:
0883  *
0884  * RUNNING_P2P -> STOP
0885  * STOP_COPY -> STOP
0886  *   While in STOP the device must stop the operation of the device. The device
0887  *   must not generate interrupts, DMA, or any other change to external state.
0888  *   It must not change its internal state. When stopped the device and kernel
0889  *   migration driver must accept and respond to interaction to support external
0890  *   subsystems in the STOP state, for example PCI MSI-X and PCI config space.
0891  *   Failure by the user to restrict device access while in STOP must not result
0892  *   in error conditions outside the user context (ex. host system faults).
0893  *
0894  *   The STOP_COPY arc will terminate a data transfer session.
0895  *
0896  * RESUMING -> STOP
0897  *   Leaving RESUMING terminates a data transfer session and indicates the
0898  *   device should complete processing of the data delivered by write(). The
0899  *   kernel migration driver should complete the incorporation of data written
0900  *   to the data transfer FD into the device internal state and perform
0901  *   final validity and consistency checking of the new device state. If the
0902  *   user provided data is found to be incomplete, inconsistent, or otherwise
0903  *   invalid, the migration driver must fail the SET_STATE ioctl and
0904  *   optionally go to the ERROR state as described below.
0905  *
0906  *   While in STOP the device has the same behavior as other STOP states
0907  *   described above.
0908  *
0909  *   To abort a RESUMING session the device must be reset.
0910  *
0911  * RUNNING_P2P -> RUNNING
0912  *   While in RUNNING the device is fully operational, the device may generate
0913  *   interrupts, DMA, respond to MMIO, all vfio device regions are functional,
0914  *   and the device may advance its internal state.
0915  *
0916  * RUNNING -> RUNNING_P2P
0917  * STOP -> RUNNING_P2P
0918  *   While in RUNNING_P2P the device is partially running in the P2P quiescent
0919  *   state defined below.
0920  *
0921  * STOP -> STOP_COPY
0922  *   This arc begin the process of saving the device state and will return a
0923  *   new data_fd.
0924  *
0925  *   While in the STOP_COPY state the device has the same behavior as STOP
0926  *   with the addition that the data transfers session continues to stream the
0927  *   migration state. End of stream on the FD indicates the entire device
0928  *   state has been transferred.
0929  *
0930  *   The user should take steps to restrict access to vfio device regions while
0931  *   the device is in STOP_COPY or risk corruption of the device migration data
0932  *   stream.
0933  *
0934  * STOP -> RESUMING
0935  *   Entering the RESUMING state starts a process of restoring the device state
0936  *   and will return a new data_fd. The data stream fed into the data_fd should
0937  *   be taken from the data transfer output of a single FD during saving from
0938  *   a compatible device. The migration driver may alter/reset the internal
0939  *   device state for this arc if required to prepare the device to receive the
0940  *   migration data.
0941  *
0942  * any -> ERROR
0943  *   ERROR cannot be specified as a device state, however any transition request
0944  *   can be failed with an errno return and may then move the device_state into
0945  *   ERROR. In this case the device was unable to execute the requested arc and
0946  *   was also unable to restore the device to any valid device_state.
0947  *   To recover from ERROR VFIO_DEVICE_RESET must be used to return the
0948  *   device_state back to RUNNING.
0949  *
0950  * The optional peer to peer (P2P) quiescent state is intended to be a quiescent
0951  * state for the device for the purposes of managing multiple devices within a
0952  * user context where peer-to-peer DMA between devices may be active. The
0953  * RUNNING_P2P states must prevent the device from initiating
0954  * any new P2P DMA transactions. If the device can identify P2P transactions
0955  * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration
0956  * driver must complete any such outstanding operations prior to completing the
0957  * FSM arc into a P2P state. For the purpose of specification the states
0958  * behave as though the device was fully running if not supported. Like while in
0959  * STOP or STOP_COPY the user must not touch the device, otherwise the state
0960  * can be exited.
0961  *
0962  * The remaining possible transitions are interpreted as combinations of the
0963  * above FSM arcs. As there are multiple paths through the FSM arcs the path
0964  * should be selected based on the following rules:
0965  *   - Select the shortest path.
0966  * Refer to vfio_mig_get_next_state() for the result of the algorithm.
0967  *
0968  * The automatic transit through the FSM arcs that make up the combination
0969  * transition is invisible to the user. When working with combination arcs the
0970  * user may see any step along the path in the device_state if SET_STATE
0971  * fails. When handling these types of errors users should anticipate future
0972  * revisions of this protocol using new states and those states becoming
0973  * visible in this case.
0974  *
0975  * The optional states cannot be used with SET_STATE if the device does not
0976  * support them. The user can discover if these states are supported by using
0977  * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can
0978  * avoid knowing about these optional states if the kernel driver supports them.
0979  */
0980 enum vfio_device_mig_state {
0981     VFIO_DEVICE_STATE_ERROR = 0,
0982     VFIO_DEVICE_STATE_STOP = 1,
0983     VFIO_DEVICE_STATE_RUNNING = 2,
0984     VFIO_DEVICE_STATE_STOP_COPY = 3,
0985     VFIO_DEVICE_STATE_RESUMING = 4,
0986     VFIO_DEVICE_STATE_RUNNING_P2P = 5,
0987 };
0988 
0989 /* -------- API for Type1 VFIO IOMMU -------- */
0990 
0991 /**
0992  * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info)
0993  *
0994  * Retrieve information about the IOMMU object. Fills in provided
0995  * struct vfio_iommu_info. Caller sets argsz.
0996  *
0997  * XXX Should we do these by CHECK_EXTENSION too?
0998  */
0999 struct vfio_iommu_type1_info {
1000     __u32   argsz;
1001     __u32   flags;
1002 #define VFIO_IOMMU_INFO_PGSIZES (1 << 0)    /* supported page sizes info */
1003 #define VFIO_IOMMU_INFO_CAPS    (1 << 1)    /* Info supports caps */
1004     __u64   iova_pgsizes;   /* Bitmap of supported page sizes */
1005     __u32   cap_offset; /* Offset within info struct of first cap */
1006 };
1007 
1008 /*
1009  * The IOVA capability allows to report the valid IOVA range(s)
1010  * excluding any non-relaxable reserved regions exposed by
1011  * devices attached to the container. Any DMA map attempt
1012  * outside the valid iova range will return error.
1013  *
1014  * The structures below define version 1 of this capability.
1015  */
1016 #define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE  1
1017 
1018 struct vfio_iova_range {
1019     __u64   start;
1020     __u64   end;
1021 };
1022 
1023 struct vfio_iommu_type1_info_cap_iova_range {
1024     struct  vfio_info_cap_header header;
1025     __u32   nr_iovas;
1026     __u32   reserved;
1027     struct  vfio_iova_range iova_ranges[];
1028 };
1029 
1030 /*
1031  * The migration capability allows to report supported features for migration.
1032  *
1033  * The structures below define version 1 of this capability.
1034  *
1035  * The existence of this capability indicates that IOMMU kernel driver supports
1036  * dirty page logging.
1037  *
1038  * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty
1039  * page logging.
1040  * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap
1041  * size in bytes that can be used by user applications when getting the dirty
1042  * bitmap.
1043  */
1044 #define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION  2
1045 
1046 struct vfio_iommu_type1_info_cap_migration {
1047     struct  vfio_info_cap_header header;
1048     __u32   flags;
1049     __u64   pgsize_bitmap;
1050     __u64   max_dirty_bitmap_size;      /* in bytes */
1051 };
1052 
1053 /*
1054  * The DMA available capability allows to report the current number of
1055  * simultaneously outstanding DMA mappings that are allowed.
1056  *
1057  * The structure below defines version 1 of this capability.
1058  *
1059  * avail: specifies the current number of outstanding DMA mappings allowed.
1060  */
1061 #define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3
1062 
1063 struct vfio_iommu_type1_info_dma_avail {
1064     struct  vfio_info_cap_header header;
1065     __u32   avail;
1066 };
1067 
1068 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
1069 
1070 /**
1071  * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map)
1072  *
1073  * Map process virtual addresses to IO virtual addresses using the
1074  * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
1075  *
1076  * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and
1077  * unblock translation of host virtual addresses in the iova range.  The vaddr
1078  * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR.  To
1079  * maintain memory consistency within the user application, the updated vaddr
1080  * must address the same memory object as originally mapped.  Failure to do so
1081  * will result in user memory corruption and/or device misbehavior.  iova and
1082  * size must match those in the original MAP_DMA call.  Protection is not
1083  * changed, and the READ & WRITE flags must be 0.
1084  */
1085 struct vfio_iommu_type1_dma_map {
1086     __u32   argsz;
1087     __u32   flags;
1088 #define VFIO_DMA_MAP_FLAG_READ (1 << 0)     /* readable from device */
1089 #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1)    /* writable from device */
1090 #define VFIO_DMA_MAP_FLAG_VADDR (1 << 2)
1091     __u64   vaddr;              /* Process virtual address */
1092     __u64   iova;               /* IO virtual address */
1093     __u64   size;               /* Size of mapping (bytes) */
1094 };
1095 
1096 #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
1097 
1098 struct vfio_bitmap {
1099     __u64        pgsize;    /* page size for bitmap in bytes */
1100     __u64        size;  /* in bytes */
1101     __u64 __user *data; /* one bit per page */
1102 };
1103 
1104 /**
1105  * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
1106  *                          struct vfio_dma_unmap)
1107  *
1108  * Unmap IO virtual addresses using the provided struct vfio_dma_unmap.
1109  * Caller sets argsz.  The actual unmapped size is returned in the size
1110  * field.  No guarantee is made to the user that arbitrary unmaps of iova
1111  * or size different from those used in the original mapping call will
1112  * succeed.
1113  *
1114  * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
1115  * before unmapping IO virtual addresses. When this flag is set, the user must
1116  * provide a struct vfio_bitmap in data[]. User must provide zero-allocated
1117  * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field.
1118  * A bit in the bitmap represents one page, of user provided page size in
1119  * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set
1120  * indicates that the page at that offset from iova is dirty. A Bitmap of the
1121  * pages in the range of unmapped size is returned in the user-provided
1122  * vfio_bitmap.data.
1123  *
1124  * If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses.  iova and size
1125  * must be 0.  This cannot be combined with the get-dirty-bitmap flag.
1126  *
1127  * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host
1128  * virtual addresses in the iova range.  Tasks that attempt to translate an
1129  * iova's vaddr will block.  DMA to already-mapped pages continues.  This
1130  * cannot be combined with the get-dirty-bitmap flag.
1131  */
1132 struct vfio_iommu_type1_dma_unmap {
1133     __u32   argsz;
1134     __u32   flags;
1135 #define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
1136 #define VFIO_DMA_UNMAP_FLAG_ALL          (1 << 1)
1137 #define VFIO_DMA_UNMAP_FLAG_VADDR        (1 << 2)
1138     __u64   iova;               /* IO virtual address */
1139     __u64   size;               /* Size of mapping (bytes) */
1140     __u8    data[];
1141 };
1142 
1143 #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
1144 
1145 /*
1146  * IOCTLs to enable/disable IOMMU container usage.
1147  * No parameters are supported.
1148  */
1149 #define VFIO_IOMMU_ENABLE   _IO(VFIO_TYPE, VFIO_BASE + 15)
1150 #define VFIO_IOMMU_DISABLE  _IO(VFIO_TYPE, VFIO_BASE + 16)
1151 
1152 /**
1153  * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
1154  *                                     struct vfio_iommu_type1_dirty_bitmap)
1155  * IOCTL is used for dirty pages logging.
1156  * Caller should set flag depending on which operation to perform, details as
1157  * below:
1158  *
1159  * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs
1160  * the IOMMU driver to log pages that are dirtied or potentially dirtied by
1161  * the device; designed to be used when a migration is in progress. Dirty pages
1162  * are logged until logging is disabled by user application by calling the IOCTL
1163  * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag.
1164  *
1165  * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs
1166  * the IOMMU driver to stop logging dirtied pages.
1167  *
1168  * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set
1169  * returns the dirty pages bitmap for IOMMU container for a given IOVA range.
1170  * The user must specify the IOVA range and the pgsize through the structure
1171  * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface
1172  * supports getting a bitmap of the smallest supported pgsize only and can be
1173  * modified in future to get a bitmap of any specified supported pgsize. The
1174  * user must provide a zeroed memory area for the bitmap memory and specify its
1175  * size in bitmap.size. One bit is used to represent one page consecutively
1176  * starting from iova offset. The user should provide page size in bitmap.pgsize
1177  * field. A bit set in the bitmap indicates that the page at that offset from
1178  * iova is dirty. The caller must set argsz to a value including the size of
1179  * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the
1180  * actual bitmap. If dirty pages logging is not enabled, an error will be
1181  * returned.
1182  *
1183  * Only one of the flags _START, _STOP and _GET may be specified at a time.
1184  *
1185  */
1186 struct vfio_iommu_type1_dirty_bitmap {
1187     __u32        argsz;
1188     __u32        flags;
1189 #define VFIO_IOMMU_DIRTY_PAGES_FLAG_START   (1 << 0)
1190 #define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP    (1 << 1)
1191 #define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP  (1 << 2)
1192     __u8         data[];
1193 };
1194 
1195 struct vfio_iommu_type1_dirty_bitmap_get {
1196     __u64              iova;    /* IO virtual address */
1197     __u64              size;    /* Size of iova range */
1198     struct vfio_bitmap bitmap;
1199 };
1200 
1201 #define VFIO_IOMMU_DIRTY_PAGES             _IO(VFIO_TYPE, VFIO_BASE + 17)
1202 
1203 /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
1204 
1205 /*
1206  * The SPAPR TCE DDW info struct provides the information about
1207  * the details of Dynamic DMA window capability.
1208  *
1209  * @pgsizes contains a page size bitmask, 4K/64K/16M are supported.
1210  * @max_dynamic_windows_supported tells the maximum number of windows
1211  * which the platform can create.
1212  * @levels tells the maximum number of levels in multi-level IOMMU tables;
1213  * this allows splitting a table into smaller chunks which reduces
1214  * the amount of physically contiguous memory required for the table.
1215  */
1216 struct vfio_iommu_spapr_tce_ddw_info {
1217     __u64 pgsizes;          /* Bitmap of supported page sizes */
1218     __u32 max_dynamic_windows_supported;
1219     __u32 levels;
1220 };
1221 
1222 /*
1223  * The SPAPR TCE info struct provides the information about the PCI bus
1224  * address ranges available for DMA, these values are programmed into
1225  * the hardware so the guest has to know that information.
1226  *
1227  * The DMA 32 bit window start is an absolute PCI bus address.
1228  * The IOVA address passed via map/unmap ioctls are absolute PCI bus
1229  * addresses too so the window works as a filter rather than an offset
1230  * for IOVA addresses.
1231  *
1232  * Flags supported:
1233  * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows
1234  *   (DDW) support is present. @ddw is only supported when DDW is present.
1235  */
1236 struct vfio_iommu_spapr_tce_info {
1237     __u32 argsz;
1238     __u32 flags;
1239 #define VFIO_IOMMU_SPAPR_INFO_DDW   (1 << 0)    /* DDW supported */
1240     __u32 dma32_window_start;   /* 32 bit window start (bytes) */
1241     __u32 dma32_window_size;    /* 32 bit window size (bytes) */
1242     struct vfio_iommu_spapr_tce_ddw_info ddw;
1243 };
1244 
1245 #define VFIO_IOMMU_SPAPR_TCE_GET_INFO   _IO(VFIO_TYPE, VFIO_BASE + 12)
1246 
1247 /*
1248  * EEH PE operation struct provides ways to:
1249  * - enable/disable EEH functionality;
1250  * - unfreeze IO/DMA for frozen PE;
1251  * - read PE state;
1252  * - reset PE;
1253  * - configure PE;
1254  * - inject EEH error.
1255  */
1256 struct vfio_eeh_pe_err {
1257     __u32 type;
1258     __u32 func;
1259     __u64 addr;
1260     __u64 mask;
1261 };
1262 
1263 struct vfio_eeh_pe_op {
1264     __u32 argsz;
1265     __u32 flags;
1266     __u32 op;
1267     union {
1268         struct vfio_eeh_pe_err err;
1269     };
1270 };
1271 
1272 #define VFIO_EEH_PE_DISABLE     0   /* Disable EEH functionality */
1273 #define VFIO_EEH_PE_ENABLE      1   /* Enable EEH functionality  */
1274 #define VFIO_EEH_PE_UNFREEZE_IO     2   /* Enable IO for frozen PE   */
1275 #define VFIO_EEH_PE_UNFREEZE_DMA    3   /* Enable DMA for frozen PE  */
1276 #define VFIO_EEH_PE_GET_STATE       4   /* PE state retrieval        */
1277 #define  VFIO_EEH_PE_STATE_NORMAL   0   /* PE in functional state    */
1278 #define  VFIO_EEH_PE_STATE_RESET    1   /* PE reset in progress      */
1279 #define  VFIO_EEH_PE_STATE_STOPPED  2   /* Stopped DMA and IO        */
1280 #define  VFIO_EEH_PE_STATE_STOPPED_DMA  4   /* Stopped DMA only          */
1281 #define  VFIO_EEH_PE_STATE_UNAVAIL  5   /* State unavailable         */
1282 #define VFIO_EEH_PE_RESET_DEACTIVATE    5   /* Deassert PE reset         */
1283 #define VFIO_EEH_PE_RESET_HOT       6   /* Assert hot reset          */
1284 #define VFIO_EEH_PE_RESET_FUNDAMENTAL   7   /* Assert fundamental reset  */
1285 #define VFIO_EEH_PE_CONFIGURE       8   /* PE configuration          */
1286 #define VFIO_EEH_PE_INJECT_ERR      9   /* Inject EEH error          */
1287 
1288 #define VFIO_EEH_PE_OP          _IO(VFIO_TYPE, VFIO_BASE + 21)
1289 
1290 /**
1291  * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory)
1292  *
1293  * Registers user space memory where DMA is allowed. It pins
1294  * user pages and does the locked memory accounting so
1295  * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls
1296  * get faster.
1297  */
1298 struct vfio_iommu_spapr_register_memory {
1299     __u32   argsz;
1300     __u32   flags;
1301     __u64   vaddr;              /* Process virtual address */
1302     __u64   size;               /* Size of mapping (bytes) */
1303 };
1304 #define VFIO_IOMMU_SPAPR_REGISTER_MEMORY    _IO(VFIO_TYPE, VFIO_BASE + 17)
1305 
1306 /**
1307  * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory)
1308  *
1309  * Unregisters user space memory registered with
1310  * VFIO_IOMMU_SPAPR_REGISTER_MEMORY.
1311  * Uses vfio_iommu_spapr_register_memory for parameters.
1312  */
1313 #define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY  _IO(VFIO_TYPE, VFIO_BASE + 18)
1314 
1315 /**
1316  * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create)
1317  *
1318  * Creates an additional TCE table and programs it (sets a new DMA window)
1319  * to every IOMMU group in the container. It receives page shift, window
1320  * size and number of levels in the TCE table being created.
1321  *
1322  * It allocates and returns an offset on a PCI bus of the new DMA window.
1323  */
1324 struct vfio_iommu_spapr_tce_create {
1325     __u32 argsz;
1326     __u32 flags;
1327     /* in */
1328     __u32 page_shift;
1329     __u32 __resv1;
1330     __u64 window_size;
1331     __u32 levels;
1332     __u32 __resv2;
1333     /* out */
1334     __u64 start_addr;
1335 };
1336 #define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19)
1337 
1338 /**
1339  * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove)
1340  *
1341  * Unprograms a TCE table from all groups in the container and destroys it.
1342  * It receives a PCI bus offset as a window id.
1343  */
1344 struct vfio_iommu_spapr_tce_remove {
1345     __u32 argsz;
1346     __u32 flags;
1347     /* in */
1348     __u64 start_addr;
1349 };
1350 #define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20)
1351 
1352 /* ***************************************************************** */
1353 
1354 #endif /* _UAPIVFIO_H */