summaryrefslogtreecommitdiff
path: root/drivers/vfio/vfio_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio/vfio_main.c')
-rw-r--r--drivers/vfio/vfio_main.c347
1 files changed, 274 insertions, 73 deletions
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 2d168793d4e1..ce6e6a560c70 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -35,6 +35,7 @@
#include <linux/pm_runtime.h>
#include <linux/interval_tree.h>
#include <linux/iova_bitmap.h>
+#include <linux/iommufd.h>
#include "vfio.h"
#define DRIVER_VERSION "0.3"
@@ -524,6 +525,11 @@ static int __vfio_register_dev(struct vfio_device *device,
if (IS_ERR(group))
return PTR_ERR(group);
+ if (WARN_ON(device->ops->bind_iommufd &&
+ (!device->ops->unbind_iommufd ||
+ !device->ops->attach_ioas)))
+ return -EINVAL;
+
/*
* If the driver doesn't specify a set then the device is added to a
* singleton set just for itself.
@@ -662,6 +668,18 @@ EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
/*
* VFIO Group fd, /dev/vfio/$GROUP
*/
+static bool vfio_group_has_iommu(struct vfio_group *group)
+{
+ lockdep_assert_held(&group->group_lock);
+ /*
+ * There can only be users if there is a container, and if there is a
+ * container there must be users.
+ */
+ WARN_ON(!group->container != !group->container_users);
+
+ return group->container || group->iommufd;
+}
+
/*
* VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
* if there was no container to unset. Since the ioctl is called on
@@ -673,15 +691,21 @@ static int vfio_group_ioctl_unset_container(struct vfio_group *group)
int ret = 0;
mutex_lock(&group->group_lock);
- if (!group->container) {
+ if (!vfio_group_has_iommu(group)) {
ret = -EINVAL;
goto out_unlock;
}
- if (group->container_users != 1) {
- ret = -EBUSY;
- goto out_unlock;
+ if (group->container) {
+ if (group->container_users != 1) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ vfio_group_detach_container(group);
+ }
+ if (group->iommufd) {
+ iommufd_ctx_put(group->iommufd);
+ group->iommufd = NULL;
}
- vfio_group_detach_container(group);
out_unlock:
mutex_unlock(&group->group_lock);
@@ -692,6 +716,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
int __user *arg)
{
struct vfio_container *container;
+ struct iommufd_ctx *iommufd;
struct fd f;
int ret;
int fd;
@@ -704,7 +729,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
return -EBADF;
mutex_lock(&group->group_lock);
- if (group->container || WARN_ON(group->container_users)) {
+ if (vfio_group_has_iommu(group)) {
ret = -EINVAL;
goto out_unlock;
}
@@ -714,12 +739,28 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
}
container = vfio_container_from_file(f.file);
- ret = -EINVAL;
if (container) {
ret = vfio_container_attach_group(container, group);
goto out_unlock;
}
+ iommufd = iommufd_ctx_from_file(f.file);
+ if (!IS_ERR(iommufd)) {
+ u32 ioas_id;
+
+ ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
+ if (ret) {
+ iommufd_ctx_put(group->iommufd);
+ goto out_unlock;
+ }
+
+ group->iommufd = iommufd;
+ goto out_unlock;
+ }
+
+ /* The FD passed is not recognized. */
+ ret = -EBADFD;
+
out_unlock:
mutex_unlock(&group->group_lock);
fdput(f);
@@ -729,45 +770,93 @@ out_unlock:
static const struct file_operations vfio_device_fops;
/* true if the vfio_device has open_device() called but not close_device() */
-bool vfio_assert_device_open(struct vfio_device *device)
+static bool vfio_assert_device_open(struct vfio_device *device)
{
return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
}
-static struct file *vfio_device_open(struct vfio_device *device)
+static int vfio_device_first_open(struct vfio_device *device)
{
- struct file *filep;
int ret;
+ lockdep_assert_held(&device->dev_set->lock);
+
+ if (!try_module_get(device->dev->driver->owner))
+ return -ENODEV;
+
+ /*
+ * Here we pass the KVM pointer with the group under the lock. If the
+ * device driver will use it, it must obtain a reference and release it
+ * during close_device.
+ */
mutex_lock(&device->group->group_lock);
- ret = vfio_device_assign_container(device);
- mutex_unlock(&device->group->group_lock);
- if (ret)
- return ERR_PTR(ret);
+ if (!vfio_group_has_iommu(device->group)) {
+ ret = -EINVAL;
+ goto err_module_put;
+ }
- if (!try_module_get(device->dev->driver->owner)) {
- ret = -ENODEV;
- goto err_unassign_container;
+ if (device->group->container) {
+ ret = vfio_group_use_container(device->group);
+ if (ret)
+ goto err_module_put;
+ } else if (device->group->iommufd) {
+ ret = vfio_iommufd_bind(device, device->group->iommufd);
+ if (ret)
+ goto err_module_put;
}
+ device->kvm = device->group->kvm;
+ if (device->ops->open_device) {
+ ret = device->ops->open_device(device);
+ if (ret)
+ goto err_container;
+ }
+ if (device->group->container)
+ vfio_device_container_register(device);
+ mutex_unlock(&device->group->group_lock);
+ return 0;
+
+err_container:
+ device->kvm = NULL;
+ if (device->group->container)
+ vfio_group_unuse_container(device->group);
+ else if (device->group->iommufd)
+ vfio_iommufd_unbind(device);
+err_module_put:
+ mutex_unlock(&device->group->group_lock);
+ module_put(device->dev->driver->owner);
+ return ret;
+}
+
+static void vfio_device_last_close(struct vfio_device *device)
+{
+ lockdep_assert_held(&device->dev_set->lock);
+
+ mutex_lock(&device->group->group_lock);
+ if (device->group->container)
+ vfio_device_container_unregister(device);
+ if (device->ops->close_device)
+ device->ops->close_device(device);
+ device->kvm = NULL;
+ if (device->group->container)
+ vfio_group_unuse_container(device->group);
+ else if (device->group->iommufd)
+ vfio_iommufd_unbind(device);
+ mutex_unlock(&device->group->group_lock);
+ module_put(device->dev->driver->owner);
+}
+
+static struct file *vfio_device_open(struct vfio_device *device)
+{
+ struct file *filep;
+ int ret;
+
mutex_lock(&device->dev_set->lock);
device->open_count++;
if (device->open_count == 1) {
- /*
- * Here we pass the KVM pointer with the group under the read
- * lock. If the device driver will use it, it must obtain a
- * reference and release it during close_device.
- */
- mutex_lock(&device->group->group_lock);
- device->kvm = device->group->kvm;
-
- if (device->ops->open_device) {
- ret = device->ops->open_device(device);
- if (ret)
- goto err_undo_count;
- }
- vfio_device_container_register(device);
- mutex_unlock(&device->group->group_lock);
+ ret = vfio_device_first_open(device);
+ if (ret)
+ goto err_unlock;
}
mutex_unlock(&device->dev_set->lock);
@@ -800,21 +889,11 @@ static struct file *vfio_device_open(struct vfio_device *device)
err_close_device:
mutex_lock(&device->dev_set->lock);
- mutex_lock(&device->group->group_lock);
- if (device->open_count == 1 && device->ops->close_device) {
- device->ops->close_device(device);
-
- vfio_device_container_unregister(device);
- }
-err_undo_count:
- mutex_unlock(&device->group->group_lock);
+ if (device->open_count == 1)
+ vfio_device_last_close(device);
+err_unlock:
device->open_count--;
- if (device->open_count == 0 && device->kvm)
- device->kvm = NULL;
mutex_unlock(&device->dev_set->lock);
- module_put(device->dev->driver->owner);
-err_unassign_container:
- vfio_device_unassign_container(device);
return ERR_PTR(ret);
}
@@ -878,7 +957,14 @@ static int vfio_group_ioctl_get_status(struct vfio_group *group,
return -ENODEV;
}
- if (group->container)
+ /*
+ * With the container FD the iommu_group_claim_dma_owner() is done
+ * during SET_CONTAINER but for IOMMFD this is done during
+ * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd
+ * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due
+ * to viability.
+ */
+ if (vfio_group_has_iommu(group))
status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
VFIO_GROUP_FLAGS_VIABLE;
else if (!iommu_group_dma_owner_claimed(group->iommu_group))
@@ -961,6 +1047,10 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
WARN_ON(group->notifier.head);
if (group->container)
vfio_group_detach_container(group);
+ if (group->iommufd) {
+ iommufd_ctx_put(group->iommufd);
+ group->iommufd = NULL;
+ }
group->opened_file = NULL;
mutex_unlock(&group->group_lock);
return 0;
@@ -1016,21 +1106,11 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
- mutex_lock(&device->group->group_lock);
- if (device->open_count == 1 && device->ops->close_device)
- device->ops->close_device(device);
-
- vfio_device_container_unregister(device);
- mutex_unlock(&device->group->group_lock);
+ if (device->open_count == 1)
+ vfio_device_last_close(device);
device->open_count--;
- if (device->open_count == 0)
- device->kvm = NULL;
mutex_unlock(&device->dev_set->lock);
- module_put(device->dev->driver->owner);
-
- vfio_device_unassign_container(device);
-
vfio_device_put_registration(device);
return 0;
@@ -1613,24 +1693,27 @@ EXPORT_SYMBOL_GPL(vfio_file_is_group);
bool vfio_file_enforced_coherent(struct file *file)
{
struct vfio_group *group = file->private_data;
- bool ret;
+ struct vfio_device *device;
+ bool ret = true;
if (!vfio_file_is_group(file))
return true;
- mutex_lock(&group->group_lock);
- if (group->container) {
- ret = vfio_container_ioctl_check_extension(group->container,
- VFIO_DMA_CC_IOMMU);
- } else {
- /*
- * Since the coherency state is determined only once a container
- * is attached the user must do so before they can prove they
- * have permission.
- */
- ret = true;
+ /*
+ * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then
+ * any domain later attached to it will also not support it. If the cap
+ * is set then the iommu_domain eventually attached to the device/group
+ * must use a domain with enforce_cache_coherency().
+ */
+ mutex_lock(&group->device_lock);
+ list_for_each_entry(device, &group->device_list, group_next) {
+ if (!device_iommu_capable(device->dev,
+ IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) {
+ ret = false;
+ break;
+ }
}
- mutex_unlock(&group->group_lock);
+ mutex_unlock(&group->device_lock);
return ret;
}
EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
@@ -1794,6 +1877,126 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
/*
+ * Pin contiguous user pages and return their associated host pages for local
+ * domain only.
+ * @device [in] : device
+ * @iova [in] : starting IOVA of user pages to be pinned.
+ * @npage [in] : count of pages to be pinned. This count should not
+ * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ * @prot [in] : protection flags
+ * @pages[out] : array of host pages
+ * Return error or number of pages pinned.
+ *
+ * A driver may only call this function if the vfio_device was created
+ * by vfio_register_emulated_iommu_dev() due to vfio_container_pin_pages().
+ */
+int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
+ int npage, int prot, struct page **pages)
+{
+ /* group->container cannot change while a vfio device is open */
+ if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device)))
+ return -EINVAL;
+ if (device->group->container)
+ return vfio_container_pin_pages(device->group->container,
+ device->group->iommu_group,
+ iova, npage, prot, pages);
+ if (device->iommufd_access) {
+ int ret;
+
+ if (iova > ULONG_MAX)
+ return -EINVAL;
+ /*
+ * VFIO ignores the sub page offset, npages is from the start of
+ * a PAGE_SIZE chunk of IOVA. The caller is expected to recover
+ * the sub page offset by doing:
+ * pages[0] + (iova % PAGE_SIZE)
+ */
+ ret = iommufd_access_pin_pages(
+ device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE),
+ npage * PAGE_SIZE, pages,
+ (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0);
+ if (ret)
+ return ret;
+ return npage;
+ }
+ return -EINVAL;
+}
+EXPORT_SYMBOL(vfio_pin_pages);
+
+/*
+ * Unpin contiguous host pages for local domain only.
+ * @device [in] : device
+ * @iova [in] : starting address of user pages to be unpinned.
+ * @npage [in] : count of pages to be unpinned. This count should not
+ * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ */
+void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
+{
+ if (WARN_ON(!vfio_assert_device_open(device)))
+ return;
+
+ if (device->group->container) {
+ vfio_container_unpin_pages(device->group->container, iova,
+ npage);
+ return;
+ }
+ if (device->iommufd_access) {
+ if (WARN_ON(iova > ULONG_MAX))
+ return;
+ iommufd_access_unpin_pages(device->iommufd_access,
+ ALIGN_DOWN(iova, PAGE_SIZE),
+ npage * PAGE_SIZE);
+ return;
+ }
+}
+EXPORT_SYMBOL(vfio_unpin_pages);
+
+/*
+ * This interface allows the CPUs to perform some sort of virtual DMA on
+ * behalf of the device.
+ *
+ * CPUs read/write from/into a range of IOVAs pointing to user space memory
+ * into/from a kernel buffer.
+ *
+ * As the read/write of user space memory is conducted via the CPUs and is
+ * not a real device DMA, it is not necessary to pin the user space memory.
+ *
+ * @device [in] : VFIO device
+ * @iova [in] : base IOVA of a user space buffer
+ * @data [in] : pointer to kernel buffer
+ * @len [in] : kernel buffer length
+ * @write : indicate read or write
+ * Return error code on failure or 0 on success.
+ */
+int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
+ size_t len, bool write)
+{
+ if (!data || len <= 0 || !vfio_assert_device_open(device))
+ return -EINVAL;
+
+ if (device->group->container)
+ return vfio_container_dma_rw(device->group->container, iova,
+ data, len, write);
+
+ if (device->iommufd_access) {
+ unsigned int flags = 0;
+
+ if (iova > ULONG_MAX)
+ return -EINVAL;
+
+ /* VFIO historically tries to auto-detect a kthread */
+ if (!current->mm)
+ flags |= IOMMUFD_ACCESS_RW_KTHREAD;
+ if (write)
+ flags |= IOMMUFD_ACCESS_RW_WRITE;
+ return iommufd_access_rw(device->iommufd_access, iova, data,
+ len, flags);
+ }
+ return -EINVAL;
+}
+EXPORT_SYMBOL(vfio_dma_rw);
+
+/*
* Module/class support
*/
static char *vfio_devnode(struct device *dev, umode_t *mode)
@@ -1870,6 +2073,4 @@ MODULE_VERSION(DRIVER_VERSION);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_ALIAS_MISCDEV(VFIO_MINOR);
-MODULE_ALIAS("devname:vfio/vfio");
MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");