1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
|
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
*/
#include "iommufd_private.h"
void iommufd_viommu_destroy(struct iommufd_object *obj)
{
struct iommufd_viommu *viommu =
container_of(obj, struct iommufd_viommu, obj);
if (viommu->ops && viommu->ops->destroy)
viommu->ops->destroy(viommu);
refcount_dec(&viommu->hwpt->common.obj.users);
xa_destroy(&viommu->vdevs);
}
int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
{
struct iommu_viommu_alloc *cmd = ucmd->cmd;
const struct iommu_user_data user_data = {
.type = cmd->type,
.uptr = u64_to_user_ptr(cmd->data_uptr),
.len = cmd->data_len,
};
struct iommufd_hwpt_paging *hwpt_paging;
struct iommufd_viommu *viommu;
struct iommufd_device *idev;
const struct iommu_ops *ops;
size_t viommu_size;
int rc;
if (cmd->flags || cmd->type == IOMMU_VIOMMU_TYPE_DEFAULT)
return -EOPNOTSUPP;
idev = iommufd_get_device(ucmd, cmd->dev_id);
if (IS_ERR(idev))
return PTR_ERR(idev);
ops = dev_iommu_ops(idev->dev);
if (!ops->get_viommu_size || !ops->viommu_init) {
rc = -EOPNOTSUPP;
goto out_put_idev;
}
viommu_size = ops->get_viommu_size(idev->dev, cmd->type);
if (!viommu_size) {
rc = -EOPNOTSUPP;
goto out_put_idev;
}
/*
* It is a driver bug for providing a viommu_size smaller than the core
* vIOMMU structure size
*/
if (WARN_ON_ONCE(viommu_size < sizeof(*viommu))) {
rc = -EOPNOTSUPP;
goto out_put_idev;
}
hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id);
if (IS_ERR(hwpt_paging)) {
rc = PTR_ERR(hwpt_paging);
goto out_put_idev;
}
if (!hwpt_paging->nest_parent) {
rc = -EINVAL;
goto out_put_hwpt;
}
viommu = (struct iommufd_viommu *)_iommufd_object_alloc_ucmd(
ucmd, viommu_size, IOMMUFD_OBJ_VIOMMU);
if (IS_ERR(viommu)) {
rc = PTR_ERR(viommu);
goto out_put_hwpt;
}
xa_init(&viommu->vdevs);
viommu->type = cmd->type;
viommu->ictx = ucmd->ictx;
viommu->hwpt = hwpt_paging;
refcount_inc(&viommu->hwpt->common.obj.users);
INIT_LIST_HEAD(&viommu->veventqs);
init_rwsem(&viommu->veventqs_rwsem);
/*
* It is the most likely case that a physical IOMMU is unpluggable. A
* pluggable IOMMU instance (if exists) is responsible for refcounting
* on its own.
*/
viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev);
rc = ops->viommu_init(viommu, hwpt_paging->common.domain,
user_data.len ? &user_data : NULL);
if (rc)
goto out_put_hwpt;
/* It is a driver bug that viommu->ops isn't filled */
if (WARN_ON_ONCE(!viommu->ops)) {
rc = -EOPNOTSUPP;
goto out_put_hwpt;
}
cmd->out_viommu_id = viommu->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_put_hwpt:
iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
out_put_idev:
iommufd_put_object(ucmd->ictx, &idev->obj);
return rc;
}
void iommufd_vdevice_abort(struct iommufd_object *obj)
{
struct iommufd_vdevice *vdev =
container_of(obj, struct iommufd_vdevice, obj);
struct iommufd_viommu *viommu = vdev->viommu;
struct iommufd_device *idev = vdev->idev;
lockdep_assert_held(&idev->igroup->lock);
if (vdev->destroy)
vdev->destroy(vdev);
/* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL);
refcount_dec(&viommu->obj.users);
idev->vdev = NULL;
}
void iommufd_vdevice_destroy(struct iommufd_object *obj)
{
struct iommufd_vdevice *vdev =
container_of(obj, struct iommufd_vdevice, obj);
struct iommufd_device *idev = vdev->idev;
struct iommufd_ctx *ictx = idev->ictx;
mutex_lock(&idev->igroup->lock);
iommufd_vdevice_abort(obj);
mutex_unlock(&idev->igroup->lock);
iommufd_put_object(ictx, &idev->obj);
}
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
{
struct iommu_vdevice_alloc *cmd = ucmd->cmd;
struct iommufd_vdevice *vdev, *curr;
size_t vdev_size = sizeof(*vdev);
struct iommufd_viommu *viommu;
struct iommufd_device *idev;
u64 virt_id = cmd->virt_id;
int rc = 0;
/* virt_id indexes an xarray */
if (virt_id > ULONG_MAX)
return -EINVAL;
viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
if (IS_ERR(viommu))
return PTR_ERR(viommu);
idev = iommufd_get_device(ucmd, cmd->dev_id);
if (IS_ERR(idev)) {
rc = PTR_ERR(idev);
goto out_put_viommu;
}
if (viommu->iommu_dev != __iommu_get_iommu_dev(idev->dev)) {
rc = -EINVAL;
goto out_put_idev;
}
mutex_lock(&idev->igroup->lock);
if (idev->destroying) {
rc = -ENOENT;
goto out_unlock_igroup;
}
if (idev->vdev) {
rc = -EEXIST;
goto out_unlock_igroup;
}
if (viommu->ops && viommu->ops->vdevice_size) {
/*
* It is a driver bug for:
* - ops->vdevice_size smaller than the core structure size
* - not implementing a pairing ops->vdevice_init op
*/
if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size ||
!viommu->ops->vdevice_init)) {
rc = -EOPNOTSUPP;
goto out_put_idev;
}
vdev_size = viommu->ops->vdevice_size;
}
vdev = (struct iommufd_vdevice *)_iommufd_object_alloc(
ucmd->ictx, vdev_size, IOMMUFD_OBJ_VDEVICE);
if (IS_ERR(vdev)) {
rc = PTR_ERR(vdev);
goto out_unlock_igroup;
}
vdev->virt_id = virt_id;
vdev->viommu = viommu;
refcount_inc(&viommu->obj.users);
/*
* A wait_cnt reference is held on the idev so long as we have the
* pointer. iommufd_device_pre_destroy() will revoke it before the
* idev real destruction.
*/
vdev->idev = idev;
/*
* iommufd_device_destroy() delays until idev->vdev is NULL before
* freeing the idev, which only happens once the vdev is finished
* destruction.
*/
idev->vdev = vdev;
curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL);
if (curr) {
rc = xa_err(curr) ?: -EEXIST;
goto out_abort;
}
if (viommu->ops && viommu->ops->vdevice_init) {
rc = viommu->ops->vdevice_init(vdev);
if (rc)
goto out_abort;
}
cmd->out_vdevice_id = vdev->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
goto out_abort;
iommufd_object_finalize(ucmd->ictx, &vdev->obj);
goto out_unlock_igroup;
out_abort:
iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj);
out_unlock_igroup:
mutex_unlock(&idev->igroup->lock);
out_put_idev:
if (rc)
iommufd_put_object(ucmd->ictx, &idev->obj);
out_put_viommu:
iommufd_put_object(ucmd->ictx, &viommu->obj);
return rc;
}
static void iommufd_hw_queue_destroy_access(struct iommufd_ctx *ictx,
struct iommufd_access *access,
u64 base_iova, size_t length)
{
u64 aligned_iova = PAGE_ALIGN_DOWN(base_iova);
u64 offset = base_iova - aligned_iova;
iommufd_access_unpin_pages(access, aligned_iova,
PAGE_ALIGN(length + offset));
iommufd_access_detach_internal(access);
iommufd_access_destroy_internal(ictx, access);
}
void iommufd_hw_queue_destroy(struct iommufd_object *obj)
{
struct iommufd_hw_queue *hw_queue =
container_of(obj, struct iommufd_hw_queue, obj);
if (hw_queue->destroy)
hw_queue->destroy(hw_queue);
if (hw_queue->access)
iommufd_hw_queue_destroy_access(hw_queue->viommu->ictx,
hw_queue->access,
hw_queue->base_addr,
hw_queue->length);
if (hw_queue->viommu)
refcount_dec(&hw_queue->viommu->obj.users);
}
/*
* When the HW accesses the guest queue via physical addresses, the underlying
* physical pages of the guest queue must be contiguous. Also, for the security
* concern that IOMMUFD_CMD_IOAS_UNMAP could potentially remove the mappings of
* the guest queue from the nesting parent iopt while the HW is still accessing
* the guest queue memory physically, such a HW queue must require an access to
* pin the underlying pages and prevent that from happening.
*/
static struct iommufd_access *
iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd,
struct iommufd_viommu *viommu, phys_addr_t *base_pa)
{
u64 aligned_iova = PAGE_ALIGN_DOWN(cmd->nesting_parent_iova);
u64 offset = cmd->nesting_parent_iova - aligned_iova;
struct iommufd_access *access;
struct page **pages;
size_t max_npages;
size_t length;
size_t i;
int rc;
/* max_npages = DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */
if (check_add_overflow(offset, cmd->length, &length))
return ERR_PTR(-ERANGE);
if (check_add_overflow(length, PAGE_SIZE - 1, &length))
return ERR_PTR(-ERANGE);
max_npages = length / PAGE_SIZE;
/* length needs to be page aligned too */
length = max_npages * PAGE_SIZE;
/*
* Use kvcalloc() to avoid memory fragmentation for a large page array.
* Set __GFP_NOWARN to avoid syzkaller blowups
*/
pages = kvcalloc(max_npages, sizeof(*pages), GFP_KERNEL | __GFP_NOWARN);
if (!pages)
return ERR_PTR(-ENOMEM);
access = iommufd_access_create_internal(viommu->ictx);
if (IS_ERR(access)) {
rc = PTR_ERR(access);
goto out_free;
}
rc = iommufd_access_attach_internal(access, viommu->hwpt->ioas);
if (rc)
goto out_destroy;
rc = iommufd_access_pin_pages(access, aligned_iova, length, pages, 0);
if (rc)
goto out_detach;
/* Validate if the underlying physical pages are contiguous */
for (i = 1; i < max_npages; i++) {
if (page_to_pfn(pages[i]) == page_to_pfn(pages[i - 1]) + 1)
continue;
rc = -EFAULT;
goto out_unpin;
}
*base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset;
kvfree(pages);
return access;
out_unpin:
iommufd_access_unpin_pages(access, aligned_iova, length);
out_detach:
iommufd_access_detach_internal(access);
out_destroy:
iommufd_access_destroy_internal(viommu->ictx, access);
out_free:
kvfree(pages);
return ERR_PTR(rc);
}
int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd)
{
struct iommu_hw_queue_alloc *cmd = ucmd->cmd;
struct iommufd_hw_queue *hw_queue;
struct iommufd_viommu *viommu;
struct iommufd_access *access;
size_t hw_queue_size;
phys_addr_t base_pa;
u64 last;
int rc;
if (cmd->flags || cmd->type == IOMMU_HW_QUEUE_TYPE_DEFAULT)
return -EOPNOTSUPP;
if (!cmd->length)
return -EINVAL;
if (check_add_overflow(cmd->nesting_parent_iova, cmd->length - 1,
&last))
return -EOVERFLOW;
viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
if (IS_ERR(viommu))
return PTR_ERR(viommu);
if (!viommu->ops || !viommu->ops->get_hw_queue_size ||
!viommu->ops->hw_queue_init_phys) {
rc = -EOPNOTSUPP;
goto out_put_viommu;
}
hw_queue_size = viommu->ops->get_hw_queue_size(viommu, cmd->type);
if (!hw_queue_size) {
rc = -EOPNOTSUPP;
goto out_put_viommu;
}
/*
* It is a driver bug for providing a hw_queue_size smaller than the
* core HW queue structure size
*/
if (WARN_ON_ONCE(hw_queue_size < sizeof(*hw_queue))) {
rc = -EOPNOTSUPP;
goto out_put_viommu;
}
hw_queue = (struct iommufd_hw_queue *)_iommufd_object_alloc_ucmd(
ucmd, hw_queue_size, IOMMUFD_OBJ_HW_QUEUE);
if (IS_ERR(hw_queue)) {
rc = PTR_ERR(hw_queue);
goto out_put_viommu;
}
access = iommufd_hw_queue_alloc_phys(cmd, viommu, &base_pa);
if (IS_ERR(access)) {
rc = PTR_ERR(access);
goto out_put_viommu;
}
hw_queue->viommu = viommu;
refcount_inc(&viommu->obj.users);
hw_queue->access = access;
hw_queue->type = cmd->type;
hw_queue->length = cmd->length;
hw_queue->base_addr = cmd->nesting_parent_iova;
rc = viommu->ops->hw_queue_init_phys(hw_queue, cmd->index, base_pa);
if (rc)
goto out_put_viommu;
cmd->out_hw_queue_id = hw_queue->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_put_viommu:
iommufd_put_object(ucmd->ictx, &viommu->obj);
return rc;
}
|