Skip to content

Commit

Permalink
iommu: re-add support for ephemeral mappings
Browse files Browse the repository at this point in the history
The nvme_sync() helper function maps a buffer, issues the command, and
unmaps the buffer. When we removed ephemeral IOVAs we relied on the user
to handle these temporary mappings from a reserved set of IOVAs at the
start of the IOVA space. While ephemeral IOVAs are used rarely, it is
nicer to be able to just pass a buffer that we know will only be mapped
temporarily. The new iommufd backend does not care about temporary
mappings since it uses the kernel iova allocation that handles this just
fine.

The vfio backend still needs some kind of special treatment. We retain a
reserved space (by default, 64k) at the start of the iova range and use
that for temporary (ephemeral) mappings, only now, instead of requiring
the user to map from that area explicitly, a new IOMMU_MAP_EPHEMERAL
flags may be passed to indicate the temporary nature of the iova. Like
before, we recycle the whole ephemeral space when no mappings are in
use.

Signed-off-by: Klaus Jensen <[email protected]>
  • Loading branch information
birkelund committed Nov 9, 2023
1 parent 7125ec4 commit 3ba79ae
Show file tree
Hide file tree
Showing 11 changed files with 90 additions and 49 deletions.
2 changes: 0 additions & 2 deletions include/vfn/iommu/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#ifndef LIBVFN_IOMMU_CONTEXT_H
#define LIBVFN_IOMMU_CONTEXT_H

#define __VFN_IOVA_MIN 0x10000

/**
* iommu_get_context - create a new iommu context
* @name: context identifier
Expand Down
12 changes: 9 additions & 3 deletions include/vfn/iommu/dma.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,19 @@
/**
* enum iommu_map_flags - flags for DMA mapping
* @IOMMU_MAP_FIXED_IOVA: If cleared, an appropriate IOVA will be allocated
* @IOMMU_MAP_EPHEMERAL: If set, the mapping is considered temporary
* @IOMMU_MAP_NOWRITE: DMA is not allowed to write to this mapping
* @IOMMU_MAP_NOREAD: DMA is not allowed to read from this mapping
*
* IOMMU_MAP_EPHEMERAL may change how the iova is allocated. I.e., currently,
* the vfio-based backend will allocate an IOVA from a reserved range of 64k.
* The iommufd-based backend has no such restrictions.
*/
enum iommu_map_flags {
IOMMU_MAP_FIXED_IOVA = 1 << 0,
IOMMU_MAP_NOWRITE = 1 << 1,
IOMMU_MAP_NOREAD = 1 << 2,
IOMMU_MAP_FIXED_IOVA = 1 << 0,
IOMMU_MAP_EPHEMERAL = 1 << 1,
IOMMU_MAP_NOWRITE = 1 << 2,
IOMMU_MAP_NOREAD = 1 << 3,
};

/**
Expand Down
8 changes: 4 additions & 4 deletions include/vfn/nvme/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ int nvme_aer(struct nvme_ctrl *ctrl, void *opaque);
* nvme_sync - Submit a command and wait for completion
* @sq: Submission queue
* @sqe: Submission queue entry
* @iova: Mapped command payload
* @buf: Command payload
* @len: Command payload length
* @cqe: Completion queue entry to fill
*
Expand All @@ -87,7 +87,8 @@ int nvme_aer(struct nvme_ctrl *ctrl, void *opaque);
* Return: On success, returns ``0``. On error, returns ``-1`` and sets
* ``errno``.
*/
int nvme_sync(struct nvme_sq *sq, void *sqe, uint64_t iova, size_t len, void *cqe);
int nvme_sync(struct nvme_ctrl *ctrl, struct nvme_sq *sq, void *sqe, void *buf, size_t len,
void *cqe);

/**
* nvme_admin - Submit an Admin command and wait for completion
Expand All @@ -97,8 +98,7 @@ int nvme_sync(struct nvme_sq *sq, void *sqe, uint64_t iova, size_t len, void *cq
* @len: Command payload length
* @cqe: Completion queue entry to fill
*
* Shortcut for nvme_sync(), mapping the buffer using the reserved iova space
* and submitting to the admin submission queue.
* Shortcut for nvme_sync(), submitting to the admin submission queue.
*
* Return: On success, returns ``0``. On error, returnes ``-1`` and sets
* ``errno``.
Expand Down
3 changes: 2 additions & 1 deletion src/iommu/context.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "context.h"

#define IOVA_MIN 0x10000
#define IOVA_MAX_39BITS (1ULL << 39)

#ifdef HAVE_VFIO_DEVICE_BIND_IOMMUFD
Expand Down Expand Up @@ -75,7 +76,7 @@ void iommu_ctx_init(struct iommu_ctx *ctx)
* For vfio, if we end up not being able to get a list of allowed
* iova ranges, be conservative.
*/
ctx->iova_ranges[0].start = __VFN_IOVA_MIN;
ctx->iova_ranges[0].start = IOVA_MIN;
ctx->iova_ranges[0].last = IOVA_MAX_39BITS - 1;

pthread_mutex_init(&ctx->lock, NULL);
Expand Down
4 changes: 3 additions & 1 deletion src/iommu/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ struct iommu_ctx;

struct iommu_ctx_ops {
/* container/ioas ops */
int (*iova_reserve)(struct iommu_ctx *ctx, size_t len, uint64_t *iova);
int (*iova_reserve)(struct iommu_ctx *ctx, size_t len, uint64_t *iova,
unsigned long flags);
void (*iova_put_ephemeral)(struct iommu_ctx *ctx);
int (*dma_map)(struct iommu_ctx *ctx, void *vaddr, size_t len, uint64_t *iova,
unsigned long flags);
int (*dma_unmap)(struct iommu_ctx *ctx, uint64_t iova, size_t len);
Expand Down
11 changes: 8 additions & 3 deletions src/iommu/dma.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ static int iova_cmp(const void *vaddr, const struct skiplist_node *n)
return 0;
}

static int iova_map_add(struct iova_map *map, void *vaddr, size_t len, uint64_t iova)
static int iova_map_add(struct iova_map *map, void *vaddr, size_t len, uint64_t iova,
unsigned long flags)
{
__autolock(&map->lock);

Expand All @@ -60,6 +61,7 @@ static int iova_map_add(struct iova_map *map, void *vaddr, size_t len, uint64_t
m->vaddr = vaddr;
m->len = len;
m->iova = iova;
m->flags = flags;

skiplist_link(&map->list, &m->list, update);

Expand Down Expand Up @@ -121,7 +123,7 @@ int iommu_map_vaddr(struct iommu_ctx *ctx, void *vaddr, size_t len, uint64_t *io

if (flags & IOMMU_MAP_FIXED_IOVA) {
_iova = *iova;
} else if (ctx->ops.iova_reserve && ctx->ops.iova_reserve(ctx, len, &_iova)) {
} else if (ctx->ops.iova_reserve && ctx->ops.iova_reserve(ctx, len, &_iova, flags)) {
log_debug("failed to allocate iova\n");
return -1;
}
Expand All @@ -131,7 +133,7 @@ int iommu_map_vaddr(struct iommu_ctx *ctx, void *vaddr, size_t len, uint64_t *io
return -1;
}

if (iova_map_add(&ctx->map, vaddr, len, _iova)) {
if (iova_map_add(&ctx->map, vaddr, len, _iova, flags)) {
log_debug("failed to add mapping\n");
return -1;
}
Expand Down Expand Up @@ -161,6 +163,9 @@ int iommu_unmap_vaddr(struct iommu_ctx *ctx, void *vaddr, size_t *len)
return -1;
}

if (m->flags & IOMMU_MAP_EPHEMERAL && ctx->ops.iova_put_ephemeral)
ctx->ops.iova_put_ephemeral(ctx);

iova_map_remove(&ctx->map, m->vaddr);

return 0;
Expand Down
51 changes: 48 additions & 3 deletions src/iommu/vfio.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ struct vfio_container {
struct vfio_group groups[VFN_MAX_VFIO_GROUPS];

pthread_mutex_t lock;
uint64_t next;
uint64_t next, next_ephemeral, nephemerals;
struct iommu_iova_range ephemerals;
};

static struct vfio_container vfio_default_container = {
Expand Down Expand Up @@ -210,7 +211,8 @@ static bool __iova_reserve(struct iommu_iova_range *ranges, int nranges, uint64_
return false;
}

static int vfio_iommu_type1_iova_reserve(struct iommu_ctx *ctx, size_t len, uint64_t *iova)
static int vfio_iommu_type1_iova_reserve(struct iommu_ctx *ctx, size_t len, uint64_t *iova,
unsigned long flags)
{
struct vfio_container *vfio = container_of_var(ctx, vfio, ctx);

Expand All @@ -222,9 +224,19 @@ static int vfio_iommu_type1_iova_reserve(struct iommu_ctx *ctx, size_t len, uint
return -1;
}

if (flags & IOMMU_MAP_EPHEMERAL) {
if (!__iova_reserve(&vfio->ephemerals, 1, &vfio->next_ephemeral, len, iova))
goto enomem;

atomic_inc(&vfio->nephemerals);

return 0;
}

if (__iova_reserve(ctx->iova_ranges, ctx->nranges, &vfio->next, len, iova))
return 0;

enomem:
errno = ENOMEM;
return -1;
}
Expand All @@ -245,11 +257,23 @@ static int vfio_iommu_type1_init(struct vfio_container *vfio)
}
#endif

if (vfio_iommu_type1_iova_reserve(&vfio->ctx, VFIO_IOMMU_TYPE1_IOVA_RESERVED, &iova)) {
if (vfio_iommu_type1_iova_reserve(&vfio->ctx, VFIO_IOMMU_TYPE1_IOVA_RESERVED, &iova, 0x0)) {
log_debug("could not reserve iova range\n");
return -1;
}

vfio->ephemerals.start = iova;
vfio->ephemerals.last = iova + VFIO_IOMMU_TYPE1_IOVA_RESERVED - 1;

if (logv(LOG_INFO)) {
__autofree char *str;

log_fatal_if(iommu_iova_range_to_string(&vfio->ephemerals, &str) < 0,
"iommu_iova_range_to_string\n");

log_info("reserved 64k for ephemerals %s\n", str);
}

return 0;
}

Expand Down Expand Up @@ -440,6 +464,26 @@ static int vfio_iommu_type1_do_dma_unmap(struct iommu_ctx *ctx, uint64_t iova, s
return 0;
}

static void vfio_iommu_type1_recycle_ephemeral_iovas(struct vfio_container *vfio)
{
__autolock(&vfio->lock);

trace_guard(VFIO_IOMMU_TYPE1_RECYCLE_EPHEMERAL_IOVAS) {
trace_emit("recycling ephemeral range (0x%" PRIx64 " -> 0x%llx)\n",
vfio->next_ephemeral, vfio->ephemerals.start);
}

vfio->next_ephemeral = vfio->ephemerals.start;
}

static void vfio_iommu_type1_iova_put_ephemeral(struct iommu_ctx *ctx)
{
struct vfio_container *vfio = container_of_var(ctx, vfio, ctx);

if (atomic_dec_fetch(&vfio->nephemerals) == 0)
vfio_iommu_type1_recycle_ephemeral_iovas(vfio);
}

#ifdef VFIO_UNMAP_ALL
static int vfio_iommu_type1_do_dma_unmap_all(struct iommu_ctx *ctx)
{
Expand All @@ -463,6 +507,7 @@ static const struct iommu_ctx_ops vfio_ops = {
.get_device_fd = vfio_get_device_fd,

.iova_reserve = vfio_iommu_type1_iova_reserve,
.iova_put_ephemeral = vfio_iommu_type1_iova_put_ephemeral,

.dma_map = vfio_iommu_type1_do_dma_map,
.dma_unmap = vfio_iommu_type1_do_dma_unmap,
Expand Down
2 changes: 1 addition & 1 deletion src/nvme/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ static int nvme_configure_adminq(struct nvme_ctrl *ctrl, unsigned long sq_flags)

static int __admin(struct nvme_ctrl *ctrl, void *sqe)
{
return nvme_sync(ctrl->adminq.sq, sqe, 0x0, 0, NULL);
return nvme_sync(ctrl, ctrl->adminq.sq, sqe, NULL, 0, NULL);
}

int nvme_create_iocq(struct nvme_ctrl *ctrl, int qid, int qsize, int vector)
Expand Down
34 changes: 13 additions & 21 deletions src/nvme/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,23 @@ int nvme_aer(struct nvme_ctrl *ctrl, void *opaque)
return 0;
}

int nvme_sync(struct nvme_sq *sq, void *sqe, uint64_t iova, size_t len, void *cqe_copy)
int nvme_sync(struct nvme_ctrl *ctrl, struct nvme_sq *sq, void *sqe, void *buf, size_t len, void *cqe_copy)
{
struct nvme_cqe cqe;
struct nvme_rq *rq;
uint64_t iova;
int ret = 0;

if (buf && iommu_map_vaddr(__iommu_ctx(ctrl), buf, len, &iova, IOMMU_MAP_EPHEMERAL)) {
log_debug("failed to map vaddr\n");
return -1;
}

rq = nvme_rq_acquire_atomic(sq);
if (!rq)
return -1;

if (len) {
if (buf) {
ret = nvme_rq_map_prp(rq, sqe, iova, len);
if (ret) {
goto release_rq;
Expand Down Expand Up @@ -113,28 +119,14 @@ int nvme_sync(struct nvme_sq *sq, void *sqe, uint64_t iova, size_t len, void *cq
release_rq:
nvme_rq_release_atomic(rq);

if (buf)
log_fatal_if(iommu_unmap_vaddr(__iommu_ctx(ctrl), buf, NULL),
"iommu_unmap_vaddr\n");

return ret;
}

int nvme_admin(struct nvme_ctrl *ctrl, void *sqe, void *buf, size_t len, void *cqe_copy)
{
uint64_t iova = 0x0;
int ret;

if (len > __VFN_IOVA_MIN) {
errno = ENOMEM;
return -1;
} else if (len) {
if (iommu_map_vaddr(__iommu_ctx(ctrl), buf, len, &iova, IOMMU_MAP_FIXED_IOVA)) {
log_debug("failed to map vaddr\n");
return -1;
}
}

ret = nvme_sync(ctrl->adminq.sq, sqe, iova, len, cqe_copy);

if (len && iommu_unmap_vaddr(__iommu_ctx(ctrl), buf, NULL))
log_debug("failed to unmap vaddr\n");

return ret;
return nvme_sync(ctrl, ctrl->adminq.sq, sqe, buf, len, cqe_copy);
}
2 changes: 1 addition & 1 deletion tests/device/flush.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ static int test_flush(uint32_t nsid)
.nsid = cpu_to_le32(nsid),
};

return nvme_sync(sq, &cmd, 0x0, 0, NULL);
return nvme_sync(&ctrl, sq, &cmd, NULL, 0, NULL);
}

int main(int argc, char **argv)
Expand Down
10 changes: 1 addition & 9 deletions tests/device/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
static int test_io(void)
{
void *vaddr;
uint64_t iova;
ssize_t len;
int ret;

Expand All @@ -47,14 +46,7 @@ static int test_io(void)
.nsid = cpu_to_le32(nsid),
};

ret = iommu_map_vaddr(__iommu_ctx(&ctrl), vaddr, len, &iova, 0x0);
if (ret)
err(1, "failed to map vaddr");

ret = nvme_sync(sq, &cmd, iova, len, NULL);

if (iommu_unmap_vaddr(__iommu_ctx(&ctrl), vaddr, NULL))
err(1, "failed to unmap vaddr");
ret = nvme_sync(&ctrl, sq, &cmd, vaddr, len, NULL);

pgunmap(vaddr, len);

Expand Down

0 comments on commit 3ba79ae

Please sign in to comment.