Skip to content

Commit

Permalink
nvme: add low-level APIs to map prp/sgl without @rq
Browse files Browse the repository at this point in the history
Add low-level helpers to map PRP and SGL data and data list which
contains the actual prp and sgl entries within a memory page.
nvme_rq_map* helpers use datalist as @rq->page.vaddr which is allocated
at the initial time.  Some applications might want to locate the
datalist memory page in another location such as CMB (Controller Memory
Buffer) rather than the host memory.  In that case, application should
update @rq->page, but it's not a good idea since it's a too much
libvfn-specific data structure and it has its own policy.  To give more
flexible options to upper layer application, this patch adds low-level
APIs which are much more generic than nvme_rq_map* helpers by receving a
@prplist and @seg for PRP and SGL list from the caller.

The newly added public helpers are:

    - nvme_map_prp
    - nvme_mapv_prp
    - nvme_mapv_sgl

This commit does not have functional changes of the existing APIs.

Signed-off-by: Minwoo Im <[email protected]>
  • Loading branch information
minwooim committed Dec 6, 2024
1 parent 87ef356 commit 75f1fba
Show file tree
Hide file tree
Showing 4 changed files with 310 additions and 215 deletions.
9 changes: 9 additions & 0 deletions include/vfn/nvme/rq.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,9 @@ static inline void nvme_rq_exec(struct nvme_rq *rq, union nvme_cmd *cmd)
*
* Map a buffer of size @len into the command payload.
*
* This helper uses a pre-allocated PRP list page within @rq and same with
* calling ``nvme_map_prp(ctrl, rq->page.vaddr, ...)``.
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova,
Expand All @@ -230,6 +233,9 @@ int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *
* allowed to be unaligned, but the entry MUST end on a page boundary. All
* subsequent entries MUST be page aligned.
*
* This helper uses a pre-allocated PRP list page within @rq and same with
* calling ``nvme_mapv_prp(ctrl, rq->page.vaddr, cmd, iova, niov)``;
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
Expand All @@ -246,6 +252,9 @@ int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd
*
* Map the memory contained in @iov into the request SGL.
*
* This helper uses a pre-allocated SGL segment list page within @rq and same
* with calling ``nvme_mapv_sgl(ctrl, rq->page.vaddr, cmd, iova, niov)``;
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_rq_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
Expand Down
52 changes: 52 additions & 0 deletions include/vfn/nvme/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,56 @@ int nvme_sync(struct nvme_ctrl *ctrl, struct nvme_sq *sq, union nvme_cmd *sqe, v
int nvme_admin(struct nvme_ctrl *ctrl, union nvme_cmd *sqe, void *buf, size_t len,
struct nvme_cqe *cqe_copy);

/**
* nvme_map_prp - Set up the Physical Region Pages in the data pointer of the
* command from a buffer that is contiguous in iova mapped
* memory.
* @ctrl: &struct nvme_ctrl
* @prplist: The first PRP list page address
* @prplist_iova: PRP list page I/O virtual address
* @cmd: NVMe command prototype (&union nvme_cmd)
* @iova: I/O Virtual Address
* @len: Length of buffer
*
* Map a buffer of size @len into the command payload.
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_map_prp(struct nvme_ctrl *ctrl, leint64_t *prplist, uint64_t prplist_iova,
union nvme_cmd *cmd, uint64_t iova, size_t len);

/**
* nvme_mapv_prp - Set up the Physical Region Pages in the data pointer of
* the command from an iovec.
* @ctrl: &struct nvme_ctrl
* @prplist: The first PRP list page address
* @cmd: NVMe command prototype (&union nvme_cmd)
* @iov: array of iovecs
* @niov: number of iovec in @iovec
*
* Map the memory contained in @iov into the request PRPs. The first entry is
* allowed to be unaligned, but the entry MUST end on a page boundary. All
* subsequent entries MUST be page aligned.
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_mapv_prp(struct nvme_ctrl *ctrl, leint64_t *prplist,
union nvme_cmd *cmd, struct iovec *iov, int niov);

/**
* nvme_mapv_sgl - Set up a Scatter/Gather List in the data pointer of the
* command from an iovec.
* @ctrl: &struct nvme_ctrl
* @seglist: SGL segment list page address
* @cmd: NVMe command prototype (&union nvme_cmd)
* @iov: array of iovecs
* @niov: number of iovec in @iovec
*
* Map the memory contained in @iov into the request SGL.
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_mapv_sgl(struct nvme_ctrl *ctrl, leint64_t *seglist, union nvme_cmd *cmd,
struct iovec *iov, int niov);

#endif /* LIBVFN_NVME_UTIL_H */
222 changes: 7 additions & 215 deletions src/nvme/rq.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,241 +35,33 @@
#include <vfn/vfio.h>
#include <vfn/nvme.h>

#include "ccan/minmax/minmax.h"

#include "iommu/context.h"
#include "types.h"

static inline int __map_prp_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova, size_t len,
int pageshift)
{
size_t pagesize = 1 << pageshift;
int max_prps = 1 << (pageshift - 3);

/* number of prps required to map the buffer */
int prpcount = 1;

*prp1 = cpu_to_le64(iova);

/* account for what is covered with the first prp */
len -= min_t(size_t, len, pagesize - (iova & (pagesize - 1)));

/* any residual just adds more prps */
if (len)
prpcount += (int)ALIGN_UP(len, pagesize) >> pageshift;

if (prpcount > 1 && !ALIGNED(iova, pagesize))
/* align down to simplify loop below */
iova = ALIGN_DOWN(iova, pagesize);

if (prpcount > max_prps) {
errno = EINVAL;
return -1;
}

/*
* Map the remaining parts of the buffer into prp2/prplist. iova will be
* aligned from the above, which simplifies this.
*/
for (int i = 1; i < prpcount; i++)
prplist[i - 1] = cpu_to_le64(iova + ((uint64_t)i << pageshift));

/*
* prpcount may be zero if the buffer length was less than the page
* size, so clamp it to 1 in that case.
*/
return clamp_t(int, prpcount, 1, prpcount);
}

static inline int __map_prp_append(leint64_t *prplist, uint64_t iova, size_t len, int max_prps,
int pageshift)
{
int prpcount = max_t(int, 1, (int)len >> pageshift);
size_t pagesize = 1 << pageshift;

if (prpcount > max_prps) {
log_error("too many prps required\n");

errno = EINVAL;
return -1;
}

if (!ALIGNED(iova, pagesize)) {
log_error("unaligned iova 0x%" PRIx64 "\n", iova);

errno = EINVAL;
return -1;
}

for (int i = 0; i < prpcount; i++)
prplist[i] = cpu_to_le64(iova + ((uint64_t)i << pageshift));

return prpcount;
}

static inline void __set_prp2(leint64_t *prp2, leint64_t prplist, leint64_t prplist0, int prpcount)
{
if (prpcount == 2)
*prp2 = prplist0;
else if (prpcount > 2)
*prp2 = prplist;
else
*prp2 = 0x0;
}

int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova,
size_t len)
{
int prpcount;
leint64_t *prplist = rq->page.vaddr;
int pageshift = __mps_to_pageshift(ctrl->config.mps);

prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift);
if (prpcount < 0) {
errno = EINVAL;
return -1;
}

__set_prp2(&cmd->dptr.prp2, cpu_to_le64(rq->page.iova), prplist[0], prpcount);

return 0;
}

int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov)
{
struct iommu_ctx *ctx = __iommu_ctx(ctrl);
uint64_t prplist_iova;

leint64_t *prplist = rq->page.vaddr;
size_t len = iov->iov_len;
int pageshift = __mps_to_pageshift(ctrl->config.mps);
size_t pagesize = 1 << pageshift;
int max_prps = 1 << (pageshift - 3);
int ret, prpcount;
uint64_t iova;

if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) {
if (!iommu_translate_vaddr(ctx, rq->page.vaddr, &prplist_iova)) {
errno = EFAULT;
return -1;
}

/* map the first segment */
prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift);
if (prpcount < 0)
goto invalid;

/*
* At this point, one of three conditions must hold:
*
* a) a single prp entry was set up by __map_first, or
* b) the iovec only has a single entry, or
* c) the first buffer ends on a page size boundary
*
* If none holds, the buffer(s) within the iovec cannot be mapped given
* the PRP alignment requirements.
*/
if (!(prpcount == 1 || niov == 1 || ALIGNED(iova + len, pagesize))) {
log_error("iov[0].iov_base/len invalid\n");

goto invalid;
}

/* map remaining iovec entries; these must be page size aligned */
for (int i = 1; i < niov; i++) {
if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) {
errno = EFAULT;
return -1;
}

len = iov[i].iov_len;

/* all entries but the last must have a page size aligned len */
if (i < niov - 1 && !ALIGNED(len, pagesize)) {
log_error("unaligned iov[%u].len (%zu)\n", i, len);

goto invalid;
}

ret = __map_prp_append(&prplist[prpcount - 1], iova, len, max_prps - prpcount,
pageshift);
if (ret < 0)
goto invalid;

prpcount += ret;
}

__set_prp2(&cmd->dptr.prp2, cpu_to_le64(rq->page.iova), prplist[0], prpcount);

return 0;

invalid:
errno = EINVAL;
return -1;
return nvme_map_prp(ctrl, rq->page.vaddr, prplist_iova, cmd, iova, len);
}

static inline void __sgl_data(struct nvme_sgld *sgld, uint64_t iova, size_t len)
{
sgld->addr = cpu_to_le64(iova);
sgld->len = cpu_to_le32((uint32_t)len);

sgld->type = NVME_SGLD_TYPE_DATA_BLOCK << 4;
}

static inline void __sgl_segment(struct nvme_sgld *sgld, uint64_t iova, int n)
int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov)
{
sgld->addr = cpu_to_le64(iova);
sgld->len = cpu_to_le32(n << 4);

sgld->type = NVME_SGLD_TYPE_LAST_SEGMENT << 4;
return nvme_mapv_prp(ctrl, rq->page.vaddr, cmd, iov, niov);
}

int nvme_rq_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov)
{
struct nvme_sgld *seg = rq->page.vaddr;
struct iommu_ctx *ctx = __iommu_ctx(ctrl);

int pageshift = __mps_to_pageshift(ctrl->config.mps);
int max_sglds = 1 << (pageshift - 4);
int dword_align = ctrl->flags & NVME_CTRL_F_SGLS_DWORD_ALIGNMENT;

uint64_t iova;

if (niov == 1) {
if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) {
errno = EFAULT;
return -1;
}

__sgl_data(&cmd->dptr.sgl, iova, iov->iov_len);

return 0;
}

if (niov > max_sglds) {
errno = EINVAL;
return -1;
}

__sgl_segment(&cmd->dptr.sgl, rq->page.iova, niov);

for (int i = 0; i < niov; i++) {
if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) {
errno = EFAULT;
return -1;
}

if (dword_align && (iova & 0x3)) {
errno = EINVAL;
return -1;
}

__sgl_data(&seg[i], iova, iov[i].iov_len);
}

cmd->flags |= NVME_FIELD_SET(NVME_CMD_FLAGS_PSDT_SGL_MPTR_CONTIG, CMD_FLAGS_PSDT);

return 0;
return nvme_mapv_sgl(ctrl, rq->page.vaddr, cmd, iov, niov);
}

int nvme_rq_mapv(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
Expand Down
Loading

0 comments on commit 75f1fba

Please sign in to comment.