Skip to content

Commit

Permalink
nvme/rq: add support for sgls
Browse files Browse the repository at this point in the history
Add support for creating SGLs and use them by default.

Note: This includes an API incompatible change in nvme_rq_mapv_prp().

Signed-off-by: Klaus Jensen <[email protected]>
  • Loading branch information
birkelund committed May 31, 2024
1 parent 9ca7fc8 commit 14e08ee
Show file tree
Hide file tree
Showing 7 changed files with 214 additions and 49 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Changelog

## v5.0.0: (unreleased)

### ``nvme_rq``

``nvme_rq_mapv_prp()`` now expects the ``struct iov *`` to contain virtual
addresses and will translate them when building the data pointer PRPs.

libvfn now supports reating SGLs (and will use them by default if available).
Use the new helper function ``nvme_rq_mapv()`` to map ``struct iov *``'s. This
function will use SGLs if supported by the controller or fall back to PRPs.
6 changes: 5 additions & 1 deletion include/vfn/nvme/ctrl.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,13 @@ static const struct nvme_ctrl_opts nvme_ctrl_opts_default = {
/*
* enum nvme_ctrl_feature_flags - NVMe controller feature flags
* @NVME_CTRL_F_ADMINISTRATIVE: controller type is admin
* @NVME_CTRL_F_SGLS_SUPPORTED: SGLs are supported
* @NVME_CTRL_F_SGLS_DWORD_ALIGNMENT: SGL data blocks require dword alignment
*/
enum nvme_ctrl_feature_flags {
NVME_CTRL_F_ADMINISTRATIVE = 1 << 0,
NVME_CTRL_F_ADMINISTRATIVE = 1 << 0,
NVME_CTRL_F_SGLS_SUPPORTED = 1 << 1,
NVME_CTRL_F_SGLS_DWORD_ALIGNMENT = 1 << 2,
};

/**
Expand Down
33 changes: 32 additions & 1 deletion include/vfn/nvme/rq.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *
* @iov: array of iovecs
* @niov: number of iovec in @iovec
*
* Map the IOVAs contained in @iov into the request PRPs. The first entry is
* Map the memory contained in @iov into the request PRPs. The first entry is
* allowed to be unaligned, but the entry MUST end on a page boundary. All
* subsequent entries MUST be page aligned.
*
Expand All @@ -235,6 +235,37 @@ int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *
int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov);

/**
* nvme_rq_mapv_sgl - Set up a Scatter/Gather List in the data pointer of the
* command from an iovec.
* @ctrl: &struct nvme_ctrl
* @rq: Request tracker (&struct nvme_rq)
* @cmd: NVMe command prototype (&union nvme_cmd)
* @iov: array of iovecs
* @niov: number of iovec in @iovec
*
* Map the memory contained in @iov into the request SGL.
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_rq_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov);

/**
* nvme_rq_mapv - Set up data pointer in the command from an iovec.
* @ctrl: &struct nvme_ctrl
* @rq: Request tracker (&struct nvme_rq)
* @cmd: NVMe command prototype (&union nvme_cmd)
* @iov: array of iovecs
* @niov: number of iovec in @iovec
*
* Map the memory contained in @iov into the request SGL (if supported) or PRPs.
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_rq_mapv(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov);

/**
* nvme_rq_spin - Spin for completion of the command associated with the request
* tracker
Expand Down
12 changes: 11 additions & 1 deletion src/nvme/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_op
uint64_t cap;
uint8_t mpsmin, mpsmax;
uint16_t oacs;
uint32_t sgls;
ssize_t len;
void *vaddr;
int ret;
Expand Down Expand Up @@ -622,10 +623,19 @@ int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_op
}

oacs = le16_to_cpu(*(leint16_t *)(vaddr + NVME_IDENTIFY_CTRL_OACS));

if (oacs & NVME_IDENTIFY_CTRL_OACS_DBCONFIG)
ret = nvme_init_dbconfig(ctrl);

sgls = le32_to_cpu(*(leint32_t *)(vaddr + NVME_IDENTIFY_CTRL_SGLS));
if (sgls) {
uint32_t alignment = NVME_FIELD_GET(sgls, IDENTIFY_CTRL_SGLS_ALIGNMENT);

ctrl->flags |= NVME_CTRL_F_SGLS_SUPPORTED;

if (alignment == NVME_IDENTIFY_CTRL_SGLS_ALIGNMENT_DWORD)
ctrl->flags |= NVME_CTRL_F_SGLS_DWORD_ALIGNMENT;
}

out:
pgunmap(vaddr, len);

Expand Down
161 changes: 120 additions & 41 deletions src/nvme/rq.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

#include "iommu/context.h"

static inline int __map_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova, size_t len,
static inline int __map_prp_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova, size_t len,
int pageshift)
{
size_t pagesize = 1 << pageshift;
Expand Down Expand Up @@ -80,58 +80,80 @@ static inline int __map_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova
return clamp_t(int, prpcount, 1, prpcount);
}

static inline int __map_aligned(leint64_t *prplist, int prpcount, uint64_t iova, int pageshift)
static inline int __map_prp_append(leint64_t *prplist, uint64_t iova, size_t len, int max_prps,
int pageshift)
{
int prpcount = max_t(int, 1, (int)len >> pageshift);
size_t pagesize = 1 << pageshift;

/*
* __map_aligned is used exclusively for mapping into the prplist
* entries where addresses must be page size aligned.
*/
assert(ALIGNED(iova, pagesize));
if (prpcount > max_prps) {
log_error("too many prps required\n");

errno = EINVAL;
return -1;
}

if (!ALIGNED(iova, pagesize)) {
log_error("unaligned iova 0x%" PRIx64 "\n", iova);

errno = EINVAL;
return -1;
}

for (int i = 0; i < prpcount; i++)
prplist[i] = cpu_to_le64(iova + ((uint64_t)i << pageshift));

return prpcount;
}

static inline void __set_prp2(leint64_t *prp2, leint64_t prplist, leint64_t prplist0, int prpcount)
{
if (prpcount == 2)
*prp2 = prplist0;
else if (prpcount > 2)
*prp2 = prplist;
else
*prp2 = 0x0;
}

int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova,
size_t len)
{
int prpcount;
leint64_t *prplist = rq->page.vaddr;
int pageshift = __mps_to_pageshift(ctrl->config.mps);

prpcount = __map_first(&cmd->dptr.prp1, prplist, iova, len,
__mps_to_pageshift(ctrl->config.mps));
prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift);
if (!prpcount) {
errno = EINVAL;
return -1;
}

if (prpcount == 2)
cmd->dptr.prp2 = prplist[0];
else if (prpcount > 2)
cmd->dptr.prp2 = cpu_to_le64(rq->page.iova);
else
cmd->dptr.prp2 = 0x0;
__set_prp2(&cmd->dptr.prp2, cpu_to_le64(rq->page.iova), prplist[0], prpcount);

return 0;
}

int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov)
{
int prpcount, _prpcount;
struct iommu_ctx *ctx = __iommu_ctx(ctrl);

leint64_t *prplist = rq->page.vaddr;
uint64_t iova = (uint64_t)iov->iov_base;
size_t len = iov->iov_len;
int pageshift = __mps_to_pageshift(ctrl->config.mps);
size_t pagesize = 1 << pageshift;
int max_prps = 1 << (pageshift - 3);
int prpcount;
uint64_t iova;

if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) {
errno = EFAULT;
return -1;
}

/* map the first segment */
prpcount = __map_first(&cmd->dptr.prp1, prplist, iova, len, pageshift);
prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift);

/*
* At this point, one of three conditions must hold:
Expand All @@ -151,46 +173,103 @@ int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd

/* map remaining iovec entries; these must be page size aligned */
for (int i = 1; i < niov; i++) {
iova = (uint64_t)iov[i].iov_base;
len = iov[i].iov_len;
if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) {
errno = EFAULT;
return -1;
}

_prpcount = max_t(int, 1, (int)len >> pageshift);
len = iov[i].iov_len;

if (prpcount + _prpcount > max_prps) {
log_error("too many prps required\n");
/* all entries but the last must have a page size aligned len */
if (i < niov - 1 && !ALIGNED(len, pagesize)) {
log_error("unaligned iov[%u].len (%zu)\n", i, len);

goto invalid;
}

prpcount += __map_prp_append(&prplist[prpcount - 1], iova, len, max_prps - prpcount,
pageshift);
}

if (!ALIGNED(iova, pagesize)) {
log_error("unaligned iov[%u].iov_base (0x%"PRIx64")\n", i, iova);
__set_prp2(&cmd->dptr.prp2, cpu_to_le64(rq->page.iova), prplist[0], prpcount);

goto invalid;
}
invalid:
errno = EINVAL;
return -1;
}

/* all entries but the last must have a page size aligned len */
if (i < niov - 1 && !ALIGNED(len, pagesize)) {
log_error("unaligned iov[%u].len (%zu)\n", i, len);
static inline void __sgl_data(struct nvme_sgld *sgld, uint64_t iova, size_t len)
{
sgld->addr = cpu_to_le64(iova);
sgld->len = cpu_to_le32((uint32_t)len);

goto invalid;
sgld->type = NVME_SGLD_TYPE_DATA_BLOCK << 4;
}

static inline void __sgl_segment(struct nvme_sgld *sgld, uint64_t iova, int n)
{
sgld->addr = cpu_to_le64(iova);
sgld->len = cpu_to_le32(n << 4);

sgld->type = NVME_SGLD_TYPE_LAST_SEGMENT << 4;
}

int nvme_rq_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov)
{
struct nvme_sgld *seg = rq->page.vaddr;
struct iommu_ctx *ctx = __iommu_ctx(ctrl);

int pageshift = __mps_to_pageshift(ctrl->config.mps);
int max_sglds = 1 << (pageshift - 4);
int dword_align = ctrl->flags & NVME_CTRL_F_SGLS_DWORD_ALIGNMENT;

uint64_t iova;

if (niov == 1) {
if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) {
errno = EFAULT;
return -1;
}

prpcount += __map_aligned(&prplist[prpcount - 1], _prpcount, iova, pageshift);
__sgl_data(&cmd->dptr.sgl, iova, iov->iov_len);

return 0;
}

if (prpcount == 2)
cmd->dptr.prp2 = prplist[0];
else if (prpcount > 2)
cmd->dptr.prp2 = cpu_to_le64(rq->page.iova);
else
cmd->dptr.prp2 = 0x0;
if (niov > max_sglds) {
errno = EINVAL;
return -1;
}

__sgl_segment(&cmd->dptr.sgl, rq->page.iova, niov);

for (int i = 0; i < niov; i++) {
if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) {
errno = EFAULT;
return -1;
}

if (dword_align && (iova & 0x3)) {
errno = EINVAL;
return -1;
}

__sgl_data(&seg[i], iova, iov[i].iov_len);
}

return 0;
}

invalid:
errno = EINVAL;
return -1;
int nvme_rq_mapv(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov)
{
struct nvme_sq *sq = rq->sq;

if ((ctrl->flags & NVME_CTRL_F_SGLS_SUPPORTED) == 0 || sq->id == 0)
return nvme_rq_mapv_prp(ctrl, rq, cmd, iov, niov);

return nvme_rq_mapv_sgl(ctrl, rq, cmd, iov, niov);
}

int nvme_rq_wait(struct nvme_rq *rq, struct nvme_cqe *cqe_copy, struct timespec *ts)
Expand Down
28 changes: 24 additions & 4 deletions src/nvme/rq_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,16 @@ int main(void)
struct nvme_rq rq;
union nvme_cmd cmd;
leint64_t *prplist;
struct nvme_sgld *sglds;
struct iovec iov[8];

plan_tests(89);
plan_tests(96);

assert(pgmap((void **)&prplist, __VFN_PAGESIZE) > 0);
assert(pgmap((void **)&rq.page.vaddr, __VFN_PAGESIZE) > 0);

rq.page.vaddr = prplist;
rq.page.iova = 0x8000000;
prplist = rq.page.vaddr;
sglds = rq.page.vaddr;

/* test 512b aligned */
memset((void *)prplist, 0x0, __VFN_PAGESIZE);
Expand Down Expand Up @@ -324,13 +326,31 @@ int main(void)

memset((void *)prplist, 0x0, __VFN_PAGESIZE);
iov[0] = (struct iovec) {.iov_base = (void *)0x1000004, .iov_len = 0x1000};
iov[0] = (struct iovec) {.iov_base = (void *)0x1001004, .iov_len = 0x1000};
iov[1] = (struct iovec) {.iov_base = (void *)0x1001004, .iov_len = 0x1000};
ok1(nvme_rq_mapv_prp(&ctrl, &rq, &cmd, iov, 2) == -1);

memset((void *)prplist, 0x0, __VFN_PAGESIZE);
iov[0] = (struct iovec) {.iov_base = (void *)0x1000000, .iov_len = 0x1000};
iov[1] = (struct iovec) {.iov_base = (void *)0x1001000, .iov_len = __max_prps * 0x1000};
ok1(nvme_rq_mapv_prp(&ctrl, &rq, &cmd, iov, 2) == -1);

/*
* SGLs
*/

memset((void *)sglds, 0x0, __VFN_PAGESIZE);
iov[0] = (struct iovec) {.iov_base = (void *)0x1000000, .iov_len = 0x1000};
ok1(nvme_rq_mapv_sgl(&ctrl, &rq, &cmd, iov, 1) == 0);
ok1(le64_to_cpu(cmd.dptr.sgl.addr) == 0x1000000);
ok1(le32_to_cpu(cmd.dptr.sgl.len) == 0x1000);
ok1(cmd.dptr.sgl.type == NVME_SGLD_TYPE_DATA_BLOCK);

memset((void *)sglds, 0x0, __VFN_PAGESIZE);
iov[0] = (struct iovec) {.iov_base = (void *)0x1000000, .iov_len = 0x1000};
iov[1] = (struct iovec) {.iov_base = (void *)0x1002000, .iov_len = 0x1000};
ok1(nvme_rq_mapv_sgl(&ctrl, &rq, &cmd, iov, 2) == 0);
ok1(le64_to_cpu(sglds[0].addr) == 0x1000000);
ok1(le64_to_cpu(sglds[1].addr) == 0x1002000);

return exit_status();
}
Loading

0 comments on commit 14e08ee

Please sign in to comment.