Skip to content

Commit

Permalink
nvme: support for non-4k page sizes
Browse files Browse the repository at this point in the history
Add check of mpsmax and capping host page size to device maximum
depending on what the controller reports.

Signed-off-by: Mads Ynddal <[email protected]>
[k.jensen: rebased]
Signed-off-by: Klaus Jensen <[email protected]>
  • Loading branch information
Baekalfen authored and birkelund committed Dec 5, 2023
1 parent a252cc0 commit 9f6ebfc
Show file tree
Hide file tree
Showing 10 changed files with 104 additions and 81 deletions.
2 changes: 1 addition & 1 deletion examples/eventfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ int main(int argc, char **argv)
.nsid = cpu_to_le32(nsid),
};

nvme_rq_map_prp(rq, &cmd, iova, 0x1000);
nvme_rq_map_prp(&ctrl, rq, &cmd, iova, 0x1000);

nvme_rq_exec(rq, &cmd);

Expand Down
2 changes: 1 addition & 1 deletion examples/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ int main(int argc, char **argv)
.nsid = cpu_to_le32(nsid),
};

ret = nvme_rq_map_prp(rq, &cmd, iova, 0x1000);
ret = nvme_rq_map_prp(&ctrl, rq, &cmd, iova, 0x1000);
if (ret)
err(1, "could not map prps");

Expand Down
1 change: 1 addition & 0 deletions include/vfn/nvme/ctrl.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ struct nvme_ctrl {
struct {
int nsqa, ncqa;
int mqes;
int mps;
} config;

/* private: internal */
Expand Down
8 changes: 6 additions & 2 deletions include/vfn/nvme/rq.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ static inline void nvme_rq_exec(struct nvme_rq *rq, union nvme_cmd *cmd)
* nvme_rq_map_prp - Set up the Physical Region Pages in the data pointer of the
* command from a buffer that is contiguous in iova mapped
* memory.
* @ctrl: &struct nvme_ctrl
* @rq: Request tracker (&struct nvme_rq)
* @cmd: NVMe command prototype (&union nvme_cmd)
* @iova: I/O Virtual Address
Expand All @@ -213,11 +214,13 @@ static inline void nvme_rq_exec(struct nvme_rq *rq, union nvme_cmd *cmd)
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_rq_map_prp(struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova, size_t len);
int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova,
size_t len);

/**
* nvme_rq_mapv_prp - Set up the Physical Region Pages in the data pointer of
* the command from an iovec.
* @ctrl: &struct nvme_ctrl
* @rq: Request tracker (&struct nvme_rq)
* @cmd: NVMe command prototype (&union nvme_cmd)
* @iov: array of iovecs
Expand All @@ -229,7 +232,8 @@ int nvme_rq_map_prp(struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova, size
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_rq_mapv_prp(struct nvme_rq *rq, union nvme_cmd *cmd, struct iovec *iov, int niov);
int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov);

/**
* nvme_rq_spin - Spin for completion of the command associated with the request
Expand Down
3 changes: 3 additions & 0 deletions include/vfn/nvme/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@

#define NVME_CID_AER (1 << 15)

#define __mps_to_pageshift(mps) (12 + mps)
#define __mps_to_pagesize(mps) (1ULL << __mps_to_pageshift(mps))

/**
* nvme_crc64 - calculate NVMe CRC64
* @crc: starting value
Expand Down
50 changes: 27 additions & 23 deletions src/nvme/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,7 @@ static void nvme_discard_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq)
if (!cq->vaddr)
return;

len = ALIGN_UP((size_t)cq->qsize << NVME_CQES, __VFN_PAGESIZE);

if (iommu_unmap_vaddr(__iommu_ctx(ctrl), cq->vaddr, NULL))
if (iommu_unmap_vaddr(__iommu_ctx(ctrl), cq->vaddr, &len))
log_debug("failed to unmap vaddr\n");

pgunmap(cq->vaddr, len);
Expand Down Expand Up @@ -166,7 +164,11 @@ static int nvme_configure_sq(struct nvme_ctrl *ctrl, int qid, int qsize,
sq->dbbuf.eventidx = sqtdbl(ctrl->dbbuf.eventidxs, qid, dstrd);
}

len = pgmapn(&sq->pages.vaddr, qsize, __VFN_PAGESIZE);
/*
* Use ctrl->config.mps instead of host page size, as we have the
* opportunity to pack the allocations.
*/
len = pgmapn(&sq->pages.vaddr, qsize, __mps_to_pagesize(ctrl->config.mps));

if (len < 0)
return -1;
Expand All @@ -185,8 +187,8 @@ static int nvme_configure_sq(struct nvme_ctrl *ctrl, int qid, int qsize,
rq->sq = sq;
rq->cid = (uint16_t)i;

rq->page.vaddr = sq->pages.vaddr + (i << __VFN_PAGESHIFT);
rq->page.iova = sq->pages.iova + (i << __VFN_PAGESHIFT);
rq->page.vaddr = sq->pages.vaddr + ((uint64_t)i << (12 + ctrl->config.mps));
rq->page.iova = sq->pages.iova + ((uint64_t)i << (12 + ctrl->config.mps));

if (i > 0)
rq->rq_next = &sq->rqs[i - 1];
Expand All @@ -208,10 +210,10 @@ static int nvme_configure_sq(struct nvme_ctrl *ctrl, int qid, int qsize,
free_sq_rqs:
free(sq->rqs);
unmap_pages:
if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->pages.vaddr, NULL))
if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->pages.vaddr, (size_t *)&len))
log_debug("failed to unmap vaddr\n");

pgunmap(sq->pages.vaddr, (size_t)sq->qsize << __VFN_PAGESHIFT);
pgunmap(sq->pages.vaddr, len);

return -1;
}
Expand All @@ -223,18 +225,14 @@ static void nvme_discard_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq)
if (!sq->vaddr)
return;

len = ALIGN_UP((size_t)sq->qsize << NVME_SQES, __VFN_PAGESIZE);

if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->vaddr, NULL))
if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->vaddr, &len))
log_debug("failed to unmap vaddr\n");

pgunmap(sq->vaddr, len);

free(sq->rqs);

len = (size_t)sq->qsize << __VFN_PAGESHIFT;

if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->pages.vaddr, NULL))
if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->pages.vaddr, &len))
log_debug("failed to unmap vaddr\n");

pgunmap(sq->pages.vaddr, len);
Expand Down Expand Up @@ -432,12 +430,12 @@ int nvme_enable(struct nvme_ctrl *ctrl)
css = NVME_FIELD_GET(cap, CAP_CSS);

cc =
NVME_FIELD_SET(__VFN_PAGESHIFT - 12, CC_MPS) |
NVME_FIELD_SET(NVME_CC_AMS_RR, CC_AMS) |
NVME_FIELD_SET(NVME_CC_SHN_NONE, CC_SHN) |
NVME_FIELD_SET(NVME_SQES, CC_IOSQES) |
NVME_FIELD_SET(NVME_CQES, CC_IOCQES) |
NVME_FIELD_SET(0x1, CC_EN);
NVME_FIELD_SET(ctrl->config.mps, CC_MPS) |
NVME_FIELD_SET(NVME_CC_AMS_RR, CC_AMS) |
NVME_FIELD_SET(NVME_CC_SHN_NONE, CC_SHN) |
NVME_FIELD_SET(NVME_SQES, CC_IOSQES) |
NVME_FIELD_SET(NVME_CQES, CC_IOCQES) |
NVME_FIELD_SET(0x1, CC_EN);

if (css & NVME_CAP_CSS_CSI)
cc |= NVME_FIELD_SET(NVME_CC_CSS_CSI, CC_CSS);
Expand Down Expand Up @@ -508,7 +506,7 @@ int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_op
{
unsigned long long classcode;
uint64_t cap;
uint8_t mpsmin;
uint8_t mpsmin, mpsmax;
uint16_t oacs;
ssize_t len;
void *vaddr;
Expand Down Expand Up @@ -549,11 +547,17 @@ int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_op

cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP));
mpsmin = NVME_FIELD_GET(cap, CAP_MPSMIN);
mpsmax = NVME_FIELD_GET(cap, CAP_MPSMAX);

ctrl->config.mps = clamp_t(int, __VFN_PAGESHIFT - 12, mpsmin, mpsmax);

if ((12 + mpsmin) > __VFN_PAGESHIFT) {
log_debug("controller minimum page size too large\n");
if ((12 + ctrl->config.mps) > __VFN_PAGESHIFT) {
log_error("mpsmin too large\n");
errno = EINVAL;
return -1;
} else if ((12 + ctrl->config.mps) < __VFN_PAGESHIFT) {
log_info("host memory page size is larger than mpsmax; clamping mps to %d\n",
ctrl->config.mps);
}

ctrl->config.mqes = NVME_FIELD_GET(cap, CAP_MQES);
Expand Down
57 changes: 30 additions & 27 deletions src/nvme/rq.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,14 @@

#include "iommu/context.h"

static int __rq_max_prps;

static void __attribute__((constructor)) init_max_prps(void)
static inline int __map_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova, size_t len,
int pageshift)
{
__rq_max_prps = (int)(sysconf(_SC_PAGESIZE) / sizeof(uint64_t) + 1);

log_debug("max prps is %d\n", __rq_max_prps);
}
size_t pagesize = 1 << pageshift;
int max_prps = 1 << (pageshift - 3);

static inline int __map_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova, size_t len)
{
/* number of prps required to map the buffer */
int prpcount = (int)len >> __VFN_PAGESHIFT;
int prpcount = (int)(len >> pageshift);

*prp1 = cpu_to_le64(iova);

Expand All @@ -61,12 +56,12 @@ static inline int __map_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova
* Additionally, we align the iova down to a page size boundary,
* simplifying the following loop.
*/
if (prpcount && !ALIGNED(iova, __VFN_PAGESIZE)) {
iova = ALIGN_DOWN(iova, __VFN_PAGESIZE);
if (prpcount && !ALIGNED(iova, pagesize)) {
iova = ALIGN_DOWN(iova, pagesize);
prpcount++;
}

if (prpcount > __rq_max_prps) {
if (prpcount > max_prps) {
errno = EINVAL;
return 0;
}
Expand All @@ -76,7 +71,7 @@ static inline int __map_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova
* aligned from the above, which simplifies this.
*/
for (int i = 1; i < prpcount; i++)
prplist[i - 1] = cpu_to_le64(iova + (i << __VFN_PAGESHIFT));
prplist[i - 1] = cpu_to_le64(iova + ((uint64_t)i << pageshift));

/*
* prpcount may be zero if the buffer length was less than the page
Expand All @@ -85,26 +80,30 @@ static inline int __map_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova
return clamp_t(int, prpcount, 1, prpcount);
}

static inline int __map_aligned(leint64_t *prplist, int prpcount, uint64_t iova)
static inline int __map_aligned(leint64_t *prplist, int prpcount, uint64_t iova, int pageshift)
{
size_t pagesize = 1 << pageshift;

/*
* __map_aligned is used exclusively for mapping into the prplist
* entries where addresses must be page size aligned.
*/
assert(ALIGNED(iova, __VFN_PAGESIZE));
assert(ALIGNED(iova, pagesize));

for (int i = 0; i < prpcount; i++)
prplist[i] = cpu_to_le64(iova + (i << __VFN_PAGESHIFT));
prplist[i] = cpu_to_le64(iova + ((uint64_t)i << pageshift));

return prpcount;
}

int nvme_rq_map_prp(struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova, size_t len)
int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova,
size_t len)
{
int prpcount;
leint64_t *prplist = rq->page.vaddr;

prpcount = __map_first(&cmd->dptr.prp1, prplist, iova, len);
prpcount = __map_first(&cmd->dptr.prp1, prplist, iova, len,
__mps_to_pageshift(ctrl->config.mps));
if (!prpcount) {
errno = EINVAL;
return -1;
Expand All @@ -120,15 +119,19 @@ int nvme_rq_map_prp(struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova, size
return 0;
}

int nvme_rq_mapv_prp(struct nvme_rq *rq, union nvme_cmd *cmd, struct iovec *iov, int niov)
int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov)
{
int prpcount, _prpcount;
leint64_t *prplist = rq->page.vaddr;
uint64_t iova = (uint64_t)iov->iov_base;
size_t len = iov->iov_len;
int pageshift = __mps_to_pageshift(ctrl->config.mps);
size_t pagesize = 1 << pageshift;
int max_prps = 1 << (pageshift - 3);

/* map the first segment */
prpcount = __map_first(&cmd->dptr.prp1, prplist, iova, len);
prpcount = __map_first(&cmd->dptr.prp1, prplist, iova, len, pageshift);

/*
* At this point, one of three conditions must hold:
Expand All @@ -140,7 +143,7 @@ int nvme_rq_mapv_prp(struct nvme_rq *rq, union nvme_cmd *cmd, struct iovec *iov,
* If none holds, the buffer(s) within the iovec cannot be mapped given
* the PRP alignment requirements.
*/
if (!(prpcount == 1 || niov == 1 || ALIGNED(iova + len, __VFN_PAGESIZE))) {
if (!(prpcount == 1 || niov == 1 || ALIGNED(iova + len, pagesize))) {
log_error("iov[0].iov_base/len invalid\n");

goto invalid;
Expand All @@ -151,29 +154,29 @@ int nvme_rq_mapv_prp(struct nvme_rq *rq, union nvme_cmd *cmd, struct iovec *iov,
iova = (uint64_t)iov[i].iov_base;
len = iov[i].iov_len;

_prpcount = max_t(int, 1, (int)len >> __VFN_PAGESHIFT);
_prpcount = max_t(int, 1, (int)len >> pageshift);

if (prpcount + _prpcount > __rq_max_prps) {
if (prpcount + _prpcount > max_prps) {
log_error("too many prps required\n");

goto invalid;
}


if (!ALIGNED(iova, __VFN_PAGESIZE)) {
if (!ALIGNED(iova, pagesize)) {
log_error("unaligned iov[%u].iov_base (0x%"PRIx64")\n", i, iova);

goto invalid;
}

/* all entries but the last must have a page size aligned len */
if (i < niov - 1 && !ALIGNED(len, __VFN_PAGESIZE)) {
if (i < niov - 1 && !ALIGNED(len, pagesize)) {
log_error("unaligned iov[%u].len (%zu)\n", i, len);

goto invalid;
}

prpcount += __map_aligned(&prplist[prpcount - 1], _prpcount, iova);
prpcount += __map_aligned(&prplist[prpcount - 1], _prpcount, iova, pageshift);
}

if (prpcount == 2)
Expand Down
Loading

0 comments on commit 9f6ebfc

Please sign in to comment.