From 9866a6d679c041771b183ba2d2ea148a96aa5b3d Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 6 Sep 2024 12:21:52 +0200 Subject: [PATCH] nvme: use iommu_dmabuf api Replace all pgmap/iommu_map_vaddr pairs with iommu_get/put_dmabuf. Signed-off-by: Klaus Jensen --- include/vfn/nvme.h | 1 + include/vfn/nvme/ctrl.h | 4 +- include/vfn/nvme/queue.h | 16 ++-- src/nvme/core.c | 174 ++++++++++++++------------------------- src/nvme/queue.c | 9 +- src/nvme/util.c | 1 - 6 files changed, 72 insertions(+), 133 deletions(-) diff --git a/include/vfn/nvme.h b/include/vfn/nvme.h index a1023c4b..63d43493 100644 --- a/include/vfn/nvme.h +++ b/include/vfn/nvme.h @@ -36,6 +36,7 @@ extern "C" { #include #include #include +#include #include #include #include diff --git a/include/vfn/nvme/ctrl.h b/include/vfn/nvme/ctrl.h index 664263b2..82b473dd 100644 --- a/include/vfn/nvme/ctrl.h +++ b/include/vfn/nvme/ctrl.h @@ -82,8 +82,8 @@ struct nvme_ctrl { * @dbbuf: doorbell buffers */ struct { - void *doorbells; - void *eventidxs; + struct iommu_dmabuf doorbells; + struct iommu_dmabuf eventidxs; } dbbuf; /** diff --git a/include/vfn/nvme/queue.h b/include/vfn/nvme/queue.h index b2316826..b7e41ef5 100644 --- a/include/vfn/nvme/queue.h +++ b/include/vfn/nvme/queue.h @@ -27,8 +27,7 @@ struct nvme_dbbuf { */ struct nvme_cq { /* private: */ - void *vaddr; - uint64_t iova; + struct iommu_dmabuf mem; int id; uint16_t head; @@ -51,13 +50,8 @@ struct nvme_sq { /* private: */ struct nvme_cq *cq; - void *vaddr; - uint64_t iova; - - struct { - void *vaddr; - uint64_t iova; - } pages; + struct iommu_dmabuf mem; + struct iommu_dmabuf pages; uint16_t tail, ptail; int qsize; @@ -84,7 +78,7 @@ struct nvme_sq { */ static inline void nvme_sq_post(struct nvme_sq *sq, const union nvme_cmd *sqe) { - memcpy(sq->vaddr + (sq->tail << NVME_SQES), sqe, 1 << NVME_SQES); + memcpy(sq->mem.vaddr + (sq->tail << NVME_SQES), sqe, 1 << NVME_SQES); trace_guard(NVME_SQ_POST) { trace_emit("sqid %d tail %d\n", sq->id, sq->tail); @@ -174,7 +168,7 @@ static inline void nvme_sq_exec(struct nvme_sq *sq, const union nvme_cmd *sqe) */ static inline struct nvme_cqe *nvme_cq_head(struct nvme_cq *cq) { - return (struct nvme_cqe *)(cq->vaddr + (cq->head << NVME_CQES)); + return (struct nvme_cqe *)(cq->mem.vaddr + (cq->head << NVME_CQES)); } /** diff --git a/src/nvme/core.c b/src/nvme/core.c index 24651910..405d27c5 100644 --- a/src/nvme/core.c +++ b/src/nvme/core.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -115,7 +114,6 @@ static int nvme_configure_cq(struct nvme_ctrl *ctrl, int qid, int qsize, int vec struct nvme_cq *cq = &ctrl->cq[qid]; uint64_t cap; uint8_t dstrd; - size_t len; cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); dstrd = NVME_FIELD_GET(cap, CAP_DSTRD); @@ -144,36 +142,25 @@ static int nvme_configure_cq(struct nvme_ctrl *ctrl, int qid, int qsize, int vec .vector = vector, }; - if (ctrl->dbbuf.doorbells) { - cq->dbbuf.doorbell = cqhdbl(ctrl->dbbuf.doorbells, qid, dstrd); - cq->dbbuf.eventidx = cqhdbl(ctrl->dbbuf.eventidxs, qid, dstrd); + if (ctrl->dbbuf.doorbells.vaddr) { + cq->dbbuf.doorbell = cqhdbl(ctrl->dbbuf.doorbells.vaddr, qid, dstrd); + cq->dbbuf.eventidx = cqhdbl(ctrl->dbbuf.eventidxs.vaddr, qid, dstrd); } - len = pgmapn(&cq->vaddr, qsize, 1 << NVME_CQES); - - if (iommu_map_vaddr(__iommu_ctx(ctrl), cq->vaddr, len, &cq->iova, 0x0)) { - log_debug("failed to map vaddr\n"); - - pgunmap(cq->vaddr, len); + if (iommu_get_dmabuf(__iommu_ctx(ctrl), &cq->mem, qsize << NVME_CQES, 0x0)) return -1; - } return 0; } void nvme_discard_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq) { - size_t len; - - if (!cq->vaddr) + if (!cq->mem.vaddr) return; - if (iommu_unmap_vaddr(__iommu_ctx(ctrl), cq->vaddr, &len)) - log_debug("failed to unmap vaddr\n"); + iommu_put_dmabuf(&cq->mem); - pgunmap(cq->vaddr, len); - - if (ctrl->dbbuf.doorbells) { + if (ctrl->dbbuf.doorbells.vaddr) { __STORE_PTR(uint32_t *, cq->dbbuf.doorbell, 0); __STORE_PTR(uint32_t *, cq->dbbuf.eventidx, 0); } @@ -187,7 +174,9 @@ static int nvme_configure_sq(struct nvme_ctrl *ctrl, int qid, int qsize, struct nvme_sq *sq = &ctrl->sq[qid]; uint64_t cap; uint8_t dstrd; - ssize_t len; + size_t pagesize; + + pagesize = __mps_to_pagesize(ctrl->config.mps); cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); dstrd = NVME_FIELD_GET(cap, CAP_DSTRD); @@ -216,25 +205,18 @@ static int nvme_configure_sq(struct nvme_ctrl *ctrl, int qid, int qsize, .cq = cq, }; - if (ctrl->dbbuf.doorbells) { - sq->dbbuf.doorbell = sqtdbl(ctrl->dbbuf.doorbells, qid, dstrd); - sq->dbbuf.eventidx = sqtdbl(ctrl->dbbuf.eventidxs, qid, dstrd); + if (ctrl->dbbuf.doorbells.vaddr) { + sq->dbbuf.doorbell = sqtdbl(ctrl->dbbuf.doorbells.vaddr, qid, dstrd); + sq->dbbuf.eventidx = sqtdbl(ctrl->dbbuf.eventidxs.vaddr, qid, dstrd); } /* * Use ctrl->config.mps instead of host page size, as we have the * opportunity to pack the allocations. */ - len = pgmapn(&sq->pages.vaddr, qsize, __mps_to_pagesize(ctrl->config.mps)); - - if (len < 0) + if (iommu_get_dmabuf(__iommu_ctx(ctrl), &sq->pages, pagesize, 0x0)) return -1; - if (iommu_map_vaddr(__iommu_ctx(ctrl), sq->pages.vaddr, len, &sq->pages.iova, 0x0)) { - log_debug("failed to map vaddr\n"); - goto unmap_pages; - } - sq->rqs = znew_t(struct nvme_rq, qsize - 1); sq->rq_top = &sq->rqs[qsize - 2]; @@ -251,50 +233,28 @@ static int nvme_configure_sq(struct nvme_ctrl *ctrl, int qid, int qsize, rq->rq_next = &sq->rqs[i - 1]; } - len = pgmapn(&sq->vaddr, qsize, 1 << NVME_SQES); - if (len < 0) - goto free_sq_rqs; + if (iommu_get_dmabuf(__iommu_ctx(ctrl), &sq->mem, qsize << NVME_SQES, 0x0)) { + free(sq->rqs); + iommu_put_dmabuf(&sq->pages); - if (iommu_map_vaddr(__iommu_ctx(ctrl), sq->vaddr, len, &sq->iova, 0x0)) { - log_debug("failed to map vaddr\n"); - goto unmap_sq; + return -1; } return 0; - -unmap_sq: - pgunmap(sq->vaddr, len); -free_sq_rqs: - free(sq->rqs); -unmap_pages: - if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->pages.vaddr, (size_t *)&len)) - log_debug("failed to unmap vaddr\n"); - - pgunmap(sq->pages.vaddr, len); - - return -1; } void nvme_discard_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq) { - size_t len; - - if (!sq->vaddr) + if (!sq->mem.vaddr) return; - if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->vaddr, &len)) - log_debug("failed to unmap vaddr\n"); - - pgunmap(sq->vaddr, len); + iommu_put_dmabuf(&sq->mem); free(sq->rqs); - if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->pages.vaddr, &len)) - log_debug("failed to unmap vaddr\n"); - - pgunmap(sq->pages.vaddr, len); + iommu_put_dmabuf(&sq->pages); - if (ctrl->dbbuf.doorbells) { + if (ctrl->dbbuf.doorbells.vaddr) { __STORE_PTR(uint32_t *, sq->dbbuf.doorbell, 0); __STORE_PTR(uint32_t *, sq->dbbuf.eventidx, 0); } @@ -326,8 +286,8 @@ int nvme_configure_adminq(struct nvme_ctrl *ctrl, unsigned long sq_flags) aqa |= aqa << 16; mmio_write32(ctrl->regs + NVME_REG_AQA, cpu_to_le32(aqa)); - mmio_hl_write64(ctrl->regs + NVME_REG_ASQ, cpu_to_le64(sq->iova)); - mmio_hl_write64(ctrl->regs + NVME_REG_ACQ, cpu_to_le64(cq->iova)); + mmio_hl_write64(ctrl->regs + NVME_REG_ASQ, cpu_to_le64(sq->mem.iova)); + mmio_hl_write64(ctrl->regs + NVME_REG_ACQ, cpu_to_le64(cq->mem.iova)); return 0; @@ -361,7 +321,7 @@ int nvme_create_iocq(struct nvme_ctrl *ctrl, int qid, int qsize, int vector) cmd.create_cq = (struct nvme_cmd_create_cq) { .opcode = NVME_ADMIN_CREATE_CQ, - .prp1 = cpu_to_le64(cq->iova), + .prp1 = cpu_to_le64(cq->mem.iova), .qid = cpu_to_le16((uint16_t)qid), .qsize = cpu_to_le16((uint16_t)(qsize - 1)), .qflags = cpu_to_le16(qflags), @@ -398,7 +358,7 @@ int nvme_create_iosq(struct nvme_ctrl *ctrl, int qid, int qsize, struct nvme_cq cmd.create_sq = (struct nvme_cmd_create_sq) { .opcode = NVME_ADMIN_CREATE_SQ, - .prp1 = cpu_to_le64(sq->iova), + .prp1 = cpu_to_le64(sq->mem.iova), .qid = cpu_to_le16((uint16_t)qid), .qsize = cpu_to_le16((uint16_t)(qsize - 1)), .qflags = cpu_to_le16(NVME_Q_PC), @@ -518,29 +478,22 @@ int nvme_reset(struct nvme_ctrl *ctrl) static int nvme_init_dbconfig(struct nvme_ctrl *ctrl) { - uint64_t prp1, prp2; union nvme_cmd cmd; - if (pgmap((void **)&ctrl->dbbuf.doorbells, __VFN_PAGESIZE) < 0) + if (iommu_get_dmabuf(__iommu_ctx(ctrl), &ctrl->dbbuf.doorbells, __VFN_PAGESIZE, 0x0)) return -1; - if (iommu_map_vaddr(__iommu_ctx(ctrl), ctrl->dbbuf.doorbells, __VFN_PAGESIZE, &prp1, 0x0)) - return -1; - - if (pgmap((void **)&ctrl->dbbuf.eventidxs, __VFN_PAGESIZE) < 0) - return -1; - - if (iommu_map_vaddr(__iommu_ctx(ctrl), ctrl->dbbuf.eventidxs, __VFN_PAGESIZE, &prp2, 0x0)) - return -1; + if (iommu_get_dmabuf(__iommu_ctx(ctrl), &ctrl->dbbuf.eventidxs, __VFN_PAGESIZE, 0x0)) + goto put_doorbells; cmd = (union nvme_cmd) { .opcode = NVME_ADMIN_DBCONFIG, - .dptr.prp1 = cpu_to_le64(prp1), - .dptr.prp2 = cpu_to_le64(prp2), + .dptr.prp1 = cpu_to_le64(ctrl->dbbuf.doorbells.iova), + .dptr.prp2 = cpu_to_le64(ctrl->dbbuf.eventidxs.iova), }; if (__admin(ctrl, &cmd)) - return -1; + goto put_eventidxs; if (!(ctrl->opts.quirks & NVME_QUIRK_BROKEN_DBBUF)) { uint64_t cap; @@ -549,14 +502,25 @@ static int nvme_init_dbconfig(struct nvme_ctrl *ctrl) cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); dstrd = NVME_FIELD_GET(cap, CAP_DSTRD); - ctrl->adminq.cq->dbbuf.doorbell = cqhdbl(ctrl->dbbuf.doorbells, NVME_AQ, dstrd); - ctrl->adminq.cq->dbbuf.eventidx = cqhdbl(ctrl->dbbuf.eventidxs, NVME_AQ, dstrd); + ctrl->adminq.cq->dbbuf.doorbell = + cqhdbl(ctrl->dbbuf.doorbells.vaddr, NVME_AQ, dstrd); + ctrl->adminq.cq->dbbuf.eventidx = + cqhdbl(ctrl->dbbuf.eventidxs.vaddr, NVME_AQ, dstrd); - ctrl->adminq.sq->dbbuf.doorbell = sqtdbl(ctrl->dbbuf.doorbells, NVME_AQ, dstrd); - ctrl->adminq.sq->dbbuf.eventidx = sqtdbl(ctrl->dbbuf.eventidxs, NVME_AQ, dstrd); + ctrl->adminq.sq->dbbuf.doorbell = + sqtdbl(ctrl->dbbuf.doorbells.vaddr, NVME_AQ, dstrd); + ctrl->adminq.sq->dbbuf.eventidx = + sqtdbl(ctrl->dbbuf.eventidxs.vaddr, NVME_AQ, dstrd); } return 0; + +put_eventidxs: + iommu_put_dmabuf(&ctrl->dbbuf.eventidxs); +put_doorbells: + iommu_put_dmabuf(&ctrl->dbbuf.doorbells); + + return -1; } static int nvme_init_pci(struct nvme_ctrl *ctrl, const char *bdf) @@ -639,11 +603,12 @@ int nvme_ctrl_init(struct nvme_ctrl *ctrl, const char *bdf, int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_opts *opts) { + struct iommu_ctx *ctx = __iommu_ctx(ctrl); + uint16_t oacs; uint32_t sgls; - ssize_t len; - void *vaddr; - int ret; + + __autovar_s(iommu_dmabuf) buffer; union nvme_cmd cmd = {}; struct nvme_cqe cqe; @@ -688,8 +653,7 @@ int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_op ctrl->config.ncqa = min_t(int, ctrl->opts.ncqr, NVME_FIELD_GET(le32_to_cpu(cqe.dw0), FEAT_NRQS_NCQR)); - len = pgmap(&vaddr, NVME_IDENTIFY_DATA_SIZE); - if (len < 0) + if (iommu_get_dmabuf(ctx, &buffer, NVME_IDENTIFY_DATA_SIZE, IOMMU_MAP_EPHEMERAL)) return -1; cmd.identify = (struct nvme_cmd_identify) { @@ -697,17 +661,14 @@ int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_op .cns = NVME_IDENTIFY_CNS_CTRL, }; - ret = nvme_admin(ctrl, &cmd, vaddr, len, NULL); - if (ret) { - log_debug("could not identify\n"); - goto out; - } + if (nvme_admin(ctrl, &cmd, buffer.vaddr, buffer.len, NULL)) + return -1; - oacs = le16_to_cpu(*(leint16_t *)(vaddr + NVME_IDENTIFY_CTRL_OACS)); - if (oacs & NVME_IDENTIFY_CTRL_OACS_DBCONFIG) - ret = nvme_init_dbconfig(ctrl); + oacs = le16_to_cpu(*(leint16_t *)(buffer.vaddr + NVME_IDENTIFY_CTRL_OACS)); + if (oacs & NVME_IDENTIFY_CTRL_OACS_DBCONFIG && nvme_init_dbconfig(ctrl)) + return -1; - sgls = le32_to_cpu(*(leint32_t *)(vaddr + NVME_IDENTIFY_CTRL_SGLS)); + sgls = le32_to_cpu(*(leint32_t *)(buffer.vaddr + NVME_IDENTIFY_CTRL_SGLS)); if (sgls) { uint32_t alignment = NVME_FIELD_GET(sgls, IDENTIFY_CTRL_SGLS_ALIGNMENT); @@ -717,10 +678,7 @@ int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_op ctrl->flags |= NVME_CTRL_F_SGLS_DWORD_ALIGNMENT; } -out: - pgunmap(vaddr, len); - - return ret; + return 0; } void nvme_close(struct nvme_ctrl *ctrl) @@ -735,17 +693,9 @@ void nvme_close(struct nvme_ctrl *ctrl) free(ctrl->cq); - if (ctrl->dbbuf.doorbells) { - struct iommu_ctx *ctx = __iommu_ctx(ctrl); - size_t len; - - log_fatal_if(iommu_unmap_vaddr(ctx, ctrl->dbbuf.doorbells, &len), - "iommu_unmap_vaddr"); - pgunmap(ctrl->dbbuf.doorbells, len); - - log_fatal_if(iommu_unmap_vaddr(ctx, ctrl->dbbuf.eventidxs, &len), - "iommu_unmap_vaddr"); - pgunmap(ctrl->dbbuf.eventidxs, len); + if (ctrl->dbbuf.doorbells.vaddr) { + iommu_put_dmabuf(&ctrl->dbbuf.doorbells); + iommu_put_dmabuf(&ctrl->dbbuf.eventidxs); } vfio_pci_unmap_bar(&ctrl->pci, 0, ctrl->regs, 0x1000, 0); diff --git a/src/nvme/queue.c b/src/nvme/queue.c index 85be40ca..42875064 100644 --- a/src/nvme/queue.c +++ b/src/nvme/queue.c @@ -26,14 +26,9 @@ #include -#include -#include -#include -#include -#include +#include #include -#include -#include +#include #include "ccan/time/time.h" diff --git a/src/nvme/util.c b/src/nvme/util.c index c5b25776..29221eb2 100644 --- a/src/nvme/util.c +++ b/src/nvme/util.c @@ -29,7 +29,6 @@ #include #include -#include #include #include #include