summaryrefslogtreecommitdiffstats
path: root/usr.sbin
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2015-03-23 14:36:53 +0000
committermav <mav@FreeBSD.org>2015-03-23 14:36:53 +0000
commit3ce68975f29c670c2a6b0d08841ef28de5d3d3c7 (patch)
treece5213af54c42bdb3c3600f5a6f985c6bf3dff82 /usr.sbin
parent571ee1f33d97441d9d5b852dbc727c3d6121fa35 (diff)
downloadFreeBSD-src-3ce68975f29c670c2a6b0d08841ef28de5d3d3c7.zip
FreeBSD-src-3ce68975f29c670c2a6b0d08841ef28de5d3d3c7.tar.gz
MFC r279957, r280017: Add DSM TRIM command support for virtual AHCI disks.
It works only for virtual disks backed by ZVOLs and raw devices supporting BIO_DELETE. Virtual disks backed by files won't report this capability. Relnotes: yes
Diffstat (limited to 'usr.sbin')
-rw-r--r--usr.sbin/bhyve/block_if.c43
-rw-r--r--usr.sbin/bhyve/block_if.h2
-rw-r--r--usr.sbin/bhyve/pci_ahci.c132
3 files changed, 169 insertions, 8 deletions
diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c
index e765987..ffe2a47 100644
--- a/usr.sbin/bhyve/block_if.c
+++ b/usr.sbin/bhyve/block_if.c
@@ -59,7 +59,8 @@ __FBSDID("$FreeBSD$");
enum blockop {
BOP_READ,
BOP_WRITE,
- BOP_FLUSH
+ BOP_FLUSH,
+ BOP_DELETE
};
enum blockstat {
@@ -81,6 +82,7 @@ struct blockif_ctxt {
int bc_magic;
int bc_fd;
int bc_ischr;
+ int bc_candelete;
int bc_rdonly;
off_t bc_size;
int bc_sectsz;
@@ -172,6 +174,7 @@ static void
blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
{
struct blockif_req *br;
+ off_t arg[2];
int err;
br = be->be_req;
@@ -197,6 +200,19 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
} else if (fsync(bc->bc_fd))
err = errno;
break;
+ case BOP_DELETE:
+ if (!bc->bc_candelete)
+ err = EOPNOTSUPP;
+ else if (bc->bc_rdonly)
+ err = EROFS;
+ else if (bc->bc_ischr) {
+ arg[0] = br->br_offset;
+ arg[1] = br->br_iov[0].iov_len;
+ if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
+ err = errno;
+ } else
+ err = EOPNOTSUPP;
+ break;
default:
err = EINVAL;
break;
@@ -276,9 +292,10 @@ blockif_open(const char *optstr, const char *ident)
char *nopt, *xopts;
struct blockif_ctxt *bc;
struct stat sbuf;
+ struct diocgattr_arg arg;
off_t size, psectsz, psectoff;
int extra, fd, i, sectsz;
- int nocache, sync, ro;
+ int nocache, sync, ro, candelete;
pthread_once(&blockif_once, blockif_init);
@@ -332,6 +349,7 @@ blockif_open(const char *optstr, const char *ident)
size = sbuf.st_size;
sectsz = DEV_BSIZE;
psectsz = psectoff = 0;
+ candelete = 0;
if (S_ISCHR(sbuf.st_mode)) {
if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
@@ -343,6 +361,10 @@ blockif_open(const char *optstr, const char *ident)
assert(sectsz != 0);
if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0)
ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff);
+ strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
+ arg.len = sizeof(arg.value.i);
+ if (ioctl(fd, DIOCGATTR, &arg) == 0)
+ candelete = arg.value.i;
} else
psectsz = sbuf.st_blksize;
@@ -355,6 +377,7 @@ blockif_open(const char *optstr, const char *ident)
bc->bc_magic = BLOCKIF_SIG;
bc->bc_fd = fd;
bc->bc_ischr = S_ISCHR(sbuf.st_mode);
+ bc->bc_candelete = candelete;
bc->bc_rdonly = ro;
bc->bc_size = size;
bc->bc_sectsz = sectsz;
@@ -434,6 +457,14 @@ blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
}
int
+blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+ assert(bc->bc_magic == BLOCKIF_SIG);
+ return (blockif_request(bc, breq, BOP_DELETE));
+}
+
+int
blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
{
struct blockif_elem *be;
@@ -634,3 +665,11 @@ blockif_is_ro(struct blockif_ctxt *bc)
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_rdonly);
}
+
+int
+blockif_candelete(struct blockif_ctxt *bc)
+{
+
+ assert(bc->bc_magic == BLOCKIF_SIG);
+ return (bc->bc_candelete);
+}
diff --git a/usr.sbin/bhyve/block_if.h b/usr.sbin/bhyve/block_if.h
index d1b7695..4e2c1b4 100644
--- a/usr.sbin/bhyve/block_if.h
+++ b/usr.sbin/bhyve/block_if.h
@@ -58,9 +58,11 @@ int blockif_sectsz(struct blockif_ctxt *bc);
void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off);
int blockif_queuesz(struct blockif_ctxt *bc);
int blockif_is_ro(struct blockif_ctxt *bc);
+int blockif_candelete(struct blockif_ctxt *bc);
int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq);
+int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_close(struct blockif_ctxt *bc);
diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c
index bdb3e9d..bc0daf3 100644
--- a/usr.sbin/bhyve/pci_ahci.c
+++ b/usr.sbin/bhyve/pci_ahci.c
@@ -644,6 +644,100 @@ ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
}
static inline void
+read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
+ void *buf, int size)
+{
+ struct ahci_cmd_hdr *hdr;
+ struct ahci_prdt_entry *prdt;
+ void *to;
+ int i, len;
+
+ hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+ len = size;
+ to = buf;
+ prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
+ for (i = 0; i < hdr->prdtl && len; i++) {
+ uint8_t *ptr;
+ uint32_t dbcsz;
+ int sublen;
+
+ dbcsz = (prdt->dbc & DBCMASK) + 1;
+ ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
+ sublen = len < dbcsz ? len : dbcsz;
+ memcpy(to, ptr, sublen);
+ len -= sublen;
+ to += sublen;
+ prdt++;
+ }
+}
+
+static void
+ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
+{
+ struct ahci_ioreq *aior;
+ struct blockif_req *breq;
+ uint8_t *entry;
+ uint64_t elba;
+ uint32_t len, elen;
+ int err;
+ uint8_t buf[512];
+
+ len = (uint16_t)cfis[13] << 8 | cfis[12];
+ len *= 512;
+ read_prdt(p, slot, cfis, buf, sizeof(buf));
+
+next:
+ entry = &buf[done];
+ elba = ((uint64_t)entry[5] << 40) |
+ ((uint64_t)entry[4] << 32) |
+ ((uint64_t)entry[3] << 24) |
+ ((uint64_t)entry[2] << 16) |
+ ((uint64_t)entry[1] << 8) |
+ entry[0];
+ elen = (uint16_t)entry[7] << 8 | entry[6];
+ done += 8;
+ if (elen == 0) {
+ if (done >= len) {
+ ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+ p->pending &= ~(1 << slot);
+ ahci_check_stopped(p);
+ return;
+ }
+ goto next;
+ }
+
+ /*
+ * Pull request off free list
+ */
+ aior = STAILQ_FIRST(&p->iofhd);
+ assert(aior != NULL);
+ STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
+ aior->cfis = cfis;
+ aior->slot = slot;
+ aior->len = len;
+ aior->done = done;
+ aior->prdtl = 0;
+
+ breq = &aior->io_req;
+ breq->br_offset = elba * blockif_sectsz(p->bctx);
+ breq->br_iovcnt = 1;
+ breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx);
+
+ /*
+ * Mark this command in-flight.
+ */
+ p->pending |= 1 << slot;
+
+ /*
+ * Stuff request onto busy list
+ */
+ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+ err = blockif_delete(p->bctx, breq);
+ assert(err == 0);
+}
+
+static inline void
write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
void *buf, int size)
{
@@ -684,10 +778,12 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
} else {
uint16_t buf[256];
uint64_t sectors;
- int sectsz, psectsz, psectoff;
+ int sectsz, psectsz, psectoff, candelete, ro;
uint16_t cyl;
uint8_t sech, heads;
+ ro = blockif_is_ro(p->bctx);
+ candelete = blockif_candelete(p->bctx);
sectsz = blockif_sectsz(p->bctx);
sectors = blockif_size(p->bctx) / sectsz;
blockif_chs(p->bctx, &cyl, &heads, &sech);
@@ -718,6 +814,7 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
buf[66] = 100;
buf[67] = 100;
buf[68] = 100;
+ buf[69] = 0;
buf[75] = 31;
buf[76] = (1 << 8 | 1 << 2);
buf[80] = 0x1f0;
@@ -736,6 +833,11 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
buf[101] = (sectors >> 16);
buf[102] = (sectors >> 32);
buf[103] = (sectors >> 48);
+ if (candelete && !ro) {
+ buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
+ buf[105] = 1;
+ buf[169] = ATA_SUPPORT_DSM_TRIM;
+ }
buf[106] = 0x4000;
buf[209] = 0x4000;
if (psectsz > sectsz) {
@@ -1394,6 +1496,15 @@ ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
case ATA_FLUSHCACHE48:
ahci_handle_flush(p, slot, cfis);
break;
+ case ATA_DATA_SET_MANAGEMENT:
+ if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
+ cfis[13] == 0 && cfis[12] == 1) {
+ ahci_handle_dsm_trim(p, slot, cfis, 0);
+ break;
+ }
+ ahci_write_fis_d2h(p, slot, cfis,
+ (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
+ break;
case ATA_STANDBY_CMD:
break;
case ATA_NOP:
@@ -1505,7 +1616,7 @@ ata_ioreq_cb(struct blockif_req *br, int err)
struct pci_ahci_softc *sc;
uint32_t tfd;
uint8_t *cfis;
- int pending, slot, ncq;
+ int pending, slot, ncq, dsm;
DPRINTF("%s %d\n", __func__, err);
@@ -1521,6 +1632,8 @@ ata_ioreq_cb(struct blockif_req *br, int err)
if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
cfis[2] == ATA_READ_FPDMA_QUEUED)
ncq = 1;
+ if (cfis[2] == ATA_DATA_SET_MANAGEMENT)
+ dsm = 1;
pthread_mutex_lock(&sc->mtx);
@@ -1534,10 +1647,17 @@ ata_ioreq_cb(struct blockif_req *br, int err)
*/
STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
- if (pending && !err) {
- ahci_handle_dma(p, slot, cfis, aior->done,
- hdr->prdtl - pending);
- goto out;
+ if (dsm) {
+ if (aior->done != aior->len && !err) {
+ ahci_handle_dsm_trim(p, slot, cfis, aior->done);
+ goto out;
+ }
+ } else {
+ if (pending && !err) {
+ ahci_handle_dma(p, slot, cfis, aior->done,
+ hdr->prdtl - pending);
+ goto out;
+ }
}
if (!err && aior->done == aior->len) {
OpenPOWER on IntegriCloud