diff options
-rw-r--r-- | drivers/misc/cxl/context.c | 6 | ||||
-rw-r--r-- | drivers/misc/cxl/cxl.h | 44 | ||||
-rw-r--r-- | drivers/misc/cxl/file.c | 19 | ||||
-rw-r--r-- | drivers/misc/cxl/native.c | 72 | ||||
-rw-r--r-- | drivers/misc/cxl/vphb.c | 26 |
5 files changed, 155 insertions, 12 deletions
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 1287148..6158421 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -193,7 +193,11 @@ int __detach_context(struct cxl_context *ctx) if (status != STARTED) return -EBUSY; - WARN_ON(cxl_detach_process(ctx)); + /* Only warn if we detached while the link was OK. + * If detach fails when hw is down, we don't care. + */ + WARN_ON(cxl_detach_process(ctx) && + cxl_adapter_link_ok(ctx->afu->adapter)); flush_work(&ctx->fault_work); /* Only needed for dedicated process */ put_pid(ctx->pid); cxl_ctx_put(); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 6a93bfb..9b9e89f 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -531,6 +531,14 @@ struct cxl_process_element { __be32 software_state; } __packed; +static inline bool cxl_adapter_link_ok(struct cxl *cxl) +{ + struct pci_dev *pdev; + + pdev = to_pci_dev(cxl->dev.parent); + return !pci_channel_offline(pdev); +} + static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg) { WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); @@ -539,12 +547,16 @@ static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg) static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val) { - out_be64(_cxl_p1_addr(cxl, reg), val); + if (likely(cxl_adapter_link_ok(cxl))) + out_be64(_cxl_p1_addr(cxl, reg), val); } static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg) { - return in_be64(_cxl_p1_addr(cxl, reg)); + if (likely(cxl_adapter_link_ok(cxl))) + return in_be64(_cxl_p1_addr(cxl, reg)); + else + return ~0ULL; } static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg) @@ -555,12 +567,16 @@ static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val) { - out_be64(_cxl_p1n_addr(afu, reg), val); + if (likely(cxl_adapter_link_ok(afu->adapter))) + out_be64(_cxl_p1n_addr(afu, reg), val); } static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg) { - return in_be64(_cxl_p1n_addr(afu, reg)); + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_be64(_cxl_p1n_addr(afu, reg)); + else + return ~0ULL; } static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg) @@ -570,22 +586,34 @@ static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val) { - out_be64(_cxl_p2n_addr(afu, reg), val); + if (likely(cxl_adapter_link_ok(afu->adapter))) + out_be64(_cxl_p2n_addr(afu, reg), val); } static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg) { - return in_be64(_cxl_p2n_addr(afu, reg)); + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_be64(_cxl_p2n_addr(afu, reg)); + else + return ~0ULL; } static inline u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off) { - return in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)); + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + + ((cr) * (afu)->crs_len) + (off)); + else + return ~0ULL; } static inline u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off) { - return in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)); + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + + ((cr) * (afu)->crs_len) + (off)); + else + return 0xffffffff; } u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off); u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off); diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index c8c8bfa..57bdb47 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -73,6 +73,11 @@ static int __afu_open(struct inode *inode, struct file *file, bool master) if (!afu->current_mode) goto err_put_afu; + if (!cxl_adapter_link_ok(adapter)) { + rc = -EIO; + goto err_put_afu; + } + if (!(ctx = cxl_context_alloc())) { rc = -ENOMEM; goto err_put_afu; @@ -238,6 +243,9 @@ long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (ctx->status == CLOSED) return -EIO; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + pr_devel("afu_ioctl\n"); switch (cmd) { case CXL_IOCTL_START_WORK: @@ -265,6 +273,9 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm) if (ctx->status != STARTED) return -EIO; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + return cxl_context_iomap(ctx, vm); } @@ -309,6 +320,9 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, int rc; DEFINE_WAIT(wait); + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + if (count < CXL_READ_MIN_SIZE) return -EINVAL; @@ -319,6 +333,11 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, if (ctx_event_pending(ctx)) break; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + rc = -EIO; + goto out; + } + if (file->f_flags & O_NONBLOCK) { rc = -EAGAIN; goto out; diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index f74ff80..44568dd 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -41,6 +41,13 @@ static int afu_control(struct cxl_afu *afu, u64 command, rc = -EBUSY; goto out; } + + if (!cxl_adapter_link_ok(afu->adapter)) { + afu->enabled = enabled; + rc = -EIO; + goto out; + } + pr_devel_ratelimited("AFU control... (0x%016llx)\n", AFU_Cntl | command); cpu_relax(); @@ -85,6 +92,10 @@ int __cxl_afu_reset(struct cxl_afu *afu) int cxl_afu_check_and_enable(struct cxl_afu *afu) { + if (!cxl_adapter_link_ok(afu->adapter)) { + WARN(1, "Refusing to enable afu while link down!\n"); + return -EIO; + } if (afu->enabled) return 0; return afu_enable(afu); @@ -103,6 +114,12 @@ int cxl_psl_purge(struct cxl_afu *afu) pr_devel("PSL purge request\n"); + if (!cxl_adapter_link_ok(afu->adapter)) { + dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n"); + rc = -EIO; + goto out; + } + if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { WARN(1, "psl_purge request while AFU not disabled!\n"); cxl_afu_disable(afu); @@ -119,6 +136,11 @@ int cxl_psl_purge(struct cxl_afu *afu) rc = -EBUSY; goto out; } + if (!cxl_adapter_link_ok(afu->adapter)) { + rc = -EIO; + goto out; + } + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%016llx PSL_DSISR: 0x%016llx\n", PSL_CNTL, dsisr); if (dsisr & CXL_PSL_DSISR_TRANS) { @@ -215,6 +237,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); return -EBUSY; } + if (!cxl_adapter_link_ok(adapter)) + return -EIO; cpu_relax(); } @@ -224,6 +248,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); return -EBUSY; } + if (!cxl_adapter_link_ok(adapter)) + return -EIO; cpu_relax(); } return 0; @@ -240,6 +266,11 @@ int cxl_afu_slbia(struct cxl_afu *afu) dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); return -EBUSY; } + /* If the adapter has gone down, we can assume that we + * will PERST it and that will invalidate everything. + */ + if (!cxl_adapter_link_ok(afu->adapter)) + return -EIO; cpu_relax(); } return 0; @@ -279,6 +310,8 @@ static void slb_invalid(struct cxl_context *ctx) cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); while (1) { + if (!cxl_adapter_link_ok(adapter)) + break; slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); if (!(slbia & CXL_TLB_SLB_P)) break; @@ -308,6 +341,11 @@ static int do_process_element_cmd(struct cxl_context *ctx, rc = -EBUSY; goto out; } + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n"); + rc = -EIO; + goto out; + } state = be64_to_cpup(ctx->afu->sw_command_status); if (state == ~0ULL) { pr_err("cxl: Error adding process element to AFU\n"); @@ -355,8 +393,13 @@ static int terminate_process_element(struct cxl_context *ctx) mutex_lock(&ctx->afu->spa_mutex); pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe); - rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, - CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); + /* We could be asked to terminate when the hw is down. That + * should always succeed: it's not running if the hw has gone + * away and is being reset. + */ + if (cxl_adapter_link_ok(ctx->afu->adapter)) + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, + CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); ctx->elem->software_state = 0; /* Remove Valid bit */ pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe); mutex_unlock(&ctx->afu->spa_mutex); @@ -369,7 +412,14 @@ static int remove_process_element(struct cxl_context *ctx) mutex_lock(&ctx->afu->spa_mutex); pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe); - if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0))) + + /* We could be asked to remove when the hw is down. Again, if + * the hw is down, the PE is gone, so we succeed. + */ + if (cxl_adapter_link_ok(ctx->afu->adapter)) + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0); + + if (!rc) ctx->pe_inserted = false; slb_invalid(ctx); pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe); @@ -612,6 +662,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) if (!(mode & afu->modes_supported)) return -EINVAL; + if (!cxl_adapter_link_ok(afu->adapter)) { + WARN(1, "Device link is down, refusing to activate!\n"); + return -EIO; + } + if (mode == CXL_MODE_DIRECTED) return activate_afu_directed(afu); if (mode == CXL_MODE_DEDICATED) @@ -622,6 +677,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) { + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + WARN(1, "Device link is down, refusing to attach process!\n"); + return -EIO; + } + ctx->kernel = kernel; if (ctx->afu->current_mode == CXL_MODE_DIRECTED) return attach_afu_directed(ctx, wed, amr); @@ -666,6 +726,12 @@ int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) { u64 pidtid; + /* If the adapter has gone away, we can't get any meaningful + * information. + */ + if (!cxl_adapter_link_ok(afu->adapter)) + return -EIO; + info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An); diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 2eba002..2930911 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -138,6 +138,26 @@ static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, return 0; } + +static inline bool cxl_config_link_ok(struct pci_bus *bus) +{ + struct pci_controller *phb; + struct cxl_afu *afu; + + /* Config space IO is based on phb->cfg_addr, which is based on + * afu_desc_mmio. This isn't safe to read/write when the link + * goes down, as EEH tears down MMIO space. + * + * Check if the link is OK before proceeding. + */ + + phb = pci_bus_to_host(bus); + if (phb == NULL) + return false; + afu = (struct cxl_afu *)phb->private_data; + return cxl_adapter_link_ok(afu->adapter); +} + static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 *val) { @@ -150,6 +170,9 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, if (rc) return rc; + if (!cxl_config_link_ok(bus)) + return PCIBIOS_DEVICE_NOT_FOUND; + /* Can only read 32 bits */ *val = (in_le32(ioaddr) >> shift) & mask; return PCIBIOS_SUCCESSFUL; @@ -167,6 +190,9 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, if (rc) return rc; + if (!cxl_config_link_ok(bus)) + return PCIBIOS_DEVICE_NOT_FOUND; + /* Can only write 32 bits so do read-modify-write */ mask <<= shift; val <<= shift; |