diff options
Diffstat (limited to 'drivers')
37 files changed, 2296 insertions, 229 deletions
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 601f64f..fdb8f3e 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -178,6 +178,20 @@ config IBM_BSR of threads across a large system which avoids bouncing a cacheline between several cores on a system +config POWERNV_OP_PANEL + tristate "IBM POWERNV Operator Panel Display support" + depends on PPC_POWERNV + default m + help + If you say Y here, a special character device node, /dev/op_panel, + will be created which exposes the operator panel display on IBM + Power Systems machines with FSPs. + + If you don't require access to the operator panel display from user + space, say N. + + If unsure, say M here to build it as a module called powernv-op-panel. + source "drivers/char/ipmi/Kconfig" config DS1620 diff --git a/drivers/char/Makefile b/drivers/char/Makefile index d8a7579..55d16bf 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -60,3 +60,4 @@ js-rtc-y = rtc.o obj-$(CONFIG_TILE_SROM) += tile-srom.o obj-$(CONFIG_XILLYBUS) += xillybus/ +obj-$(CONFIG_POWERNV_OP_PANEL) += powernv-op-panel.o diff --git a/drivers/char/powernv-op-panel.c b/drivers/char/powernv-op-panel.c new file mode 100644 index 0000000..a45dabc --- /dev/null +++ b/drivers/char/powernv-op-panel.c @@ -0,0 +1,223 @@ +/* + * OPAL Operator Panel Display Driver + * + * Copyright 2016, Suraj Jitindar Singh, IBM Corporation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/device.h> +#include <linux/errno.h> +#include <linux/mutex.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/platform_device.h> +#include <linux/miscdevice.h> + +#include <asm/opal.h> + +/* + * This driver creates a character device (/dev/op_panel) which exposes the + * operator panel (character LCD display) on IBM Power Systems machines + * with FSPs. + * A character buffer written to the device will be displayed on the + * operator panel. + */ + +static DEFINE_MUTEX(oppanel_mutex); + +static u32 num_lines, oppanel_size; +static oppanel_line_t *oppanel_lines; +static char *oppanel_data; + +static loff_t oppanel_llseek(struct file *filp, loff_t offset, int whence) +{ + return fixed_size_llseek(filp, offset, whence, oppanel_size); +} + +static ssize_t oppanel_read(struct file *filp, char __user *userbuf, size_t len, + loff_t *f_pos) +{ + return simple_read_from_buffer(userbuf, len, f_pos, oppanel_data, + oppanel_size); +} + +static int __op_panel_update_display(void) +{ + struct opal_msg msg; + int rc, token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_debug("Couldn't get OPAL async token [token=%d]\n", + token); + return token; + } + + rc = opal_write_oppanel_async(token, oppanel_lines, num_lines); + switch (rc) { + case OPAL_ASYNC_COMPLETION: + rc = opal_async_wait_response(token, &msg); + if (rc) { + pr_debug("Failed to wait for async response [rc=%d]\n", + rc); + break; + } + rc = opal_get_async_rc(msg); + if (rc != OPAL_SUCCESS) { + pr_debug("OPAL async call returned failed [rc=%d]\n", + rc); + break; + } + case OPAL_SUCCESS: + break; + default: + pr_debug("OPAL write op-panel call failed [rc=%d]\n", rc); + } + + opal_async_release_token(token); + return rc; +} + +static ssize_t oppanel_write(struct file *filp, const char __user *userbuf, + size_t len, loff_t *f_pos) +{ + loff_t f_pos_prev = *f_pos; + ssize_t ret; + int rc; + + if (!*f_pos) + memset(oppanel_data, ' ', oppanel_size); + else if (*f_pos >= oppanel_size) + return -EFBIG; + + ret = simple_write_to_buffer(oppanel_data, oppanel_size, f_pos, userbuf, + len); + if (ret > 0) { + rc = __op_panel_update_display(); + if (rc != OPAL_SUCCESS) { + pr_err_ratelimited("OPAL call failed to write to op panel display [rc=%d]\n", + rc); + *f_pos = f_pos_prev; + return -EIO; + } + } + return ret; +} + +static int oppanel_open(struct inode *inode, struct file *filp) +{ + if (!mutex_trylock(&oppanel_mutex)) { + pr_debug("Device Busy\n"); + return -EBUSY; + } + return 0; +} + +static int oppanel_release(struct inode *inode, struct file *filp) +{ + mutex_unlock(&oppanel_mutex); + return 0; +} + +static const struct file_operations oppanel_fops = { + .owner = THIS_MODULE, + .llseek = oppanel_llseek, + .read = oppanel_read, + .write = oppanel_write, + .open = oppanel_open, + .release = oppanel_release +}; + +static struct miscdevice oppanel_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "op_panel", + .fops = &oppanel_fops +}; + +static int oppanel_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + u32 line_len; + int rc, i; + + rc = of_property_read_u32(np, "#length", &line_len); + if (rc) { + pr_err_ratelimited("Operator panel length property not found\n"); + return rc; + } + rc = of_property_read_u32(np, "#lines", &num_lines); + if (rc) { + pr_err_ratelimited("Operator panel lines property not found\n"); + return rc; + } + oppanel_size = line_len * num_lines; + + pr_devel("Operator panel of size %u found with %u lines of length %u\n", + oppanel_size, num_lines, line_len); + + oppanel_data = kcalloc(oppanel_size, sizeof(*oppanel_data), GFP_KERNEL); + if (!oppanel_data) + return -ENOMEM; + + oppanel_lines = kcalloc(num_lines, sizeof(oppanel_line_t), GFP_KERNEL); + if (!oppanel_lines) { + rc = -ENOMEM; + goto free_oppanel_data; + } + + memset(oppanel_data, ' ', oppanel_size); + for (i = 0; i < num_lines; i++) { + oppanel_lines[i].line_len = cpu_to_be64(line_len); + oppanel_lines[i].line = cpu_to_be64(__pa(&oppanel_data[i * + line_len])); + } + + rc = misc_register(&oppanel_dev); + if (rc) { + pr_err_ratelimited("Failed to register as misc device\n"); + goto free_oppanel; + } + + return 0; + +free_oppanel: + kfree(oppanel_lines); +free_oppanel_data: + kfree(oppanel_data); + return rc; +} + +static int oppanel_remove(struct platform_device *pdev) +{ + misc_deregister(&oppanel_dev); + kfree(oppanel_lines); + kfree(oppanel_data); + return 0; +} + +static const struct of_device_id oppanel_match[] = { + { .compatible = "ibm,opal-oppanel" }, + { }, +}; + +static struct platform_driver oppanel_driver = { + .driver = { + .name = "powernv-op-panel", + .of_match_table = oppanel_match, + }, + .probe = oppanel_probe, + .remove = oppanel_remove, +}; + +module_platform_driver(oppanel_driver); + +MODULE_DEVICE_TABLE(of, oppanel_match); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("PowerNV Operator Panel LCD Display Driver"); +MODULE_AUTHOR("Suraj Jitindar Singh <sjitindarsingh@gmail.com>"); diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index e12dc30..f7ca891 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -20,7 +20,7 @@ #include <asm/opal.h> #include <asm/runlatch.h> -#define MAX_POWERNV_IDLE_STATES 8 +#define POWERNV_THRESHOLD_LATENCY_NS 200000 struct cpuidle_driver powernv_idle_driver = { .name = "powernv_idle", @@ -29,6 +29,9 @@ struct cpuidle_driver powernv_idle_driver = { static int max_idle_state; static struct cpuidle_state *cpuidle_state_table; + +static u64 stop_psscr_table[CPUIDLE_STATE_MAX]; + static u64 snooze_timeout; static bool snooze_timeout_en; @@ -93,16 +96,27 @@ static int fastsleep_loop(struct cpuidle_device *dev, return index; } #endif + +static int stop_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + ppc64_runlatch_off(); + power9_idle_stop(stop_psscr_table[index]); + ppc64_runlatch_on(); + return index; +} + /* * States for dedicated partition case. */ -static struct cpuidle_state powernv_states[MAX_POWERNV_IDLE_STATES] = { +static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = { { /* Snooze */ .name = "snooze", .desc = "snooze", .exit_latency = 0, .target_residency = 0, - .enter = &snooze_loop }, + .enter = snooze_loop }, }; static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n, @@ -168,7 +182,11 @@ static int powernv_add_idle_states(void) struct device_node *power_mgt; int nr_idle_states = 1; /* Snooze */ int dt_idle_states; - u32 *latency_ns, *residency_ns, *flags; + u32 latency_ns[CPUIDLE_STATE_MAX]; + u32 residency_ns[CPUIDLE_STATE_MAX]; + u32 flags[CPUIDLE_STATE_MAX]; + u64 psscr_val[CPUIDLE_STATE_MAX]; + const char *names[CPUIDLE_STATE_MAX]; int i, rc; /* Currently we have snooze statically defined */ @@ -186,26 +204,55 @@ static int powernv_add_idle_states(void) goto out; } - flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL); + /* + * Since snooze is used as first idle state, max idle states allowed is + * CPUIDLE_STATE_MAX -1 + */ + if (dt_idle_states > CPUIDLE_STATE_MAX - 1) { + pr_warn("cpuidle-powernv: discovered idle states more than allowed"); + dt_idle_states = CPUIDLE_STATE_MAX - 1; + } + if (of_property_read_u32_array(power_mgt, "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { pr_warn("cpuidle-powernv : missing ibm,cpu-idle-state-flags in DT\n"); - goto out_free_flags; + goto out; } - latency_ns = kzalloc(sizeof(*latency_ns) * dt_idle_states, GFP_KERNEL); - rc = of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-latencies-ns", latency_ns, dt_idle_states); - if (rc) { + if (of_property_read_u32_array(power_mgt, + "ibm,cpu-idle-state-latencies-ns", latency_ns, + dt_idle_states)) { pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); - goto out_free_latency; + goto out; + } + if (of_property_read_string_array(power_mgt, + "ibm,cpu-idle-state-names", names, dt_idle_states) < 0) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); + goto out; } - residency_ns = kzalloc(sizeof(*residency_ns) * dt_idle_states, GFP_KERNEL); + /* + * If the idle states use stop instruction, probe for psscr values + * which are necessary to specify required stop level. + */ + if (flags[0] & (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP)) + if (of_property_read_u64_array(power_mgt, + "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n"); + goto out; + } + rc = of_property_read_u32_array(power_mgt, "ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states); for (i = 0; i < dt_idle_states; i++) { + /* + * If an idle state has exit latency beyond + * POWERNV_THRESHOLD_LATENCY_NS then don't use it + * in cpu-idle. + */ + if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS) + continue; /* * Cpuidle accepts exit_latency and target_residency in us. @@ -217,7 +264,17 @@ static int powernv_add_idle_states(void) strcpy(powernv_states[nr_idle_states].desc, "Nap"); powernv_states[nr_idle_states].flags = 0; powernv_states[nr_idle_states].target_residency = 100; - powernv_states[nr_idle_states].enter = &nap_loop; + powernv_states[nr_idle_states].enter = nap_loop; + } else if ((flags[i] & OPAL_PM_STOP_INST_FAST) && + !(flags[i] & OPAL_PM_TIMEBASE_STOP)) { + strncpy(powernv_states[nr_idle_states].name, + names[i], CPUIDLE_NAME_LEN); + strncpy(powernv_states[nr_idle_states].desc, + names[i], CPUIDLE_NAME_LEN); + powernv_states[nr_idle_states].flags = 0; + + powernv_states[nr_idle_states].enter = stop_loop; + stop_psscr_table[nr_idle_states] = psscr_val[i]; } /* @@ -232,7 +289,17 @@ static int powernv_add_idle_states(void) strcpy(powernv_states[nr_idle_states].desc, "FastSleep"); powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP; powernv_states[nr_idle_states].target_residency = 300000; - powernv_states[nr_idle_states].enter = &fastsleep_loop; + powernv_states[nr_idle_states].enter = fastsleep_loop; + } else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) && + (flags[i] & OPAL_PM_TIMEBASE_STOP)) { + strncpy(powernv_states[nr_idle_states].name, + names[i], CPUIDLE_NAME_LEN); + strncpy(powernv_states[nr_idle_states].desc, + names[i], CPUIDLE_NAME_LEN); + + powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP; + powernv_states[nr_idle_states].enter = stop_loop; + stop_psscr_table[nr_idle_states] = psscr_val[i]; } #endif powernv_states[nr_idle_states].exit_latency = @@ -245,12 +312,6 @@ static int powernv_add_idle_states(void) nr_idle_states++; } - - kfree(residency_ns); -out_free_latency: - kfree(latency_ns); -out_free_flags: - kfree(flags); out: return nr_idle_states; } diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig index 89d8208..a83ead1 100644 --- a/drivers/crypto/vmx/Kconfig +++ b/drivers/crypto/vmx/Kconfig @@ -1,7 +1,7 @@ config CRYPTO_DEV_VMX_ENCRYPT tristate "Encryption acceleration support on P8 CPU" depends on CRYPTO_DEV_VMX - default y + default m help Support for VMX cryptographic acceleration instructions on Power8 CPU. This module supports acceleration for AES and GHASH in hardware. If you diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c index f688c32..31a98dc 100644 --- a/drivers/crypto/vmx/vmx.c +++ b/drivers/crypto/vmx/vmx.c @@ -23,6 +23,7 @@ #include <linux/moduleparam.h> #include <linux/types.h> #include <linux/err.h> +#include <linux/cpufeature.h> #include <linux/crypto.h> #include <asm/cputable.h> #include <crypto/internal/hash.h> @@ -45,9 +46,6 @@ int __init p8_init(void) int ret = 0; struct crypto_alg **alg_it; - if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) - return -ENODEV; - for (alg_it = algs; *alg_it; alg_it++) { ret = crypto_register_alg(*alg_it); printk(KERN_INFO "crypto_register_alg '%s' = %d\n", @@ -80,7 +78,7 @@ void __exit p8_exit(void) crypto_unregister_shash(&p8_ghash_alg); } -module_init(p8_init); +module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, p8_init); module_exit(p8_exit); MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>"); diff --git a/drivers/i2c/busses/i2c-opal.c b/drivers/i2c/busses/i2c-opal.c index 75dd6d04..11e2a1f 100644 --- a/drivers/i2c/busses/i2c-opal.c +++ b/drivers/i2c/busses/i2c-opal.c @@ -71,7 +71,7 @@ static int i2c_opal_send_request(u32 bus_id, struct opal_i2c_request *req) if (rc) goto exit; - rc = be64_to_cpu(msg.params[1]); + rc = opal_get_async_rc(msg); if (rc != OPAL_SUCCESS) { rc = i2c_opal_translate_error(rc); goto exit; diff --git a/drivers/leds/leds-powernv.c b/drivers/leds/leds-powernv.c index dfb8bd3..b2a98c7 100644 --- a/drivers/leds/leds-powernv.c +++ b/drivers/leds/leds-powernv.c @@ -118,7 +118,7 @@ static int powernv_led_set(struct powernv_led_data *powernv_led, goto out_token; } - rc = be64_to_cpu(msg.params[1]); + rc = opal_get_async_rc(msg); if (rc != OPAL_SUCCESS) dev_err(dev, "%s : OAPL async call returned failed [rc=%d]\n", __func__, rc); diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index d531f80..d6f72c8 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -38,6 +38,7 @@ #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/slab.h> +#include <linux/memblock.h> #include <asm/byteorder.h> #include <asm/io.h> @@ -99,6 +100,7 @@ static DEFINE_MUTEX(smu_mutex); static struct smu_device *smu; static DEFINE_MUTEX(smu_part_access); static int smu_irq_inited; +static unsigned long smu_cmdbuf_abs; static void smu_i2c_retry(unsigned long data); @@ -479,8 +481,13 @@ int __init smu_init (void) printk(KERN_INFO "SMU: Driver %s %s\n", VERSION, AUTHOR); + /* + * SMU based G5s need some memory below 2Gb. Thankfully this is + * called at a time where memblock is still available. + */ + smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL); if (smu_cmdbuf_abs == 0) { - printk(KERN_ERR "SMU: Command buffer not allocated !\n"); + printk(KERN_ERR "SMU: Command buffer allocation failed !\n"); ret = -EINVAL; goto fail_np; } diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index 8756d06..b75cf83 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -7,11 +7,7 @@ config CXL_BASE default n select PPC_COPRO_BASE -config CXL_KERNEL_API - bool - default n - -config CXL_EEH +config CXL_AFU_DRIVER_OPS bool default n @@ -19,8 +15,7 @@ config CXL tristate "Support for IBM Coherent Accelerators (CXL)" depends on PPC_POWERNV && PCI_MSI && EEH select CXL_BASE - select CXL_KERNEL_API - select CXL_EEH + select CXL_AFU_DRIVER_OPS default m help Select this option to enable driver support for IBM Coherent @@ -33,3 +28,11 @@ config CXL CAPI adapters are found in POWER8 based systems. If unsure, say N. + +config CXL_BIMODAL + bool "Support for bi-modal CAPI cards" + depends on HOTPLUG_PCI_POWERNV = y && CXL || HOTPLUG_PCI_POWERNV = m && CXL = m + default y + help + Select this option to enable support for bi-modal CAPI cards, such as + the Mellanox CX-4. diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index 8a55c1a..56e9a47 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC_WERROR) += -Werror cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o debugfs.o pci.o trace.o -cxl-y += vphb.o api.o +cxl-y += vphb.o phb.o api.o cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o obj-$(CONFIG_CXL) += cxl.o obj-$(CONFIG_CXL_BASE) += base.o diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 6d228cc..f3d34b9 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -13,6 +13,8 @@ #include <linux/file.h> #include <misc/cxl.h> #include <linux/fs.h> +#include <asm/pnv-pci.h> +#include <linux/msi.h> #include "cxl.h" @@ -24,6 +26,8 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) int rc; afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return ERR_CAST(afu); ctx = cxl_context_alloc(); if (IS_ERR(ctx)) { @@ -94,6 +98,42 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) return 0; } +int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq) +{ + if (*ctx == NULL || *afu_irq == 0) { + *afu_irq = 1; + *ctx = cxl_get_context(pdev); + } else { + (*afu_irq)++; + if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) { + *ctx = list_next_entry(*ctx, extra_irq_contexts); + *afu_irq = 1; + } + } + return cxl_find_afu_irq(*ctx, *afu_irq); +} +/* Exported via cxl_base */ + +int cxl_set_priv(struct cxl_context *ctx, void *priv) +{ + if (!ctx) + return -EINVAL; + + ctx->priv = priv; + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_set_priv); + +void *cxl_get_priv(struct cxl_context *ctx) +{ + if (!ctx) + return ERR_PTR(-EINVAL); + + return ctx->priv; +} +EXPORT_SYMBOL_GPL(cxl_get_priv); + int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) { int res; @@ -102,7 +142,10 @@ int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) if (num == 0) num = ctx->afu->pp_irqs; res = afu_allocate_irqs(ctx, num); - if (!res && !cpu_has_feature(CPU_FTR_HVMODE)) { + if (res) + return res; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) { /* In a guest, the PSL interrupt is not multiplexed. It was * allocated above, and we need to set its handler */ @@ -110,6 +153,13 @@ int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) if (hwirq) cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl"); } + + if (ctx->status == STARTED) { + if (cxl_ops->update_ivtes) + cxl_ops->update_ivtes(ctx); + else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n"); + } + return res; } EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); @@ -323,6 +373,23 @@ struct cxl_context *cxl_fops_get_context(struct file *file) } EXPORT_SYMBOL_GPL(cxl_fops_get_context); +void cxl_set_driver_ops(struct cxl_context *ctx, + struct cxl_afu_driver_ops *ops) +{ + WARN_ON(!ops->fetch_event || !ops->event_delivered); + atomic_set(&ctx->afu_driver_events, 0); + ctx->afu_driver_ops = ops; +} +EXPORT_SYMBOL_GPL(cxl_set_driver_ops); + +void cxl_context_events_pending(struct cxl_context *ctx, + unsigned int new_events) +{ + atomic_add(new_events, &ctx->afu_driver_events); + wake_up_all(&ctx->wq); +} +EXPORT_SYMBOL_GPL(cxl_context_events_pending); + int cxl_start_work(struct cxl_context *ctx, struct cxl_ioctl_start_work *work) { @@ -390,7 +457,106 @@ EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image); ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) { struct cxl_afu *afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return -ENODEV; return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); } EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd); + +int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs) +{ + struct cxl_afu *afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return -ENODEV; + + if (irqs > afu->adapter->user_irqs) + return -EINVAL; + + /* Limit user_irqs to prevent the user increasing this via sysfs */ + afu->adapter->user_irqs = irqs; + afu->irqs_max = irqs; + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process); + +int cxl_get_max_irqs_per_process(struct pci_dev *dev) +{ + struct cxl_afu *afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return -ENODEV; + + return afu->irqs_max; +} +EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process); + +/* + * This is a special interrupt allocation routine called from the PHB's MSI + * setup function. When capi interrupts are allocated in this manner they must + * still be associated with a running context, but since the MSI APIs have no + * way to specify this we use the default context associated with the device. + * + * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU + * interrupt number, so in order to overcome this their driver informs us of + * the restriction by setting the maximum interrupts per context, and we + * allocate additional contexts as necessary so that we can keep the AFU + * interrupt number within the supported range. + */ +int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + struct cxl_context *ctx, *new_ctx, *default_ctx; + int remaining; + int rc; + + ctx = default_ctx = cxl_get_context(pdev); + if (WARN_ON(!default_ctx)) + return -ENODEV; + + remaining = nvec; + while (remaining > 0) { + rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max)); + if (rc) { + pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev)); + return rc; + } + remaining -= ctx->afu->irqs_max; + + if (ctx != default_ctx && default_ctx->status == STARTED) { + WARN_ON(cxl_start_context(ctx, + be64_to_cpu(default_ctx->elem->common.wed), + NULL)); + } + + if (remaining > 0) { + new_ctx = cxl_dev_context_init(pdev); + if (!new_ctx) { + pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev)); + return -ENOSPC; + } + list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts); + ctx = new_ctx; + } + } + + return 0; +} +/* Exported via cxl_base */ + +void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct cxl_context *ctx, *pos, *tmp; + + ctx = cxl_get_context(pdev); + if (WARN_ON(!ctx)) + return; + + cxl_free_afu_irqs(ctx); + list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) { + cxl_stop_context(pos); + cxl_free_afu_irqs(pos); + list_del(&pos->extra_irq_contexts); + cxl_release_context(pos); + } +} +/* Exported via cxl_base */ diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index 9b90ec6..cd54ce6 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -54,6 +54,19 @@ static inline void cxl_calls_put(struct cxl_calls *calls) { } #endif /* CONFIG_CXL_MODULE */ +/* AFU refcount management */ +struct cxl_afu *cxl_afu_get(struct cxl_afu *afu) +{ + return (get_device(&afu->dev) == NULL) ? NULL : afu; +} +EXPORT_SYMBOL_GPL(cxl_afu_get); + +void cxl_afu_put(struct cxl_afu *afu) +{ + put_device(&afu->dev); +} +EXPORT_SYMBOL_GPL(cxl_afu_put); + void cxl_slbia(struct mm_struct *mm) { struct cxl_calls *calls; @@ -93,9 +106,92 @@ int cxl_update_properties(struct device_node *dn, } EXPORT_SYMBOL_GPL(cxl_update_properties); +/* + * API calls into the driver that may be called from the PHB code and must be + * built in. + */ +bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) +{ + bool ret; + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return false; + + ret = calls->cxl_pci_associate_default_context(dev, afu); + + cxl_calls_put(calls); + + return ret; +} +EXPORT_SYMBOL_GPL(cxl_pci_associate_default_context); + +void cxl_pci_disable_device(struct pci_dev *dev) +{ + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return; + + calls->cxl_pci_disable_device(dev); + + cxl_calls_put(calls); +} +EXPORT_SYMBOL_GPL(cxl_pci_disable_device); + +int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq) +{ + int ret; + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return -EBUSY; + + ret = calls->cxl_next_msi_hwirq(pdev, ctx, afu_irq); + + cxl_calls_put(calls); + + return ret; +} +EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq); + +int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + int ret; + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return false; + + ret = calls->cxl_cx4_setup_msi_irqs(pdev, nvec, type); + + cxl_calls_put(calls); + + return ret; +} +EXPORT_SYMBOL_GPL(cxl_cx4_setup_msi_irqs); + +void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return; + + calls->cxl_cx4_teardown_msi_irqs(pdev); + + cxl_calls_put(calls); +} +EXPORT_SYMBOL_GPL(cxl_cx4_teardown_msi_irqs); + static int __init cxl_base_init(void) { - struct device_node *np = NULL; + struct device_node *np; struct platform_device *dev; int count = 0; @@ -105,8 +201,7 @@ static int __init cxl_base_init(void) if (cpu_has_feature(CPU_FTR_HVMODE)) return 0; - while ((np = of_find_compatible_node(np, NULL, - "ibm,coherent-platform-facility"))) { + for_each_compatible_node(np, NULL, "ibm,coherent-platform-facility") { dev = of_platform_device_create(np, NULL, NULL); if (dev) count++; @@ -114,5 +209,4 @@ static int __init cxl_base_init(void) pr_devel("Found %d cxl device(s)\n", count); return 0; } - -module_init(cxl_base_init); +device_initcall(cxl_base_init); diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 26d206b..bdee9a0 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -67,6 +67,9 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, ctx->pending_fault = false; ctx->pending_afu_err = false; + INIT_LIST_HEAD(&ctx->irq_names); + INIT_LIST_HEAD(&ctx->extra_irq_contexts); + /* * When we have to destroy all contexts in cxl_context_detach_all() we * end up with afu_release_irqs() called from inside a @@ -87,7 +90,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, */ mutex_lock(&afu->contexts_lock); idr_preload(GFP_KERNEL); - i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0, + i = idr_alloc(&ctx->afu->contexts_idr, ctx, + ctx->afu->adapter->native->sl_ops->min_pe, ctx->afu->num_procs, GFP_NOWAIT); idr_preload_end(); mutex_unlock(&afu->contexts_lock); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 4fe5078..de09053 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -24,6 +24,7 @@ #include <asm/reg.h> #include <misc/cxl-base.h> +#include <misc/cxl.h> #include <uapi/misc/cxl.h> extern uint cxl_verbose; @@ -34,7 +35,7 @@ extern uint cxl_verbose; * Bump version each time a user API change is made, whether it is * backwards compatible ot not. */ -#define CXL_API_VERSION 2 +#define CXL_API_VERSION 3 #define CXL_API_VERSION_COMPATIBLE 1 /* @@ -81,6 +82,7 @@ static const cxl_p1_reg_t CXL_PSL_TLBIA = {0x00A8}; static const cxl_p1_reg_t CXL_PSL_AFUSEL = {0x00B0}; /* 0x00C0:7EFF Implementation dependent area */ +/* PSL registers */ static const cxl_p1_reg_t CXL_PSL_FIR1 = {0x0100}; static const cxl_p1_reg_t CXL_PSL_FIR2 = {0x0108}; static const cxl_p1_reg_t CXL_PSL_Timebase = {0x0110}; @@ -91,6 +93,11 @@ static const cxl_p1_reg_t CXL_PSL_FIR_CNTL = {0x0148}; static const cxl_p1_reg_t CXL_PSL_DSNDCTL = {0x0150}; static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158}; static const cxl_p1_reg_t CXL_PSL_TRACE = {0x0170}; +/* XSL registers (Mellanox CX4) */ +static const cxl_p1_reg_t CXL_XSL_Timebase = {0x0100}; +static const cxl_p1_reg_t CXL_XSL_TB_CTLSTAT = {0x0108}; +static const cxl_p1_reg_t CXL_XSL_FEC = {0x0158}; +static const cxl_p1_reg_t CXL_XSL_DSNCTL = {0x0168}; /* 0x7F00:7FFF Reserved PCIe MSI-X Pending Bit Array area */ /* 0x8000:FFFF Reserved PCIe MSI-X Table Area */ @@ -182,6 +189,18 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; #define CXL_PSL_ID_An_F (1ull << (63-31)) #define CXL_PSL_ID_An_L (1ull << (63-30)) +/****** CXL_PSL_SERR_An ****************************************************/ +#define CXL_PSL_SERR_An_afuto (1ull << (63-0)) +#define CXL_PSL_SERR_An_afudis (1ull << (63-1)) +#define CXL_PSL_SERR_An_afuov (1ull << (63-2)) +#define CXL_PSL_SERR_An_badsrc (1ull << (63-3)) +#define CXL_PSL_SERR_An_badctx (1ull << (63-4)) +#define CXL_PSL_SERR_An_llcmdis (1ull << (63-5)) +#define CXL_PSL_SERR_An_llcmdto (1ull << (63-6)) +#define CXL_PSL_SERR_An_afupar (1ull << (63-7)) +#define CXL_PSL_SERR_An_afudup (1ull << (63-8)) +#define CXL_PSL_SERR_An_AE (1ull << (63-30)) + /****** CXL_PSL_SCNTL_An ****************************************************/ #define CXL_PSL_SCNTL_An_CR (0x1ull << (63-15)) /* Programming Modes: */ @@ -421,18 +440,6 @@ struct cxl_afu { bool enabled; }; -/* AFU refcount management */ -static inline struct cxl_afu *cxl_afu_get(struct cxl_afu *afu) -{ - - return (get_device(&afu->dev) == NULL) ? NULL : afu; -} - -static inline void cxl_afu_put(struct cxl_afu *afu) -{ - put_device(&afu->dev); -} - struct cxl_irq_name { struct list_head list; @@ -477,6 +484,9 @@ struct cxl_context { /* Only used in PR mode */ u64 process_token; + /* driver private data */ + void *priv; + unsigned long *irq_bitmap; /* Accessed from IRQ context */ struct cxl_irq_ranges irqs; struct list_head irq_names; @@ -522,7 +532,36 @@ struct cxl_context { bool pending_fault; bool pending_afu_err; + /* Used by AFU drivers for driver specific event delivery */ + struct cxl_afu_driver_ops *afu_driver_ops; + atomic_t afu_driver_events; + struct rcu_head rcu; + + /* + * Only used when more interrupts are allocated via + * pci_enable_msix_range than are supported in the default context, to + * use additional contexts to overcome the limitation. i.e. Mellanox + * CX4 only: + */ + struct list_head extra_irq_contexts; +}; + +struct cxl_service_layer_ops { + int (*adapter_regs_init)(struct cxl *adapter, struct pci_dev *dev); + int (*afu_regs_init)(struct cxl_afu *afu); + int (*register_serr_irq)(struct cxl_afu *afu); + void (*release_serr_irq)(struct cxl_afu *afu); + void (*debugfs_add_adapter_sl_regs)(struct cxl *adapter, struct dentry *dir); + void (*debugfs_add_afu_sl_regs)(struct cxl_afu *afu, struct dentry *dir); + void (*psl_irq_dump_registers)(struct cxl_context *ctx); + void (*err_irq_dump_registers)(struct cxl *adapter); + void (*debugfs_stop_trace)(struct cxl *adapter); + void (*write_timebase_ctrl)(struct cxl *adapter); + u64 (*timebase_read)(struct cxl *adapter); + int capi_mode; + bool needs_reset_before_disable; + int min_pe; }; struct cxl_native { @@ -533,6 +572,7 @@ struct cxl_native { irq_hw_number_t err_hwirq; unsigned int err_virq; u64 ps_off; + const struct cxl_service_layer_ops *sl_ops; }; struct cxl_guest { @@ -688,9 +728,21 @@ static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg) ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, loff_t off, size_t count); +/* Internal functions wrapped in cxl_base to allow PHB to call them */ +bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); +void _cxl_pci_disable_device(struct pci_dev *dev); +int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); +int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); +void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); + bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu); + void (*cxl_pci_disable_device)(struct pci_dev *dev); + int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); + int (*cxl_cx4_setup_msi_irqs)(struct pci_dev *pdev, int nvec, int type); + void (*cxl_cx4_teardown_msi_irqs)(struct pci_dev *pdev); + struct module *owner; }; int register_cxl_calls(struct cxl_calls *calls); @@ -805,6 +857,11 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter); int cxl_afu_disable(struct cxl_afu *afu); int cxl_psl_purge(struct cxl_afu *afu); +void cxl_debugfs_add_adapter_psl_regs(struct cxl *adapter, struct dentry *dir); +void cxl_debugfs_add_adapter_xsl_regs(struct cxl *adapter, struct dentry *dir); +void cxl_debugfs_add_afu_psl_regs(struct cxl_afu *afu, struct dentry *dir); +void cxl_native_psl_irq_dump_regs(struct cxl_context *ctx); +void cxl_native_err_irq_dump_regs(struct cxl *adapter); void cxl_stop_trace(struct cxl *cxl); int cxl_pci_vphb_add(struct cxl_afu *afu); void cxl_pci_vphb_remove(struct cxl_afu *afu); @@ -855,6 +912,7 @@ struct cxl_backend_ops { int (*attach_process)(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr); int (*detach_process)(struct cxl_context *ctx); + void (*update_ivtes)(struct cxl_context *ctx); bool (*support_attributes)(const char *attr_name, enum cxl_attrs type); bool (*link_ok)(struct cxl *cxl, struct cxl_afu *afu); void (*release_afu)(struct device *dev); @@ -879,4 +937,7 @@ extern const struct cxl_backend_ops *cxl_ops; /* check if the given pci_dev is on the the cxl vphb bus */ bool cxl_pci_is_vphb_device(struct pci_dev *dev); + +/* decode AFU error bits in the PSL register PSL_SERR_An */ +void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr); #endif diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c index 5751899..ec7b8a0 100644 --- a/drivers/misc/cxl/debugfs.c +++ b/drivers/misc/cxl/debugfs.c @@ -51,6 +51,19 @@ static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode, return debugfs_create_file(name, mode, parent, (void __force *)value, &fops_io_x64); } +void cxl_debugfs_add_adapter_psl_regs(struct cxl *adapter, struct dentry *dir) +{ + debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR1)); + debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR2)); + debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR_CNTL)); + debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE)); +} + +void cxl_debugfs_add_adapter_xsl_regs(struct cxl *adapter, struct dentry *dir) +{ + debugfs_create_io_x64("fec", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_XSL_FEC)); +} + int cxl_debugfs_adapter_add(struct cxl *adapter) { struct dentry *dir; @@ -65,13 +78,10 @@ int cxl_debugfs_adapter_add(struct cxl *adapter) return PTR_ERR(dir); adapter->debugfs = dir; - debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR1)); - debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR2)); - debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR_CNTL)); debugfs_create_io_x64("err_ivte", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_ErrIVTE)); - debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE)); - + if (adapter->native->sl_ops->debugfs_add_adapter_sl_regs) + adapter->native->sl_ops->debugfs_add_adapter_sl_regs(adapter, dir); return 0; } @@ -80,6 +90,14 @@ void cxl_debugfs_adapter_remove(struct cxl *adapter) debugfs_remove_recursive(adapter->debugfs); } +void cxl_debugfs_add_afu_psl_regs(struct cxl_afu *afu, struct dentry *dir) +{ + debugfs_create_io_x64("fir", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_FIR_SLICE_An)); + debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An)); + debugfs_create_io_x64("afu_debug", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_AFU_DEBUG_An)); + debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SLICE_TRACE)); +} + int cxl_debugfs_afu_add(struct cxl_afu *afu) { struct dentry *dir; @@ -94,18 +112,15 @@ int cxl_debugfs_afu_add(struct cxl_afu *afu) return PTR_ERR(dir); afu->debugfs = dir; - debugfs_create_io_x64("fir", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_FIR_SLICE_An)); - debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An)); - debugfs_create_io_x64("afu_debug", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_AFU_DEBUG_An)); debugfs_create_io_x64("sr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SR_An)); - debugfs_create_io_x64("dsisr", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DSISR_An)); debugfs_create_io_x64("dar", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DAR_An)); debugfs_create_io_x64("sstp0", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP0_An)); debugfs_create_io_x64("sstp1", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP1_An)); debugfs_create_io_x64("err_status", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_ErrStat_An)); - debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SLICE_TRACE)); + if (afu->adapter->native->sl_ops->debugfs_add_afu_sl_regs) + afu->adapter->native->sl_ops->debugfs_add_afu_sl_regs(afu, dir); return 0; } diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index eec468f..5fb9894 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -293,6 +293,17 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm) return cxl_context_iomap(ctx, vm); } +static inline bool ctx_event_pending(struct cxl_context *ctx) +{ + if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err) + return true; + + if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) + return true; + + return false; +} + unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) { struct cxl_context *ctx = file->private_data; @@ -305,8 +316,7 @@ unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) pr_devel("afu_poll wait done pe: %i\n", ctx->pe); spin_lock_irqsave(&ctx->lock, flags); - if (ctx->pending_irq || ctx->pending_fault || - ctx->pending_afu_err) + if (ctx_event_pending(ctx)) mask |= POLLIN | POLLRDNORM; else if (ctx->status == CLOSED) /* Only error on closed when there are no futher events pending @@ -319,16 +329,46 @@ unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) return mask; } -static inline int ctx_event_pending(struct cxl_context *ctx) +static ssize_t afu_driver_event_copy(struct cxl_context *ctx, + char __user *buf, + struct cxl_event *event, + struct cxl_event_afu_driver_reserved *pl) { - return (ctx->pending_irq || ctx->pending_fault || - ctx->pending_afu_err || (ctx->status == CLOSED)); + /* Check event */ + if (!pl) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL); + return -EFAULT; + } + + /* Check event size */ + event->header.size += pl->data_size; + if (event->header.size > CXL_READ_MIN_SIZE) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL); + return -EFAULT; + } + + /* Copy event header */ + if (copy_to_user(buf, event, sizeof(struct cxl_event_header))) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT); + return -EFAULT; + } + + /* Copy event data */ + buf += sizeof(struct cxl_event_header); + if (copy_to_user(buf, &pl->data, pl->data_size)) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT); + return -EFAULT; + } + + ctx->afu_driver_ops->event_delivered(ctx, pl, 0); /* Success */ + return event->header.size; } ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off) { struct cxl_context *ctx = file->private_data; + struct cxl_event_afu_driver_reserved *pl = NULL; struct cxl_event event; unsigned long flags; int rc; @@ -344,7 +384,7 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, for (;;) { prepare_to_wait(&ctx->wq, &wait, TASK_INTERRUPTIBLE); - if (ctx_event_pending(ctx)) + if (ctx_event_pending(ctx) || (ctx->status == CLOSED)) break; if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { @@ -374,7 +414,12 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, memset(&event, 0, sizeof(event)); event.header.process_element = ctx->pe; event.header.size = sizeof(struct cxl_event_header); - if (ctx->pending_irq) { + if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) { + pr_devel("afu_read delivering AFU driver specific event\n"); + pl = ctx->afu_driver_ops->fetch_event(ctx); + atomic_dec(&ctx->afu_driver_events); + event.header.type = CXL_EVENT_AFU_DRIVER; + } else if (ctx->pending_irq) { pr_devel("afu_read delivering AFU interrupt\n"); event.header.size += sizeof(struct cxl_event_afu_interrupt); event.header.type = CXL_EVENT_AFU_INTERRUPT; @@ -404,6 +449,9 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, spin_unlock_irqrestore(&ctx->lock, flags); + if (event.header.type == CXL_EVENT_AFU_DRIVER) + return afu_driver_event_copy(ctx, buf, &event, pl); + if (copy_to_user(buf, &event, event.header.size)) return -EFAULT; return event.header.size; @@ -558,7 +606,7 @@ int __init cxl_file_init(void) * If these change we really need to update API. Either change some * flags or update API version number CXL_API_VERSION. */ - BUILD_BUG_ON(CXL_API_VERSION != 2); + BUILD_BUG_ON(CXL_API_VERSION != 3); BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64); BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8); BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8); diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c index 68dd0b7..c63d61e 100644 --- a/drivers/misc/cxl/flash.c +++ b/drivers/misc/cxl/flash.c @@ -24,8 +24,8 @@ struct ai_header { }; static struct semaphore sem; -unsigned long *buffer[CXL_AI_MAX_ENTRIES]; -struct sg_list *le; +static unsigned long *buffer[CXL_AI_MAX_ENTRIES]; +static struct sg_list *le; static u64 continue_token; static unsigned int transfer; diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index bc8d0b9..9aa58a7 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -196,15 +196,18 @@ static irqreturn_t guest_slice_irq_err(int irq, void *data) { struct cxl_afu *afu = data; int rc; - u64 serr; + u64 serr, afu_error, dsisr; - WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, &serr); if (rc) { dev_crit(&afu->dev, "Couldn't read PSL_SERR_An: %d\n", rc); return IRQ_HANDLED; } - dev_crit(&afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); + afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An); + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + cxl_afu_decode_psl_serr(afu, serr); + dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error); + dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr); rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr); if (rc) @@ -1052,16 +1055,18 @@ static void free_adapter(struct cxl *adapter) struct irq_avail *cur; int i; - if (adapter->guest->irq_avail) { - for (i = 0; i < adapter->guest->irq_nranges; i++) { - cur = &adapter->guest->irq_avail[i]; - kfree(cur->bitmap); + if (adapter->guest) { + if (adapter->guest->irq_avail) { + for (i = 0; i < adapter->guest->irq_nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + kfree(cur->bitmap); + } + kfree(adapter->guest->irq_avail); } - kfree(adapter->guest->irq_avail); + kfree(adapter->guest->status); + kfree(adapter->guest); } - kfree(adapter->guest->status); cxl_remove_adapter_nr(adapter); - kfree(adapter->guest); kfree(adapter); } @@ -1182,6 +1187,7 @@ const struct cxl_backend_ops cxl_guest_ops = { .ack_irq = guest_ack_irq, .attach_process = guest_attach_process, .detach_process = guest_detach_process, + .update_ivtes = NULL, .support_attributes = guest_support_attributes, .link_ok = guest_link_ok, .release_afu = guest_release_afu, diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 8def455..dec60f5 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -260,9 +260,6 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) else alloc_count = count + 1; - /* Initialize the list head to hold irq names */ - INIT_LIST_HEAD(&ctx->irq_names); - if ((rc = cxl_ops->alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, alloc_count))) return rc; @@ -374,3 +371,32 @@ void afu_release_irqs(struct cxl_context *ctx, void *cookie) ctx->irq_count = 0; } + +void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr) +{ + dev_crit(&afu->dev, + "PSL Slice error received. Check AFU for root cause.\n"); + dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + if (serr & CXL_PSL_SERR_An_afuto) + dev_crit(&afu->dev, "AFU MMIO Timeout\n"); + if (serr & CXL_PSL_SERR_An_afudis) + dev_crit(&afu->dev, + "MMIO targeted Accelerator that was not enabled\n"); + if (serr & CXL_PSL_SERR_An_afuov) + dev_crit(&afu->dev, "AFU CTAG Overflow\n"); + if (serr & CXL_PSL_SERR_An_badsrc) + dev_crit(&afu->dev, "Bad Interrupt Source\n"); + if (serr & CXL_PSL_SERR_An_badctx) + dev_crit(&afu->dev, "Bad Context Handle\n"); + if (serr & CXL_PSL_SERR_An_llcmdis) + dev_crit(&afu->dev, "LLCMD to Disabled AFU\n"); + if (serr & CXL_PSL_SERR_An_llcmdto) + dev_crit(&afu->dev, "LLCMD Timeout to AFU\n"); + if (serr & CXL_PSL_SERR_An_afupar) + dev_crit(&afu->dev, "AFU MMIO Parity Error\n"); + if (serr & CXL_PSL_SERR_An_afudup) + dev_crit(&afu->dev, "AFU MMIO Duplicate CTAG Error\n"); + if (serr & CXL_PSL_SERR_An_AE) + dev_crit(&afu->dev, + "AFU asserted JDONE with JERROR in AFU Directed Mode\n"); +} diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index ae68c32..d9be23b2 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -110,6 +110,11 @@ static inline void cxl_slbia_core(struct mm_struct *mm) static struct cxl_calls cxl_calls = { .cxl_slbia = cxl_slbia_core, + .cxl_pci_associate_default_context = _cxl_pci_associate_default_context, + .cxl_pci_disable_device = _cxl_pci_disable_device, + .cxl_next_msi_hwirq = _cxl_next_msi_hwirq, + .cxl_cx4_setup_msi_irqs = _cxl_cx4_setup_msi_irqs, + .cxl_cx4_teardown_msi_irqs = _cxl_cx4_teardown_msi_irqs, .owner = THIS_MODULE, }; diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 55d8a14..3bcdaee 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -21,10 +21,10 @@ #include "cxl.h" #include "trace.h" -static int afu_control(struct cxl_afu *afu, u64 command, +static int afu_control(struct cxl_afu *afu, u64 command, u64 clear, u64 result, u64 mask, bool enabled) { - u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + u64 AFU_Cntl; unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); int rc = 0; @@ -33,7 +33,8 @@ static int afu_control(struct cxl_afu *afu, u64 command, trace_cxl_afu_ctrl(afu, command); - cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl | command); + AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + cxl_p2n_write(afu, CXL_AFU_Cntl_An, (AFU_Cntl & ~clear) | command); AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); while ((AFU_Cntl & mask) != result) { @@ -54,6 +55,16 @@ static int afu_control(struct cxl_afu *afu, u64 command, cpu_relax(); AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); }; + + if (AFU_Cntl & CXL_AFU_Cntl_An_RA) { + /* + * Workaround for a bug in the XSL used in the Mellanox CX4 + * that fails to clear the RA bit after an AFU reset, + * preventing subsequent AFU resets from working. + */ + cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl & ~CXL_AFU_Cntl_An_RA); + } + pr_devel("AFU command complete: %llx\n", command); afu->enabled = enabled; out: @@ -67,7 +78,7 @@ static int afu_enable(struct cxl_afu *afu) { pr_devel("AFU enable request\n"); - return afu_control(afu, CXL_AFU_Cntl_An_E, + return afu_control(afu, CXL_AFU_Cntl_An_E, 0, CXL_AFU_Cntl_An_ES_Enabled, CXL_AFU_Cntl_An_ES_MASK, true); } @@ -76,7 +87,8 @@ int cxl_afu_disable(struct cxl_afu *afu) { pr_devel("AFU disable request\n"); - return afu_control(afu, 0, CXL_AFU_Cntl_An_ES_Disabled, + return afu_control(afu, 0, CXL_AFU_Cntl_An_E, + CXL_AFU_Cntl_An_ES_Disabled, CXL_AFU_Cntl_An_ES_MASK, false); } @@ -85,7 +97,7 @@ static int native_afu_reset(struct cxl_afu *afu) { pr_devel("AFU reset request\n"); - return afu_control(afu, CXL_AFU_Cntl_An_RA, + return afu_control(afu, CXL_AFU_Cntl_An_RA, 0, CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled, CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK, false); @@ -189,7 +201,7 @@ int cxl_alloc_spa(struct cxl_afu *afu) unsigned spa_size; /* Work out how many pages to allocate */ - afu->native->spa_order = 0; + afu->native->spa_order = -1; do { afu->native->spa_order++; spa_size = (1 << afu->native->spa_order) * PAGE_SIZE; @@ -430,7 +442,6 @@ static int remove_process_element(struct cxl_context *ctx) return rc; } - void cxl_assign_psn_space(struct cxl_context *ctx) { if (!ctx->afu->pp_size || ctx->master) { @@ -507,10 +518,39 @@ static u64 calculate_sr(struct cxl_context *ctx) return sr; } +static void update_ivtes_directed(struct cxl_context *ctx) +{ + bool need_update = (ctx->status == STARTED); + int r; + + if (need_update) { + WARN_ON(terminate_process_element(ctx)); + WARN_ON(remove_process_element(ctx)); + } + + for (r = 0; r < CXL_IRQ_RANGES; r++) { + ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]); + ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]); + } + + /* + * Theoretically we could use the update llcmd, instead of a + * terminate/remove/add (or if an atomic update was required we could + * do a suspend/update/resume), however it seems there might be issues + * with the update llcmd on some cards (including those using an XSL on + * an ASIC) so for now it's safest to go with the commands that are + * known to work. In the future if we come across a situation where the + * card may be performing transactions using the same PE while we are + * doing this update we might need to revisit this. + */ + if (need_update) + WARN_ON(add_process_element(ctx)); +} + static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) { u32 pid; - int r, result; + int result; cxl_assign_psn_space(ctx); @@ -545,10 +585,7 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) ctx->irqs.range[0] = 1; } - for (r = 0; r < CXL_IRQ_RANGES; r++) { - ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]); - ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]); - } + update_ivtes_directed(ctx); ctx->elem->common.amr = cpu_to_be64(amr); ctx->elem->common.wed = cpu_to_be64(wed); @@ -570,7 +607,33 @@ static int deactivate_afu_directed(struct cxl_afu *afu) cxl_sysfs_afu_m_remove(afu); cxl_chardev_afu_remove(afu); - cxl_ops->afu_reset(afu); + /* + * The CAIA section 2.2.1 indicates that the procedure for starting and + * stopping an AFU in AFU directed mode is AFU specific, which is not + * ideal since this code is generic and with one exception has no + * knowledge of the AFU. This is in contrast to the procedure for + * disabling a dedicated process AFU, which is documented to just + * require a reset. The architecture does indicate that both an AFU + * reset and an AFU disable should result in the AFU being disabled and + * we do both followed by a PSL purge for safety. + * + * Notably we used to have some issues with the disable sequence on PSL + * cards, which is why we ended up using this heavy weight procedure in + * the first place, however a bug was discovered that had rendered the + * disable operation ineffective, so it is conceivable that was the + * sole explanation for those difficulties. Careful regression testing + * is recommended if anyone attempts to remove or reorder these + * operations. + * + * The XSL on the Mellanox CX4 behaves a little differently from the + * PSL based cards and will time out an AFU reset if the AFU is still + * enabled. That card is special in that we do have a means to identify + * it from this code, so in that case we skip the reset and just use a + * disable/purge to avoid the timeout and corresponding noise in the + * kernel log. + */ + if (afu->adapter->native->sl_ops->needs_reset_before_disable) + cxl_ops->afu_reset(afu); cxl_afu_disable(afu); cxl_psl_purge(afu); @@ -600,6 +663,22 @@ static int activate_dedicated_process(struct cxl_afu *afu) return cxl_chardev_d_afu_add(afu); } +static void update_ivtes_dedicated(struct cxl_context *ctx) +{ + struct cxl_afu *afu = ctx->afu; + + cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, + (((u64)ctx->irqs.offset[0] & 0xffff) << 48) | + (((u64)ctx->irqs.offset[1] & 0xffff) << 32) | + (((u64)ctx->irqs.offset[2] & 0xffff) << 16) | + ((u64)ctx->irqs.offset[3] & 0xffff)); + cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64) + (((u64)ctx->irqs.range[0] & 0xffff) << 48) | + (((u64)ctx->irqs.range[1] & 0xffff) << 32) | + (((u64)ctx->irqs.range[2] & 0xffff) << 16) | + ((u64)ctx->irqs.range[3] & 0xffff)); +} + static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) { struct cxl_afu *afu = ctx->afu; @@ -618,16 +697,7 @@ static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) cxl_prefault(ctx, wed); - cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, - (((u64)ctx->irqs.offset[0] & 0xffff) << 48) | - (((u64)ctx->irqs.offset[1] & 0xffff) << 32) | - (((u64)ctx->irqs.offset[2] & 0xffff) << 16) | - ((u64)ctx->irqs.offset[3] & 0xffff)); - cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64) - (((u64)ctx->irqs.range[0] & 0xffff) << 48) | - (((u64)ctx->irqs.range[1] & 0xffff) << 32) | - (((u64)ctx->irqs.range[2] & 0xffff) << 16) | - ((u64)ctx->irqs.range[3] & 0xffff)); + update_ivtes_dedicated(ctx); cxl_p2n_write(afu, CXL_PSL_AMR_An, amr); @@ -703,12 +773,37 @@ static int native_attach_process(struct cxl_context *ctx, bool kernel, static inline int detach_process_native_dedicated(struct cxl_context *ctx) { + /* + * The CAIA section 2.1.1 indicates that we need to do an AFU reset to + * stop the AFU in dedicated mode (we therefore do not make that + * optional like we do in the afu directed path). It does not indicate + * that we need to do an explicit disable (which should occur + * implicitly as part of the reset) or purge, but we do these as well + * to be on the safe side. + * + * Notably we used to have some issues with the disable sequence + * (before the sequence was spelled out in the architecture) which is + * why we were so heavy weight in the first place, however a bug was + * discovered that had rendered the disable operation ineffective, so + * it is conceivable that was the sole explanation for those + * difficulties. Point is, we should be careful and do some regression + * testing if we ever attempt to remove any part of this procedure. + */ cxl_ops->afu_reset(ctx->afu); cxl_afu_disable(ctx->afu); cxl_psl_purge(ctx->afu); return 0; } +static void native_update_ivtes(struct cxl_context *ctx) +{ + if (ctx->afu->current_mode == CXL_MODE_DIRECTED) + return update_ivtes_directed(ctx); + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) + return update_ivtes_dedicated(ctx); + WARN(1, "native_update_ivtes: Bad mode\n"); +} + static inline int detach_process_native_afu_directed(struct cxl_context *ctx) { if (!ctx->pe_inserted) @@ -754,26 +849,38 @@ static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info) return 0; } -static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx, - u64 dsisr, u64 errstat) +void cxl_native_psl_irq_dump_regs(struct cxl_context *ctx) { u64 fir1, fir2, fir_slice, serr, afu_debug; fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1); fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2); fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An); - serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); - dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2); - dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + if (ctx->afu->adapter->native->sl_ops->register_serr_irq) { + serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); + cxl_afu_decode_psl_serr(ctx->afu, serr); + } dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); +} + +static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx, + u64 dsisr, u64 errstat) +{ + + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); - dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); - cxl_stop_trace(ctx->afu->adapter); + if (ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers) + ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers(ctx); + + if (ctx->afu->adapter->native->sl_ops->debugfs_stop_trace) { + dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); + ctx->afu->adapter->native->sl_ops->debugfs_stop_trace(ctx->afu->adapter); + } return cxl_ops->ack_irq(ctx, 0, errstat); } @@ -849,41 +956,56 @@ void native_irq_wait(struct cxl_context *ctx) static irqreturn_t native_slice_irq_err(int irq, void *data) { struct cxl_afu *afu = data; - u64 fir_slice, errstat, serr, afu_debug; - - WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); + u64 fir_slice, errstat, serr, afu_debug, afu_error, dsisr; + /* + * slice err interrupt is only used with full PSL (no XSL) + */ serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); - dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An); + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + cxl_afu_decode_psl_serr(afu, serr); dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat); dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); + dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error); + dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr); cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); return IRQ_HANDLED; } +void cxl_native_err_irq_dump_regs(struct cxl *adapter) +{ + u64 fir1, fir2; + + fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); + + dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2); +} + static irqreturn_t native_irq_err(int irq, void *data) { struct cxl *adapter = data; - u64 fir1, fir2, err_ivte; + u64 err_ivte; WARN(1, "CXL ERROR interrupt %i\n", irq); err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte); - dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); - cxl_stop_trace(adapter); - - fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); - fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); + if (adapter->native->sl_ops->debugfs_stop_trace) { + dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); + adapter->native->sl_ops->debugfs_stop_trace(adapter); + } - dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2); + if (adapter->native->sl_ops->err_irq_dump_registers) + adapter->native->sl_ops->err_irq_dump_registers(adapter); return IRQ_HANDLED; } @@ -1128,6 +1250,7 @@ const struct cxl_backend_ops cxl_native_ops = { .irq_wait = native_irq_wait, .attach_process = native_attach_process, .detach_process = native_detach_process, + .update_ivtes = native_update_ivtes, .support_attributes = native_support_attributes, .link_ok = cxl_adapter_link_ok, .release_afu = cxl_pci_release_afu, diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index a08fcc8..d152e2d 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -55,6 +55,8 @@ pci_read_config_byte(dev, vsec + 0xa, dest) #define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \ pci_write_config_byte(dev, vsec + 0xa, val) +#define CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, vsec, val) \ + pci_bus_write_config_byte(bus, devfn, vsec + 0xa, val) #define CXL_VSEC_PROTOCOL_MASK 0xe0 #define CXL_VSEC_PROTOCOL_1024TB 0x80 #define CXL_VSEC_PROTOCOL_512TB 0x40 @@ -352,13 +354,10 @@ static u64 get_capp_unit_id(struct device_node *np) return 0; } -static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev *dev) +static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, u64 *capp_unit_id) { struct device_node *np; const __be32 *prop; - u64 psl_dsnctl; - u64 chipid; - u64 capp_unit_id; if (!(np = pnv_pci_get_phb_node(dev))) return -ENODEV; @@ -367,14 +366,28 @@ static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev np = of_get_next_parent(np); if (!np) return -ENODEV; - chipid = be32_to_cpup(prop); - capp_unit_id = get_capp_unit_id(np); + *chipid = be32_to_cpup(prop); + *capp_unit_id = get_capp_unit_id(np); of_node_put(np); - if (!capp_unit_id) { + if (!*capp_unit_id) { pr_err("cxl: invalid capp unit id\n"); return -ENODEV; } + return 0; +} + +static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev) +{ + u64 psl_dsnctl; + u64 chipid; + u64 capp_unit_id; + int rc; + + rc = calc_capp_routing(dev, &chipid, &capp_unit_id); + if (rc) + return rc; + psl_dsnctl = 0x0000900000000000ULL; /* pteupd ttype, scdone */ psl_dsnctl |= (0x2ULL << (63-38)); /* MMIO hang pulse: 256 us */ /* Tell PSL where to route data to */ @@ -393,8 +406,61 @@ static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev return 0; } +static int init_implementation_adapter_xsl_regs(struct cxl *adapter, struct pci_dev *dev) +{ + u64 xsl_dsnctl; + u64 chipid; + u64 capp_unit_id; + int rc; + + rc = calc_capp_routing(dev, &chipid, &capp_unit_id); + if (rc) + return rc; + + /* Tell XSL where to route data to */ + xsl_dsnctl = 0x0000600000000000ULL | (chipid << (63-5)); + xsl_dsnctl |= (capp_unit_id << (63-13)); + cxl_p1_write(adapter, CXL_XSL_DSNCTL, xsl_dsnctl); + + return 0; +} + +/* PSL & XSL */ +#define TBSYNC_CAL(n) (((u64)n & 0x7) << (63-3)) #define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6)) -#define _2048_250MHZ_CYCLES 1 +/* For the PSL this is a multiple for 0 < n <= 7: */ +#define PSL_2048_250MHZ_CYCLES 1 + +static void write_timebase_ctrl_psl(struct cxl *adapter) +{ + cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, + TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES)); +} + +/* XSL */ +#define TBSYNC_ENA (1ULL << 63) +/* For the XSL this is 2**n * 2000 clocks for 0 < n <= 6: */ +#define XSL_2000_CLOCKS 1 +#define XSL_4000_CLOCKS 2 +#define XSL_8000_CLOCKS 3 + +static void write_timebase_ctrl_xsl(struct cxl *adapter) +{ + cxl_p1_write(adapter, CXL_XSL_TB_CTLSTAT, + TBSYNC_ENA | + TBSYNC_CAL(3) | + TBSYNC_CNT(XSL_4000_CLOCKS)); +} + +static u64 timebase_read_psl(struct cxl *adapter) +{ + return cxl_p1_read(adapter, CXL_PSL_Timebase); +} + +static u64 timebase_read_xsl(struct cxl *adapter) +{ + return cxl_p1_read(adapter, CXL_XSL_Timebase); +} static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) { @@ -421,8 +487,7 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) * Setup PSL Timebase Control and Status register * with the recommended Timebase Sync Count value */ - cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, - TBSYNC_CNT(2 * _2048_250MHZ_CYCLES)); + adapter->native->sl_ops->write_timebase_ctrl(adapter); /* Enable PSL Timebase */ cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000); @@ -435,7 +500,7 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) dev_info(&dev->dev, "PSL timebase can't synchronize\n"); return; } - psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase); + psl_tb = adapter->native->sl_ops->timebase_read(adapter); delta = mftb() - psl_tb; if (delta < 0) delta = -delta; @@ -445,7 +510,7 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) return; } -static int init_implementation_afu_regs(struct cxl_afu *afu) +static int init_implementation_afu_psl_regs(struct cxl_afu *afu) { /* read/write masks for this slice */ cxl_p1n_write(afu, CXL_PSL_APCALLOC_A, 0xFFFFFFFEFEFEFEFEULL); @@ -551,36 +616,234 @@ static int setup_cxl_bars(struct pci_dev *dev) return 0; } -/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */ -static int switch_card_to_cxl(struct pci_dev *dev) -{ +#ifdef CONFIG_CXL_BIMODAL + +struct cxl_switch_work { + struct pci_dev *dev; + struct work_struct work; int vsec; + int mode; +}; + +static void switch_card_to_cxl(struct work_struct *work) +{ + struct cxl_switch_work *switch_work = + container_of(work, struct cxl_switch_work, work); + struct pci_dev *dev = switch_work->dev; + struct pci_bus *bus = dev->bus; + struct pci_controller *hose = pci_bus_to_host(bus); + struct pci_dev *bridge; + struct pnv_php_slot *php_slot; + unsigned int devfn; u8 val; int rc; - dev_info(&dev->dev, "switch card to CXL\n"); + dev_info(&bus->dev, "cxl: Preparing for mode switch...\n"); + bridge = list_first_entry_or_null(&hose->bus->devices, struct pci_dev, + bus_list); + if (!bridge) { + dev_WARN(&bus->dev, "cxl: Couldn't find root port!\n"); + goto err_dev_put; + } - if (!(vsec = find_cxl_vsec(dev))) { - dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); + php_slot = pnv_php_find_slot(pci_device_to_OF_node(bridge)); + if (!php_slot) { + dev_err(&bus->dev, "cxl: Failed to find slot hotplug " + "information. You may need to upgrade " + "skiboot. Aborting.\n"); + goto err_dev_put; + } + + rc = CXL_READ_VSEC_MODE_CONTROL(dev, switch_work->vsec, &val); + if (rc) { + dev_err(&bus->dev, "cxl: Failed to read CAPI mode control: %i\n", rc); + goto err_dev_put; + } + devfn = dev->devfn; + + /* Release the reference obtained in cxl_check_and_switch_mode() */ + pci_dev_put(dev); + + dev_dbg(&bus->dev, "cxl: Removing PCI devices from kernel\n"); + pci_lock_rescan_remove(); + pci_hp_remove_devices(bridge->subordinate); + pci_unlock_rescan_remove(); + + /* Switch the CXL protocol on the card */ + if (switch_work->mode == CXL_BIMODE_CXL) { + dev_info(&bus->dev, "cxl: Switching card to CXL mode\n"); + val &= ~CXL_VSEC_PROTOCOL_MASK; + val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; + rc = pnv_cxl_enable_phb_kernel_api(hose, true); + if (rc) { + dev_err(&bus->dev, "cxl: Failed to enable kernel API" + " on real PHB, aborting\n"); + goto err_free_work; + } + } else { + dev_WARN(&bus->dev, "cxl: Switching card to PCI mode not supported!\n"); + goto err_free_work; + } + + rc = CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, switch_work->vsec, val); + if (rc) { + dev_err(&bus->dev, "cxl: Failed to configure CXL protocol: %i\n", rc); + goto err_free_work; + } + + /* + * The CAIA spec (v1.1, Section 10.6 Bi-modal Device Support) states + * we must wait 100ms after this mode switch before touching PCIe config + * space. + */ + msleep(100); + + /* + * Hot reset to cause the card to come back in cxl mode. A + * OPAL_RESET_PCI_LINK would be sufficient, but currently lacks support + * in skiboot, so we use a hot reset instead. + * + * We call pci_set_pcie_reset_state() on the bridge, as a CAPI card is + * guaranteed to sit directly under the root port, and setting the reset + * state on a device directly under the root port is equivalent to doing + * it on the root port iself. + */ + dev_info(&bus->dev, "cxl: Configuration write complete, resetting card\n"); + pci_set_pcie_reset_state(bridge, pcie_hot_reset); + pci_set_pcie_reset_state(bridge, pcie_deassert_reset); + + dev_dbg(&bus->dev, "cxl: Offlining slot\n"); + rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_OFFLINE); + if (rc) { + dev_err(&bus->dev, "cxl: OPAL offlining call failed: %i\n", rc); + goto err_free_work; + } + + dev_dbg(&bus->dev, "cxl: Onlining and probing slot\n"); + rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_ONLINE); + if (rc) { + dev_err(&bus->dev, "cxl: OPAL onlining call failed: %i\n", rc); + goto err_free_work; + } + + pci_lock_rescan_remove(); + pci_hp_add_devices(bridge->subordinate); + pci_unlock_rescan_remove(); + + dev_info(&bus->dev, "cxl: CAPI mode switch completed\n"); + kfree(switch_work); + return; + +err_dev_put: + /* Release the reference obtained in cxl_check_and_switch_mode() */ + pci_dev_put(dev); +err_free_work: + kfree(switch_work); +} + +int cxl_check_and_switch_mode(struct pci_dev *dev, int mode, int vsec) +{ + struct cxl_switch_work *work; + u8 val; + int rc; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) return -ENODEV; + + if (!vsec) { + vsec = find_cxl_vsec(dev); + if (!vsec) { + dev_info(&dev->dev, "CXL VSEC not found\n"); + return -ENODEV; + } } - if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) { - dev_err(&dev->dev, "failed to read current mode control: %i", rc); + rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val); + if (rc) { + dev_err(&dev->dev, "Failed to read current mode control: %i", rc); return rc; } - val &= ~CXL_VSEC_PROTOCOL_MASK; - val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; - if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) { - dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc); - return rc; + + if (mode == CXL_BIMODE_PCI) { + if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) { + dev_info(&dev->dev, "Card is already in PCI mode\n"); + return 0; + } + /* + * TODO: Before it's safe to switch the card back to PCI mode + * we need to disable the CAPP and make sure any cachelines the + * card holds have been flushed out. Needs skiboot support. + */ + dev_WARN(&dev->dev, "CXL mode switch to PCI unsupported!\n"); + return -EIO; } + + if (val & CXL_VSEC_PROTOCOL_ENABLE) { + dev_info(&dev->dev, "Card is already in CXL mode\n"); + return 0; + } + + dev_info(&dev->dev, "Card is in PCI mode, scheduling kernel thread " + "to switch to CXL mode\n"); + + work = kmalloc(sizeof(struct cxl_switch_work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + pci_dev_get(dev); + work->dev = dev; + work->vsec = vsec; + work->mode = mode; + INIT_WORK(&work->work, switch_card_to_cxl); + + schedule_work(&work->work); + /* - * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states - * we must wait 100ms after this mode switch before touching - * PCIe config space. + * We return a failure now to abort the driver init. Once the + * link has been cycled and the card is in cxl mode we will + * come back (possibly using the generic cxl driver), but + * return success as the card should then be in cxl mode. + * + * TODO: What if the card comes back in PCI mode even after + * the switch? Don't want to spin endlessly. */ - msleep(100); + return -EBUSY; +} +EXPORT_SYMBOL_GPL(cxl_check_and_switch_mode); + +#endif /* CONFIG_CXL_BIMODAL */ + +static int setup_cxl_protocol_area(struct pci_dev *dev) +{ + u8 val; + int rc; + int vsec = find_cxl_vsec(dev); + + if (!vsec) { + dev_info(&dev->dev, "CXL VSEC not found\n"); + return -ENODEV; + } + + rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val); + if (rc) { + dev_err(&dev->dev, "Failed to read current mode control: %i\n", rc); + return rc; + } + + if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) { + dev_err(&dev->dev, "Card not in CAPI mode!\n"); + return -EIO; + } + + if ((val & CXL_VSEC_PROTOCOL_MASK) != CXL_VSEC_PROTOCOL_256TB) { + val &= ~CXL_VSEC_PROTOCOL_MASK; + val |= CXL_VSEC_PROTOCOL_256TB; + rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val); + if (rc) { + dev_err(&dev->dev, "Failed to set CXL protocol area: %i\n", rc); + return rc; + } + } return 0; } @@ -712,6 +975,21 @@ static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) } } + if ((afu->modes_supported & ~CXL_MODE_DEDICATED) && afu->max_procs_virtualised == 0) { + /* + * We could also check this for the dedicated process model + * since the architecture indicates it should be set to 1, but + * in that case we ignore the value and I'd rather not risk + * breaking any existing dedicated process AFUs that left it as + * 0 (not that I'm aware of any). It is clearly an error for an + * AFU directed AFU to set this to 0, and would have previously + * triggered a bug resulting in the maximum not being enforced + * at all since idr_alloc treats 0 as no maximum. + */ + dev_err(&afu->dev, "AFU does not support any processes\n"); + return -EINVAL; + } + return 0; } @@ -753,11 +1031,13 @@ static int sanitise_afu_regs(struct cxl_afu *afu) else cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); } - reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); - if (reg) { - if (reg & ~0xffff) - dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg); - cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); + if (afu->adapter->native->sl_ops->register_serr_irq) { + reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); + if (reg) { + if (reg & ~0xffff) + dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg); + cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); + } } reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); if (reg) { @@ -835,11 +1115,13 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc if ((rc = cxl_afu_descriptor_looks_ok(afu))) goto err1; - if ((rc = init_implementation_afu_regs(afu))) - goto err1; + if (adapter->native->sl_ops->afu_regs_init) + if ((rc = adapter->native->sl_ops->afu_regs_init(afu))) + goto err1; - if ((rc = cxl_native_register_serr_irq(afu))) - goto err1; + if (adapter->native->sl_ops->register_serr_irq) + if ((rc = adapter->native->sl_ops->register_serr_irq(afu))) + goto err1; if ((rc = cxl_native_register_psl_irq(afu))) goto err2; @@ -847,7 +1129,8 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc return 0; err2: - cxl_native_release_serr_irq(afu); + if (adapter->native->sl_ops->release_serr_irq) + adapter->native->sl_ops->release_serr_irq(afu); err1: pci_unmap_slice_regs(afu); return rc; @@ -856,7 +1139,8 @@ err1: static void pci_deconfigure_afu(struct cxl_afu *afu) { cxl_native_release_psl_irq(afu); - cxl_native_release_serr_irq(afu); + if (afu->adapter->native->sl_ops->release_serr_irq) + afu->adapter->native->sl_ops->release_serr_irq(afu); pci_unmap_slice_regs(afu); } @@ -1165,7 +1449,7 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = setup_cxl_bars(dev))) return rc; - if ((rc = switch_card_to_cxl(dev))) + if ((rc = setup_cxl_protocol_area(dev))) return rc; if ((rc = cxl_update_image_control(adapter))) @@ -1177,10 +1461,13 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = sanitise_adapter_regs(adapter))) goto err; - if ((rc = init_implementation_adapter_regs(adapter, dev))) + if ((rc = adapter->native->sl_ops->adapter_regs_init(adapter, dev))) goto err; - if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI))) + /* Required for devices using CAPP DMA mode, harmless for others */ + pci_set_master(dev); + + if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode))) goto err; /* If recovery happened, the last step is to turn on snooping. @@ -1212,6 +1499,43 @@ static void cxl_deconfigure_adapter(struct cxl *adapter) pci_disable_device(pdev); } +static const struct cxl_service_layer_ops psl_ops = { + .adapter_regs_init = init_implementation_adapter_psl_regs, + .afu_regs_init = init_implementation_afu_psl_regs, + .register_serr_irq = cxl_native_register_serr_irq, + .release_serr_irq = cxl_native_release_serr_irq, + .debugfs_add_adapter_sl_regs = cxl_debugfs_add_adapter_psl_regs, + .debugfs_add_afu_sl_regs = cxl_debugfs_add_afu_psl_regs, + .psl_irq_dump_registers = cxl_native_psl_irq_dump_regs, + .err_irq_dump_registers = cxl_native_err_irq_dump_regs, + .debugfs_stop_trace = cxl_stop_trace, + .write_timebase_ctrl = write_timebase_ctrl_psl, + .timebase_read = timebase_read_psl, + .capi_mode = OPAL_PHB_CAPI_MODE_CAPI, + .needs_reset_before_disable = true, +}; + +static const struct cxl_service_layer_ops xsl_ops = { + .adapter_regs_init = init_implementation_adapter_xsl_regs, + .debugfs_add_adapter_sl_regs = cxl_debugfs_add_adapter_xsl_regs, + .write_timebase_ctrl = write_timebase_ctrl_xsl, + .timebase_read = timebase_read_xsl, + .capi_mode = OPAL_PHB_CAPI_MODE_DMA, + .min_pe = 1, /* Workaround for Mellanox CX4 HW bug */ +}; + +static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev) +{ + if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) { + dev_info(&adapter->dev, "Device uses an XSL\n"); + adapter->native->sl_ops = &xsl_ops; + } else { + dev_info(&adapter->dev, "Device uses a PSL\n"); + adapter->native->sl_ops = &psl_ops; + } +} + + static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) { struct cxl *adapter; @@ -1227,6 +1551,8 @@ static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) goto err_release; } + set_sl_ops(adapter, dev); + /* Set defaults for parameters which need to persist over * configure/reconfigure */ @@ -1280,6 +1606,67 @@ static void cxl_pci_remove_adapter(struct cxl *adapter) device_unregister(&adapter->dev); } +#define CXL_MAX_PCIEX_PARENT 2 + +static int cxl_slot_is_switched(struct pci_dev *dev) +{ + struct device_node *np; + int depth = 0; + const __be32 *prop; + + if (!(np = pci_device_to_OF_node(dev))) { + pr_err("cxl: np = NULL\n"); + return -ENODEV; + } + of_node_get(np); + while (np) { + np = of_get_next_parent(np); + prop = of_get_property(np, "device_type", NULL); + if (!prop || strcmp((char *)prop, "pciex")) + break; + depth++; + } + of_node_put(np); + return (depth > CXL_MAX_PCIEX_PARENT); +} + +bool cxl_slot_is_supported(struct pci_dev *dev, int flags) +{ + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return false; + + if ((flags & CXL_SLOT_FLAG_DMA) && (!pvr_version_is(PVR_POWER8NVL))) { + /* + * CAPP DMA mode is technically supported on regular P8, but + * will EEH if the card attempts to access memory < 4GB, which + * we cannot realistically avoid. We might be able to work + * around the issue, but until then return unsupported: + */ + return false; + } + + if (cxl_slot_is_switched(dev)) + return false; + + /* + * XXX: This gets a little tricky on regular P8 (not POWER8NVL) since + * the CAPP can be connected to PHB 0, 1 or 2 on a first come first + * served basis, which is racy to check from here. If we need to + * support this in future we might need to consider having this + * function effectively reserve it ahead of time. + * + * Currently, the only user of this API is the Mellanox CX4, which is + * only supported on P8NVL due to the above mentioned limitation of + * CAPP DMA mode and therefore does not need to worry about this. If the + * issue with CAPP DMA mode is later worked around on P8 we might need + * to revisit this. + */ + + return true; +} +EXPORT_SYMBOL_GPL(cxl_slot_is_supported); + + static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct cxl *adapter; @@ -1291,6 +1678,11 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) return -ENODEV; } + if (cxl_slot_is_switched(dev)) { + dev_info(&dev->dev, "Ignoring card on incompatible PCI slot\n"); + return -ENODEV; + } + if (cxl_verbose) dump_cxl_config_space(dev); @@ -1311,6 +1703,9 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc); } + if (pnv_pci_on_cxl_phb(dev) && adapter->slices >= 1) + pnv_cxl_phb_set_peer_afu(dev, adapter->afu[0]); + return 0; } @@ -1381,6 +1776,9 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, */ for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; + /* Only participate in EEH if we are on a virtual PHB */ + if (afu->phb == NULL) + return PCI_ERS_RESULT_NONE; cxl_vphb_error_detected(afu, state); } return PCI_ERS_RESULT_DISCONNECT; diff --git a/drivers/misc/cxl/phb.c b/drivers/misc/cxl/phb.c new file mode 100644 index 0000000..0935d44 --- /dev/null +++ b/drivers/misc/cxl/phb.c @@ -0,0 +1,44 @@ +/* + * Copyright 2014-2016 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/pci.h> +#include "cxl.h" + +bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) +{ + struct cxl_context *ctx; + + /* + * Allocate a context to do cxl things to. This is used for interrupts + * in the peer model using a real phb, and if we eventually do DMA ops + * in the virtual phb, we'll need a default context to attach them to. + */ + ctx = cxl_dev_context_init(dev); + if (!ctx) + return false; + dev->dev.archdata.cxl_ctx = ctx; + + return (cxl_ops->afu_check_and_enable(afu) == 0); +} +/* exported via cxl_base */ + +void _cxl_pci_disable_device(struct pci_dev *dev) +{ + struct cxl_context *ctx = cxl_get_context(dev); + + if (ctx) { + if (ctx->status == STARTED) { + dev_err(&dev->dev, "Default context started\n"); + return; + } + dev->dev.archdata.cxl_ctx = NULL; + cxl_release_context(ctx); + } +} +/* exported via cxl_base */ diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index cdc7723..dee8def 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -9,6 +9,7 @@ #include <linux/pci.h> #include <misc/cxl.h> +#include <asm/pnv-pci.h> #include "cxl.h" static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) @@ -44,7 +45,6 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) { struct pci_controller *phb; struct cxl_afu *afu; - struct cxl_context *ctx; phb = pci_bus_to_host(dev->bus); afu = (struct cxl_afu *)phb->private_data; @@ -57,30 +57,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) set_dma_ops(&dev->dev, &dma_direct_ops); set_dma_offset(&dev->dev, PAGE_OFFSET); - /* - * Allocate a context to do cxl things too. If we eventually do real - * DMA ops, we'll need a default context to attach them to - */ - ctx = cxl_dev_context_init(dev); - if (!ctx) - return false; - dev->dev.archdata.cxl_ctx = ctx; - - return (cxl_ops->afu_check_and_enable(afu) == 0); -} - -static void cxl_pci_disable_device(struct pci_dev *dev) -{ - struct cxl_context *ctx = cxl_get_context(dev); - - if (ctx) { - if (ctx->status == STARTED) { - dev_err(&dev->dev, "Default context started\n"); - return; - } - dev->dev.archdata.cxl_ctx = NULL; - cxl_release_context(ctx); - } + return _cxl_pci_associate_default_context(dev, afu); } static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus, @@ -197,8 +174,8 @@ static struct pci_controller_ops cxl_pci_controller_ops = { .probe_mode = cxl_pci_probe_mode, .enable_device_hook = cxl_pci_enable_device_hook, - .disable_device = cxl_pci_disable_device, - .release_device = cxl_pci_disable_device, + .disable_device = _cxl_pci_disable_device, + .release_device = _cxl_pci_disable_device, .window_alignment = cxl_pci_window_alignment, .reset_secondary_bus = cxl_pci_reset_secondary_bus, .setup_msi_irqs = cxl_setup_msi_irqs, @@ -208,20 +185,30 @@ static struct pci_controller_ops cxl_pci_controller_ops = int cxl_pci_vphb_add(struct cxl_afu *afu) { - struct pci_dev *phys_dev; - struct pci_controller *phb, *phys_phb; + struct pci_controller *phb; struct device_node *vphb_dn; struct device *parent; - if (cpu_has_feature(CPU_FTR_HVMODE)) { - phys_dev = to_pci_dev(afu->adapter->dev.parent); - phys_phb = pci_bus_to_host(phys_dev->bus); - vphb_dn = phys_phb->dn; - parent = &phys_dev->dev; - } else { - vphb_dn = afu->adapter->dev.parent->of_node; - parent = afu->adapter->dev.parent; - } + /* + * If there are no AFU configuration records we won't have anything to + * expose under the vPHB, so skip creating one, returning success since + * this is still a valid case. This will also opt us out of EEH + * handling since we won't have anything special to do if there are no + * kernel drivers attached to the vPHB, and EEH handling is not yet + * supported in the peer model. + */ + if (!afu->crs_num) + return 0; + + /* The parent device is the adapter. Reuse the device node of + * the adapter. + * We don't seem to care what device node is used for the vPHB, + * but tools such as lsvpd walk up the device parents looking + * for a valid location code, so we might as well show devices + * attached to the adapter as being located on that adapter. + */ + parent = afu->adapter->dev.parent; + vphb_dn = parent->of_node; /* Alloc and setup PHB data structure */ phb = pcibios_alloc_controller(vphb_dn); @@ -272,13 +259,18 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu) pcibios_free_controller(phb); } +static bool _cxl_pci_is_vphb_device(struct pci_controller *phb) +{ + return (phb->ops == &cxl_pcie_pci_ops); +} + bool cxl_pci_is_vphb_device(struct pci_dev *dev) { struct pci_controller *phb; phb = pci_bus_to_host(dev->bus); - return (phb->ops == &cxl_pcie_pci_ops); + return _cxl_pci_is_vphb_device(phb); } struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) @@ -287,7 +279,13 @@ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) phb = pci_bus_to_host(dev->bus); - return (struct cxl_afu *)phb->private_data; + if (_cxl_pci_is_vphb_device(phb)) + return (struct cxl_afu *)phb->private_data; + + if (pnv_pci_on_cxl_phb(dev)) + return pnv_cxl_phb_to_afu(phb); + + return ERR_PTR(-ENODEV); } EXPORT_SYMBOL_GPL(cxl_pci_to_afu); diff --git a/drivers/mtd/devices/powernv_flash.c b/drivers/mtd/devices/powernv_flash.c index d5b870b..f5396f2 100644 --- a/drivers/mtd/devices/powernv_flash.c +++ b/drivers/mtd/devices/powernv_flash.c @@ -95,7 +95,7 @@ static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op, return -EIO; } - rc = be64_to_cpu(msg.params[1]); + rc = opal_get_async_rc(msg); if (rc == OPAL_SUCCESS) { rc = 0; if (retlen) diff --git a/drivers/of/base.c b/drivers/of/base.c index a4b6087..7792266 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -502,6 +502,28 @@ int of_device_is_compatible(const struct device_node *device, } EXPORT_SYMBOL(of_device_is_compatible); +/** Checks if the device is compatible with any of the entries in + * a NULL terminated array of strings. Returns the best match + * score or 0. + */ +int of_device_compatible_match(struct device_node *device, + const char *const *compat) +{ + unsigned int tmp, score = 0; + + if (!compat) + return 0; + + while (*compat) { + tmp = of_device_is_compatible(device, *compat); + if (tmp > score) + score = tmp; + compat++; + } + + return score; +} + /** * of_machine_is_compatible - Test root of device tree for a given compatible value * @compat: compatible string to look for in root node's compatible property. diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index df8caec..aadce45 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -113,6 +113,19 @@ config HOTPLUG_PCI_SHPC When in doubt, say N. +config HOTPLUG_PCI_POWERNV + tristate "PowerPC PowerNV PCI Hotplug driver" + depends on PPC_POWERNV && EEH + select OF_DYNAMIC + help + Say Y here if you run PowerPC PowerNV platform that supports + PCI Hotplug + + To compile this driver as a module, choose M here: the + module will be called pnv-php. + + When in doubt, say N. + config HOTPLUG_PCI_RPA tristate "RPA PCI Hotplug driver" depends on PPC_PSERIES && EEH diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index b616e75..e33cdda 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE) += pciehp.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550) += cpcihp_zt5550.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC) += cpcihp_generic.o obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o +obj-$(CONFIG_HOTPLUG_PCI_POWERNV) += pnv-php.o obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR) += rpadlpar_io.o obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o @@ -50,6 +51,8 @@ ibmphp-objs := ibmphp_core.o \ acpiphp-objs := acpiphp_core.o \ acpiphp_glue.o +pnv-php-objs := pnv_php.o + rpaphp-objs := rpaphp_core.o \ rpaphp_pci.o \ rpaphp_slot.o diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c new file mode 100644 index 0000000..e6245b0 --- /dev/null +++ b/drivers/pci/hotplug/pnv_php.c @@ -0,0 +1,711 @@ +/* + * PCI Hotplug Driver for PowerPC PowerNV platform. + * + * Copyright Gavin Shan, IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/libfdt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/pci_hotplug.h> + +#include <asm/opal.h> +#include <asm/pnv-pci.h> +#include <asm/ppc-pci.h> + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" +#define DRIVER_DESC "PowerPC PowerNV PCI Hotplug Driver" + +static LIST_HEAD(pnv_php_slot_list); +static DEFINE_SPINLOCK(pnv_php_lock); + +static void pnv_php_register(struct device_node *dn); +static void pnv_php_unregister_one(struct device_node *dn); +static void pnv_php_unregister(struct device_node *dn); + +static void pnv_php_free_slot(struct kref *kref) +{ + struct pnv_php_slot *php_slot = container_of(kref, + struct pnv_php_slot, kref); + + WARN_ON(!list_empty(&php_slot->children)); + kfree(php_slot->name); + kfree(php_slot); +} + +static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot) +{ + + if (WARN_ON(!php_slot)) + return; + + kref_put(&php_slot->kref, pnv_php_free_slot); +} + +static struct pnv_php_slot *pnv_php_match(struct device_node *dn, + struct pnv_php_slot *php_slot) +{ + struct pnv_php_slot *target, *tmp; + + if (php_slot->dn == dn) { + kref_get(&php_slot->kref); + return php_slot; + } + + list_for_each_entry(tmp, &php_slot->children, link) { + target = pnv_php_match(dn, tmp); + if (target) + return target; + } + + return NULL; +} + +struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn) +{ + struct pnv_php_slot *php_slot, *tmp; + unsigned long flags; + + spin_lock_irqsave(&pnv_php_lock, flags); + list_for_each_entry(tmp, &pnv_php_slot_list, link) { + php_slot = pnv_php_match(dn, tmp); + if (php_slot) { + spin_unlock_irqrestore(&pnv_php_lock, flags); + return php_slot; + } + } + spin_unlock_irqrestore(&pnv_php_lock, flags); + + return NULL; +} +EXPORT_SYMBOL_GPL(pnv_php_find_slot); + +/* + * Remove pdn for all children of the indicated device node. + * The function should remove pdn in a depth-first manner. + */ +static void pnv_php_rmv_pdns(struct device_node *dn) +{ + struct device_node *child; + + for_each_child_of_node(dn, child) { + pnv_php_rmv_pdns(child); + + pci_remove_device_node_info(child); + } +} + +/* + * Detach all child nodes of the indicated device nodes. The + * function should handle device nodes in depth-first manner. + * + * We should not invoke of_node_release() as the memory for + * individual device node is part of large memory block. The + * large block is allocated from memblock (system bootup) or + * kmalloc() when unflattening the device tree by OF changeset. + * We can not free the large block allocated from memblock. For + * later case, it should be released at once. + */ +static void pnv_php_detach_device_nodes(struct device_node *parent) +{ + struct device_node *dn; + int refcount; + + for_each_child_of_node(parent, dn) { + pnv_php_detach_device_nodes(dn); + + of_node_put(dn); + refcount = atomic_read(&dn->kobj.kref.refcount); + if (unlikely(refcount != 1)) + pr_warn("Invalid refcount %d on <%s>\n", + refcount, of_node_full_name(dn)); + + of_detach_node(dn); + } +} + +static void pnv_php_rmv_devtree(struct pnv_php_slot *php_slot) +{ + pnv_php_rmv_pdns(php_slot->dn); + + /* + * Decrease the refcount if the device nodes were created + * through OF changeset before detaching them. + */ + if (php_slot->fdt) + of_changeset_destroy(&php_slot->ocs); + pnv_php_detach_device_nodes(php_slot->dn); + + if (php_slot->fdt) { + kfree(php_slot->dt); + kfree(php_slot->fdt); + php_slot->dt = NULL; + php_slot->dn->child = NULL; + php_slot->fdt = NULL; + } +} + +/* + * As the nodes in OF changeset are applied in reverse order, we + * need revert the nodes in advance so that we have correct node + * order after the changeset is applied. + */ +static void pnv_php_reverse_nodes(struct device_node *parent) +{ + struct device_node *child, *next; + + /* In-depth first */ + for_each_child_of_node(parent, child) + pnv_php_reverse_nodes(child); + + /* Reverse the nodes in the child list */ + child = parent->child; + parent->child = NULL; + while (child) { + next = child->sibling; + + child->sibling = parent->child; + parent->child = child; + child = next; + } +} + +static int pnv_php_populate_changeset(struct of_changeset *ocs, + struct device_node *dn) +{ + struct device_node *child; + int ret = 0; + + for_each_child_of_node(dn, child) { + ret = of_changeset_attach_node(ocs, child); + if (unlikely(ret)) + break; + + ret = pnv_php_populate_changeset(ocs, child); + if (unlikely(ret)) + break; + } + + return ret; +} + +static void *pnv_php_add_one_pdn(struct device_node *dn, void *data) +{ + struct pci_controller *hose = (struct pci_controller *)data; + struct pci_dn *pdn; + + pdn = pci_add_device_node_info(hose, dn); + if (unlikely(!pdn)) + return ERR_PTR(-ENOMEM); + + return NULL; +} + +static void pnv_php_add_pdns(struct pnv_php_slot *slot) +{ + struct pci_controller *hose = pci_bus_to_host(slot->bus); + + pci_traverse_device_nodes(slot->dn, pnv_php_add_one_pdn, hose); +} + +static int pnv_php_add_devtree(struct pnv_php_slot *php_slot) +{ + void *fdt, *fdt1, *dt; + int ret; + + /* We don't know the FDT blob size. We try to get it through + * maximal memory chunk and then copy it to another chunk that + * fits the real size. + */ + fdt1 = kzalloc(0x10000, GFP_KERNEL); + if (unlikely(!fdt1)) { + ret = -ENOMEM; + dev_warn(&php_slot->pdev->dev, "Cannot alloc FDT blob\n"); + goto out; + } + + ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x10000); + if (unlikely(ret)) { + dev_warn(&php_slot->pdev->dev, "Error %d getting FDT blob\n", + ret); + goto free_fdt1; + } + + fdt = kzalloc(fdt_totalsize(fdt1), GFP_KERNEL); + if (unlikely(!fdt)) { + ret = -ENOMEM; + dev_warn(&php_slot->pdev->dev, "Cannot %d bytes memory\n", + fdt_totalsize(fdt1)); + goto free_fdt1; + } + + /* Unflatten device tree blob */ + memcpy(fdt, fdt1, fdt_totalsize(fdt1)); + dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL); + if (unlikely(!dt)) { + ret = -EINVAL; + dev_warn(&php_slot->pdev->dev, "Cannot unflatten FDT\n"); + goto free_fdt; + } + + /* Initialize and apply the changeset */ + of_changeset_init(&php_slot->ocs); + pnv_php_reverse_nodes(php_slot->dn); + ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn); + if (unlikely(ret)) { + pnv_php_reverse_nodes(php_slot->dn); + dev_warn(&php_slot->pdev->dev, "Error %d populating changeset\n", + ret); + goto free_dt; + } + + php_slot->dn->child = NULL; + ret = of_changeset_apply(&php_slot->ocs); + if (unlikely(ret)) { + dev_warn(&php_slot->pdev->dev, "Error %d applying changeset\n", + ret); + goto destroy_changeset; + } + + /* Add device node firmware data */ + pnv_php_add_pdns(php_slot); + php_slot->fdt = fdt; + php_slot->dt = dt; + kfree(fdt1); + goto out; + +destroy_changeset: + of_changeset_destroy(&php_slot->ocs); +free_dt: + kfree(dt); + php_slot->dn->child = NULL; +free_fdt: + kfree(fdt); +free_fdt1: + kfree(fdt1); +out: + return ret; +} + +int pnv_php_set_slot_power_state(struct hotplug_slot *slot, + uint8_t state) +{ + struct pnv_php_slot *php_slot = slot->private; + struct opal_msg msg; + int ret; + + ret = pnv_pci_set_power_state(php_slot->id, state, &msg); + if (likely(ret > 0)) { + if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle || + be64_to_cpu(msg.params[2]) != state || + be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) { + dev_warn(&php_slot->pdev->dev, "Wrong msg (%lld, %lld, %lld)\n", + be64_to_cpu(msg.params[1]), + be64_to_cpu(msg.params[2]), + be64_to_cpu(msg.params[3])); + return -ENOMSG; + } + } else if (unlikely(ret < 0)) { + dev_warn(&php_slot->pdev->dev, "Error %d powering %s\n", + ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off"); + return ret; + } + + if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE) + pnv_php_rmv_devtree(php_slot); + else + ret = pnv_php_add_devtree(php_slot); + + return ret; +} +EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state); + +static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state) +{ + struct pnv_php_slot *php_slot = slot->private; + uint8_t power_state = OPAL_PCI_SLOT_POWER_ON; + int ret; + + /* + * Retrieve power status from firmware. If we fail + * getting that, the power status fails back to + * be on. + */ + ret = pnv_pci_get_power_state(php_slot->id, &power_state); + if (unlikely(ret)) { + dev_warn(&php_slot->pdev->dev, "Error %d getting power status\n", + ret); + } else { + *state = power_state; + slot->info->power_status = power_state; + } + + return 0; +} + +static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) +{ + struct pnv_php_slot *php_slot = slot->private; + uint8_t presence = OPAL_PCI_SLOT_EMPTY; + int ret; + + /* + * Retrieve presence status from firmware. If we can't + * get that, it will fail back to be empty. + */ + ret = pnv_pci_get_presence_state(php_slot->id, &presence); + if (likely(ret >= 0)) { + *state = presence; + slot->info->adapter_status = presence; + ret = 0; + } else { + dev_warn(&php_slot->pdev->dev, "Error %d getting presence\n", + ret); + } + + return ret; +} + +static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state) +{ + /* FIXME: Make it real once firmware supports it */ + slot->info->attention_status = state; + + return 0; +} + +static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan) +{ + struct hotplug_slot *slot = &php_slot->slot; + uint8_t presence = OPAL_PCI_SLOT_EMPTY; + uint8_t power_status = OPAL_PCI_SLOT_POWER_ON; + int ret; + + /* Check if the slot has been configured */ + if (php_slot->state != PNV_PHP_STATE_REGISTERED) + return 0; + + /* Retrieve slot presence status */ + ret = pnv_php_get_adapter_state(slot, &presence); + if (unlikely(ret)) + return ret; + + /* Proceed if there have nothing behind the slot */ + if (presence == OPAL_PCI_SLOT_EMPTY) + goto scan; + + /* + * If the power supply to the slot is off, we can't detect + * adapter presence state. That means we have to turn the + * slot on before going to probe slot's presence state. + * + * On the first time, we don't change the power status to + * boost system boot with assumption that the firmware + * supplies consistent slot power status: empty slot always + * has its power off and non-empty slot has its power on. + */ + if (!php_slot->power_state_check) { + php_slot->power_state_check = true; + + ret = pnv_php_get_power_state(slot, &power_status); + if (unlikely(ret)) + return ret; + + if (power_status != OPAL_PCI_SLOT_POWER_ON) + return 0; + } + + /* Check the power status. Scan the slot if it is already on */ + ret = pnv_php_get_power_state(slot, &power_status); + if (unlikely(ret)) + return ret; + + if (power_status == OPAL_PCI_SLOT_POWER_ON) + goto scan; + + /* Power is off, turn it on and then scan the slot */ + ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON); + if (unlikely(ret)) + return ret; + +scan: + if (presence == OPAL_PCI_SLOT_PRESENT) { + if (rescan) { + pci_lock_rescan_remove(); + pci_hp_add_devices(php_slot->bus); + pci_unlock_rescan_remove(); + } + + /* Rescan for child hotpluggable slots */ + php_slot->state = PNV_PHP_STATE_POPULATED; + if (rescan) + pnv_php_register(php_slot->dn); + } else { + php_slot->state = PNV_PHP_STATE_POPULATED; + } + + return 0; +} + +static int pnv_php_enable_slot(struct hotplug_slot *slot) +{ + struct pnv_php_slot *php_slot = container_of(slot, + struct pnv_php_slot, slot); + + return pnv_php_enable(php_slot, true); +} + +static int pnv_php_disable_slot(struct hotplug_slot *slot) +{ + struct pnv_php_slot *php_slot = slot->private; + int ret; + + if (php_slot->state != PNV_PHP_STATE_POPULATED) + return 0; + + /* Remove all devices behind the slot */ + pci_lock_rescan_remove(); + pci_hp_remove_devices(php_slot->bus); + pci_unlock_rescan_remove(); + + /* Detach the child hotpluggable slots */ + pnv_php_unregister(php_slot->dn); + + /* Notify firmware and remove device nodes */ + ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_OFF); + + php_slot->state = PNV_PHP_STATE_REGISTERED; + return ret; +} + +static struct hotplug_slot_ops php_slot_ops = { + .get_power_status = pnv_php_get_power_state, + .get_adapter_status = pnv_php_get_adapter_state, + .set_attention_status = pnv_php_set_attention_state, + .enable_slot = pnv_php_enable_slot, + .disable_slot = pnv_php_disable_slot, +}; + +static void pnv_php_release(struct hotplug_slot *slot) +{ + struct pnv_php_slot *php_slot = slot->private; + unsigned long flags; + + /* Remove from global or child list */ + spin_lock_irqsave(&pnv_php_lock, flags); + list_del(&php_slot->link); + spin_unlock_irqrestore(&pnv_php_lock, flags); + + /* Detach from parent */ + pnv_php_put_slot(php_slot); + pnv_php_put_slot(php_slot->parent); +} + +static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn) +{ + struct pnv_php_slot *php_slot; + struct pci_bus *bus; + const char *label; + uint64_t id; + + label = of_get_property(dn, "ibm,slot-label", NULL); + if (unlikely(!label)) + return NULL; + + if (pnv_pci_get_slot_id(dn, &id)) + return NULL; + + bus = pci_find_bus_by_node(dn); + if (unlikely(!bus)) + return NULL; + + php_slot = kzalloc(sizeof(*php_slot), GFP_KERNEL); + if (unlikely(!php_slot)) + return NULL; + + php_slot->name = kstrdup(label, GFP_KERNEL); + if (unlikely(!php_slot->name)) { + kfree(php_slot); + return NULL; + } + + if (likely(dn->child && PCI_DN(dn->child))) + php_slot->slot_no = PCI_SLOT(PCI_DN(dn->child)->devfn); + else + php_slot->slot_no = -1; /* Placeholder slot */ + + kref_init(&php_slot->kref); + php_slot->state = PNV_PHP_STATE_INITIALIZED; + php_slot->dn = dn; + php_slot->pdev = bus->self; + php_slot->bus = bus; + php_slot->id = id; + php_slot->power_state_check = false; + php_slot->slot.ops = &php_slot_ops; + php_slot->slot.info = &php_slot->slot_info; + php_slot->slot.release = pnv_php_release; + php_slot->slot.private = php_slot; + + INIT_LIST_HEAD(&php_slot->children); + INIT_LIST_HEAD(&php_slot->link); + + return php_slot; +} + +static int pnv_php_register_slot(struct pnv_php_slot *php_slot) +{ + struct pnv_php_slot *parent; + struct device_node *dn = php_slot->dn; + unsigned long flags; + int ret; + + /* Check if the slot is registered or not */ + parent = pnv_php_find_slot(php_slot->dn); + if (unlikely(parent)) { + pnv_php_put_slot(parent); + return -EEXIST; + } + + /* Register PCI slot */ + ret = pci_hp_register(&php_slot->slot, php_slot->bus, + php_slot->slot_no, php_slot->name); + if (unlikely(ret)) { + dev_warn(&php_slot->pdev->dev, "Error %d registering slot\n", + ret); + return ret; + } + + /* Attach to the parent's child list or global list */ + while ((dn = of_get_parent(dn))) { + if (!PCI_DN(dn)) { + of_node_put(dn); + break; + } + + parent = pnv_php_find_slot(dn); + if (parent) { + of_node_put(dn); + break; + } + + of_node_put(dn); + } + + spin_lock_irqsave(&pnv_php_lock, flags); + php_slot->parent = parent; + if (parent) + list_add_tail(&php_slot->link, &parent->children); + else + list_add_tail(&php_slot->link, &pnv_php_slot_list); + spin_unlock_irqrestore(&pnv_php_lock, flags); + + php_slot->state = PNV_PHP_STATE_REGISTERED; + return 0; +} + +static int pnv_php_register_one(struct device_node *dn) +{ + struct pnv_php_slot *php_slot; + const __be32 *prop32; + int ret; + + /* Check if it's hotpluggable slot */ + prop32 = of_get_property(dn, "ibm,slot-pluggable", NULL); + if (!prop32 || !of_read_number(prop32, 1)) + return -ENXIO; + + prop32 = of_get_property(dn, "ibm,reset-by-firmware", NULL); + if (!prop32 || !of_read_number(prop32, 1)) + return -ENXIO; + + php_slot = pnv_php_alloc_slot(dn); + if (unlikely(!php_slot)) + return -ENODEV; + + ret = pnv_php_register_slot(php_slot); + if (unlikely(ret)) + goto free_slot; + + ret = pnv_php_enable(php_slot, false); + if (unlikely(ret)) + goto unregister_slot; + + return 0; + +unregister_slot: + pnv_php_unregister_one(php_slot->dn); +free_slot: + pnv_php_put_slot(php_slot); + return ret; +} + +static void pnv_php_register(struct device_node *dn) +{ + struct device_node *child; + + /* + * The parent slots should be registered before their + * child slots. + */ + for_each_child_of_node(dn, child) { + pnv_php_register_one(child); + pnv_php_register(child); + } +} + +static void pnv_php_unregister_one(struct device_node *dn) +{ + struct pnv_php_slot *php_slot; + + php_slot = pnv_php_find_slot(dn); + if (!php_slot) + return; + + php_slot->state = PNV_PHP_STATE_OFFLINE; + pnv_php_put_slot(php_slot); + pci_hp_deregister(&php_slot->slot); +} + +static void pnv_php_unregister(struct device_node *dn) +{ + struct device_node *child; + + /* The child slots should go before their parent slots */ + for_each_child_of_node(dn, child) { + pnv_php_unregister(child); + pnv_php_unregister_one(child); + } +} + +static int __init pnv_php_init(void) +{ + struct device_node *dn; + + pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); + for_each_compatible_node(dn, NULL, "ibm,ioda2-phb") + pnv_php_register(dn); + + return 0; +} + +static void __exit pnv_php_exit(void) +{ + struct device_node *dn; + + for_each_compatible_node(dn, NULL, "ibm,ioda2-phb") + pnv_php_unregister(dn); +} + +module_init(pnv_php_init); +module_exit(pnv_php_exit); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c index 6937c72..388c4d8 100644 --- a/drivers/pci/hotplug/rpaphp_slot.c +++ b/drivers/pci/hotplug/rpaphp_slot.c @@ -117,8 +117,10 @@ EXPORT_SYMBOL_GPL(rpaphp_deregister_slot); int rpaphp_register_slot(struct slot *slot) { struct hotplug_slot *php_slot = slot->hotplug_slot; + struct device_node *child; + u32 my_index; int retval; - int slotno; + int slotno = -1; dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n", __func__, slot->dn->full_name, slot->index, slot->name, @@ -130,10 +132,15 @@ int rpaphp_register_slot(struct slot *slot) return -EAGAIN; } - if (slot->dn->child) - slotno = PCI_SLOT(PCI_DN(slot->dn->child)->devfn); - else - slotno = -1; + for_each_child_of_node(slot->dn, child) { + retval = of_property_read_u32(child, "ibm,my-drc-index", &my_index); + if (my_index == slot->index) { + slotno = PCI_SLOT(PCI_DN(child)->devfn); + of_node_put(child); + break; + } + } + retval = pci_hp_register(php_slot, slot->bus, slotno, slot->name); if (retval) { err("pci_hp_register failed with error %d\n", retval); diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 55641a3..d678c46 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -695,11 +695,16 @@ static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type) pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl); } +void __weak pcibios_setup_bridge(struct pci_bus *bus, unsigned long type) +{ +} + void pci_setup_bridge(struct pci_bus *bus) { unsigned long type = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH; + pcibios_setup_bridge(bus, type); __pci_setup_bridge(bus, type); } diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c index 9c18d6f..ea20f62 100644 --- a/drivers/rtc/rtc-opal.c +++ b/drivers/rtc/rtc-opal.c @@ -134,7 +134,7 @@ static int opal_get_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) goto exit; } - rc = be64_to_cpu(msg.params[1]); + rc = opal_get_async_rc(msg); if (rc != OPAL_SUCCESS) { rc = -EIO; goto exit; @@ -181,7 +181,7 @@ static int opal_set_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) goto exit; } - rc = be64_to_cpu(msg.params[1]); + rc = opal_get_async_rc(msg); if (rc != OPAL_SUCCESS) rc = -EIO; diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h index f54bbd5..e43545c 100644 --- a/drivers/scsi/cxlflash/main.h +++ b/drivers/scsi/cxlflash/main.h @@ -102,8 +102,4 @@ struct asyc_intr_info { #define SCAN_HOST 0x04 }; -#ifndef CONFIG_CXL_EEH -#define cxl_perst_reloads_same_image(_a, _b) do { } while (0) -#endif - #endif /* _CXLFLASH_MAIN_H */ diff --git a/drivers/tty/hvc/hvc_console.h b/drivers/tty/hvc/hvc_console.h index 9131019..798c48d 100644 --- a/drivers/tty/hvc/hvc_console.h +++ b/drivers/tty/hvc/hvc_console.h @@ -60,6 +60,7 @@ struct hvc_struct { struct winsize ws; struct work_struct tty_resize; struct list_head next; + unsigned long flags; }; /* implemented by a low level driver */ diff --git a/drivers/tty/hvc/hvc_irq.c b/drivers/tty/hvc/hvc_irq.c index c9adb05..bc7a968 100644 --- a/drivers/tty/hvc/hvc_irq.c +++ b/drivers/tty/hvc/hvc_irq.c @@ -14,6 +14,11 @@ static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance) /* if hvc_poll request a repoll, then kick the hvcd thread */ if (hvc_poll(dev_instance)) hvc_kick(); + + /* + * We're safe to always return IRQ_HANDLED as the hvcd thread will + * iterate through each hvc_struct. + */ return IRQ_HANDLED; } @@ -28,8 +33,8 @@ int notifier_add_irq(struct hvc_struct *hp, int irq) hp->irq_requested = 0; return 0; } - rc = request_irq(irq, hvc_handle_interrupt, 0, - "hvc_console", hp); + rc = request_irq(irq, hvc_handle_interrupt, hp->flags, + "hvc_console", hp); if (!rc) hp->irq_requested = 1; return rc; diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index 47b54c6..5107993 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -214,7 +214,13 @@ static int hvc_opal_probe(struct platform_device *dev) dev->dev.of_node->full_name, boot ? " (boot console)" : ""); - irq = opal_event_request(ilog2(OPAL_EVENT_CONSOLE_INPUT)); + irq = irq_of_parse_and_map(dev->dev.of_node, 0); + if (!irq) { + pr_info("hvc%d: No interrupts property, using OPAL event\n", + termno); + irq = opal_event_request(ilog2(OPAL_EVENT_CONSOLE_INPUT)); + } + if (!irq) { pr_err("hvc_opal: Unable to map interrupt for device %s\n", dev->dev.of_node->full_name); @@ -224,6 +230,9 @@ static int hvc_opal_probe(struct platform_device *dev) hp = hvc_alloc(termno, irq, ops, MAX_VIO_PUT_CHARS); if (IS_ERR(hp)) return PTR_ERR(hp); + + /* hvc consoles on powernv may need to share a single irq */ + hp->flags = IRQF_SHARED; dev_set_drvdata(&dev->dev, hp); return 0; |