diff options
253 files changed, 7079 insertions, 4863 deletions
@@ -96,3 +96,6 @@ x509.genkey # Kconfig presets all.config + +# Kdevelop4 +*.kdev4 @@ -1734,14 +1734,14 @@ S: Chapel Hill, North Carolina 27514-4818 S: USA N: Dave Jones -E: davej@redhat.com +E: davej@codemonkey.org.uk W: http://www.codemonkey.org.uk D: Assorted VIA x86 support. D: 2.5 AGPGART overhaul. D: CPUFREQ maintenance. -D: Fedora kernel maintenance. +D: Fedora kernel maintenance (2003-2014). +D: 'Trinity' and similar fuzz testing work. D: Misc/Other. -S: 314 Littleton Rd, Westford, MA 01886, USA N: Martin Josfsson E: gandalf@wlug.westbo.se diff --git a/Documentation/devicetree/bindings/i2c/i2c-opal.txt b/Documentation/devicetree/bindings/i2c/i2c-opal.txt new file mode 100644 index 0000000..12bc614 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-opal.txt @@ -0,0 +1,37 @@ +Device-tree bindings for I2C OPAL driver +---------------------------------------- + +Most of the device node and properties layout is specific to the firmware and +used by the firmware itself for configuring the port. From the linux +perspective, the properties of use are "ibm,port-name" and "ibm,opal-id". + +Required properties: + +- reg: Port-id within a given master +- compatible: must be "ibm,opal-i2c" +- ibm,opal-id: Refers to a specific bus and used to identify it when calling + the relevant OPAL functions. +- bus-frequency: Operating frequency of the i2c bus (in HZ). Informational for + linux, used by the FW though. + +Optional properties: +- ibm,port-name: Firmware provides this name that uniquely identifies the i2c + port. + +The node contains a number of other properties that are used by the FW itself +and depend on the specific hardware implementation. The example below depicts +a P8 on-chip bus. + +Example: + +i2c-bus@0 { + reg = <0x0>; + bus-frequency = <0x61a80>; + compatible = "ibm,power8-i2c-port", "ibm,opal-i2c"; + ibm,opal-id = <0x1>; + ibm,port-name = "p8_00000000_e1p0"; + #address-cells = <0x1>; + phandle = <0x10000006>; + #size-cells = <0x0>; + linux,phandle = <0x10000006>; +}; diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.txt index 4472ed2..818518a 100644 --- a/Documentation/x86/intel_mpx.txt +++ b/Documentation/x86/intel_mpx.txt @@ -7,11 +7,15 @@ that can be used in conjunction with compiler changes to check memory references, for those references whose compile-time normal intentions are usurped at runtime due to buffer overflow or underflow. +You can tell if your CPU supports MPX by looking in /proc/cpuinfo: + + cat /proc/cpuinfo | grep ' mpx ' + For more information, please refer to Intel(R) Architecture Instruction Set Extensions Programming Reference, Chapter 9: Intel(R) Memory Protection Extensions. -Note: Currently no hardware with MPX ISA is available but it is always +Note: As of December 2014, no hardware with MPX is available but it is possible to use SDE (Intel(R) Software Development Emulator) instead, which can be downloaded from http://software.intel.com/en-us/articles/intel-software-development-emulator @@ -30,9 +34,15 @@ is how we expect the compiler, application and kernel to work together. instrumentation as well as some setup code called early after the app starts. New instruction prefixes are noops for old CPUs. 2) That setup code allocates (virtual) space for the "bounds directory", - points the "bndcfgu" register to the directory and notifies the kernel - (via the new prctl(PR_MPX_ENABLE_MANAGEMENT)) that the app will be using - MPX. + points the "bndcfgu" register to the directory (must also set the valid + bit) and notifies the kernel (via the new prctl(PR_MPX_ENABLE_MANAGEMENT)) + that the app will be using MPX. The app must be careful not to access + the bounds tables between the time when it populates "bndcfgu" and + when it calls the prctl(). This might be hard to guarantee if the app + is compiled with MPX. You can add "__attribute__((bnd_legacy))" to + the function to disable MPX instrumentation to help guarantee this. + Also be careful not to call out to any other code which might be + MPX-instrumented. 3) The kernel detects that the CPU has MPX, allows the new prctl() to succeed, and notes the location of the bounds directory. Userspace is expected to keep the bounds directory at that locationWe note it @@ -481,9 +481,10 @@ asm-generic: # of make so .config is not included in this case either (for *config). version_h := include/generated/uapi/linux/version.h +old_version_h := include/linux/version.h no-dot-config-targets := clean mrproper distclean \ - cscope gtags TAGS tags help %docs check% coccicheck \ + cscope gtags TAGS tags help% %docs check% coccicheck \ $(version_h) headers_% archheaders archscripts \ kernelversion %src-pkg @@ -1005,6 +1006,7 @@ endef $(version_h): $(srctree)/Makefile FORCE $(call filechk,version.h) + $(Q)rm -f $(old_version_h) include/generated/utsrelease.h: include/config/kernel.release FORCE $(call filechk,utsrelease.h) @@ -1036,8 +1038,6 @@ firmware_install: FORCE #Default location for installed headers export INSTALL_HDR_PATH = $(objtree)/usr -hdr-inst := -rR -f $(srctree)/scripts/Makefile.headersinst obj - # If we do an all arch process set dst to asm-$(hdr-arch) hdr-dst = $(if $(KBUILD_HEADERS), dst=include/asm-$(hdr-arch), dst=include/asm) @@ -1175,7 +1175,7 @@ MRPROPER_FILES += .config .config.old .version .old_version $(version_h) \ Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \ signing_key.priv signing_key.x509 x509.genkey \ extra_certificates signing_key.x509.keyid \ - signing_key.x509.signer include/linux/version.h + signing_key.x509.signer # clean - Delete most, but leave enough to build external modules # @@ -1235,7 +1235,7 @@ rpm: include/config/kernel.release FORCE # --------------------------------------------------------------------------- boards := $(wildcard $(srctree)/arch/$(SRCARCH)/configs/*_defconfig) -boards := $(notdir $(boards)) +boards := $(sort $(notdir $(boards))) board-dirs := $(dir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/*/*_defconfig)) board-dirs := $(sort $(notdir $(board-dirs:/=))) @@ -1326,7 +1326,7 @@ help-board-dirs := $(addprefix help-,$(board-dirs)) help-boards: $(help-board-dirs) -boards-per-dir = $(notdir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/$*/*_defconfig)) +boards-per-dir = $(sort $(notdir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/$*/*_defconfig))) $(help-board-dirs): help-%: @echo 'Architecture specific targets ($(SRCARCH) $*):' @@ -1581,11 +1581,6 @@ ifneq ($(cmd_files),) include $(cmd_files) endif -# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=dir -# Usage: -# $(Q)$(MAKE) $(clean)=dir -clean := -f $(srctree)/scripts/Makefile.clean obj - endif # skip-makefile PHONY += FORCE diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c index b0a608d..b964c66 100644 --- a/arch/cris/arch-v10/lib/usercopy.c +++ b/arch/cris/arch-v10/lib/usercopy.c @@ -30,8 +30,7 @@ /* Copy to userspace. This is based on the memcpy used for kernel-to-kernel copying; see "string.c". */ -unsigned long -__copy_user (void __user *pdst, const void *psrc, unsigned long pn) +unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -187,13 +186,14 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn) return retn; } +EXPORT_SYMBOL(__copy_user); /* Copy from user to kernel, zeroing the bytes that were inaccessible in userland. The return-value is the number of bytes that were inaccessible. */ -unsigned long -__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn) +unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, + unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -369,11 +369,10 @@ copy_exception_bytes: return retn + n; } +EXPORT_SYMBOL(__copy_user_zeroing); /* Zero userspace. */ - -unsigned long -__do_clear_user (void __user *pto, unsigned long pn) +unsigned long __do_clear_user(void __user *pto, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -521,3 +520,4 @@ __do_clear_user (void __user *pto, unsigned long pn) return retn; } +EXPORT_SYMBOL(__do_clear_user); diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig index 15a9ed1..4fc16b4 100644 --- a/arch/cris/arch-v32/drivers/Kconfig +++ b/arch/cris/arch-v32/drivers/Kconfig @@ -108,6 +108,7 @@ config ETRAX_AXISFLASHMAP select MTD_JEDECPROBE select MTD_BLOCK select MTD_COMPLEX_MAPPINGS + select MTD_MTDRAM help This option enables MTD mapping of flash devices. Needed to use flash memories. If unsure, say Y. @@ -358,13 +359,6 @@ config ETRAX_SPI_MMC default MMC select SPI select MMC_SPI - select ETRAX_SPI_MMC_BOARD - -# For the parts that can't be a module (due to restrictions in -# framework elsewhere). -config ETRAX_SPI_MMC_BOARD - boolean - default n # While the board info is MMC_SPI only, the drivers are written to be # independent of MMC_SPI, so we'll keep SPI non-dependent on the diff --git a/arch/cris/arch-v32/drivers/Makefile b/arch/cris/arch-v32/drivers/Makefile index 39aa3c1..15fbfef 100644 --- a/arch/cris/arch-v32/drivers/Makefile +++ b/arch/cris/arch-v32/drivers/Makefile @@ -10,4 +10,3 @@ obj-$(CONFIG_ETRAX_IOP_FW_LOAD) += iop_fw_load.o obj-$(CONFIG_ETRAX_I2C) += i2c.o obj-$(CONFIG_ETRAX_SYNCHRONOUS_SERIAL) += sync_serial.o obj-$(CONFIG_PCI) += pci/ -obj-$(CONFIG_ETRAX_SPI_MMC_BOARD) += board_mmcspi.o diff --git a/arch/cris/arch-v32/drivers/i2c.h b/arch/cris/arch-v32/drivers/i2c.h index c073cf4..d9cc856 100644 --- a/arch/cris/arch-v32/drivers/i2c.h +++ b/arch/cris/arch-v32/drivers/i2c.h @@ -2,7 +2,6 @@ #include <linux/init.h> /* High level I2C actions */ -int __init i2c_init(void); int i2c_write(unsigned char theSlave, void *data, size_t nbytes); int i2c_read(unsigned char theSlave, void *data, size_t nbytes); int i2c_writereg(unsigned char theSlave, unsigned char theReg, unsigned char theValue); diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c index 5a14913..08a313f 100644 --- a/arch/cris/arch-v32/drivers/sync_serial.c +++ b/arch/cris/arch-v32/drivers/sync_serial.c @@ -1,8 +1,7 @@ /* - * Simple synchronous serial port driver for ETRAX FS and Artpec-3. - * - * Copyright (c) 2005 Axis Communications AB + * Simple synchronous serial port driver for ETRAX FS and ARTPEC-3. * + * Copyright (c) 2005, 2008 Axis Communications AB * Author: Mikael Starvik * */ @@ -16,16 +15,17 @@ #include <linux/mutex.h> #include <linux/interrupt.h> #include <linux/poll.h> -#include <linux/init.h> -#include <linux/timer.h> -#include <linux/spinlock.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/device.h> #include <linux/wait.h> #include <asm/io.h> -#include <dma.h> +#include <mach/dma.h> #include <pinmux.h> #include <hwregs/reg_rdwr.h> #include <hwregs/sser_defs.h> +#include <hwregs/timer_defs.h> #include <hwregs/dma_defs.h> #include <hwregs/dma.h> #include <hwregs/intr_vect_defs.h> @@ -59,22 +59,23 @@ /* the rest of the data pointed out by Descr1 and set readp to the start */ /* of Descr2 */ -#define SYNC_SERIAL_MAJOR 125 - /* IN_BUFFER_SIZE should be a multiple of 6 to make sure that 24 bit */ /* words can be handled */ -#define IN_BUFFER_SIZE 12288 -#define IN_DESCR_SIZE 256 -#define NBR_IN_DESCR (IN_BUFFER_SIZE/IN_DESCR_SIZE) +#define IN_DESCR_SIZE SSP_INPUT_CHUNK_SIZE +#define NBR_IN_DESCR (8*6) +#define IN_BUFFER_SIZE (IN_DESCR_SIZE * NBR_IN_DESCR) -#define OUT_BUFFER_SIZE 1024*8 #define NBR_OUT_DESCR 8 +#define OUT_BUFFER_SIZE (1024 * NBR_OUT_DESCR) #define DEFAULT_FRAME_RATE 0 #define DEFAULT_WORD_RATE 7 +/* To be removed when we move to pure udev. */ +#define SYNC_SERIAL_MAJOR 125 + /* NOTE: Enabling some debug will likely cause overrun or underrun, - * especially if manual mode is use. + * especially if manual mode is used. */ #define DEBUG(x) #define DEBUGREAD(x) @@ -85,11 +86,28 @@ #define DEBUGTRDMA(x) #define DEBUGOUTBUF(x) -typedef struct sync_port -{ - reg_scope_instances regi_sser; - reg_scope_instances regi_dmain; - reg_scope_instances regi_dmaout; +enum syncser_irq_setup { + no_irq_setup = 0, + dma_irq_setup = 1, + manual_irq_setup = 2, +}; + +struct sync_port { + unsigned long regi_sser; + unsigned long regi_dmain; + unsigned long regi_dmaout; + + /* Interrupt vectors. */ + unsigned long dma_in_intr_vect; /* Used for DMA in. */ + unsigned long dma_out_intr_vect; /* Used for DMA out. */ + unsigned long syncser_intr_vect; /* Used when no DMA. */ + + /* DMA number for in and out. */ + unsigned int dma_in_nbr; + unsigned int dma_out_nbr; + + /* DMA owner. */ + enum dma_owner req_dma; char started; /* 1 if port has been started */ char port_nbr; /* Port 0 or 1 */ @@ -99,22 +117,29 @@ typedef struct sync_port char use_dma; /* 1 if port uses dma */ char tr_running; - char init_irqs; + enum syncser_irq_setup init_irqs; int output; int input; /* Next byte to be read by application */ - volatile unsigned char *volatile readp; + unsigned char *readp; /* Next byte to be written by etrax */ - volatile unsigned char *volatile writep; + unsigned char *writep; unsigned int in_buffer_size; + unsigned int in_buffer_len; unsigned int inbufchunk; - unsigned char out_buffer[OUT_BUFFER_SIZE] __attribute__ ((aligned(32))); - unsigned char in_buffer[IN_BUFFER_SIZE]__attribute__ ((aligned(32))); - unsigned char flip[IN_BUFFER_SIZE] __attribute__ ((aligned(32))); - struct dma_descr_data* next_rx_desc; - struct dma_descr_data* prev_rx_desc; + /* Data buffers for in and output. */ + unsigned char out_buffer[OUT_BUFFER_SIZE] __aligned(32); + unsigned char in_buffer[IN_BUFFER_SIZE] __aligned(32); + unsigned char flip[IN_BUFFER_SIZE] __aligned(32); + struct timespec timestamp[NBR_IN_DESCR]; + struct dma_descr_data *next_rx_desc; + struct dma_descr_data *prev_rx_desc; + + struct timeval last_timestamp; + int read_ts_idx; + int write_ts_idx; /* Pointer to the first available descriptor in the ring, * unless active_tr_descr == catch_tr_descr and a dma @@ -135,114 +160,138 @@ typedef struct sync_port /* Number of bytes currently locked for being read by DMA */ int out_buf_count; - dma_descr_data in_descr[NBR_IN_DESCR] __attribute__ ((__aligned__(16))); - dma_descr_context in_context __attribute__ ((__aligned__(32))); - dma_descr_data out_descr[NBR_OUT_DESCR] - __attribute__ ((__aligned__(16))); - dma_descr_context out_context __attribute__ ((__aligned__(32))); + dma_descr_context in_context __aligned(32); + dma_descr_context out_context __aligned(32); + dma_descr_data in_descr[NBR_IN_DESCR] __aligned(16); + dma_descr_data out_descr[NBR_OUT_DESCR] __aligned(16); + wait_queue_head_t out_wait_q; wait_queue_head_t in_wait_q; spinlock_t lock; -} sync_port; +}; static DEFINE_MUTEX(sync_serial_mutex); static int etrax_sync_serial_init(void); static void initialize_port(int portnbr); static inline int sync_data_avail(struct sync_port *port); -static int sync_serial_open(struct inode *, struct file*); -static int sync_serial_release(struct inode*, struct file*); +static int sync_serial_open(struct inode *, struct file *); +static int sync_serial_release(struct inode *, struct file *); static unsigned int sync_serial_poll(struct file *filp, poll_table *wait); -static int sync_serial_ioctl(struct file *, - unsigned int cmd, unsigned long arg); -static ssize_t sync_serial_write(struct file * file, const char * buf, +static long sync_serial_ioctl(struct file *file, + unsigned int cmd, unsigned long arg); +static int sync_serial_ioctl_unlocked(struct file *file, + unsigned int cmd, unsigned long arg); +static ssize_t sync_serial_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos); -static ssize_t sync_serial_read(struct file *file, char *buf, +static ssize_t sync_serial_read(struct file *file, char __user *buf, size_t count, loff_t *ppos); -#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \ - defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \ - (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \ - defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)) +#if ((defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \ + defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \ + (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \ + defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA))) #define SYNC_SER_DMA +#else +#define SYNC_SER_MANUAL #endif -static void send_word(sync_port* port); -static void start_dma_out(struct sync_port *port, const char *data, int count); -static void start_dma_in(sync_port* port); #ifdef SYNC_SER_DMA +static void start_dma_out(struct sync_port *port, const char *data, int count); +static void start_dma_in(struct sync_port *port); static irqreturn_t tr_interrupt(int irq, void *dev_id); static irqreturn_t rx_interrupt(int irq, void *dev_id); #endif - -#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \ - !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \ - (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \ - !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)) -#define SYNC_SER_MANUAL -#endif #ifdef SYNC_SER_MANUAL +static void send_word(struct sync_port *port); static irqreturn_t manual_interrupt(int irq, void *dev_id); #endif -#ifdef CONFIG_ETRAXFS /* ETRAX FS */ -#define OUT_DMA_NBR 4 -#define IN_DMA_NBR 5 -#define PINMUX_SSER pinmux_sser0 -#define SYNCSER_INST regi_sser0 -#define SYNCSER_INTR_VECT SSER0_INTR_VECT -#define OUT_DMA_INST regi_dma4 -#define IN_DMA_INST regi_dma5 -#define DMA_OUT_INTR_VECT DMA4_INTR_VECT -#define DMA_IN_INTR_VECT DMA5_INTR_VECT -#define REQ_DMA_SYNCSER dma_sser0 -#else /* Artpec-3 */ -#define OUT_DMA_NBR 6 -#define IN_DMA_NBR 7 -#define PINMUX_SSER pinmux_sser -#define SYNCSER_INST regi_sser -#define SYNCSER_INTR_VECT SSER_INTR_VECT -#define OUT_DMA_INST regi_dma6 -#define IN_DMA_INST regi_dma7 -#define DMA_OUT_INTR_VECT DMA6_INTR_VECT -#define DMA_IN_INTR_VECT DMA7_INTR_VECT -#define REQ_DMA_SYNCSER dma_sser +#define artpec_pinmux_alloc_fixed crisv32_pinmux_alloc_fixed +#define artpec_request_dma crisv32_request_dma +#define artpec_free_dma crisv32_free_dma + +#ifdef CONFIG_ETRAXFS +/* ETRAX FS */ +#define DMA_OUT_NBR0 SYNC_SER0_TX_DMA_NBR +#define DMA_IN_NBR0 SYNC_SER0_RX_DMA_NBR +#define DMA_OUT_NBR1 SYNC_SER1_TX_DMA_NBR +#define DMA_IN_NBR1 SYNC_SER1_RX_DMA_NBR +#define PINMUX_SSER0 pinmux_sser0 +#define PINMUX_SSER1 pinmux_sser1 +#define SYNCSER_INST0 regi_sser0 +#define SYNCSER_INST1 regi_sser1 +#define SYNCSER_INTR_VECT0 SSER0_INTR_VECT +#define SYNCSER_INTR_VECT1 SSER1_INTR_VECT +#define OUT_DMA_INST0 regi_dma4 +#define IN_DMA_INST0 regi_dma5 +#define DMA_OUT_INTR_VECT0 DMA4_INTR_VECT +#define DMA_OUT_INTR_VECT1 DMA7_INTR_VECT +#define DMA_IN_INTR_VECT0 DMA5_INTR_VECT +#define DMA_IN_INTR_VECT1 DMA6_INTR_VECT +#define REQ_DMA_SYNCSER0 dma_sser0 +#define REQ_DMA_SYNCSER1 dma_sser1 +#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA) +#define PORT1_DMA 1 +#else +#define PORT1_DMA 0 +#endif +#elif defined(CONFIG_CRIS_MACH_ARTPEC3) +/* ARTPEC-3 */ +#define DMA_OUT_NBR0 SYNC_SER_TX_DMA_NBR +#define DMA_IN_NBR0 SYNC_SER_RX_DMA_NBR +#define PINMUX_SSER0 pinmux_sser +#define SYNCSER_INST0 regi_sser +#define SYNCSER_INTR_VECT0 SSER_INTR_VECT +#define OUT_DMA_INST0 regi_dma6 +#define IN_DMA_INST0 regi_dma7 +#define DMA_OUT_INTR_VECT0 DMA6_INTR_VECT +#define DMA_IN_INTR_VECT0 DMA7_INTR_VECT +#define REQ_DMA_SYNCSER0 dma_sser +#define REQ_DMA_SYNCSER1 dma_sser #endif -/* The ports */ -static struct sync_port ports[]= -{ - { - .regi_sser = SYNCSER_INST, - .regi_dmaout = OUT_DMA_INST, - .regi_dmain = IN_DMA_INST, #if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA) - .use_dma = 1, +#define PORT0_DMA 1 #else - .use_dma = 0, +#define PORT0_DMA 0 #endif - } -#ifdef CONFIG_ETRAXFS - , +/* The ports */ +static struct sync_port ports[] = { { - .regi_sser = regi_sser1, - .regi_dmaout = regi_dma6, - .regi_dmain = regi_dma7, -#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA) - .use_dma = 1, -#else - .use_dma = 0, -#endif - } + .regi_sser = SYNCSER_INST0, + .regi_dmaout = OUT_DMA_INST0, + .regi_dmain = IN_DMA_INST0, + .use_dma = PORT0_DMA, + .dma_in_intr_vect = DMA_IN_INTR_VECT0, + .dma_out_intr_vect = DMA_OUT_INTR_VECT0, + .dma_in_nbr = DMA_IN_NBR0, + .dma_out_nbr = DMA_OUT_NBR0, + .req_dma = REQ_DMA_SYNCSER0, + .syncser_intr_vect = SYNCSER_INTR_VECT0, + }, +#ifdef CONFIG_ETRAXFS + { + .regi_sser = SYNCSER_INST1, + .regi_dmaout = regi_dma6, + .regi_dmain = regi_dma7, + .use_dma = PORT1_DMA, + .dma_in_intr_vect = DMA_IN_INTR_VECT1, + .dma_out_intr_vect = DMA_OUT_INTR_VECT1, + .dma_in_nbr = DMA_IN_NBR1, + .dma_out_nbr = DMA_OUT_NBR1, + .req_dma = REQ_DMA_SYNCSER1, + .syncser_intr_vect = SYNCSER_INTR_VECT1, + }, #endif }; #define NBR_PORTS ARRAY_SIZE(ports) -static const struct file_operations sync_serial_fops = { +static const struct file_operations syncser_fops = { .owner = THIS_MODULE, .write = sync_serial_write, .read = sync_serial_read, @@ -253,61 +302,40 @@ static const struct file_operations sync_serial_fops = { .llseek = noop_llseek, }; -static int __init etrax_sync_serial_init(void) -{ - ports[0].enabled = 0; -#ifdef CONFIG_ETRAXFS - ports[1].enabled = 0; -#endif - if (register_chrdev(SYNC_SERIAL_MAJOR, "sync serial", - &sync_serial_fops) < 0) { - printk(KERN_WARNING - "Unable to get major for synchronous serial port\n"); - return -EBUSY; - } - - /* Initialize Ports */ -#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) - if (crisv32_pinmux_alloc_fixed(PINMUX_SSER)) { - printk(KERN_WARNING - "Unable to alloc pins for synchronous serial port 0\n"); - return -EIO; - } - ports[0].enabled = 1; - initialize_port(0); -#endif - -#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) - if (crisv32_pinmux_alloc_fixed(pinmux_sser1)) { - printk(KERN_WARNING - "Unable to alloc pins for synchronous serial port 0\n"); - return -EIO; - } - ports[1].enabled = 1; - initialize_port(1); -#endif +static dev_t syncser_first; +static int minor_count = NBR_PORTS; +#define SYNCSER_NAME "syncser" +static struct cdev *syncser_cdev; +static struct class *syncser_class; -#ifdef CONFIG_ETRAXFS - printk(KERN_INFO "ETRAX FS synchronous serial port driver\n"); -#else - printk(KERN_INFO "Artpec-3 synchronous serial port driver\n"); -#endif - return 0; +static void sync_serial_start_port(struct sync_port *port) +{ + reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg); + reg_sser_rw_tr_cfg tr_cfg = + REG_RD(sser, port->regi_sser, rw_tr_cfg); + reg_sser_rw_rec_cfg rec_cfg = + REG_RD(sser, port->regi_sser, rw_rec_cfg); + cfg.en = regk_sser_yes; + tr_cfg.tr_en = regk_sser_yes; + rec_cfg.rec_en = regk_sser_yes; + REG_WR(sser, port->regi_sser, rw_cfg, cfg); + REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg); + REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg); + port->started = 1; } static void __init initialize_port(int portnbr) { - int __attribute__((unused)) i; struct sync_port *port = &ports[portnbr]; - reg_sser_rw_cfg cfg = {0}; - reg_sser_rw_frm_cfg frm_cfg = {0}; - reg_sser_rw_tr_cfg tr_cfg = {0}; - reg_sser_rw_rec_cfg rec_cfg = {0}; + reg_sser_rw_cfg cfg = { 0 }; + reg_sser_rw_frm_cfg frm_cfg = { 0 }; + reg_sser_rw_tr_cfg tr_cfg = { 0 }; + reg_sser_rw_rec_cfg rec_cfg = { 0 }; - DEBUG(printk(KERN_DEBUG "Init sync serial port %d\n", portnbr)); + DEBUG(pr_info("Init sync serial port %d\n", portnbr)); port->port_nbr = portnbr; - port->init_irqs = 1; + port->init_irqs = no_irq_setup; port->out_rd_ptr = port->out_buffer; port->out_buf_count = 0; @@ -318,10 +346,11 @@ static void __init initialize_port(int portnbr) port->readp = port->flip; port->writep = port->flip; port->in_buffer_size = IN_BUFFER_SIZE; + port->in_buffer_len = 0; port->inbufchunk = IN_DESCR_SIZE; - port->next_rx_desc = &port->in_descr[0]; - port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR-1]; - port->prev_rx_desc->eol = 1; + + port->read_ts_idx = 0; + port->write_ts_idx = 0; init_waitqueue_head(&port->out_wait_q); init_waitqueue_head(&port->in_wait_q); @@ -368,14 +397,18 @@ static void __init initialize_port(int portnbr) REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg); #ifdef SYNC_SER_DMA - /* Setup the descriptor ring for dma out/transmit. */ - for (i = 0; i < NBR_OUT_DESCR; i++) { - port->out_descr[i].wait = 0; - port->out_descr[i].intr = 1; - port->out_descr[i].eol = 0; - port->out_descr[i].out_eop = 0; - port->out_descr[i].next = - (dma_descr_data *)virt_to_phys(&port->out_descr[i+1]); + { + int i; + /* Setup the descriptor ring for dma out/transmit. */ + for (i = 0; i < NBR_OUT_DESCR; i++) { + dma_descr_data *descr = &port->out_descr[i]; + descr->wait = 0; + descr->intr = 1; + descr->eol = 0; + descr->out_eop = 0; + descr->next = + (dma_descr_data *)virt_to_phys(&descr[i+1]); + } } /* Create a ring from the list. */ @@ -391,201 +424,116 @@ static void __init initialize_port(int portnbr) static inline int sync_data_avail(struct sync_port *port) { - int avail; - unsigned char *start; - unsigned char *end; - - start = (unsigned char*)port->readp; /* cast away volatile */ - end = (unsigned char*)port->writep; /* cast away volatile */ - /* 0123456789 0123456789 - * ----- - ----- - * ^rp ^wp ^wp ^rp - */ - - if (end >= start) - avail = end - start; - else - avail = port->in_buffer_size - (start - end); - return avail; -} - -static inline int sync_data_avail_to_end(struct sync_port *port) -{ - int avail; - unsigned char *start; - unsigned char *end; - - start = (unsigned char*)port->readp; /* cast away volatile */ - end = (unsigned char*)port->writep; /* cast away volatile */ - /* 0123456789 0123456789 - * ----- ----- - * ^rp ^wp ^wp ^rp - */ - - if (end >= start) - avail = end - start; - else - avail = port->flip + port->in_buffer_size - start; - return avail; + return port->in_buffer_len; } static int sync_serial_open(struct inode *inode, struct file *file) { + int ret = 0; int dev = iminor(inode); - int ret = -EBUSY; - sync_port *port; - reg_dma_rw_cfg cfg = {.en = regk_dma_yes}; - reg_dma_rw_intr_mask intr_mask = {.data = regk_dma_yes}; + struct sync_port *port; +#ifdef SYNC_SER_DMA + reg_dma_rw_cfg cfg = { .en = regk_dma_yes }; + reg_dma_rw_intr_mask intr_mask = { .data = regk_dma_yes }; +#endif - mutex_lock(&sync_serial_mutex); - DEBUG(printk(KERN_DEBUG "Open sync serial port %d\n", dev)); + DEBUG(pr_debug("Open sync serial port %d\n", dev)); - if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) - { - DEBUG(printk(KERN_DEBUG "Invalid minor %d\n", dev)); - ret = -ENODEV; - goto out; + if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) { + DEBUG(pr_info("Invalid minor %d\n", dev)); + return -ENODEV; } port = &ports[dev]; /* Allow open this device twice (assuming one reader and one writer) */ - if (port->busy == 2) - { - DEBUG(printk(KERN_DEBUG "Device is busy.. \n")); - goto out; + if (port->busy == 2) { + DEBUG(pr_info("syncser%d is busy\n", dev)); + return -EBUSY; } + mutex_lock(&sync_serial_mutex); - if (port->init_irqs) { - if (port->use_dma) { - if (port == &ports[0]) { -#ifdef SYNC_SER_DMA - if (request_irq(DMA_OUT_INTR_VECT, - tr_interrupt, - 0, - "synchronous serial 0 dma tr", - &ports[0])) { - printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ"); - goto out; - } else if (request_irq(DMA_IN_INTR_VECT, - rx_interrupt, - 0, - "synchronous serial 1 dma rx", - &ports[0])) { - free_irq(DMA_OUT_INTR_VECT, &port[0]); - printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ"); - goto out; - } else if (crisv32_request_dma(OUT_DMA_NBR, - "synchronous serial 0 dma tr", - DMA_VERBOSE_ON_ERROR, - 0, - REQ_DMA_SYNCSER)) { - free_irq(DMA_OUT_INTR_VECT, &port[0]); - free_irq(DMA_IN_INTR_VECT, &port[0]); - printk(KERN_CRIT "Can't allocate sync serial port 0 TX DMA channel"); - goto out; - } else if (crisv32_request_dma(IN_DMA_NBR, - "synchronous serial 0 dma rec", - DMA_VERBOSE_ON_ERROR, - 0, - REQ_DMA_SYNCSER)) { - crisv32_free_dma(OUT_DMA_NBR); - free_irq(DMA_OUT_INTR_VECT, &port[0]); - free_irq(DMA_IN_INTR_VECT, &port[0]); - printk(KERN_CRIT "Can't allocate sync serial port 1 RX DMA channel"); - goto out; - } -#endif - } -#ifdef CONFIG_ETRAXFS - else if (port == &ports[1]) { + /* Clear any stale date left in the flip buffer */ + port->readp = port->writep = port->flip; + port->in_buffer_len = 0; + port->read_ts_idx = 0; + port->write_ts_idx = 0; + + if (port->init_irqs != no_irq_setup) { + /* Init only on first call. */ + port->busy++; + mutex_unlock(&sync_serial_mutex); + return 0; + } + if (port->use_dma) { #ifdef SYNC_SER_DMA - if (request_irq(DMA6_INTR_VECT, - tr_interrupt, - 0, - "synchronous serial 1 dma tr", - &ports[1])) { - printk(KERN_CRIT "Can't allocate sync serial port 1 IRQ"); - goto out; - } else if (request_irq(DMA7_INTR_VECT, - rx_interrupt, - 0, - "synchronous serial 1 dma rx", - &ports[1])) { - free_irq(DMA6_INTR_VECT, &ports[1]); - printk(KERN_CRIT "Can't allocate sync serial port 3 IRQ"); - goto out; - } else if (crisv32_request_dma( - SYNC_SER1_TX_DMA_NBR, - "synchronous serial 1 dma tr", - DMA_VERBOSE_ON_ERROR, - 0, - dma_sser1)) { - free_irq(DMA6_INTR_VECT, &ports[1]); - free_irq(DMA7_INTR_VECT, &ports[1]); - printk(KERN_CRIT "Can't allocate sync serial port 3 TX DMA channel"); - goto out; - } else if (crisv32_request_dma( - SYNC_SER1_RX_DMA_NBR, - "synchronous serial 3 dma rec", - DMA_VERBOSE_ON_ERROR, - 0, - dma_sser1)) { - crisv32_free_dma(SYNC_SER1_TX_DMA_NBR); - free_irq(DMA6_INTR_VECT, &ports[1]); - free_irq(DMA7_INTR_VECT, &ports[1]); - printk(KERN_CRIT "Can't allocate sync serial port 3 RX DMA channel"); - goto out; - } -#endif - } + const char *tmp; + DEBUG(pr_info("Using DMA for syncser%d\n", dev)); + + tmp = dev == 0 ? "syncser0 tx" : "syncser1 tx"; + if (request_irq(port->dma_out_intr_vect, tr_interrupt, 0, + tmp, port)) { + pr_err("Can't alloc syncser%d TX IRQ", dev); + ret = -EBUSY; + goto unlock_and_exit; + } + if (artpec_request_dma(port->dma_out_nbr, tmp, + DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) { + free_irq(port->dma_out_intr_vect, port); + pr_err("Can't alloc syncser%d TX DMA", dev); + ret = -EBUSY; + goto unlock_and_exit; + } + tmp = dev == 0 ? "syncser0 rx" : "syncser1 rx"; + if (request_irq(port->dma_in_intr_vect, rx_interrupt, 0, + tmp, port)) { + artpec_free_dma(port->dma_out_nbr); + free_irq(port->dma_out_intr_vect, port); + pr_err("Can't alloc syncser%d RX IRQ", dev); + ret = -EBUSY; + goto unlock_and_exit; + } + if (artpec_request_dma(port->dma_in_nbr, tmp, + DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) { + artpec_free_dma(port->dma_out_nbr); + free_irq(port->dma_out_intr_vect, port); + free_irq(port->dma_in_intr_vect, port); + pr_err("Can't alloc syncser%d RX DMA", dev); + ret = -EBUSY; + goto unlock_and_exit; + } + /* Enable DMAs */ + REG_WR(dma, port->regi_dmain, rw_cfg, cfg); + REG_WR(dma, port->regi_dmaout, rw_cfg, cfg); + /* Enable DMA IRQs */ + REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask); + REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask); + /* Set up wordsize = 1 for DMAs. */ + DMA_WR_CMD(port->regi_dmain, regk_dma_set_w_size1); + DMA_WR_CMD(port->regi_dmaout, regk_dma_set_w_size1); + + start_dma_in(port); + port->init_irqs = dma_irq_setup; #endif - /* Enable DMAs */ - REG_WR(dma, port->regi_dmain, rw_cfg, cfg); - REG_WR(dma, port->regi_dmaout, rw_cfg, cfg); - /* Enable DMA IRQs */ - REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask); - REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask); - /* Set up wordsize = 1 for DMAs. */ - DMA_WR_CMD (port->regi_dmain, regk_dma_set_w_size1); - DMA_WR_CMD (port->regi_dmaout, regk_dma_set_w_size1); - - start_dma_in(port); - port->init_irqs = 0; - } else { /* !port->use_dma */ + } else { /* !port->use_dma */ #ifdef SYNC_SER_MANUAL - if (port == &ports[0]) { - if (request_irq(SYNCSER_INTR_VECT, - manual_interrupt, - 0, - "synchronous serial manual irq", - &ports[0])) { - printk("Can't allocate sync serial manual irq"); - goto out; - } - } -#ifdef CONFIG_ETRAXFS - else if (port == &ports[1]) { - if (request_irq(SSER1_INTR_VECT, - manual_interrupt, - 0, - "synchronous serial manual irq", - &ports[1])) { - printk(KERN_CRIT "Can't allocate sync serial manual irq"); - goto out; - } - } -#endif - port->init_irqs = 0; + const char *tmp = dev == 0 ? "syncser0 manual irq" : + "syncser1 manual irq"; + if (request_irq(port->syncser_intr_vect, manual_interrupt, + 0, tmp, port)) { + pr_err("Can't alloc syncser%d manual irq", + dev); + ret = -EBUSY; + goto unlock_and_exit; + } + port->init_irqs = manual_irq_setup; #else - panic("sync_serial: Manual mode not supported.\n"); + panic("sync_serial: Manual mode not supported\n"); #endif /* SYNC_SER_MANUAL */ - } - - } /* port->init_irqs */ - + } port->busy++; ret = 0; -out: + +unlock_and_exit: mutex_unlock(&sync_serial_mutex); return ret; } @@ -593,18 +541,17 @@ out: static int sync_serial_release(struct inode *inode, struct file *file) { int dev = iminor(inode); - sync_port *port; + struct sync_port *port; - if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) - { - DEBUG(printk("Invalid minor %d\n", dev)); + if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) { + DEBUG(pr_info("Invalid minor %d\n", dev)); return -ENODEV; } port = &ports[dev]; if (port->busy) port->busy--; if (!port->busy) - /* XXX */ ; + /* XXX */; return 0; } @@ -612,21 +559,15 @@ static unsigned int sync_serial_poll(struct file *file, poll_table *wait) { int dev = iminor(file_inode(file)); unsigned int mask = 0; - sync_port *port; - DEBUGPOLL( static unsigned int prev_mask = 0; ); + struct sync_port *port; + DEBUGPOLL( + static unsigned int prev_mask; + ); port = &ports[dev]; - if (!port->started) { - reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg); - reg_sser_rw_rec_cfg rec_cfg = - REG_RD(sser, port->regi_sser, rw_rec_cfg); - cfg.en = regk_sser_yes; - rec_cfg.rec_en = port->input; - REG_WR(sser, port->regi_sser, rw_cfg, cfg); - REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg); - port->started = 1; - } + if (!port->started) + sync_serial_start_port(port); poll_wait(file, &port->out_wait_q, wait); poll_wait(file, &port->in_wait_q, wait); @@ -645,33 +586,175 @@ static unsigned int sync_serial_poll(struct file *file, poll_table *wait) if (port->input && sync_data_avail(port) >= port->inbufchunk) mask |= POLLIN | POLLRDNORM; - DEBUGPOLL(if (mask != prev_mask) - printk("sync_serial_poll: mask 0x%08X %s %s\n", mask, - mask&POLLOUT?"POLLOUT":"", mask&POLLIN?"POLLIN":""); - prev_mask = mask; - ); + DEBUGPOLL( + if (mask != prev_mask) + pr_info("sync_serial_poll: mask 0x%08X %s %s\n", + mask, + mask & POLLOUT ? "POLLOUT" : "", + mask & POLLIN ? "POLLIN" : ""); + prev_mask = mask; + ); return mask; } -static int sync_serial_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) +static ssize_t __sync_serial_read(struct file *file, + char __user *buf, + size_t count, + loff_t *ppos, + struct timespec *ts) +{ + unsigned long flags; + int dev = MINOR(file->f_dentry->d_inode->i_rdev); + int avail; + struct sync_port *port; + unsigned char *start; + unsigned char *end; + + if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) { + DEBUG(pr_info("Invalid minor %d\n", dev)); + return -ENODEV; + } + port = &ports[dev]; + + if (!port->started) + sync_serial_start_port(port); + + /* Calculate number of available bytes */ + /* Save pointers to avoid that they are modified by interrupt */ + spin_lock_irqsave(&port->lock, flags); + start = port->readp; + end = port->writep; + spin_unlock_irqrestore(&port->lock, flags); + + while ((start == end) && !port->in_buffer_len) { + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + wait_event_interruptible(port->in_wait_q, + !(start == end && !port->full)); + + if (signal_pending(current)) + return -EINTR; + + spin_lock_irqsave(&port->lock, flags); + start = port->readp; + end = port->writep; + spin_unlock_irqrestore(&port->lock, flags); + } + + DEBUGREAD(pr_info("R%d c %d ri %u wi %u /%u\n", + dev, count, + start - port->flip, end - port->flip, + port->in_buffer_size)); + + /* Lazy read, never return wrapped data. */ + if (end > start) + avail = end - start; + else + avail = port->flip + port->in_buffer_size - start; + + count = count > avail ? avail : count; + if (copy_to_user(buf, start, count)) + return -EFAULT; + + /* If timestamp requested, find timestamp of first returned byte + * and copy it. + * N.B: Applications that request timstamps MUST read data in + * chunks that are multiples of IN_DESCR_SIZE. + * Otherwise the timestamps will not be aligned to the data read. + */ + if (ts != NULL) { + int idx = port->read_ts_idx; + memcpy(ts, &port->timestamp[idx], sizeof(struct timespec)); + port->read_ts_idx += count / IN_DESCR_SIZE; + if (port->read_ts_idx >= NBR_IN_DESCR) + port->read_ts_idx = 0; + } + + spin_lock_irqsave(&port->lock, flags); + port->readp += count; + /* Check for wrap */ + if (port->readp >= port->flip + port->in_buffer_size) + port->readp = port->flip; + port->in_buffer_len -= count; + port->full = 0; + spin_unlock_irqrestore(&port->lock, flags); + + DEBUGREAD(pr_info("r %d\n", count)); + + return count; +} + +static ssize_t sync_serial_input(struct file *file, unsigned long arg) +{ + struct ssp_request req; + int count; + int ret; + + /* Copy the request structure from user-mode. */ + ret = copy_from_user(&req, (struct ssp_request __user *)arg, + sizeof(struct ssp_request)); + + if (ret) { + DEBUG(pr_info("sync_serial_input copy from user failed\n")); + return -EFAULT; + } + + /* To get the timestamps aligned, make sure that 'len' + * is a multiple of IN_DESCR_SIZE. + */ + if ((req.len % IN_DESCR_SIZE) != 0) { + DEBUG(pr_info("sync_serial: req.len %x, IN_DESCR_SIZE %x\n", + req.len, IN_DESCR_SIZE)); + return -EFAULT; + } + + /* Do the actual read. */ + /* Note that req.buf is actually a pointer to user space. */ + count = __sync_serial_read(file, req.buf, req.len, + NULL, &req.ts); + + if (count < 0) { + DEBUG(pr_info("sync_serial_input read failed\n")); + return count; + } + + /* Copy the request back to user-mode. */ + ret = copy_to_user((struct ssp_request __user *)arg, &req, + sizeof(struct ssp_request)); + + if (ret) { + DEBUG(pr_info("syncser input copy2user failed\n")); + return -EFAULT; + } + + /* Return the number of bytes read. */ + return count; +} + + +static int sync_serial_ioctl_unlocked(struct file *file, + unsigned int cmd, unsigned long arg) { int return_val = 0; int dma_w_size = regk_dma_set_w_size1; int dev = iminor(file_inode(file)); - sync_port *port; + struct sync_port *port; reg_sser_rw_tr_cfg tr_cfg; reg_sser_rw_rec_cfg rec_cfg; reg_sser_rw_frm_cfg frm_cfg; reg_sser_rw_cfg gen_cfg; reg_sser_rw_intr_mask intr_mask; - if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) - { - DEBUG(printk("Invalid minor %d\n", dev)); + if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) { + DEBUG(pr_info("Invalid minor %d\n", dev)); return -1; } - port = &ports[dev]; + + if (cmd == SSP_INPUT) + return sync_serial_input(file, arg); + + port = &ports[dev]; spin_lock_irq(&port->lock); tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg); @@ -680,11 +763,9 @@ static int sync_serial_ioctl(struct file *file, gen_cfg = REG_RD(sser, port->regi_sser, rw_cfg); intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask); - switch(cmd) - { + switch (cmd) { case SSP_SPEED: - if (GET_SPEED(arg) == CODEC) - { + if (GET_SPEED(arg) == CODEC) { unsigned int freq; gen_cfg.base_freq = regk_sser_f32; @@ -701,15 +782,25 @@ static int sync_serial_ioctl(struct file *file, case FREQ_256kHz: gen_cfg.clk_div = 125 * (1 << (freq - FREQ_256kHz)) - 1; - break; + break; case FREQ_512kHz: gen_cfg.clk_div = 62; - break; + break; case FREQ_1MHz: case FREQ_2MHz: case FREQ_4MHz: gen_cfg.clk_div = 8 * (1 << freq) - 1; - break; + break; + } + } else if (GET_SPEED(arg) == CODEC_f32768) { + gen_cfg.base_freq = regk_sser_f32_768; + switch (GET_FREQ(arg)) { + case FREQ_4096kHz: + gen_cfg.clk_div = 7; + break; + default: + spin_unlock_irq(&port->lock); + return -EINVAL; } } else { gen_cfg.base_freq = regk_sser_f29_493; @@ -767,62 +858,64 @@ static int sync_serial_ioctl(struct file *file, break; case SSP_MODE: - switch(arg) - { - case MASTER_OUTPUT: - port->output = 1; - port->input = 0; - frm_cfg.out_on = regk_sser_tr; - frm_cfg.frame_pin_dir = regk_sser_out; - gen_cfg.clk_dir = regk_sser_out; - break; - case SLAVE_OUTPUT: - port->output = 1; - port->input = 0; - frm_cfg.frame_pin_dir = regk_sser_in; - gen_cfg.clk_dir = regk_sser_in; - break; - case MASTER_INPUT: - port->output = 0; - port->input = 1; - frm_cfg.frame_pin_dir = regk_sser_out; - frm_cfg.out_on = regk_sser_intern_tb; - gen_cfg.clk_dir = regk_sser_out; - break; - case SLAVE_INPUT: - port->output = 0; - port->input = 1; - frm_cfg.frame_pin_dir = regk_sser_in; - gen_cfg.clk_dir = regk_sser_in; - break; - case MASTER_BIDIR: - port->output = 1; - port->input = 1; - frm_cfg.frame_pin_dir = regk_sser_out; - frm_cfg.out_on = regk_sser_intern_tb; - gen_cfg.clk_dir = regk_sser_out; - break; - case SLAVE_BIDIR: - port->output = 1; - port->input = 1; - frm_cfg.frame_pin_dir = regk_sser_in; - gen_cfg.clk_dir = regk_sser_in; - break; - default: - spin_unlock_irq(&port->lock); - return -EINVAL; + switch (arg) { + case MASTER_OUTPUT: + port->output = 1; + port->input = 0; + frm_cfg.out_on = regk_sser_tr; + frm_cfg.frame_pin_dir = regk_sser_out; + gen_cfg.clk_dir = regk_sser_out; + break; + case SLAVE_OUTPUT: + port->output = 1; + port->input = 0; + frm_cfg.frame_pin_dir = regk_sser_in; + gen_cfg.clk_dir = regk_sser_in; + break; + case MASTER_INPUT: + port->output = 0; + port->input = 1; + frm_cfg.frame_pin_dir = regk_sser_out; + frm_cfg.out_on = regk_sser_intern_tb; + gen_cfg.clk_dir = regk_sser_out; + break; + case SLAVE_INPUT: + port->output = 0; + port->input = 1; + frm_cfg.frame_pin_dir = regk_sser_in; + gen_cfg.clk_dir = regk_sser_in; + break; + case MASTER_BIDIR: + port->output = 1; + port->input = 1; + frm_cfg.frame_pin_dir = regk_sser_out; + frm_cfg.out_on = regk_sser_intern_tb; + gen_cfg.clk_dir = regk_sser_out; + break; + case SLAVE_BIDIR: + port->output = 1; + port->input = 1; + frm_cfg.frame_pin_dir = regk_sser_in; + gen_cfg.clk_dir = regk_sser_in; + break; + default: + spin_unlock_irq(&port->lock); + return -EINVAL; } - if (!port->use_dma || (arg == MASTER_OUTPUT || arg == SLAVE_OUTPUT)) + if (!port->use_dma || arg == MASTER_OUTPUT || + arg == SLAVE_OUTPUT) intr_mask.rdav = regk_sser_yes; break; case SSP_FRAME_SYNC: if (arg & NORMAL_SYNC) { frm_cfg.rec_delay = 1; frm_cfg.tr_delay = 1; - } - else if (arg & EARLY_SYNC) + } else if (arg & EARLY_SYNC) frm_cfg.rec_delay = frm_cfg.tr_delay = 0; - else if (arg & SECOND_WORD_SYNC) { + else if (arg & LATE_SYNC) { + frm_cfg.tr_delay = 2; + frm_cfg.rec_delay = 2; + } else if (arg & SECOND_WORD_SYNC) { frm_cfg.rec_delay = 7; frm_cfg.tr_delay = 1; } @@ -914,15 +1007,12 @@ static int sync_serial_ioctl(struct file *file, frm_cfg.type = regk_sser_level; frm_cfg.tr_delay = 1; frm_cfg.level = regk_sser_neg_lo; - if (arg & SPI_SLAVE) - { + if (arg & SPI_SLAVE) { rec_cfg.clk_pol = regk_sser_neg; gen_cfg.clk_dir = regk_sser_in; port->input = 1; port->output = 0; - } - else - { + } else { gen_cfg.out_clk_pol = regk_sser_pos; port->input = 0; port->output = 1; @@ -965,19 +1055,19 @@ static int sync_serial_ioctl(struct file *file, } static long sync_serial_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) + unsigned int cmd, unsigned long arg) { - long ret; + long ret; - mutex_lock(&sync_serial_mutex); - ret = sync_serial_ioctl_unlocked(file, cmd, arg); - mutex_unlock(&sync_serial_mutex); + mutex_lock(&sync_serial_mutex); + ret = sync_serial_ioctl_unlocked(file, cmd, arg); + mutex_unlock(&sync_serial_mutex); - return ret; + return ret; } /* NOTE: sync_serial_write does not support concurrency */ -static ssize_t sync_serial_write(struct file *file, const char *buf, +static ssize_t sync_serial_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { int dev = iminor(file_inode(file)); @@ -993,7 +1083,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, unsigned char *buf_stop_ptr; /* Last byte + 1 */ if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) { - DEBUG(printk("Invalid minor %d\n", dev)); + DEBUG(pr_info("Invalid minor %d\n", dev)); return -ENODEV; } port = &ports[dev]; @@ -1006,9 +1096,9 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, * |_________|___________________|________________________| * ^ rd_ptr ^ wr_ptr */ - DEBUGWRITE(printk(KERN_DEBUG "W d%d c %lu a: %p c: %p\n", - port->port_nbr, count, port->active_tr_descr, - port->catch_tr_descr)); + DEBUGWRITE(pr_info("W d%d c %u a: %p c: %p\n", + port->port_nbr, count, port->active_tr_descr, + port->catch_tr_descr)); /* Read variables that may be updated by interrupts */ spin_lock_irqsave(&port->lock, flags); @@ -1020,7 +1110,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, if (port->tr_running && ((port->use_dma && port->active_tr_descr == port->catch_tr_descr) || out_buf_count >= OUT_BUFFER_SIZE)) { - DEBUGWRITE(printk(KERN_DEBUG "sser%d full\n", dev)); + DEBUGWRITE(pr_info("sser%d full\n", dev)); return -EAGAIN; } @@ -1043,15 +1133,16 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, if (copy_from_user(wr_ptr, buf, trunc_count)) return -EFAULT; - DEBUGOUTBUF(printk(KERN_DEBUG "%-4d + %-4d = %-4d %p %p %p\n", - out_buf_count, trunc_count, - port->out_buf_count, port->out_buffer, - wr_ptr, buf_stop_ptr)); + DEBUGOUTBUF(pr_info("%-4d + %-4d = %-4d %p %p %p\n", + out_buf_count, trunc_count, + port->out_buf_count, port->out_buffer, + wr_ptr, buf_stop_ptr)); /* Make sure transmitter/receiver is running */ if (!port->started) { reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg); - reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg); + reg_sser_rw_rec_cfg rec_cfg = + REG_RD(sser, port->regi_sser, rw_rec_cfg); cfg.en = regk_sser_yes; rec_cfg.rec_en = port->input; REG_WR(sser, port->regi_sser, rw_cfg, cfg); @@ -1068,8 +1159,11 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, spin_lock_irqsave(&port->lock, flags); port->out_buf_count += trunc_count; if (port->use_dma) { +#ifdef SYNC_SER_DMA start_dma_out(port, wr_ptr, trunc_count); +#endif } else if (!port->tr_running) { +#ifdef SYNC_SER_MANUAL reg_sser_rw_intr_mask intr_mask; intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask); /* Start sender by writing data */ @@ -1077,14 +1171,15 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, /* and enable transmitter ready IRQ */ intr_mask.trdy = 1; REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask); +#endif } spin_unlock_irqrestore(&port->lock, flags); /* Exit if non blocking */ if (file->f_flags & O_NONBLOCK) { - DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu %08x\n", - port->port_nbr, trunc_count, - REG_RD_INT(dma, port->regi_dmaout, r_intr))); + DEBUGWRITE(pr_info("w d%d c %u %08x\n", + port->port_nbr, trunc_count, + REG_RD_INT(dma, port->regi_dmaout, r_intr))); return trunc_count; } @@ -1094,105 +1189,32 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, if (signal_pending(current)) return -EINTR; - DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu\n", - port->port_nbr, trunc_count)); + DEBUGWRITE(pr_info("w d%d c %u\n", port->port_nbr, trunc_count)); return trunc_count; } -static ssize_t sync_serial_read(struct file * file, char * buf, +static ssize_t sync_serial_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - int dev = iminor(file_inode(file)); - int avail; - sync_port *port; - unsigned char* start; - unsigned char* end; - unsigned long flags; - - if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) - { - DEBUG(printk("Invalid minor %d\n", dev)); - return -ENODEV; - } - port = &ports[dev]; - - DEBUGREAD(printk("R%d c %d ri %lu wi %lu /%lu\n", dev, count, port->readp - port->flip, port->writep - port->flip, port->in_buffer_size)); - - if (!port->started) - { - reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg); - reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg); - reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg); - cfg.en = regk_sser_yes; - tr_cfg.tr_en = regk_sser_yes; - rec_cfg.rec_en = regk_sser_yes; - REG_WR(sser, port->regi_sser, rw_cfg, cfg); - REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg); - REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg); - port->started = 1; - } - - /* Calculate number of available bytes */ - /* Save pointers to avoid that they are modified by interrupt */ - spin_lock_irqsave(&port->lock, flags); - start = (unsigned char*)port->readp; /* cast away volatile */ - end = (unsigned char*)port->writep; /* cast away volatile */ - spin_unlock_irqrestore(&port->lock, flags); - while ((start == end) && !port->full) /* No data */ - { - DEBUGREAD(printk(KERN_DEBUG "&")); - if (file->f_flags & O_NONBLOCK) - return -EAGAIN; - - wait_event_interruptible(port->in_wait_q, - !(start == end && !port->full)); - if (signal_pending(current)) - return -EINTR; - - spin_lock_irqsave(&port->lock, flags); - start = (unsigned char*)port->readp; /* cast away volatile */ - end = (unsigned char*)port->writep; /* cast away volatile */ - spin_unlock_irqrestore(&port->lock, flags); - } - - /* Lazy read, never return wrapped data. */ - if (port->full) - avail = port->in_buffer_size; - else if (end > start) - avail = end - start; - else - avail = port->flip + port->in_buffer_size - start; - - count = count > avail ? avail : count; - if (copy_to_user(buf, start, count)) - return -EFAULT; - /* Disable interrupts while updating readp */ - spin_lock_irqsave(&port->lock, flags); - port->readp += count; - if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */ - port->readp = port->flip; - port->full = 0; - spin_unlock_irqrestore(&port->lock, flags); - DEBUGREAD(printk("r %d\n", count)); - return count; + return __sync_serial_read(file, buf, count, ppos, NULL); } -static void send_word(sync_port* port) +#ifdef SYNC_SER_MANUAL +static void send_word(struct sync_port *port) { reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg); reg_sser_rw_tr_data tr_data = {0}; - switch(tr_cfg.sample_size) + switch (tr_cfg.sample_size) { + case 8: + port->out_buf_count--; + tr_data.data = *port->out_rd_ptr++; + REG_WR(sser, port->regi_sser, rw_tr_data, tr_data); + if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE) + port->out_rd_ptr = port->out_buffer; + break; + case 12: { - case 8: - port->out_buf_count--; - tr_data.data = *port->out_rd_ptr++; - REG_WR(sser, port->regi_sser, rw_tr_data, tr_data); - if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE) - port->out_rd_ptr = port->out_buffer; - break; - case 12: - { int data = (*port->out_rd_ptr++) << 8; data |= *port->out_rd_ptr++; port->out_buf_count -= 2; @@ -1200,8 +1222,8 @@ static void send_word(sync_port* port) REG_WR(sser, port->regi_sser, rw_tr_data, tr_data); if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE) port->out_rd_ptr = port->out_buffer; + break; } - break; case 16: port->out_buf_count -= 2; tr_data.data = *(unsigned short *)port->out_rd_ptr; @@ -1233,27 +1255,28 @@ static void send_word(sync_port* port) break; } } +#endif -static void start_dma_out(struct sync_port *port, - const char *data, int count) +#ifdef SYNC_SER_DMA +static void start_dma_out(struct sync_port *port, const char *data, int count) { - port->active_tr_descr->buf = (char *) virt_to_phys((char *) data); + port->active_tr_descr->buf = (char *)virt_to_phys((char *)data); port->active_tr_descr->after = port->active_tr_descr->buf + count; port->active_tr_descr->intr = 1; port->active_tr_descr->eol = 1; port->prev_tr_descr->eol = 0; - DEBUGTRDMA(printk(KERN_DEBUG "Inserting eolr:%p eol@:%p\n", + DEBUGTRDMA(pr_info("Inserting eolr:%p eol@:%p\n", port->prev_tr_descr, port->active_tr_descr)); port->prev_tr_descr = port->active_tr_descr; - port->active_tr_descr = phys_to_virt((int) port->active_tr_descr->next); + port->active_tr_descr = phys_to_virt((int)port->active_tr_descr->next); if (!port->tr_running) { reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg); - port->out_context.next = 0; + port->out_context.next = NULL; port->out_context.saved_data = (dma_descr_data *)virt_to_phys(port->prev_tr_descr); port->out_context.saved_data_buf = port->prev_tr_descr->buf; @@ -1263,57 +1286,58 @@ static void start_dma_out(struct sync_port *port, tr_cfg.tr_en = regk_sser_yes; REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg); - DEBUGTRDMA(printk(KERN_DEBUG "dma s\n");); + DEBUGTRDMA(pr_info(KERN_INFO "dma s\n");); } else { DMA_CONTINUE_DATA(port->regi_dmaout); - DEBUGTRDMA(printk(KERN_DEBUG "dma c\n");); + DEBUGTRDMA(pr_info("dma c\n");); } port->tr_running = 1; } -static void start_dma_in(sync_port *port) +static void start_dma_in(struct sync_port *port) { int i; char *buf; + unsigned long flags; + spin_lock_irqsave(&port->lock, flags); port->writep = port->flip; + spin_unlock_irqrestore(&port->lock, flags); - if (port->writep > port->flip + port->in_buffer_size) { - panic("Offset too large in sync serial driver\n"); - return; - } - buf = (char*)virt_to_phys(port->in_buffer); + buf = (char *)virt_to_phys(port->in_buffer); for (i = 0; i < NBR_IN_DESCR; i++) { port->in_descr[i].buf = buf; port->in_descr[i].after = buf + port->inbufchunk; port->in_descr[i].intr = 1; - port->in_descr[i].next = (dma_descr_data*)virt_to_phys(&port->in_descr[i+1]); + port->in_descr[i].next = + (dma_descr_data *)virt_to_phys(&port->in_descr[i+1]); port->in_descr[i].buf = buf; buf += port->inbufchunk; } /* Link the last descriptor to the first */ - port->in_descr[i-1].next = (dma_descr_data*)virt_to_phys(&port->in_descr[0]); + port->in_descr[i-1].next = + (dma_descr_data *)virt_to_phys(&port->in_descr[0]); port->in_descr[i-1].eol = regk_sser_yes; port->next_rx_desc = &port->in_descr[0]; port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR - 1]; - port->in_context.saved_data = (dma_descr_data*)virt_to_phys(&port->in_descr[0]); + port->in_context.saved_data = + (dma_descr_data *)virt_to_phys(&port->in_descr[0]); port->in_context.saved_data_buf = port->in_descr[0].buf; DMA_START_CONTEXT(port->regi_dmain, virt_to_phys(&port->in_context)); } -#ifdef SYNC_SER_DMA static irqreturn_t tr_interrupt(int irq, void *dev_id) { reg_dma_r_masked_intr masked; - reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes}; + reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes }; reg_dma_rw_stat stat; int i; int found = 0; int stop_sser = 0; for (i = 0; i < NBR_PORTS; i++) { - sync_port *port = &ports[i]; - if (!port->enabled || !port->use_dma) + struct sync_port *port = &ports[i]; + if (!port->enabled || !port->use_dma) continue; /* IRQ active for the port? */ @@ -1338,19 +1362,20 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id) int sent; sent = port->catch_tr_descr->after - port->catch_tr_descr->buf; - DEBUGTXINT(printk(KERN_DEBUG "%-4d - %-4d = %-4d\t" - "in descr %p (ac: %p)\n", - port->out_buf_count, sent, - port->out_buf_count - sent, - port->catch_tr_descr, - port->active_tr_descr);); + DEBUGTXINT(pr_info("%-4d - %-4d = %-4d\t" + "in descr %p (ac: %p)\n", + port->out_buf_count, sent, + port->out_buf_count - sent, + port->catch_tr_descr, + port->active_tr_descr);); port->out_buf_count -= sent; port->catch_tr_descr = phys_to_virt((int) port->catch_tr_descr->next); port->out_rd_ptr = phys_to_virt((int) port->catch_tr_descr->buf); } else { - int i, sent; + reg_sser_rw_tr_cfg tr_cfg; + int j, sent; /* EOL handler. * Note that if an EOL was encountered during the irq * locked section of sync_ser_write the DMA will be @@ -1358,11 +1383,11 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id) * The remaining descriptors will be traversed by * the descriptor interrupts as usual. */ - i = 0; + j = 0; while (!port->catch_tr_descr->eol) { sent = port->catch_tr_descr->after - port->catch_tr_descr->buf; - DEBUGOUTBUF(printk(KERN_DEBUG + DEBUGOUTBUF(pr_info( "traversing descr %p -%d (%d)\n", port->catch_tr_descr, sent, @@ -1370,16 +1395,15 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id) port->out_buf_count -= sent; port->catch_tr_descr = phys_to_virt( (int)port->catch_tr_descr->next); - i++; - if (i >= NBR_OUT_DESCR) { + j++; + if (j >= NBR_OUT_DESCR) { /* TODO: Reset and recover */ panic("sync_serial: missing eol"); } } sent = port->catch_tr_descr->after - port->catch_tr_descr->buf; - DEBUGOUTBUF(printk(KERN_DEBUG - "eol at descr %p -%d (%d)\n", + DEBUGOUTBUF(pr_info("eol at descr %p -%d (%d)\n", port->catch_tr_descr, sent, port->out_buf_count)); @@ -1394,15 +1418,13 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id) OUT_BUFFER_SIZE) port->out_rd_ptr = port->out_buffer; - reg_sser_rw_tr_cfg tr_cfg = - REG_RD(sser, port->regi_sser, rw_tr_cfg); - DEBUGTXINT(printk(KERN_DEBUG + tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg); + DEBUGTXINT(pr_info( "tr_int DMA stop %d, set catch @ %p\n", port->out_buf_count, port->active_tr_descr)); if (port->out_buf_count != 0) - printk(KERN_CRIT "sync_ser: buffer not " - "empty after eol.\n"); + pr_err("sync_ser: buf not empty after eol\n"); port->catch_tr_descr = port->active_tr_descr; port->tr_running = 0; tr_cfg.tr_en = regk_sser_no; @@ -1414,62 +1436,79 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id) return IRQ_RETVAL(found); } /* tr_interrupt */ + +static inline void handle_rx_packet(struct sync_port *port) +{ + int idx; + reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes }; + unsigned long flags; + + DEBUGRXINT(pr_info(KERN_INFO "!")); + spin_lock_irqsave(&port->lock, flags); + + /* If we overrun the user experience is crap regardless if we + * drop new or old data. Its much easier to get it right when + * dropping new data so lets do that. + */ + if ((port->writep + port->inbufchunk <= + port->flip + port->in_buffer_size) && + (port->in_buffer_len + port->inbufchunk < IN_BUFFER_SIZE)) { + memcpy(port->writep, + phys_to_virt((unsigned)port->next_rx_desc->buf), + port->inbufchunk); + port->writep += port->inbufchunk; + if (port->writep >= port->flip + port->in_buffer_size) + port->writep = port->flip; + + /* Timestamp the new data chunk. */ + if (port->write_ts_idx == NBR_IN_DESCR) + port->write_ts_idx = 0; + idx = port->write_ts_idx++; + do_posix_clock_monotonic_gettime(&port->timestamp[idx]); + port->in_buffer_len += port->inbufchunk; + } + spin_unlock_irqrestore(&port->lock, flags); + + port->next_rx_desc->eol = 1; + port->prev_rx_desc->eol = 0; + /* Cache bug workaround */ + flush_dma_descr(port->prev_rx_desc, 0); + port->prev_rx_desc = port->next_rx_desc; + port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next); + /* Cache bug workaround */ + flush_dma_descr(port->prev_rx_desc, 1); + /* wake up the waiting process */ + wake_up_interruptible(&port->in_wait_q); + DMA_CONTINUE(port->regi_dmain); + REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr); + +} + static irqreturn_t rx_interrupt(int irq, void *dev_id) { reg_dma_r_masked_intr masked; - reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes}; int i; int found = 0; - for (i = 0; i < NBR_PORTS; i++) - { - sync_port *port = &ports[i]; + DEBUG(pr_info("rx_interrupt\n")); + + for (i = 0; i < NBR_PORTS; i++) { + struct sync_port *port = &ports[i]; - if (!port->enabled || !port->use_dma ) + if (!port->enabled || !port->use_dma) continue; masked = REG_RD(dma, port->regi_dmain, r_masked_intr); - if (masked.data) /* Descriptor interrupt */ - { - found = 1; - while (REG_RD(dma, port->regi_dmain, rw_data) != - virt_to_phys(port->next_rx_desc)) { - DEBUGRXINT(printk(KERN_DEBUG "!")); - if (port->writep + port->inbufchunk > port->flip + port->in_buffer_size) { - int first_size = port->flip + port->in_buffer_size - port->writep; - memcpy((char*)port->writep, phys_to_virt((unsigned)port->next_rx_desc->buf), first_size); - memcpy(port->flip, phys_to_virt((unsigned)port->next_rx_desc->buf+first_size), port->inbufchunk - first_size); - port->writep = port->flip + port->inbufchunk - first_size; - } else { - memcpy((char*)port->writep, - phys_to_virt((unsigned)port->next_rx_desc->buf), - port->inbufchunk); - port->writep += port->inbufchunk; - if (port->writep >= port->flip + port->in_buffer_size) - port->writep = port->flip; - } - if (port->writep == port->readp) - { - port->full = 1; - } - - port->next_rx_desc->eol = 1; - port->prev_rx_desc->eol = 0; - /* Cache bug workaround */ - flush_dma_descr(port->prev_rx_desc, 0); - port->prev_rx_desc = port->next_rx_desc; - port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next); - /* Cache bug workaround */ - flush_dma_descr(port->prev_rx_desc, 1); - /* wake up the waiting process */ - wake_up_interruptible(&port->in_wait_q); - DMA_CONTINUE(port->regi_dmain); - REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr); + if (!masked.data) + continue; - } - } + /* Descriptor interrupt */ + found = 1; + while (REG_RD(dma, port->regi_dmain, rw_data) != + virt_to_phys(port->next_rx_desc)) + handle_rx_packet(port); } return IRQ_RETVAL(found); } /* rx_interrupt */ @@ -1478,75 +1517,83 @@ static irqreturn_t rx_interrupt(int irq, void *dev_id) #ifdef SYNC_SER_MANUAL static irqreturn_t manual_interrupt(int irq, void *dev_id) { + unsigned long flags; int i; int found = 0; reg_sser_r_masked_intr masked; - for (i = 0; i < NBR_PORTS; i++) - { - sync_port *port = &ports[i]; + for (i = 0; i < NBR_PORTS; i++) { + struct sync_port *port = &ports[i]; if (!port->enabled || port->use_dma) - { continue; - } masked = REG_RD(sser, port->regi_sser, r_masked_intr); - if (masked.rdav) /* Data received? */ - { - reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg); - reg_sser_r_rec_data data = REG_RD(sser, port->regi_sser, r_rec_data); + /* Data received? */ + if (masked.rdav) { + reg_sser_rw_rec_cfg rec_cfg = + REG_RD(sser, port->regi_sser, rw_rec_cfg); + reg_sser_r_rec_data data = REG_RD(sser, + port->regi_sser, r_rec_data); found = 1; /* Read data */ - switch(rec_cfg.sample_size) - { + spin_lock_irqsave(&port->lock, flags); + switch (rec_cfg.sample_size) { case 8: *port->writep++ = data.data & 0xff; break; case 12: *port->writep = (data.data & 0x0ff0) >> 4; *(port->writep + 1) = data.data & 0x0f; - port->writep+=2; + port->writep += 2; break; case 16: - *(unsigned short*)port->writep = data.data; - port->writep+=2; + *(unsigned short *)port->writep = data.data; + port->writep += 2; break; case 24: - *(unsigned int*)port->writep = data.data; - port->writep+=3; + *(unsigned int *)port->writep = data.data; + port->writep += 3; break; case 32: - *(unsigned int*)port->writep = data.data; - port->writep+=4; + *(unsigned int *)port->writep = data.data; + port->writep += 4; break; } - if (port->writep >= port->flip + port->in_buffer_size) /* Wrap? */ + /* Wrap? */ + if (port->writep >= port->flip + port->in_buffer_size) port->writep = port->flip; if (port->writep == port->readp) { - /* receive buffer overrun, discard oldest data - */ + /* Receive buf overrun, discard oldest data */ port->readp++; - if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */ + /* Wrap? */ + if (port->readp >= port->flip + + port->in_buffer_size) port->readp = port->flip; } + spin_unlock_irqrestore(&port->lock, flags); if (sync_data_avail(port) >= port->inbufchunk) - wake_up_interruptible(&port->in_wait_q); /* Wake up application */ + /* Wake up application */ + wake_up_interruptible(&port->in_wait_q); } - if (masked.trdy) /* Transmitter ready? */ - { + /* Transmitter ready? */ + if (masked.trdy) { found = 1; - if (port->out_buf_count > 0) /* More data to send */ + /* More data to send */ + if (port->out_buf_count > 0) send_word(port); - else /* transmission finished */ - { + else { + /* Transmission finished */ reg_sser_rw_intr_mask intr_mask; - intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask); + intr_mask = REG_RD(sser, port->regi_sser, + rw_intr_mask); intr_mask.trdy = 0; - REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask); - wake_up_interruptible(&port->out_wait_q); /* Wake up application */ + REG_WR(sser, port->regi_sser, + rw_intr_mask, intr_mask); + /* Wake up application */ + wake_up_interruptible(&port->out_wait_q); } } } @@ -1554,4 +1601,109 @@ static irqreturn_t manual_interrupt(int irq, void *dev_id) } #endif +static int __init etrax_sync_serial_init(void) +{ +#if 1 + /* This code will be removed when we move to udev for all devices. */ + syncser_first = MKDEV(SYNC_SERIAL_MAJOR, 0); + if (register_chrdev_region(syncser_first, minor_count, SYNCSER_NAME)) { + pr_err("Failed to register major %d\n", SYNC_SERIAL_MAJOR); + return -1; + } +#else + /* Allocate dynamic major number. */ + if (alloc_chrdev_region(&syncser_first, 0, minor_count, SYNCSER_NAME)) { + pr_err("Failed to allocate character device region\n"); + return -1; + } +#endif + syncser_cdev = cdev_alloc(); + if (!syncser_cdev) { + pr_err("Failed to allocate cdev for syncser\n"); + unregister_chrdev_region(syncser_first, minor_count); + return -1; + } + cdev_init(syncser_cdev, &syncser_fops); + + /* Create a sysfs class for syncser */ + syncser_class = class_create(THIS_MODULE, "syncser_class"); + + /* Initialize Ports */ +#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) + if (artpec_pinmux_alloc_fixed(PINMUX_SSER0)) { + pr_warn("Unable to alloc pins for synchronous serial port 0\n"); + unregister_chrdev_region(syncser_first, minor_count); + return -EIO; + } + initialize_port(0); + ports[0].enabled = 1; + /* Register with sysfs so udev can pick it up. */ + device_create(syncser_class, NULL, syncser_first, NULL, + "%s%d", SYNCSER_NAME, 0); +#endif + +#if defined(CONFIG_ETRAXFS) && defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) + if (artpec_pinmux_alloc_fixed(PINMUX_SSER1)) { + pr_warn("Unable to alloc pins for synchronous serial port 1\n"); + unregister_chrdev_region(syncser_first, minor_count); + class_destroy(syncser_class); + return -EIO; + } + initialize_port(1); + ports[1].enabled = 1; + /* Register with sysfs so udev can pick it up. */ + device_create(syncser_class, NULL, syncser_first, NULL, + "%s%d", SYNCSER_NAME, 0); +#endif + + /* Add it to system */ + if (cdev_add(syncser_cdev, syncser_first, minor_count) < 0) { + pr_err("Failed to add syncser as char device\n"); + device_destroy(syncser_class, syncser_first); + class_destroy(syncser_class); + cdev_del(syncser_cdev); + unregister_chrdev_region(syncser_first, minor_count); + return -1; + } + + + pr_info("ARTPEC synchronous serial port (%s: %d, %d)\n", + SYNCSER_NAME, MAJOR(syncser_first), MINOR(syncser_first)); + + return 0; +} + +static void __exit etrax_sync_serial_exit(void) +{ + int i; + device_destroy(syncser_class, syncser_first); + class_destroy(syncser_class); + + if (syncser_cdev) { + cdev_del(syncser_cdev); + unregister_chrdev_region(syncser_first, minor_count); + } + for (i = 0; i < NBR_PORTS; i++) { + struct sync_port *port = &ports[i]; + if (port->init_irqs == dma_irq_setup) { + /* Free dma irqs and dma channels. */ +#ifdef SYNC_SER_DMA + artpec_free_dma(port->dma_in_nbr); + artpec_free_dma(port->dma_out_nbr); + free_irq(port->dma_out_intr_vect, port); + free_irq(port->dma_in_intr_vect, port); +#endif + } else if (port->init_irqs == manual_irq_setup) { + /* Free manual irq. */ + free_irq(port->syncser_intr_vect, port); + } + } + + pr_info("ARTPEC synchronous serial port unregistered\n"); +} + module_init(etrax_sync_serial_init); +module_exit(etrax_sync_serial_exit); + +MODULE_LICENSE("GPL"); + diff --git a/arch/cris/arch-v32/kernel/debugport.c b/arch/cris/arch-v32/kernel/debugport.c index 610909b..02e33eb 100644 --- a/arch/cris/arch-v32/kernel/debugport.c +++ b/arch/cris/arch-v32/kernel/debugport.c @@ -3,7 +3,9 @@ */ #include <linux/console.h> +#include <linux/kernel.h> #include <linux/init.h> +#include <linux/string.h> #include <hwregs/reg_rdwr.h> #include <hwregs/reg_map.h> #include <hwregs/ser_defs.h> @@ -65,6 +67,7 @@ struct dbg_port ports[] = }, #endif }; + static struct dbg_port *port = #if defined(CONFIG_ETRAX_DEBUG_PORT0) &ports[0]; @@ -97,14 +100,19 @@ static struct dbg_port *kgdb_port = #endif #endif -static void -start_port(struct dbg_port* p) +static void start_port(struct dbg_port *p) { - if (!p) - return; + /* Set up serial port registers */ + reg_ser_rw_tr_ctrl tr_ctrl = {0}; + reg_ser_rw_tr_dma_en tr_dma_en = {0}; - if (p->started) + reg_ser_rw_rec_ctrl rec_ctrl = {0}; + reg_ser_rw_tr_baud_div tr_baud_div = {0}; + reg_ser_rw_rec_baud_div rec_baud_div = {0}; + + if (!p || p->started) return; + p->started = 1; if (p->nbr == 1) @@ -118,36 +126,24 @@ start_port(struct dbg_port* p) crisv32_pinmux_alloc_fixed(pinmux_ser4); #endif - /* Set up serial port registers */ - reg_ser_rw_tr_ctrl tr_ctrl = {0}; - reg_ser_rw_tr_dma_en tr_dma_en = {0}; - - reg_ser_rw_rec_ctrl rec_ctrl = {0}; - reg_ser_rw_tr_baud_div tr_baud_div = {0}; - reg_ser_rw_rec_baud_div rec_baud_div = {0}; - tr_ctrl.base_freq = rec_ctrl.base_freq = regk_ser_f29_493; tr_dma_en.en = rec_ctrl.dma_mode = regk_ser_no; tr_baud_div.div = rec_baud_div.div = 29493000 / p->baudrate / 8; tr_ctrl.en = rec_ctrl.en = 1; - if (p->parity == 'O') - { + if (p->parity == 'O') { tr_ctrl.par_en = regk_ser_yes; tr_ctrl.par = regk_ser_odd; rec_ctrl.par_en = regk_ser_yes; rec_ctrl.par = regk_ser_odd; - } - else if (p->parity == 'E') - { + } else if (p->parity == 'E') { tr_ctrl.par_en = regk_ser_yes; tr_ctrl.par = regk_ser_even; rec_ctrl.par_en = regk_ser_yes; rec_ctrl.par = regk_ser_odd; } - if (p->bits == 7) - { + if (p->bits == 7) { tr_ctrl.data_bits = regk_ser_bits7; rec_ctrl.data_bits = regk_ser_bits7; } @@ -161,8 +157,7 @@ start_port(struct dbg_port* p) #ifdef CONFIG_ETRAX_KGDB /* Use polling to get a single character from the kernel debug port */ -int -getDebugChar(void) +int getDebugChar(void) { reg_ser_rs_stat_din stat; reg_ser_rw_ack_intr ack_intr = { 0 }; @@ -179,8 +174,7 @@ getDebugChar(void) } /* Use polling to put a single character to the kernel debug port */ -void -putDebugChar(int val) +void putDebugChar(int val) { reg_ser_r_stat_din stat; do { @@ -190,12 +184,48 @@ putDebugChar(int val) } #endif /* CONFIG_ETRAX_KGDB */ +static void __init early_putch(int c) +{ + reg_ser_r_stat_din stat; + /* Wait until transmitter is ready and send. */ + do + stat = REG_RD(ser, port->instance, r_stat_din); + while (!stat.tr_rdy); + REG_WR_INT(ser, port->instance, rw_dout, c); +} + +static void __init +early_console_write(struct console *con, const char *s, unsigned n) +{ + extern void reset_watchdog(void); + int i; + + /* Send data. */ + for (i = 0; i < n; i++) { + /* TODO: the '\n' -> '\n\r' translation should be done at the + receiver. Remove it when the serial driver removes it. */ + if (s[i] == '\n') + early_putch('\r'); + early_putch(s[i]); + reset_watchdog(); + } +} + +static struct console early_console_dev __initdata = { + .name = "early", + .write = early_console_write, + .flags = CON_PRINTBUFFER | CON_BOOT, + .index = -1 +}; + /* Register console for printk's, etc. */ -int __init -init_etrax_debug(void) +int __init init_etrax_debug(void) { start_port(port); + /* Register an early console if a debug port was chosen. */ + register_console(&early_console_dev); + #ifdef CONFIG_ETRAX_KGDB start_port(kgdb_port); #endif /* CONFIG_ETRAX_KGDB */ diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c index ee66866..eb74dab 100644 --- a/arch/cris/arch-v32/kernel/time.c +++ b/arch/cris/arch-v32/kernel/time.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/threads.h> #include <linux/cpufreq.h> +#include <linux/mm.h> #include <asm/types.h> #include <asm/signal.h> #include <asm/io.h> @@ -56,7 +57,6 @@ static int __init etrax_init_cont_rotime(void) } arch_initcall(etrax_init_cont_rotime); - unsigned long timer_regs[NR_CPUS] = { regi_timer0, @@ -68,9 +68,8 @@ unsigned long timer_regs[NR_CPUS] = extern int set_rtc_mmss(unsigned long nowtime); #ifdef CONFIG_CPU_FREQ -static int -cris_time_freq_notifier(struct notifier_block *nb, unsigned long val, - void *data); +static int cris_time_freq_notifier(struct notifier_block *nb, + unsigned long val, void *data); static struct notifier_block cris_time_freq_notifier_block = { .notifier_call = cris_time_freq_notifier, @@ -87,7 +86,6 @@ unsigned long get_ns_in_jiffie(void) return ns; } - /* From timer MDS describing the hardware watchdog: * 4.3.1 Watchdog Operation * The watchdog timer is an 8-bit timer with a configurable start value. @@ -109,11 +107,18 @@ static short int watchdog_key = 42; /* arbitrary 7 bit number */ * is used though, so set this really low. */ #define WATCHDOG_MIN_FREE_PAGES 8 +/* for reliable NICE_DOGGY behaviour */ +static int bite_in_progress; + void reset_watchdog(void) { #if defined(CONFIG_ETRAX_WATCHDOG) reg_timer_rw_wd_ctrl wd_ctrl = { 0 }; +#if defined(CONFIG_ETRAX_WATCHDOG_NICE_DOGGY) + if (unlikely(bite_in_progress)) + return; +#endif /* Only keep watchdog happy as long as we have memory left! */ if(nr_free_pages() > WATCHDOG_MIN_FREE_PAGES) { /* Reset the watchdog with the inverse of the old key */ @@ -148,7 +153,9 @@ void handle_watchdog_bite(struct pt_regs *regs) #if defined(CONFIG_ETRAX_WATCHDOG) extern int cause_of_death; + nmi_enter(); oops_in_progress = 1; + bite_in_progress = 1; printk(KERN_WARNING "Watchdog bite\n"); /* Check if forced restart or unexpected watchdog */ @@ -170,6 +177,7 @@ void handle_watchdog_bite(struct pt_regs *regs) printk(KERN_WARNING "Oops: bitten by watchdog\n"); show_registers(regs); oops_in_progress = 0; + printk("\n"); /* Flush mtdoops. */ #ifndef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY reset_watchdog(); #endif @@ -202,7 +210,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id) /* Reset watchdog otherwise it resets us! */ reset_watchdog(); - /* Update statistics. */ + /* Update statistics. */ update_process_times(user_mode(regs)); cris_do_profile(regs); /* Save profiling information */ @@ -213,7 +221,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id) /* Call the real timer interrupt handler */ xtime_update(1); - return IRQ_HANDLED; + return IRQ_HANDLED; } /* Timer is IRQF_SHARED so drivers can add stuff to the timer irq chain. */ @@ -293,14 +301,13 @@ void __init time_init(void) #ifdef CONFIG_CPU_FREQ cpufreq_register_notifier(&cris_time_freq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); + CPUFREQ_TRANSITION_NOTIFIER); #endif } #ifdef CONFIG_CPU_FREQ -static int -cris_time_freq_notifier(struct notifier_block *nb, unsigned long val, - void *data) +static int cris_time_freq_notifier(struct notifier_block *nb, + unsigned long val, void *data) { struct cpufreq_freqs *freqs = data; if (val == CPUFREQ_POSTCHANGE) { diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c index 0b5b70d..f0f335d 100644 --- a/arch/cris/arch-v32/lib/usercopy.c +++ b/arch/cris/arch-v32/lib/usercopy.c @@ -26,8 +26,7 @@ /* Copy to userspace. This is based on the memcpy used for kernel-to-kernel copying; see "string.c". */ -unsigned long -__copy_user (void __user *pdst, const void *psrc, unsigned long pn) +unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -155,13 +154,13 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn) return retn; } +EXPORT_SYMBOL(__copy_user); /* Copy from user to kernel, zeroing the bytes that were inaccessible in userland. The return-value is the number of bytes that were inaccessible. */ - -unsigned long -__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn) +unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, + unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -321,11 +320,10 @@ copy_exception_bytes: return retn + n; } +EXPORT_SYMBOL(__copy_user_zeroing); /* Zero userspace. */ - -unsigned long -__do_clear_user (void __user *pto, unsigned long pn) +unsigned long __do_clear_user(void __user *pto, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -468,3 +466,4 @@ __do_clear_user (void __user *pto, unsigned long pn) return retn; } +EXPORT_SYMBOL(__do_clear_user); diff --git a/arch/cris/arch-v32/mach-fs/pinmux.c b/arch/cris/arch-v32/mach-fs/pinmux.c index 38f29ee..05a0470 100644 --- a/arch/cris/arch-v32/mach-fs/pinmux.c +++ b/arch/cris/arch-v32/mach-fs/pinmux.c @@ -26,7 +26,29 @@ static DEFINE_SPINLOCK(pinmux_lock); static void crisv32_pinmux_set(int port); -int crisv32_pinmux_init(void) +static int __crisv32_pinmux_alloc(int port, int first_pin, int last_pin, + enum pin_mode mode) +{ + int i; + + for (i = first_pin; i <= last_pin; i++) { + if ((pins[port][i] != pinmux_none) + && (pins[port][i] != pinmux_gpio) + && (pins[port][i] != mode)) { +#ifdef DEBUG + panic("Pinmux alloc failed!\n"); +#endif + return -EPERM; + } + } + + for (i = first_pin; i <= last_pin; i++) + pins[port][i] = mode; + + crisv32_pinmux_set(port); +} + +static int crisv32_pinmux_init(void) { static int initialized; @@ -37,20 +59,20 @@ int crisv32_pinmux_init(void) pa.pa0 = pa.pa1 = pa.pa2 = pa.pa3 = pa.pa4 = pa.pa5 = pa.pa6 = pa.pa7 = regk_pinmux_yes; REG_WR(pinmux, regi_pinmux, rw_pa, pa); - crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio); - crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio); - crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio); - crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio); + __crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio); + __crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio); + __crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio); + __crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio); } return 0; } -int -crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode mode) +int crisv32_pinmux_alloc(int port, int first_pin, int last_pin, + enum pin_mode mode) { - int i; unsigned long flags; + int ret; crisv32_pinmux_init(); @@ -59,26 +81,11 @@ crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode mode) spin_lock_irqsave(&pinmux_lock, flags); - for (i = first_pin; i <= last_pin; i++) { - if ((pins[port][i] != pinmux_none) - && (pins[port][i] != pinmux_gpio) - && (pins[port][i] != mode)) { - spin_unlock_irqrestore(&pinmux_lock, flags); -#ifdef DEBUG - panic("Pinmux alloc failed!\n"); -#endif - return -EPERM; - } - } - - for (i = first_pin; i <= last_pin; i++) - pins[port][i] = mode; - - crisv32_pinmux_set(port); + ret = __crisv32_pinmux_alloc(port, first_pin, last_pin, mode); spin_unlock_irqrestore(&pinmux_lock, flags); - return 0; + return ret; } int crisv32_pinmux_alloc_fixed(enum fixed_function function) @@ -98,58 +105,58 @@ int crisv32_pinmux_alloc_fixed(enum fixed_function function) switch (function) { case pinmux_ser1: - ret = crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed); hwprot.ser1 = regk_pinmux_yes; break; case pinmux_ser2: - ret = crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed); hwprot.ser2 = regk_pinmux_yes; break; case pinmux_ser3: - ret = crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed); hwprot.ser3 = regk_pinmux_yes; break; case pinmux_sser0: - ret = crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed); hwprot.sser0 = regk_pinmux_yes; break; case pinmux_sser1: - ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed); hwprot.sser1 = regk_pinmux_yes; break; case pinmux_ata0: - ret = crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed); hwprot.ata0 = regk_pinmux_yes; break; case pinmux_ata1: - ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed); hwprot.ata1 = regk_pinmux_yes; break; case pinmux_ata2: - ret = crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed); hwprot.ata2 = regk_pinmux_yes; break; case pinmux_ata3: - ret = crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed); hwprot.ata2 = regk_pinmux_yes; break; case pinmux_ata: - ret = crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed); - ret |= crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed); + ret |= __crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed); hwprot.ata = regk_pinmux_yes; break; case pinmux_eth1: - ret = crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed); hwprot.eth1 = regk_pinmux_yes; hwprot.eth1_mgm = regk_pinmux_yes; break; case pinmux_timer: - ret = crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed); + ret = __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed); hwprot.timer = regk_pinmux_yes; spin_unlock_irqrestore(&pinmux_lock, flags); return ret; @@ -188,9 +195,19 @@ void crisv32_pinmux_set(int port) #endif } -int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin) +static int __crisv32_pinmux_dealloc(int port, int first_pin, int last_pin) { int i; + + for (i = first_pin; i <= last_pin; i++) + pins[port][i] = pinmux_none; + + crisv32_pinmux_set(port); + return 0; +} + +int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin) +{ unsigned long flags; crisv32_pinmux_init(); @@ -199,11 +216,7 @@ int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin) return -EINVAL; spin_lock_irqsave(&pinmux_lock, flags); - - for (i = first_pin; i <= last_pin; i++) - pins[port][i] = pinmux_none; - - crisv32_pinmux_set(port); + __crisv32_pinmux_dealloc(port, first_pin, last_pin); spin_unlock_irqrestore(&pinmux_lock, flags); return 0; @@ -226,58 +239,58 @@ int crisv32_pinmux_dealloc_fixed(enum fixed_function function) switch (function) { case pinmux_ser1: - ret = crisv32_pinmux_dealloc(PORT_C, 4, 7); + ret = __crisv32_pinmux_dealloc(PORT_C, 4, 7); hwprot.ser1 = regk_pinmux_no; break; case pinmux_ser2: - ret = crisv32_pinmux_dealloc(PORT_C, 8, 11); + ret = __crisv32_pinmux_dealloc(PORT_C, 8, 11); hwprot.ser2 = regk_pinmux_no; break; case pinmux_ser3: - ret = crisv32_pinmux_dealloc(PORT_C, 12, 15); + ret = __crisv32_pinmux_dealloc(PORT_C, 12, 15); hwprot.ser3 = regk_pinmux_no; break; case pinmux_sser0: - ret = crisv32_pinmux_dealloc(PORT_C, 0, 3); - ret |= crisv32_pinmux_dealloc(PORT_C, 16, 16); + ret = __crisv32_pinmux_dealloc(PORT_C, 0, 3); + ret |= __crisv32_pinmux_dealloc(PORT_C, 16, 16); hwprot.sser0 = regk_pinmux_no; break; case pinmux_sser1: - ret = crisv32_pinmux_dealloc(PORT_D, 0, 4); + ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4); hwprot.sser1 = regk_pinmux_no; break; case pinmux_ata0: - ret = crisv32_pinmux_dealloc(PORT_D, 5, 7); - ret |= crisv32_pinmux_dealloc(PORT_D, 15, 17); + ret = __crisv32_pinmux_dealloc(PORT_D, 5, 7); + ret |= __crisv32_pinmux_dealloc(PORT_D, 15, 17); hwprot.ata0 = regk_pinmux_no; break; case pinmux_ata1: - ret = crisv32_pinmux_dealloc(PORT_D, 0, 4); - ret |= crisv32_pinmux_dealloc(PORT_E, 17, 17); + ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4); + ret |= __crisv32_pinmux_dealloc(PORT_E, 17, 17); hwprot.ata1 = regk_pinmux_no; break; case pinmux_ata2: - ret = crisv32_pinmux_dealloc(PORT_C, 11, 15); - ret |= crisv32_pinmux_dealloc(PORT_E, 3, 3); + ret = __crisv32_pinmux_dealloc(PORT_C, 11, 15); + ret |= __crisv32_pinmux_dealloc(PORT_E, 3, 3); hwprot.ata2 = regk_pinmux_no; break; case pinmux_ata3: - ret = crisv32_pinmux_dealloc(PORT_C, 8, 10); - ret |= crisv32_pinmux_dealloc(PORT_C, 0, 2); + ret = __crisv32_pinmux_dealloc(PORT_C, 8, 10); + ret |= __crisv32_pinmux_dealloc(PORT_C, 0, 2); hwprot.ata2 = regk_pinmux_no; break; case pinmux_ata: - ret = crisv32_pinmux_dealloc(PORT_B, 0, 15); - ret |= crisv32_pinmux_dealloc(PORT_D, 8, 15); + ret = __crisv32_pinmux_dealloc(PORT_B, 0, 15); + ret |= __crisv32_pinmux_dealloc(PORT_D, 8, 15); hwprot.ata = regk_pinmux_no; break; case pinmux_eth1: - ret = crisv32_pinmux_dealloc(PORT_E, 0, 17); + ret = __crisv32_pinmux_dealloc(PORT_E, 0, 17); hwprot.eth1 = regk_pinmux_no; hwprot.eth1_mgm = regk_pinmux_no; break; case pinmux_timer: - ret = crisv32_pinmux_dealloc(PORT_C, 16, 16); + ret = __crisv32_pinmux_dealloc(PORT_C, 16, 16); hwprot.timer = regk_pinmux_no; spin_unlock_irqrestore(&pinmux_lock, flags); return ret; @@ -293,7 +306,8 @@ int crisv32_pinmux_dealloc_fixed(enum fixed_function function) return ret; } -void crisv32_pinmux_dump(void) +#ifdef DEBUG +static void crisv32_pinmux_dump(void) { int i, j; @@ -305,5 +319,5 @@ void crisv32_pinmux_dump(void) printk(KERN_DEBUG " Pin %d = %d\n", j, pins[i][j]); } } - +#endif __initcall(crisv32_pinmux_init); diff --git a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h index c2b3036..09bf0c9 100644 --- a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h +++ b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h @@ -28,11 +28,9 @@ enum fixed_function { pinmux_timer }; -int crisv32_pinmux_init(void); int crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode); int crisv32_pinmux_alloc_fixed(enum fixed_function function); int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin); int crisv32_pinmux_dealloc_fixed(enum fixed_function function); -void crisv32_pinmux_dump(void); #endif diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index d5f1248..889f2de 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -1,8 +1,4 @@ -header-y += arch-v10/ -header-y += arch-v32/ - - generic-y += barrier.h generic-y += clkdev.h generic-y += cputime.h diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild index 7d47b36..01f66b8 100644 --- a/arch/cris/include/uapi/asm/Kbuild +++ b/arch/cris/include/uapi/asm/Kbuild @@ -1,8 +1,8 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm -header-y += arch-v10/ -header-y += arch-v32/ +header-y += ../arch-v10/arch/ +header-y += ../arch-v32/arch/ header-y += auxvec.h header-y += bitsperlong.h header-y += byteorder.h diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c index 5868cee..3908b94 100644 --- a/arch/cris/kernel/crisksyms.c +++ b/arch/cris/kernel/crisksyms.c @@ -47,16 +47,16 @@ EXPORT_SYMBOL(__negdi2); EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(iounmap); -/* Userspace access functions */ -EXPORT_SYMBOL(__copy_user_zeroing); -EXPORT_SYMBOL(__copy_user); - #undef memcpy #undef memset extern void * memset(void *, int, __kernel_size_t); extern void * memcpy(void *, const void *, __kernel_size_t); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); +#ifdef CONFIG_ETRAX_ARCH_V32 +#undef strcmp +EXPORT_SYMBOL(strcmp); +#endif #ifdef CONFIG_ETRAX_FAST_TIMER /* Fast timer functions */ @@ -66,3 +66,4 @@ EXPORT_SYMBOL(del_fast_timer); EXPORT_SYMBOL(schedule_usleep); #endif EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_from_user); diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c index 0ffda73..da4c724 100644 --- a/arch/cris/kernel/traps.c +++ b/arch/cris/kernel/traps.c @@ -14,6 +14,10 @@ #include <linux/init.h> #include <linux/module.h> +#include <linux/utsname.h> +#ifdef CONFIG_KALLSYMS +#include <linux/kallsyms.h> +#endif #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -34,25 +38,24 @@ static int kstack_depth_to_print = 24; void (*nmi_handler)(struct pt_regs *); -void -show_trace(unsigned long *stack) +void show_trace(unsigned long *stack) { unsigned long addr, module_start, module_end; extern char _stext, _etext; int i; - printk("\nCall Trace: "); + pr_err("\nCall Trace: "); i = 1; module_start = VMALLOC_START; module_end = VMALLOC_END; - while (((long)stack & (THREAD_SIZE-1)) != 0) { + while (((long)stack & (THREAD_SIZE - 1)) != 0) { if (__get_user(addr, stack)) { /* This message matches "failing address" marked s390 in ksymoops, so lines containing it will not be filtered out by ksymoops. */ - printk("Failing address 0x%lx\n", (unsigned long)stack); + pr_err("Failing address 0x%lx\n", (unsigned long)stack); break; } stack++; @@ -68,10 +71,14 @@ show_trace(unsigned long *stack) if (((addr >= (unsigned long)&_stext) && (addr <= (unsigned long)&_etext)) || ((addr >= module_start) && (addr <= module_end))) { +#ifdef CONFIG_KALLSYMS + print_ip_sym(addr); +#else if (i && ((i % 8) == 0)) - printk("\n "); - printk("[<%08lx>] ", addr); + pr_err("\n "); + pr_err("[<%08lx>] ", addr); i++; +#endif } } } @@ -111,21 +118,21 @@ show_stack(struct task_struct *task, unsigned long *sp) stack = sp; - printk("\nStack from %08lx:\n ", (unsigned long)stack); + pr_err("\nStack from %08lx:\n ", (unsigned long)stack); for (i = 0; i < kstack_depth_to_print; i++) { if (((long)stack & (THREAD_SIZE-1)) == 0) break; if (i && ((i % 8) == 0)) - printk("\n "); + pr_err("\n "); if (__get_user(addr, stack)) { /* This message matches "failing address" marked s390 in ksymoops, so lines containing it will not be filtered out by ksymoops. */ - printk("Failing address 0x%lx\n", (unsigned long)stack); + pr_err("Failing address 0x%lx\n", (unsigned long)stack); break; } stack++; - printk("%08lx ", addr); + pr_err("%08lx ", addr); } show_trace(sp); } @@ -139,33 +146,32 @@ show_stack(void) unsigned long *sp = (unsigned long *)rdusp(); int i; - printk("Stack dump [0x%08lx]:\n", (unsigned long)sp); + pr_err("Stack dump [0x%08lx]:\n", (unsigned long)sp); for (i = 0; i < 16; i++) - printk("sp + %d: 0x%08lx\n", i*4, sp[i]); + pr_err("sp + %d: 0x%08lx\n", i*4, sp[i]); return 0; } #endif -void -set_nmi_handler(void (*handler)(struct pt_regs *)) +void set_nmi_handler(void (*handler)(struct pt_regs *)) { nmi_handler = handler; arch_enable_nmi(); } #ifdef CONFIG_DEBUG_NMI_OOPS -void -oops_nmi_handler(struct pt_regs *regs) +void oops_nmi_handler(struct pt_regs *regs) { stop_watchdog(); oops_in_progress = 1; - printk("NMI!\n"); + pr_err("NMI!\n"); show_registers(regs); oops_in_progress = 0; + oops_exit(); + pr_err("\n"); /* Flush mtdoops. */ } -static int __init -oops_nmi_register(void) +static int __init oops_nmi_register(void) { set_nmi_handler(oops_nmi_handler); return 0; @@ -180,8 +186,7 @@ __initcall(oops_nmi_register); * similar to an Oops dump, and if the kernel is configured to be a nice * doggy, then halt instead of reboot. */ -void -watchdog_bite_hook(struct pt_regs *regs) +void watchdog_bite_hook(struct pt_regs *regs) { #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY local_irq_disable(); @@ -196,8 +201,7 @@ watchdog_bite_hook(struct pt_regs *regs) } /* This is normally the Oops function. */ -void -die_if_kernel(const char *str, struct pt_regs *regs, long err) +void die_if_kernel(const char *str, struct pt_regs *regs, long err) { if (user_mode(regs)) return; @@ -211,13 +215,17 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err) stop_watchdog(); #endif + oops_enter(); handle_BUG(regs); - printk("%s: %04lx\n", str, err & 0xffff); + pr_err("Linux %s %s\n", utsname()->release, utsname()->version); + pr_err("%s: %04lx\n", str, err & 0xffff); show_registers(regs); + oops_exit(); oops_in_progress = 0; + pr_err("\n"); /* Flush mtdoops. */ #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY reset_watchdog(); @@ -225,8 +233,7 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err) do_exit(SIGSEGV); } -void __init -trap_init(void) +void __init trap_init(void) { /* Nothing needs to be done */ } diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c index c81af5b..1e7fd45 100644 --- a/arch/cris/mm/init.c +++ b/arch/cris/mm/init.c @@ -11,13 +11,15 @@ #include <linux/gfp.h> #include <linux/init.h> #include <linux/bootmem.h> +#include <linux/proc_fs.h> +#include <linux/kcore.h> #include <asm/tlb.h> #include <asm/sections.h> unsigned long empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); -void __init -mem_init(void) +void __init mem_init(void) { BUG_ON(!mem_map); @@ -31,10 +33,36 @@ mem_init(void) mem_init_print_info(NULL); } -/* free the pages occupied by initialization code */ +/* Free a range of init pages. Virtual addresses. */ -void -free_initmem(void) +void free_init_pages(const char *what, unsigned long begin, unsigned long end) +{ + unsigned long addr; + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); + free_page(addr); + totalram_pages++; + } + + printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); +} + +/* Free the pages occupied by initialization code. */ + +void free_initmem(void) { free_initmem_default(-1); } + +/* Free the pages occupied by initrd code. */ + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + free_init_pages("initrd memory", + start, + end); +} +#endif diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c index f9ca44b..80fdb99 100644 --- a/arch/cris/mm/ioremap.c +++ b/arch/cris/mm/ioremap.c @@ -76,10 +76,11 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l * Must be freed with iounmap. */ -void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) +void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size) { return __ioremap(phys_addr | MEM_NON_CACHEABLE, size, 0); } +EXPORT_SYMBOL(ioremap_nocache); void iounmap(volatile void __iomem *addr) { diff --git a/arch/hexagon/include/asm/cache.h b/arch/hexagon/include/asm/cache.h index 2635117..69952c1 100644 --- a/arch/hexagon/include/asm/cache.h +++ b/arch/hexagon/include/asm/cache.h @@ -1,7 +1,7 @@ /* * Cache definitions for the Hexagon architecture * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. + * Copyright (c) 2010-2011,2014 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -25,6 +25,8 @@ #define L1_CACHE_SHIFT (5) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES + #define __cacheline_aligned __aligned(L1_CACHE_BYTES) #define ____cacheline_aligned __aligned(L1_CACHE_BYTES) diff --git a/arch/hexagon/include/asm/cacheflush.h b/arch/hexagon/include/asm/cacheflush.h index 49e0896..b86f9f3 100644 --- a/arch/hexagon/include/asm/cacheflush.h +++ b/arch/hexagon/include/asm/cacheflush.h @@ -21,10 +21,7 @@ #ifndef _ASM_CACHEFLUSH_H #define _ASM_CACHEFLUSH_H -#include <linux/cache.h> -#include <linux/mm.h> -#include <asm/string.h> -#include <asm-generic/cacheflush.h> +#include <linux/mm_types.h> /* Cache flushing: * @@ -41,6 +38,20 @@ #define LINESIZE 32 #define LINEBITS 5 +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_icache_page(vma, pg) do { } while (0) +#define flush_icache_user_range(vma, pg, adr, len) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + /* * Flush Dcache range through current map. */ @@ -49,7 +60,6 @@ extern void flush_dcache_range(unsigned long start, unsigned long end); /* * Flush Icache range through current map. */ -#undef flush_icache_range extern void flush_icache_range(unsigned long start, unsigned long end); /* @@ -79,19 +89,11 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, /* generic_ptrace_pokedata doesn't wind up here, does it? */ } -#undef copy_to_user_page -static inline void copy_to_user_page(struct vm_area_struct *vma, - struct page *page, - unsigned long vaddr, - void *dst, void *src, int len) -{ - memcpy(dst, src, len); - if (vma->vm_flags & VM_EXEC) { - flush_icache_range((unsigned long) dst, - (unsigned long) dst + len); - } -} +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long vaddr, void *dst, void *src, int len); +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) extern void hexagon_inv_dcache_range(unsigned long start, unsigned long end); extern void hexagon_clean_dcache_range(unsigned long start, unsigned long end); diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h index 7029899..66f5e9a 100644 --- a/arch/hexagon/include/asm/io.h +++ b/arch/hexagon/include/asm/io.h @@ -24,14 +24,9 @@ #ifdef __KERNEL__ #include <linux/types.h> -#include <linux/delay.h> -#include <linux/vmalloc.h> -#include <asm/string.h> -#include <asm/mem-layout.h> #include <asm/iomap.h> #include <asm/page.h> #include <asm/cacheflush.h> -#include <asm/tlbflush.h> /* * We don't have PCI yet. diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c index 0e7c1db..6981949 100644 --- a/arch/hexagon/kernel/setup.c +++ b/arch/hexagon/kernel/setup.c @@ -19,6 +19,7 @@ */ #include <linux/init.h> +#include <linux/delay.h> #include <linux/bootmem.h> #include <linux/mmzone.h> #include <linux/mm.h> diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 7858663..110dab1 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -1,7 +1,7 @@ /* * Kernel traps/events for Hexagon processor * - * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved. + * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -423,7 +423,7 @@ void do_trap0(struct pt_regs *regs) */ info.si_code = TRAP_BRKPT; info.si_addr = (void __user *) pt_elr(regs); - send_sig_info(SIGTRAP, &info, current); + force_sig_info(SIGTRAP, &info, current); } else { #ifdef CONFIG_KGDB kgdb_handle_exception(pt_cause(regs), SIGTRAP, diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 44d8c47..5f268c1 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -1,7 +1,7 @@ /* * Linker script for Hexagon kernel * - * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved. + * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -59,7 +59,7 @@ SECTIONS INIT_DATA_SECTION(PAGE_SIZE) _sdata = .; - RW_DATA_SECTION(32,PAGE_SIZE,PAGE_SIZE) + RW_DATA_SECTION(32,PAGE_SIZE,_THREAD_SIZE) RO_DATA_SECTION(PAGE_SIZE) _edata = .; diff --git a/arch/hexagon/mm/cache.c b/arch/hexagon/mm/cache.c index 0c76c80..a7c6d82 100644 --- a/arch/hexagon/mm/cache.c +++ b/arch/hexagon/mm/cache.c @@ -127,3 +127,13 @@ void flush_cache_all_hexagon(void) local_irq_restore(flags); mb(); } + +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long vaddr, void *dst, void *src, int len) +{ + memcpy(dst, src, len); + if (vma->vm_flags & VM_EXEC) { + flush_icache_range((unsigned long) dst, + (unsigned long) dst + len); + } +} diff --git a/arch/hexagon/mm/ioremap.c b/arch/hexagon/mm/ioremap.c index 5905fd5..d27d672 100644 --- a/arch/hexagon/mm/ioremap.c +++ b/arch/hexagon/mm/ioremap.c @@ -20,6 +20,7 @@ #include <linux/io.h> #include <linux/vmalloc.h> +#include <linux/mm.h> void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size) { diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h index 14aa1c5..0ec484d 100644 --- a/arch/ia64/include/asm/percpu.h +++ b/arch/ia64/include/asm/percpu.h @@ -35,8 +35,8 @@ extern void *per_cpu_init(void); /* * Be extremely careful when taking the address of this variable! Due to virtual - * remapping, it is different from the canonical address returned by __get_cpu_var(var)! - * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly + * remapping, it is different from the canonical address returned by this_cpu_ptr(&var)! + * On the positive side, using __ia64_per_cpu_var() instead of this_cpu_ptr() is slightly * more efficient. */ #define __ia64_per_cpu_var(var) (*({ \ diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h new file mode 100644 index 0000000..d2f99ca --- /dev/null +++ b/arch/powerpc/include/asm/cpuidle.h @@ -0,0 +1,20 @@ +#ifndef _ASM_POWERPC_CPUIDLE_H +#define _ASM_POWERPC_CPUIDLE_H + +#ifdef CONFIG_PPC_POWERNV +/* Used in powernv idle state management */ +#define PNV_THREAD_RUNNING 0 +#define PNV_THREAD_NAP 1 +#define PNV_THREAD_SLEEP 2 +#define PNV_THREAD_WINKLE 3 +#define PNV_CORE_IDLE_LOCK_BIT 0x100 +#define PNV_CORE_IDLE_THREAD_BITS 0x0FF + +#ifndef __ASSEMBLY__ +extern u32 pnv_fastsleep_workaround_at_entry[]; +extern u32 pnv_fastsleep_workaround_at_exit[]; +#endif + +#endif + +#endif diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 5cd8d2f..eb95b67 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -56,6 +56,14 @@ struct opal_sg_list { #define OPAL_HARDWARE_FROZEN -13 #define OPAL_WRONG_STATE -14 #define OPAL_ASYNC_COMPLETION -15 +#define OPAL_I2C_TIMEOUT -17 +#define OPAL_I2C_INVALID_CMD -18 +#define OPAL_I2C_LBUS_PARITY -19 +#define OPAL_I2C_BKEND_OVERRUN -20 +#define OPAL_I2C_BKEND_ACCESS -21 +#define OPAL_I2C_ARBT_LOST -22 +#define OPAL_I2C_NACK_RCVD -23 +#define OPAL_I2C_STOP_ERR -24 /* API Tokens (in r0) */ #define OPAL_INVALID_CALL -1 @@ -152,12 +160,25 @@ struct opal_sg_list { #define OPAL_PCI_ERR_INJECT 96 #define OPAL_PCI_EEH_FREEZE_SET 97 #define OPAL_HANDLE_HMI 98 +#define OPAL_CONFIG_CPU_IDLE_STATE 99 +#define OPAL_SLW_SET_REG 100 #define OPAL_REGISTER_DUMP_REGION 101 #define OPAL_UNREGISTER_DUMP_REGION 102 #define OPAL_WRITE_TPO 103 #define OPAL_READ_TPO 104 #define OPAL_IPMI_SEND 107 #define OPAL_IPMI_RECV 108 +#define OPAL_I2C_REQUEST 109 + +/* Device tree flags */ + +/* Flags set in power-mgmt nodes in device tree if + * respective idle states are supported in the platform. + */ +#define OPAL_PM_NAP_ENABLED 0x00010000 +#define OPAL_PM_SLEEP_ENABLED 0x00020000 +#define OPAL_PM_WINKLE_ENABLED 0x00040000 +#define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000 #ifndef __ASSEMBLY__ @@ -712,6 +733,24 @@ typedef struct oppanel_line { uint64_t line_len; } oppanel_line_t; +/* OPAL I2C request */ +struct opal_i2c_request { + uint8_t type; +#define OPAL_I2C_RAW_READ 0 +#define OPAL_I2C_RAW_WRITE 1 +#define OPAL_I2C_SM_READ 2 +#define OPAL_I2C_SM_WRITE 3 + uint8_t flags; +#define OPAL_I2C_ADDR_10 0x01 /* Not supported yet */ + uint8_t subaddr_sz; /* Max 4 */ + uint8_t reserved; + __be16 addr; /* 7 or 10 bit address */ + __be16 reserved2; + __be32 subaddr; /* Sub-address if any */ + __be32 size; /* Data size */ + __be64 buffer_ra; /* Buffer real address */ +}; + /* /sys/firmware/opal */ extern struct kobject *opal_kobj; @@ -876,11 +915,14 @@ int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); int64_t opal_handle_hmi(void); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); +int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg, uint64_t msg_len); int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, uint64_t *msg_len); +int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id, + struct opal_i2c_request *oreq); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 24a386c..e5f22c6 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -152,6 +152,16 @@ struct paca_struct { u64 tm_scratch; /* TM scratch area for reclaim */ #endif +#ifdef CONFIG_PPC_POWERNV + /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */ + u32 *core_idle_state_ptr; + u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */ + /* Mask to indicate thread id in core */ + u8 thread_mask; + /* Mask to denote subcore sibling threads */ + u8 subcore_sibling_mask; +#endif + #ifdef CONFIG_PPC_BOOK3S_64 /* Exclusive emergency stack pointer for machine check exception. */ void *mc_emergency_sp; diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 1a52877..03cd858 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -194,6 +194,7 @@ #define PPC_INST_NAP 0x4c000364 #define PPC_INST_SLEEP 0x4c0003a4 +#define PPC_INST_WINKLE 0x4c0003e4 /* A2 specific instructions */ #define PPC_INST_ERATWE 0x7c0001a6 @@ -375,6 +376,7 @@ #define PPC_NAP stringify_in_c(.long PPC_INST_NAP) #define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) +#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE) /* BHRB instructions */ #define PPC_CLRBHRB stringify_in_c(.long PPC_INST_CLRBHRB) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 29c3798..bf117d8 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -452,7 +452,8 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern unsigned long power7_nap(int check_irq); -extern void power7_sleep(void); +extern unsigned long power7_sleep(void); +extern unsigned long power7_winkle(void); extern void flush_instruction_cache(void); extern void hard_reset_now(void); extern void poweroff_now(void); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c998279..1c874fb 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -118,8 +118,10 @@ #define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV) #ifdef __BIG_ENDIAN__ #define MSR_ __MSR +#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV) #else #define MSR_ (__MSR | MSR_LE) +#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV | MSR_LE) #endif #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) @@ -371,6 +373,7 @@ #define SPRN_DBAT7L 0x23F /* Data BAT 7 Lower Register */ #define SPRN_DBAT7U 0x23E /* Data BAT 7 Upper Register */ #define SPRN_PPR 0x380 /* SMT Thread status Register */ +#define SPRN_TSCR 0x399 /* Thread Switch Control Register */ #define SPRN_DEC 0x016 /* Decrement Register */ #define SPRN_DER 0x095 /* Debug Enable Regsiter */ @@ -728,6 +731,7 @@ #define SPRN_BESCR 806 /* Branch event status and control register */ #define BESCR_GE 0x8000000000000000ULL /* Global Enable */ #define SPRN_WORT 895 /* Workload optimization register - thread */ +#define SPRN_WORC 863 /* Workload optimization register - core */ #define SPRN_PMC1 787 #define SPRN_PMC2 788 diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 6240698..ff21b7a 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -90,6 +90,10 @@ static inline void syscall_set_arguments(struct task_struct *task, static inline int syscall_get_arch(void) { - return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64; + int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64; +#ifdef __LITTLE_ENDIAN__ + arch |= __AUDIT_ARCH_LE; +#endif + return arch; } #endif /* _ASM_SYSCALL_H */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 9485b43..a0c071d 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -284,7 +284,7 @@ do { \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) #endif /* __powerpc64__ */ @@ -297,7 +297,7 @@ do { \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) @@ -308,7 +308,7 @@ do { \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 24d78e1..e624f96 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -726,5 +726,16 @@ int main(void) arch.timing_last_enter.tv32.tbl)); #endif +#ifdef CONFIG_PPC_POWERNV + DEFINE(PACA_CORE_IDLE_STATE_PTR, + offsetof(struct paca_struct, core_idle_state_ptr)); + DEFINE(PACA_THREAD_IDLE_STATE, + offsetof(struct paca_struct, thread_idle_state)); + DEFINE(PACA_THREAD_MASK, + offsetof(struct paca_struct, thread_mask)); + DEFINE(PACA_SUBCORE_SIBLING_MASK, + offsetof(struct paca_struct, subcore_sibling_mask)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index db08382..c2df815 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -15,6 +15,7 @@ #include <asm/hw_irq.h> #include <asm/exception-64s.h> #include <asm/ptrace.h> +#include <asm/cpuidle.h> /* * We layout physical memory as follows: @@ -101,23 +102,34 @@ system_reset_pSeries: #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION /* Running native on arch 2.06 or later, check if we are - * waking up from nap. We only handle no state loss and - * supervisor state loss. We do -not- handle hypervisor - * state loss at this time. + * waking up from nap/sleep/winkle. */ mfspr r13,SPRN_SRR1 rlwinm. r13,r13,47-31,30,31 beq 9f - /* waking up from powersave (nap) state */ - cmpwi cr1,r13,2 - /* Total loss of HV state is fatal, we could try to use the - * PIR to locate a PACA, then use an emergency stack etc... - * OPAL v3 based powernv platforms have new idle states - * which fall in this catagory. + cmpwi cr3,r13,2 + + /* + * Check if last bit of HSPGR0 is set. This indicates whether we are + * waking up from winkle. */ - bgt cr1,8f GET_PACA(r13) + clrldi r5,r13,63 + clrrdi r13,r13,1 + cmpwi cr4,r5,1 + mtspr SPRN_HSPRG0,r13 + + lbz r0,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r0,PNV_THREAD_NAP + bgt cr2,8f /* Either sleep or Winkle */ + + /* Waking up from nap should not cause hypervisor state loss */ + bgt cr3,. + + /* Waking up from nap */ + li r0,PNV_THREAD_RUNNING + stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE li r0,KVM_HWTHREAD_IN_KERNEL @@ -133,7 +145,7 @@ BEGIN_FTR_SECTION /* Return SRR1 from power7_nap() */ mfspr r3,SPRN_SRR1 - beq cr1,2f + beq cr3,2f b power7_wakeup_noloss 2: b power7_wakeup_loss @@ -1382,6 +1394,7 @@ machine_check_handle_early: MACHINE_CHECK_HANDLER_WINDUP GET_PACA(r13) ld r1,PACAR1(r13) + li r3,PNV_THREAD_NAP b power7_enter_nap_mode 4: #endif diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 18c0687..05adc8b 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -18,9 +18,25 @@ #include <asm/hw_irq.h> #include <asm/kvm_book3s_asm.h> #include <asm/opal.h> +#include <asm/cpuidle.h> +#include <asm/mmu-hash64.h> #undef DEBUG +/* + * Use unused space in the interrupt stack to save and restore + * registers for winkle support. + */ +#define _SDR1 GPR3 +#define _RPR GPR4 +#define _SPURR GPR5 +#define _PURR GPR6 +#define _TSCR GPR7 +#define _DSCR GPR8 +#define _AMOR GPR9 +#define _WORT GPR10 +#define _WORC GPR11 + /* Idle state entry routines */ #define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ @@ -37,8 +53,7 @@ /* * Pass requested state in r3: - * 0 - nap - * 1 - sleep + * r3 - PNV_THREAD_NAP/SLEEP/WINKLE * * To check IRQ_HAPPENED in r4 * 0 - don't check @@ -101,18 +116,105 @@ _GLOBAL(power7_powersave_common) std r9,_MSR(r1) std r1,PACAR1(r13) -_GLOBAL(power7_enter_nap_mode) + /* + * Go to real mode to do the nap, as required by the architecture. + * Also, we need to be in real mode before setting hwthread_state, + * because as soon as we do that, another thread can switch + * the MMU context to the guest. + */ + LOAD_REG_IMMEDIATE(r5, MSR_IDLE) + li r6, MSR_RI + andc r6, r9, r6 + LOAD_REG_ADDR(r7, power7_enter_nap_mode) + mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ + mtspr SPRN_SRR0, r7 + mtspr SPRN_SRR1, r5 + rfid + + .globl power7_enter_nap_mode +power7_enter_nap_mode: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* Tell KVM we're napping */ li r4,KVM_HWTHREAD_IN_NAP stb r4,HSTATE_HWTHREAD_STATE(r13) #endif - cmpwi cr0,r3,1 - beq 2f + stb r3,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr3,r3,PNV_THREAD_SLEEP + bge cr3,2f IDLE_STATE_ENTER_SEQ(PPC_NAP) /* No return */ -2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP) - /* No return */ +2: + /* Sleep or winkle */ + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop1: + lwarx r15,0,r14 + andc r15,r15,r7 /* Clear thread bit */ + + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + +/* + * If cr0 = 0, then current thread is the last thread of the core entering + * sleep. Last thread needs to execute the hardware bug workaround code if + * required by the platform. + * Make the workaround call unconditionally here. The below branch call is + * patched out when the idle states are discovered if the platform does not + * require it. + */ +.global pnv_fastsleep_workaround_at_entry +pnv_fastsleep_workaround_at_entry: + beq fastsleep_workaround_at_entry + + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + +common_enter: /* common code for all the threads entering sleep or winkle */ + bgt cr3,enter_winkle + IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + +fastsleep_workaround_at_entry: + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + + /* Fast sleep workaround */ + li r3,1 + li r4,1 + li r0,OPAL_CONFIG_CPU_IDLE_STATE + bl opal_call_realmode + + /* Clear Lock bit */ + li r0,0 + lwsync + stw r0,0(r14) + b common_enter + +enter_winkle: + /* + * Note all register i.e per-core, per-subcore or per-thread is saved + * here since any thread in the core might wake up first + */ + mfspr r3,SPRN_SDR1 + std r3,_SDR1(r1) + mfspr r3,SPRN_RPR + std r3,_RPR(r1) + mfspr r3,SPRN_SPURR + std r3,_SPURR(r1) + mfspr r3,SPRN_PURR + std r3,_PURR(r1) + mfspr r3,SPRN_TSCR + std r3,_TSCR(r1) + mfspr r3,SPRN_DSCR + std r3,_DSCR(r1) + mfspr r3,SPRN_AMOR + std r3,_AMOR(r1) + mfspr r3,SPRN_WORT + std r3,_WORT(r1) + mfspr r3,SPRN_WORC + std r3,_WORC(r1) + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) _GLOBAL(power7_idle) /* Now check if user or arch enabled NAP mode */ @@ -125,48 +227,21 @@ _GLOBAL(power7_idle) _GLOBAL(power7_nap) mr r4,r3 - li r3,0 + li r3,PNV_THREAD_NAP b power7_powersave_common /* No return */ _GLOBAL(power7_sleep) - li r3,1 + li r3,PNV_THREAD_SLEEP li r4,1 b power7_powersave_common /* No return */ -/* - * Make opal call in realmode. This is a generic function to be called - * from realmode from reset vector. It handles endianess. - * - * r13 - paca pointer - * r1 - stack pointer - * r3 - opal token - */ -opal_call_realmode: - mflr r12 - std r12,_LINK(r1) - ld r2,PACATOC(r13) - /* Set opal return address */ - LOAD_REG_ADDR(r0,return_from_opal_call) - mtlr r0 - /* Handle endian-ness */ - li r0,MSR_LE - mfmsr r12 - andc r12,r12,r0 - mtspr SPRN_HSRR1,r12 - mr r0,r3 /* Move opal token to r0 */ - LOAD_REG_ADDR(r11,opal) - ld r12,8(r11) - ld r2,0(r11) - mtspr SPRN_HSRR0,r12 - hrfid - -return_from_opal_call: - FIXUP_ENDIAN - ld r0,_LINK(r1) - mtlr r0 - blr +_GLOBAL(power7_winkle) + li r3,3 + li r4,1 + b power7_powersave_common + /* No return */ #define CHECK_HMI_INTERRUPT \ mfspr r0,SPRN_SRR1; \ @@ -181,7 +256,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ ld r2,PACATOC(r13); \ ld r1,PACAR1(r13); \ std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ + li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ bl opal_call_realmode; \ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 20: nop; @@ -190,16 +265,190 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ _GLOBAL(power7_wakeup_tb_loss) ld r2,PACATOC(r13); ld r1,PACAR1(r13) + /* + * Before entering any idle state, the NVGPRs are saved in the stack + * and they are restored before switching to the process context. Hence + * until they are restored, they are free to be used. + * + * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode + * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the + * wakeup reason if we branch to kvm_start_guest. + */ + mfspr r16,SPRN_SRR1 BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop2: + lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + /* + * Lock bit is set in one of the 2 cases- + * a. In the sleep/winkle enter path, the last thread is executing + * fastsleep workaround code. + * b. In the wake up path, another thread is executing fastsleep + * workaround undo code or resyncing timebase or restoring context + * In either case loop until the lock bit is cleared. + */ + bne core_idle_lock_held + + cmpwi cr2,r15,0 + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) + and r4,r4,r15 + cmpwi cr1,r4,0 /* Check if first in subcore */ + + /* + * At this stage + * cr1 - 0b0100 if first thread to wakeup in subcore + * cr2 - 0b0100 if first thread to wakeup in core + * cr3- 0b0010 if waking up from sleep or winkle + * cr4 - 0b0100 if waking up from winkle + */ + + or r15,r15,r7 /* Set thread bit */ + + beq cr1,first_thread_in_subcore + + /* Not first thread in subcore to wake up */ + stwcx. r15,0,r14 + bne- lwarx_loop2 + isync + b common_exit + +core_idle_lock_held: + HMT_LOW +core_idle_lock_loop: + lwz r15,0(14) + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bne core_idle_lock_loop + HMT_MEDIUM + b lwarx_loop2 + +first_thread_in_subcore: + /* First thread in subcore to wakeup */ + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop2 + isync + + /* + * If waking up from sleep, subcore state is not lost. Hence + * skip subcore state restore + */ + bne cr4,subcore_state_restored + + /* Restore per-subcore state */ + ld r4,_SDR1(r1) + mtspr SPRN_SDR1,r4 + ld r4,_RPR(r1) + mtspr SPRN_RPR,r4 + ld r4,_AMOR(r1) + mtspr SPRN_AMOR,r4 + +subcore_state_restored: + /* + * Check if the thread is also the first thread in the core. If not, + * skip to clear_lock. + */ + bne cr2,clear_lock + +first_thread_in_core: + + /* + * First thread in the core waking up from fastsleep. It needs to + * call the fastsleep workaround code if the platform requires it. + * Call it unconditionally here. The below branch instruction will + * be patched out when the idle states are discovered if platform + * does not require workaround. + */ +.global pnv_fastsleep_workaround_at_exit +pnv_fastsleep_workaround_at_exit: + b fastsleep_workaround_at_exit + +timebase_resync: + /* Do timebase resync if we are waking up from sleep. Use cr3 value + * set in exceptions-64s.S */ + ble cr3,clear_lock /* Time base re-sync */ - li r3,OPAL_RESYNC_TIMEBASE + li r0,OPAL_RESYNC_TIMEBASE bl opal_call_realmode; - /* TODO: Check r3 for failure */ + /* + * If waking up from sleep, per core state is not lost, skip to + * clear_lock. + */ + bne cr4,clear_lock + + /* Restore per core state */ + ld r4,_TSCR(r1) + mtspr SPRN_TSCR,r4 + ld r4,_WORC(r1) + mtspr SPRN_WORC,r4 + +clear_lock: + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + lwsync + stw r15,0(r14) + +common_exit: + /* + * Common to all threads. + * + * If waking up from sleep, hypervisor state is not lost. Hence + * skip hypervisor state restore. + */ + bne cr4,hypervisor_state_restored + + /* Waking up from winkle */ + + /* Restore per thread state */ + bl __restore_cpu_power8 + + /* Restore SLB from PACA */ + ld r8,PACA_SLBSHADOWPTR(r13) + + .rept SLB_NUM_BOLTED + li r3, SLBSHADOW_SAVEAREA + LDX_BE r5, r8, r3 + addi r3, r3, 8 + LDX_BE r6, r8, r3 + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r8,r8,16 + .endr + + ld r4,_SPURR(r1) + mtspr SPRN_SPURR,r4 + ld r4,_PURR(r1) + mtspr SPRN_PURR,r4 + ld r4,_DSCR(r1) + mtspr SPRN_DSCR,r4 + ld r4,_WORT(r1) + mtspr SPRN_WORT,r4 + +hypervisor_state_restored: + + li r5,PNV_THREAD_RUNNING + stb r5,PACA_THREAD_IDLE_STATE(r13) + + mtspr SPRN_SRR1,r16 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beq 6f + b kvm_start_guest +6: +#endif + REST_NVGPRS(r1) REST_GPR(2, r1) ld r3,_CCR(r1) @@ -212,6 +461,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) mtspr SPRN_SRR0,r5 rfid +fastsleep_workaround_at_exit: + li r3,1 + li r4,0 + li r0,OPAL_CONFIG_CPU_IDLE_STATE + bl opal_call_realmode + b timebase_resync + /* * R3 here contains the value that will be returned to the caller * of power7_nap. diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8b2d2dc..8ec017c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -700,7 +700,6 @@ void start_secondary(void *unused) smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); preempt_disable(); - cpu_callin_map[cpu] = 1; if (smp_ops->setup_cpu) smp_ops->setup_cpu(cpu); @@ -739,6 +738,14 @@ void start_secondary(void *unused) notify_cpu_starting(cpu); set_cpu_online(cpu, true); + /* + * CPU must be marked active and online before we signal back to the + * master, because the scheduler needs to see the cpu_online and + * cpu_active bits set. + */ + smp_wmb(); + cpu_callin_map[cpu] = 1; + local_irq_enable(); cpu_startup_entry(CPUHP_ONLINE); diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index dba3408..f162d0b 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -177,7 +177,7 @@ static ssize_t _name##_show(struct device *dev, \ } \ ret = sprintf(buf, _fmt, _expr); \ e_free: \ - kfree(page); \ + kmem_cache_free(hv_page_cache, page); \ return ret; \ } \ static DEVICE_ATTR_RO(_name) @@ -217,11 +217,14 @@ static bool is_physical_domain(int domain) domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE; } +DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096); +DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096); + static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, u16 lpar, u64 *res, bool success_expected) { - unsigned long ret = -ENOMEM; + unsigned long ret; /* * request_buffer and result_buffer are not required to be 4k aligned, @@ -243,13 +246,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, BUILD_BUG_ON(sizeof(*request_buffer) > 4096); BUILD_BUG_ON(sizeof(*result_buffer) > 4096); - request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); - if (!request_buffer) - goto out; + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + result_buffer = (void *)get_cpu_var(hv_24x7_resb); - result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); - if (!result_buffer) - goto out_free_request_buffer; + memset(request_buffer, 0, 4096); + memset(result_buffer, 0, 4096); *request_buffer = (struct reqb) { .buf = { @@ -278,15 +279,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, domain, offset, ix, lpar, ret, ret, result_buffer->buf.detailed_rc, result_buffer->buf.failing_request_ix); - goto out_free_result_buffer; + goto out; } *res = be64_to_cpu(result_buffer->result); -out_free_result_buffer: - kfree(result_buffer); -out_free_request_buffer: - kfree(request_buffer); out: return ret; } diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 0a299be..54eca8b 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -158,6 +158,43 @@ opal_tracepoint_return: blr #endif +/* + * Make opal call in realmode. This is a generic function to be called + * from realmode. It handles endianness. + * + * r13 - paca pointer + * r1 - stack pointer + * r0 - opal token + */ +_GLOBAL(opal_call_realmode) + mflr r12 + std r12,PPC_LR_STKOFF(r1) + ld r2,PACATOC(r13) + /* Set opal return address */ + LOAD_REG_ADDR(r12,return_from_opal_call) + mtlr r12 + + mfmsr r12 +#ifdef __LITTLE_ENDIAN__ + /* Handle endian-ness */ + li r11,MSR_LE + andc r12,r12,r11 +#endif + mtspr SPRN_HSRR1,r12 + LOAD_REG_ADDR(r11,opal) + ld r12,8(r11) + ld r2,0(r11) + mtspr SPRN_HSRR0,r12 + hrfid + +return_from_opal_call: +#ifdef __LITTLE_ENDIAN__ + FIXUP_ENDIAN +#endif + ld r12,PPC_LR_STKOFF(r1) + mtlr r12 + blr + OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); @@ -247,6 +284,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE); @@ -254,3 +292,4 @@ OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO); OPAL_CALL(opal_tpo_read, OPAL_READ_TPO); OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND); OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV); +OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index cb0b6de..f10b9ec 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -9,8 +9,9 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG +#define pr_fmt(fmt) "opal: " fmt +#include <linux/printk.h> #include <linux/types.h> #include <linux/of.h> #include <linux/of_fdt.h> @@ -625,6 +626,39 @@ static int opal_sysfs_init(void) return 0; } +static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + return memory_read_from_buffer(buf, count, &off, bin_attr->private, + bin_attr->size); +} + +static BIN_ATTR_RO(symbol_map, 0); + +static void opal_export_symmap(void) +{ + const __be64 *syms; + unsigned int size; + struct device_node *fw; + int rc; + + fw = of_find_node_by_path("/ibm,opal/firmware"); + if (!fw) + return; + syms = of_get_property(fw, "symbol-map", &size); + if (!syms || size != 2 * sizeof(__be64)) + return; + + /* Setup attributes */ + bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0])); + bin_attr_symbol_map.size = be64_to_cpu(syms[1]); + + rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map); + if (rc) + pr_warn("Error %d creating OPAL symbols file\n", rc); +} + static void __init opal_dump_region_init(void) { void *addr; @@ -653,6 +687,14 @@ static void opal_ipmi_init(struct device_node *opal_node) of_platform_device_create(np, NULL, NULL); } +static void opal_i2c_create_devs(void) +{ + struct device_node *np; + + for_each_compatible_node(np, NULL, "ibm,opal-i2c") + of_platform_device_create(np, NULL, NULL); +} + static int __init opal_init(void) { struct device_node *np, *consoles; @@ -679,6 +721,9 @@ static int __init opal_init(void) of_node_put(consoles); } + /* Create i2c platform devices */ + opal_i2c_create_devs(); + /* Find all OPAL interrupts and request them */ irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); pr_debug("opal: Found %d interrupts reserved for OPAL\n", @@ -702,6 +747,8 @@ static int __init opal_init(void) /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { + /* Export symbol map to userspace */ + opal_export_symmap(); /* Setup dump region interface */ opal_dump_region_init(); /* Setup error log interface */ @@ -824,3 +871,4 @@ EXPORT_SYMBOL_GPL(opal_rtc_read); EXPORT_SYMBOL_GPL(opal_rtc_write); EXPORT_SYMBOL_GPL(opal_tpo_read); EXPORT_SYMBOL_GPL(opal_tpo_write); +EXPORT_SYMBOL_GPL(opal_i2c_request); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 6c8e2d1..604c48e 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -29,6 +29,8 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) } #endif +extern u32 pnv_get_supported_cpuidle_states(void); + extern void pnv_lpc_init(void); bool cpu_core_split_required(void); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 30b1c3e..b700a32 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -36,8 +36,12 @@ #include <asm/opal.h> #include <asm/kexec.h> #include <asm/smp.h> +#include <asm/cputhreads.h> +#include <asm/cpuidle.h> +#include <asm/code-patching.h> #include "powernv.h" +#include "subcore.h" static void __init pnv_setup_arch(void) { @@ -288,6 +292,168 @@ static void __init pnv_setup_machdep_rtas(void) } #endif /* CONFIG_PPC_POWERNV_RTAS */ +static u32 supported_cpuidle_states; + +int pnv_save_sprs_for_winkle(void) +{ + int cpu; + int rc; + + /* + * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross + * all cpus at boot. Get these reg values of current cpu and use the + * same accross all cpus. + */ + uint64_t lpcr_val = mfspr(SPRN_LPCR); + uint64_t hid0_val = mfspr(SPRN_HID0); + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); + uint64_t hmeer_val = mfspr(SPRN_HMEER); + + for_each_possible_cpu(cpu) { + uint64_t pir = get_hard_smp_processor_id(cpu); + uint64_t hsprg0_val = (uint64_t)&paca[cpu]; + + /* + * HSPRG0 is used to store the cpu's pointer to paca. Hence last + * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 + * with 63rd bit set, so that when a thread wakes up at 0x100 we + * can use this bit to distinguish between fastsleep and + * deep winkle. + */ + hsprg0_val |= 1; + + rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); + if (rc != 0) + return rc; + + /* HIDs are per core registers */ + if (cpu_thread_in_core(cpu) == 0) { + + rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); + if (rc != 0) + return rc; + } + } + + return 0; +} + +static void pnv_alloc_idle_core_states(void) +{ + int i, j; + int nr_cores = cpu_nr_cores(); + u32 *core_idle_state; + + /* + * core_idle_state - First 8 bits track the idle state of each thread + * of the core. The 8th bit is the lock bit. Initially all thread bits + * are set. They are cleared when the thread enters deep idle state + * like sleep and winkle. Initially the lock bit is cleared. + * The lock bit has 2 purposes + * a. While the first thread is restoring core state, it prevents + * other threads in the core from switching to process context. + * b. While the last thread in the core is saving the core state, it + * prevents a different thread from waking up. + */ + for (i = 0; i < nr_cores; i++) { + int first_cpu = i * threads_per_core; + int node = cpu_to_node(first_cpu); + + core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); + *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; + + for (j = 0; j < threads_per_core; j++) { + int cpu = first_cpu + j; + + paca[cpu].core_idle_state_ptr = core_idle_state; + paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; + paca[cpu].thread_mask = 1 << j; + } + } + + update_subcore_sibling_mask(); + + if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) + pnv_save_sprs_for_winkle(); +} + +u32 pnv_get_supported_cpuidle_states(void) +{ + return supported_cpuidle_states; +} +EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); + +static int __init pnv_init_idle_states(void) +{ + struct device_node *power_mgt; + int dt_idle_states; + const __be32 *idle_state_flags; + u32 len_flags, flags; + int i; + + supported_cpuidle_states = 0; + + if (cpuidle_disable != IDLE_NO_OVERRIDE) + return 0; + + if (!firmware_has_feature(FW_FEATURE_OPALv3)) + return 0; + + power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!power_mgt) { + pr_warn("opal: PowerMgmt Node not found\n"); + return 0; + } + + idle_state_flags = of_get_property(power_mgt, + "ibm,cpu-idle-state-flags", &len_flags); + if (!idle_state_flags) { + pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n"); + return 0; + } + + dt_idle_states = len_flags / sizeof(u32); + + for (i = 0; i < dt_idle_states; i++) { + flags = be32_to_cpu(idle_state_flags[i]); + supported_cpuidle_states |= flags; + } + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_entry, + PPC_INST_NOP); + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_exit, + PPC_INST_NOP); + } + pnv_alloc_idle_core_states(); + return 0; +} + +subsys_initcall(pnv_init_idle_states); + static int __init pnv_probe(void) { unsigned long root = of_get_flat_dt_root(); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index b716f66..fc34025 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -150,6 +150,7 @@ static void pnv_smp_cpu_kill_self(void) { unsigned int cpu; unsigned long srr1; + u32 idle_states; /* Standard hot unplug procedure */ local_irq_disable(); @@ -160,13 +161,23 @@ static void pnv_smp_cpu_kill_self(void) generic_set_cpu_dead(cpu); smp_wmb(); + idle_states = pnv_get_supported_cpuidle_states(); /* We don't want to take decrementer interrupts while we are offline, * so clear LPCR:PECE1. We keep PECE2 enabled. */ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); while (!generic_check_cpu_restart(cpu)) { + ppc64_runlatch_off(); - srr1 = power7_nap(1); + + if (idle_states & OPAL_PM_WINKLE_ENABLED) + srr1 = power7_winkle(); + else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || + (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) + srr1 = power7_sleep(); + else + srr1 = power7_nap(1); + ppc64_runlatch_on(); /* @@ -198,13 +209,27 @@ static void pnv_smp_cpu_kill_self(void) #endif /* CONFIG_HOTPLUG_CPU */ +static int pnv_cpu_bootable(unsigned int nr) +{ + /* + * Starting with POWER8, the subcore logic relies on all threads of a + * core being booted so that they can participate in split mode + * switches. So on those machines we ignore the smt_enabled_at_boot + * setting (smt-enabled on the kernel command line). + */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return 1; + + return smp_generic_cpu_bootable(nr); +} + static struct smp_ops_t pnv_smp_ops = { .message_pass = smp_muxed_ipi_message_pass, .cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */ .probe = xics_smp_probe, .kick_cpu = pnv_smp_kick_cpu, .setup_cpu = pnv_smp_setup_cpu, - .cpu_bootable = smp_generic_cpu_bootable, + .cpu_bootable = pnv_cpu_bootable, #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = pnv_smp_cpu_disable, .cpu_die = generic_cpu_die, diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index c87f96b..f60f80a 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -160,6 +160,18 @@ static void wait_for_sync_step(int step) mb(); } +static void update_hid_in_slw(u64 hid0) +{ + u64 idle_states = pnv_get_supported_cpuidle_states(); + + if (idle_states & OPAL_PM_WINKLE_ENABLED) { + /* OPAL call to patch slw with the new HID0 value */ + u64 cpu_pir = hard_smp_processor_id(); + + opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0); + } +} + static void unsplit_core(void) { u64 hid0, mask; @@ -179,6 +191,7 @@ static void unsplit_core(void) hid0 = mfspr(SPRN_HID0); hid0 &= ~HID0_POWER8_DYNLPARDIS; mtspr(SPRN_HID0, hid0); + update_hid_in_slw(hid0); while (mfspr(SPRN_HID0) & mask) cpu_relax(); @@ -215,6 +228,7 @@ static void split_core(int new_mode) hid0 = mfspr(SPRN_HID0); hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; mtspr(SPRN_HID0, hid0); + update_hid_in_slw(hid0); /* Wait for it to happen */ while (!(mfspr(SPRN_HID0) & split_parms[i].mask)) @@ -251,6 +265,25 @@ bool cpu_core_split_required(void) return true; } +void update_subcore_sibling_mask(void) +{ + int cpu; + /* + * sibling mask for the first cpu. Left shift this by required bits + * to get sibling mask for the rest of the cpus. + */ + int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1; + + for_each_possible_cpu(cpu) { + int tid = cpu_thread_in_core(cpu); + int offset = (tid / threads_per_subcore) * threads_per_subcore; + int mask = sibling_mask_first_cpu << offset; + + paca[cpu].subcore_sibling_mask = mask; + + } +} + static int cpu_update_split_mode(void *data) { int cpu, new_mode = *(int *)data; @@ -284,6 +317,7 @@ static int cpu_update_split_mode(void *data) /* Make the new mode public */ subcores_per_core = new_mode; threads_per_subcore = threads_per_core / subcores_per_core; + update_subcore_sibling_mask(); /* Make sure the new mode is written before we exit */ mb(); diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h index 148abc9..84e02ae 100644 --- a/arch/powerpc/platforms/powernv/subcore.h +++ b/arch/powerpc/platforms/powernv/subcore.h @@ -14,5 +14,12 @@ #define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */ #ifndef __ASSEMBLY__ + +#ifdef CONFIG_SMP void split_core_secondary_loop(u8 *state); -#endif +extern void update_subcore_sibling_mask(void); +#else +static inline void update_subcore_sibling_mask(void) { }; +#endif /* CONFIG_SMP */ + +#endif /* __ASSEMBLY__ */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d69f1cd..ba397bd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -249,10 +249,6 @@ config HAVE_INTEL_TXT def_bool y depends on INTEL_IOMMU && ACPI -config X86_INTEL_MPX - def_bool y - depends on CPU_SUP_INTEL - config X86_32_SMP def_bool y depends on X86_32 && SMP @@ -887,11 +883,11 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI + select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ config X86_IO_APIC - def_bool y - depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI - select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ + def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC + depends on X86_LOCAL_APIC select IRQ_DOMAIN config X86_REROUTE_FOR_BROKEN_BOOT_IRQS @@ -1594,6 +1590,32 @@ config X86_SMAP If unsure, say Y. +config X86_INTEL_MPX + prompt "Intel MPX (Memory Protection Extensions)" + def_bool n + depends on CPU_SUP_INTEL + ---help--- + MPX provides hardware features that can be used in + conjunction with compiler-instrumented code to check + memory references. It is designed to detect buffer + overflow or underflow bugs. + + This option enables running applications which are + instrumented or otherwise use MPX. It does not use MPX + itself inside the kernel or to protect the kernel + against bad memory references. + + Enabling this option will make the kernel larger: + ~8k of kernel text and 36 bytes of data on a 64-bit + defconfig. It adds a long to the 'mm_struct' which + will increase the kernel memory overhead of each + process and adds some branches to paths used during + exec() and munmap(). + + For details, see Documentation/x86/intel_mpx.txt + + If unsure, say N. + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 4615906..9662290 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -94,30 +94,7 @@ extern void trace_call_function_single_interrupt(void); #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi #endif /* CONFIG_TRACING */ -/* IOAPIC */ -#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) -extern unsigned long io_apic_irqs; - -extern void setup_IO_APIC(void); -extern void disable_IO_APIC(void); - -struct io_apic_irq_attr { - int ioapic; - int ioapic_pin; - int trigger; - int polarity; -}; - -static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, - int ioapic, int ioapic_pin, - int trigger, int polarity) -{ - irq_attr->ioapic = ioapic; - irq_attr->ioapic_pin = ioapic_pin; - irq_attr->trigger = trigger; - irq_attr->polarity = polarity; -} - +#ifdef CONFIG_IRQ_REMAP /* Intel specific interrupt remapping information */ struct irq_2_iommu { struct intel_iommu *iommu; @@ -131,14 +108,12 @@ struct irq_2_irte { u16 devid; /* Device ID for IRTE table */ u16 index; /* Index into IRTE table*/ }; +#endif /* CONFIG_IRQ_REMAP */ + +#ifdef CONFIG_X86_LOCAL_APIC +struct irq_data; -/* - * This is performance-critical, we want to do it O(1) - * - * Most irqs are mapped 1:1 with pins. - */ struct irq_cfg { - struct irq_pin_list *irq_2_pin; cpumask_var_t domain; cpumask_var_t old_domain; u8 vector; @@ -150,18 +125,39 @@ struct irq_cfg { struct irq_2_irte irq_2_irte; }; #endif + union { +#ifdef CONFIG_X86_IO_APIC + struct { + struct list_head irq_2_pin; + }; +#endif + }; }; +extern struct irq_cfg *irq_cfg(unsigned int irq); +extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); +extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); +extern void lock_vector_lock(void); +extern void unlock_vector_lock(void); extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); +extern void clear_irq_vector(int irq, struct irq_cfg *cfg); +extern void setup_vector_irq(int cpu); +#ifdef CONFIG_SMP extern void send_cleanup_vector(struct irq_cfg *); +extern void irq_complete_move(struct irq_cfg *cfg); +#else +static inline void send_cleanup_vector(struct irq_cfg *c) { } +static inline void irq_complete_move(struct irq_cfg *c) { } +#endif -struct irq_data; -int __ioapic_set_affinity(struct irq_data *, const struct cpumask *, - unsigned int *dest_id); -extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); -extern void setup_ioapic_dest(void); - -extern void enable_IO_APIC(void); +extern int apic_retrigger_irq(struct irq_data *data); +extern void apic_ack_edge(struct irq_data *data); +extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, + unsigned int *dest_id); +#else /* CONFIG_X86_LOCAL_APIC */ +static inline void lock_vector_lock(void) {} +static inline void unlock_vector_lock(void) {} +#endif /* CONFIG_X86_LOCAL_APIC */ /* Statistics */ extern atomic_t irq_err_count; @@ -185,7 +181,8 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *); extern __visible void smp_invalidate_interrupt(struct pt_regs *); #endif -extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); +extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR + - FIRST_EXTERNAL_VECTOR])(void); #ifdef CONFIG_TRACING #define trace_interrupt interrupt #endif @@ -195,17 +192,6 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); -extern void setup_vector_irq(int cpu); - -#ifdef CONFIG_X86_IO_APIC -extern void lock_vector_lock(void); -extern void unlock_vector_lock(void); -extern void __setup_vector_irq(int cpu); -#else -static inline void lock_vector_lock(void) {} -static inline void unlock_vector_lock(void) {} -static inline void __setup_vector_irq(int cpu) {} -#endif #endif /* !ASSEMBLY_ */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 1733ab4..bf006cc 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -132,6 +132,10 @@ extern int noioapicquirk; /* -1 if "noapic" boot option passed */ extern int noioapicreroute; +extern unsigned long io_apic_irqs; + +#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs)) + /* * If we use the IO-APIC for IRQ routing, disable automatic * assignment of PCI IRQ's. @@ -139,18 +143,15 @@ extern int noioapicreroute; #define io_apic_assign_pci_irqs \ (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) -struct io_apic_irq_attr; struct irq_cfg; extern void ioapic_insert_resources(void); +extern int arch_early_ioapic_init(void); extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, unsigned int, int, struct io_apic_irq_attr *); extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); -extern void native_compose_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id); extern void native_eoi_ioapic_pin(int apic, int pin, int vector); extern int save_ioapic_entries(void); @@ -160,6 +161,13 @@ extern int restore_ioapic_entries(void); extern void setup_ioapic_ids_from_mpc(void); extern void setup_ioapic_ids_from_mpc_nocheck(void); +struct io_apic_irq_attr { + int ioapic; + int ioapic_pin; + int trigger; + int polarity; +}; + enum ioapic_domain_type { IOAPIC_DOMAIN_INVALID, IOAPIC_DOMAIN_LEGACY, @@ -188,8 +196,10 @@ extern int mp_find_ioapic_pin(int ioapic, u32 gsi); extern u32 mp_pin_to_gsi(int ioapic, int pin); extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags); extern void mp_unmap_irq(int irq); -extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, - struct ioapic_domain_cfg *cfg); +extern int mp_register_ioapic(int id, u32 address, u32 gsi_base, + struct ioapic_domain_cfg *cfg); +extern int mp_unregister_ioapic(u32 gsi_base); +extern int mp_ioapic_registered(u32 gsi_base); extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq); extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq); @@ -227,19 +237,25 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned extern void io_apic_eoi(unsigned int apic, unsigned int vector); -extern bool mp_should_keep_irq(struct device *dev); - +extern void setup_IO_APIC(void); +extern void enable_IO_APIC(void); +extern void disable_IO_APIC(void); +extern void setup_ioapic_dest(void); +extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); +extern void print_IO_APICs(void); #else /* !CONFIG_X86_IO_APIC */ +#define IO_APIC_IRQ(x) 0 #define io_apic_assign_pci_irqs 0 #define setup_ioapic_ids_from_mpc x86_init_noop static inline void ioapic_insert_resources(void) { } +static inline int arch_early_ioapic_init(void) { return 0; } +static inline void print_IO_APICs(void) {} #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; } static inline void mp_unmap_irq(int irq) { } -static inline bool mp_should_keep_irq(struct device *dev) { return 1; } static inline int save_ioapic_entries(void) { @@ -262,7 +278,6 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_print_entries NULL #define native_ioapic_set_affinity NULL #define native_setup_ioapic_entry NULL -#define native_compose_msi_msg NULL #define native_eoi_ioapic_pin NULL #endif diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 5702d7e..666c89e 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -126,6 +126,12 @@ #define NR_VECTORS 256 +#ifdef CONFIG_X86_LOCAL_APIC +#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR +#else +#define FIRST_SYSTEM_VECTOR NR_VECTORS +#endif + #define FPU_IRQ 13 #define FIRST_VM86_IRQ 3 diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 0892ea0..4e370a5 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -96,12 +96,15 @@ extern void pci_iommu_alloc(void); #ifdef CONFIG_PCI_MSI /* implemented in arch/x86/kernel/apic/io_apic. */ struct msi_desc; +void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, + unsigned int dest, struct msi_msg *msg, u8 hpet_id); int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void native_teardown_msi_irq(unsigned int irq); void native_restore_msi_irqs(struct pci_dev *dev); int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, unsigned int irq_base, unsigned int irq_offset); #else +#define native_compose_msi_msg NULL #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #endif diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index fa1195d..164e3f8 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); +extern bool mp_should_keep_irq(struct device *dev); + struct pci_raw_ops { int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val); diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h index 46727eb..6e1aaf7 100644 --- a/arch/x86/include/uapi/asm/ldt.h +++ b/arch/x86/include/uapi/asm/ldt.h @@ -28,6 +28,13 @@ struct user_desc { unsigned int seg_not_present:1; unsigned int useable:1; #ifdef __x86_64__ + /* + * Because this bit is not present in 32-bit user code, user + * programs can pass uninitialized values here. Therefore, in + * any context in which a user_desc comes from a 32-bit program, + * the kernel must act as though lm == 0, regardless of the + * actual value. + */ unsigned int lm:1; #endif }; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index a142e77..4433a4b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -76,6 +76,19 @@ int acpi_fix_pin2_polarity __initdata; static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #endif +/* + * Locks related to IOAPIC hotplug + * Hotplug side: + * ->device_hotplug_lock + * ->acpi_ioapic_lock + * ->ioapic_lock + * Interrupt mapping side: + * ->acpi_ioapic_lock + * ->ioapic_mutex + * ->ioapic_lock + */ +static DEFINE_MUTEX(acpi_ioapic_lock); + /* -------------------------------------------------------------------------- Boot-time Configuration -------------------------------------------------------------------------- */ @@ -395,10 +408,6 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); - trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; node = dev ? dev_to_node(dev) : NUMA_NO_NODE; @@ -411,7 +420,8 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, if (irq < 0) return irq; - if (enable_update_mptable) + /* Don't set up the ACPI SCI because it's already set up */ + if (enable_update_mptable && acpi_gbl_FADT.sci_interrupt != gsi) mp_config_acpi_gsi(dev, gsi, trigger, polarity); return irq; @@ -424,9 +434,6 @@ static void mp_unregister_gsi(u32 gsi) if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return; - if (acpi_gbl_FADT.sci_interrupt == gsi) - return; - irq = mp_map_gsi_to_irq(gsi, 0); if (irq > 0) mp_unmap_irq(irq); @@ -609,8 +616,10 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { *irqp = gsi; } else { + mutex_lock(&acpi_ioapic_lock); irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); + mutex_unlock(&acpi_ioapic_lock); if (irq < 0) return -1; *irqp = irq; @@ -650,7 +659,9 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, int irq = gsi; #ifdef CONFIG_X86_IO_APIC + mutex_lock(&acpi_ioapic_lock); irq = mp_register_gsi(dev, gsi, trigger, polarity); + mutex_unlock(&acpi_ioapic_lock); #endif return irq; @@ -659,7 +670,9 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, static void acpi_unregister_gsi_ioapic(u32 gsi) { #ifdef CONFIG_X86_IO_APIC + mutex_lock(&acpi_ioapic_lock); mp_unregister_gsi(gsi); + mutex_unlock(&acpi_ioapic_lock); #endif } @@ -690,6 +703,7 @@ void acpi_unregister_gsi(u32 gsi) } EXPORT_SYMBOL_GPL(acpi_unregister_gsi); +#ifdef CONFIG_X86_LOCAL_APIC static void __init acpi_set_irq_model_ioapic(void) { acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; @@ -697,6 +711,7 @@ static void __init acpi_set_irq_model_ioapic(void) __acpi_unregister_gsi = acpi_unregister_gsi_ioapic; acpi_ioapic = 1; } +#endif /* * ACPI based hotplug support for CPU @@ -759,20 +774,74 @@ EXPORT_SYMBOL(acpi_unmap_lsapic); int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) { - /* TBD */ - return -EINVAL; -} + int ret = -ENOSYS; +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC + int ioapic_id; + u64 addr; + struct ioapic_domain_cfg cfg = { + .type = IOAPIC_DOMAIN_DYNAMIC, + .ops = &acpi_irqdomain_ops, + }; + + ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr); + if (ioapic_id < 0) { + unsigned long long uid; + acpi_status status; + status = acpi_evaluate_integer(handle, METHOD_NAME__UID, + NULL, &uid); + if (ACPI_FAILURE(status)) { + acpi_handle_warn(handle, "failed to get IOAPIC ID.\n"); + return -EINVAL; + } + ioapic_id = (int)uid; + } + + mutex_lock(&acpi_ioapic_lock); + ret = mp_register_ioapic(ioapic_id, phys_addr, gsi_base, &cfg); + mutex_unlock(&acpi_ioapic_lock); +#endif + + return ret; +} EXPORT_SYMBOL(acpi_register_ioapic); int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) { - /* TBD */ - return -EINVAL; -} + int ret = -ENOSYS; +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC + mutex_lock(&acpi_ioapic_lock); + ret = mp_unregister_ioapic(gsi_base); + mutex_unlock(&acpi_ioapic_lock); +#endif + + return ret; +} EXPORT_SYMBOL(acpi_unregister_ioapic); +/** + * acpi_ioapic_registered - Check whether IOAPIC assoicatied with @gsi_base + * has been registered + * @handle: ACPI handle of the IOAPIC deivce + * @gsi_base: GSI base associated with the IOAPIC + * + * Assume caller holds some type of lock to serialize acpi_ioapic_registered() + * with acpi_register_ioapic()/acpi_unregister_ioapic(). + */ +int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base) +{ + int ret = 0; + +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC + mutex_lock(&acpi_ioapic_lock); + ret = mp_ioapic_registered(gsi_base); + mutex_unlock(&acpi_ioapic_lock); +#endif + + return ret; +} + static int __init acpi_parse_sbf(struct acpi_table_header *table) { struct acpi_table_boot *sb; @@ -1185,7 +1254,9 @@ static void __init acpi_process_madt(void) /* * Parse MADT IO-APIC entries */ + mutex_lock(&acpi_ioapic_lock); error = acpi_parse_madt_ioapic_entries(); + mutex_unlock(&acpi_ioapic_lock); if (!error) { acpi_set_irq_model_ioapic(); diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index dcb5b15..8bb12ddc 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,10 +2,12 @@ # Makefile for local APIC drivers and for the IO-APIC code # -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o vector.o obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o +obj-$(CONFIG_PCI_MSI) += msi.o +obj-$(CONFIG_HT_IRQ) += htirq.o obj-$(CONFIG_SMP) += ipi.o ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ba6cc04..29b5b18 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -196,7 +196,7 @@ static int disable_apic_timer __initdata; int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); -int first_system_vector = 0xfe; +int first_system_vector = FIRST_SYSTEM_VECTOR; /* * Debug level, exported for io_apic.c @@ -1930,7 +1930,7 @@ int __init APIC_init_uniprocessor(void) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ -static inline void __smp_spurious_interrupt(void) +static inline void __smp_spurious_interrupt(u8 vector) { u32 v; @@ -1939,30 +1939,32 @@ static inline void __smp_spurious_interrupt(void) * if it is a vectored one. Just in case... * Spurious interrupts should not be ACKed. */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) + v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); + if (v & (1 << (vector & 0x1f))) ack_APIC_irq(); inc_irq_stat(irq_spurious_count); /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - pr_info("spurious APIC interrupt on CPU#%d, " - "should never happen.\n", smp_processor_id()); + pr_info("spurious APIC interrupt through vector %02x on CPU#%d, " + "should never happen.\n", vector, smp_processor_id()); } __visible void smp_spurious_interrupt(struct pt_regs *regs) { entering_irq(); - __smp_spurious_interrupt(); + __smp_spurious_interrupt(~regs->orig_ax); exiting_irq(); } __visible void smp_trace_spurious_interrupt(struct pt_regs *regs) { + u8 vector = ~regs->orig_ax; + entering_irq(); - trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR); - __smp_spurious_interrupt(); - trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR); + trace_spurious_apic_entry(vector); + __smp_spurious_interrupt(vector); + trace_spurious_apic_exit(vector); exiting_irq(); } diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c new file mode 100644 index 0000000..816f36e --- /dev/null +++ b/arch/x86/kernel/apic/htirq.c @@ -0,0 +1,107 @@ +/* + * Support Hypertransport IRQ + * + * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo + * Moved from arch/x86/kernel/apic/io_apic.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/htirq.h> +#include <asm/hw_irq.h> +#include <asm/apic.h> +#include <asm/hypertransport.h> + +/* + * Hypertransport interrupt support + */ +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) +{ + struct ht_irq_msg msg; + + fetch_ht_irq_msg(irq, &msg); + + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); + + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); + + write_ht_irq_msg(irq, &msg); +} + +static int +ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) +{ + struct irq_cfg *cfg = irqd_cfg(data); + unsigned int dest; + int ret; + + ret = apic_set_affinity(data, mask, &dest); + if (ret) + return ret; + + target_ht_irq(data->irq, dest, cfg->vector); + return IRQ_SET_MASK_OK_NOCOPY; +} + +static struct irq_chip ht_irq_chip = { + .name = "PCI-HT", + .irq_mask = mask_ht_irq, + .irq_unmask = unmask_ht_irq, + .irq_ack = apic_ack_edge, + .irq_set_affinity = ht_set_affinity, + .irq_retrigger = apic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) +{ + struct irq_cfg *cfg; + struct ht_irq_msg msg; + unsigned dest; + int err; + + if (disable_apic) + return -ENXIO; + + cfg = irq_cfg(irq); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; + + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + + msg.address_lo = + HT_IRQ_LOW_BASE | + HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_VECTOR(cfg->vector) | + ((apic->irq_dest_mode == 0) ? + HT_IRQ_LOW_DM_PHYSICAL : + HT_IRQ_LOW_DM_LOGICAL) | + HT_IRQ_LOW_RQEOI_EDGE | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + HT_IRQ_LOW_MT_FIXED : + HT_IRQ_LOW_MT_ARBITRATED) | + HT_IRQ_LOW_IRQ_MASKED; + + write_ht_irq_msg(irq, &msg); + + irq_set_chip_and_handler_name(irq, &ht_irq_chip, + handle_edge_irq, "edge"); + + dev_dbg(&dev->dev, "irq %d for HT\n", irq); + + return 0; +} diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a6745e7..3f5f604 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -32,15 +32,11 @@ #include <linux/module.h> #include <linux/syscore_ops.h> #include <linux/irqdomain.h> -#include <linux/msi.h> -#include <linux/htirq.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/jiffies.h> /* time_after() */ #include <linux/slab.h> #include <linux/bootmem.h> -#include <linux/dmar.h> -#include <linux/hpet.h> #include <asm/idle.h> #include <asm/io.h> @@ -52,17 +48,12 @@ #include <asm/dma.h> #include <asm/timer.h> #include <asm/i8259.h> -#include <asm/msidef.h> -#include <asm/hypertransport.h> #include <asm/setup.h> #include <asm/irq_remapping.h> -#include <asm/hpet.h> #include <asm/hw_irq.h> #include <asm/apic.h> -#define __apicdebuginit(type) static type __init - #define for_each_ioapic(idx) \ for ((idx) = 0; (idx) < nr_ioapics; (idx)++) #define for_each_ioapic_reverse(idx) \ @@ -74,7 +65,7 @@ for_each_pin((idx), (pin)) #define for_each_irq_pin(entry, head) \ - for (entry = head; entry; entry = entry->next) + list_for_each_entry(entry, &head, list) /* * Is the SiS APIC rmw bug present ? @@ -83,7 +74,6 @@ int sis_apic_bug = -1; static DEFINE_RAW_SPINLOCK(ioapic_lock); -static DEFINE_RAW_SPINLOCK(vector_lock); static DEFINE_MUTEX(ioapic_mutex); static unsigned int ioapic_dynirq_base; static int ioapic_initialized; @@ -112,6 +102,7 @@ static struct ioapic { struct ioapic_domain_cfg irqdomain_cfg; struct irq_domain *irqdomain; struct mp_pin_info *pin_info; + struct resource *iomem_res; } ioapics[MAX_IO_APICS]; #define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver @@ -205,8 +196,6 @@ static int __init parse_noapic(char *str) } early_param("noapic", parse_noapic); -static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); - /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ void mp_save_irq(struct mpc_intsrc *m) { @@ -228,8 +217,8 @@ void mp_save_irq(struct mpc_intsrc *m) } struct irq_pin_list { + struct list_head list; int apic, pin; - struct irq_pin_list *next; }; static struct irq_pin_list *alloc_irq_pin_list(int node) @@ -237,7 +226,26 @@ static struct irq_pin_list *alloc_irq_pin_list(int node) return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); } -int __init arch_early_irq_init(void) +static void alloc_ioapic_saved_registers(int idx) +{ + size_t size; + + if (ioapics[idx].saved_registers) + return; + + size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers; + ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL); + if (!ioapics[idx].saved_registers) + pr_err("IOAPIC %d: suspend/resume impossible!\n", idx); +} + +static void free_ioapic_saved_registers(int idx) +{ + kfree(ioapics[idx].saved_registers); + ioapics[idx].saved_registers = NULL; +} + +int __init arch_early_ioapic_init(void) { struct irq_cfg *cfg; int i, node = cpu_to_node(0); @@ -245,13 +253,8 @@ int __init arch_early_irq_init(void) if (!nr_legacy_irqs()) io_apic_irqs = ~0UL; - for_each_ioapic(i) { - ioapics[i].saved_registers = - kzalloc(sizeof(struct IO_APIC_route_entry) * - ioapics[i].nr_registers, GFP_KERNEL); - if (!ioapics[i].saved_registers) - pr_err("IOAPIC %d: suspend/resume impossible!\n", i); - } + for_each_ioapic(i) + alloc_ioapic_saved_registers(i); /* * For legacy IRQ's, start with assigning irq0 to irq15 to @@ -266,61 +269,6 @@ int __init arch_early_irq_init(void) return 0; } -static inline struct irq_cfg *irq_cfg(unsigned int irq) -{ - return irq_get_chip_data(irq); -} - -static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) -{ - struct irq_cfg *cfg; - - cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node); - if (!cfg) - return NULL; - if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node)) - goto out_cfg; - if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node)) - goto out_domain; - return cfg; -out_domain: - free_cpumask_var(cfg->domain); -out_cfg: - kfree(cfg); - return NULL; -} - -static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) -{ - if (!cfg) - return; - irq_set_chip_data(at, NULL); - free_cpumask_var(cfg->domain); - free_cpumask_var(cfg->old_domain); - kfree(cfg); -} - -static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) -{ - int res = irq_alloc_desc_at(at, node); - struct irq_cfg *cfg; - - if (res < 0) { - if (res != -EEXIST) - return NULL; - cfg = irq_cfg(at); - if (cfg) - return cfg; - } - - cfg = alloc_irq_cfg(at, node); - if (cfg) - irq_set_chip_data(at, cfg); - else - irq_free_desc(at); - return cfg; -} - struct io_apic { unsigned int index; unsigned int unused[3]; @@ -445,15 +393,12 @@ static void ioapic_mask_entry(int apic, int pin) */ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { - struct irq_pin_list **last, *entry; + struct irq_pin_list *entry; /* don't allow duplicates */ - last = &cfg->irq_2_pin; - for_each_irq_pin(entry, cfg->irq_2_pin) { + for_each_irq_pin(entry, cfg->irq_2_pin) if (entry->apic == apic && entry->pin == pin) return 0; - last = &entry->next; - } entry = alloc_irq_pin_list(node); if (!entry) { @@ -464,22 +409,19 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi entry->apic = apic; entry->pin = pin; - *last = entry; + list_add_tail(&entry->list, &cfg->irq_2_pin); return 0; } static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin) { - struct irq_pin_list **last, *entry; + struct irq_pin_list *tmp, *entry; - last = &cfg->irq_2_pin; - for_each_irq_pin(entry, cfg->irq_2_pin) + list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list) if (entry->apic == apic && entry->pin == pin) { - *last = entry->next; + list_del(&entry->list); kfree(entry); return; - } else { - last = &entry->next; } } @@ -559,7 +501,7 @@ static void mask_ioapic(struct irq_cfg *cfg) static void mask_ioapic_irq(struct irq_data *data) { - mask_ioapic(data->chip_data); + mask_ioapic(irqd_cfg(data)); } static void __unmask_ioapic(struct irq_cfg *cfg) @@ -578,7 +520,7 @@ static void unmask_ioapic(struct irq_cfg *cfg) static void unmask_ioapic_irq(struct irq_data *data) { - unmask_ioapic(data->chip_data); + unmask_ioapic(irqd_cfg(data)); } /* @@ -1164,8 +1106,7 @@ void mp_unmap_irq(int irq) * Find a specific PCI IRQ entry. * Not an __init, possibly needed by modules */ -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, - struct io_apic_irq_attr *irq_attr) +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) { int irq, i, best_ioapic = -1, best_idx = -1; @@ -1219,195 +1160,11 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, return -1; out: - irq = pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, - IOAPIC_MAP_ALLOC); - if (irq > 0) - set_io_apic_irq_attr(irq_attr, best_ioapic, - mp_irqs[best_idx].dstirq, - irq_trigger(best_idx), - irq_polarity(best_idx)); - return irq; + return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, + IOAPIC_MAP_ALLOC); } EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -void lock_vector_lock(void) -{ - /* Used to the online set of cpus does not change - * during assign_irq_vector. - */ - raw_spin_lock(&vector_lock); -} - -void unlock_vector_lock(void) -{ - raw_spin_unlock(&vector_lock); -} - -static int -__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) -{ - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; - static int current_offset = VECTOR_OFFSET_START % 16; - int cpu, err; - cpumask_var_t tmp_mask; - - if (cfg->move_in_progress) - return -EBUSY; - - if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) - return -ENOMEM; - - /* Only try and allocate irqs on cpus that are present */ - err = -ENOSPC; - cpumask_clear(cfg->old_domain); - cpu = cpumask_first_and(mask, cpu_online_mask); - while (cpu < nr_cpu_ids) { - int new_cpu, vector, offset; - - apic->vector_allocation_domain(cpu, tmp_mask, mask); - - if (cpumask_subset(tmp_mask, cfg->domain)) { - err = 0; - if (cpumask_equal(tmp_mask, cfg->domain)) - break; - /* - * New cpumask using the vector is a proper subset of - * the current in use mask. So cleanup the vector - * allocation for the members that are not used anymore. - */ - cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); - cfg->move_in_progress = - cpumask_intersects(cfg->old_domain, cpu_online_mask); - cpumask_and(cfg->domain, cfg->domain, tmp_mask); - break; - } - - vector = current_vector; - offset = current_offset; -next: - vector += 16; - if (vector >= first_system_vector) { - offset = (offset + 1) % 16; - vector = FIRST_EXTERNAL_VECTOR + offset; - } - - if (unlikely(current_vector == vector)) { - cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); - cpumask_andnot(tmp_mask, mask, cfg->old_domain); - cpu = cpumask_first_and(tmp_mask, cpu_online_mask); - continue; - } - - if (test_bit(vector, used_vectors)) - goto next; - - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) { - if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED) - goto next; - } - /* Found one! */ - current_vector = vector; - current_offset = offset; - if (cfg->vector) { - cpumask_copy(cfg->old_domain, cfg->domain); - cfg->move_in_progress = - cpumask_intersects(cfg->old_domain, cpu_online_mask); - } - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) - per_cpu(vector_irq, new_cpu)[vector] = irq; - cfg->vector = vector; - cpumask_copy(cfg->domain, tmp_mask); - err = 0; - break; - } - free_cpumask_var(tmp_mask); - return err; -} - -int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) -{ - int err; - unsigned long flags; - - raw_spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, cfg, mask); - raw_spin_unlock_irqrestore(&vector_lock, flags); - return err; -} - -static void __clear_irq_vector(int irq, struct irq_cfg *cfg) -{ - int cpu, vector; - - BUG_ON(!cfg->vector); - - vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; - - cfg->vector = 0; - cpumask_clear(cfg->domain); - - if (likely(!cfg->move_in_progress)) - return; - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - if (per_cpu(vector_irq, cpu)[vector] != irq) - continue; - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; - break; - } - } - cfg->move_in_progress = 0; -} - -void __setup_vector_irq(int cpu) -{ - /* Initialize vector_irq on a new cpu */ - int irq, vector; - struct irq_cfg *cfg; - - /* - * vector_lock will make sure that we don't run into irq vector - * assignments that might be happening on another cpu in parallel, - * while we setup our initial vector to irq mappings. - */ - raw_spin_lock(&vector_lock); - /* Mark the inuse vectors */ - for_each_active_irq(irq) { - cfg = irq_cfg(irq); - if (!cfg) - continue; - - if (!cpumask_test_cpu(cpu, cfg->domain)) - continue; - vector = cfg->vector; - per_cpu(vector_irq, cpu)[vector] = irq; - } - /* Mark the free vectors */ - for (vector = 0; vector < NR_VECTORS; ++vector) { - irq = per_cpu(vector_irq, cpu)[vector]; - if (irq <= VECTOR_UNDEFINED) - continue; - - cfg = irq_cfg(irq); - if (!cpumask_test_cpu(cpu, cfg->domain)) - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; - } - raw_spin_unlock(&vector_lock); -} - static struct irq_chip ioapic_chip; #ifdef CONFIG_X86_32 @@ -1496,7 +1253,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, &dest)) { pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); - __clear_irq_vector(irq, cfg); + clear_irq_vector(irq, cfg); return; } @@ -1510,7 +1267,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) { pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); - __clear_irq_vector(irq, cfg); + clear_irq_vector(irq, cfg); return; } @@ -1641,7 +1398,7 @@ void ioapic_zap_locks(void) raw_spin_lock_init(&ioapic_lock); } -__apicdebuginit(void) print_IO_APIC(int ioapic_idx) +static void __init print_IO_APIC(int ioapic_idx) { union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; @@ -1698,7 +1455,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries); } -__apicdebuginit(void) print_IO_APICs(void) +void __init print_IO_APICs(void) { int ioapic_idx; struct irq_cfg *cfg; @@ -1731,8 +1488,7 @@ __apicdebuginit(void) print_IO_APICs(void) cfg = irq_cfg(irq); if (!cfg) continue; - entry = cfg->irq_2_pin; - if (!entry) + if (list_empty(&cfg->irq_2_pin)) continue; printk(KERN_DEBUG "IRQ%d ", irq); for_each_irq_pin(entry, cfg->irq_2_pin) @@ -1743,205 +1499,6 @@ __apicdebuginit(void) print_IO_APICs(void) printk(KERN_INFO ".................................... done.\n"); } -__apicdebuginit(void) print_APIC_field(int base) -{ - int i; - - printk(KERN_DEBUG); - - for (i = 0; i < 8; i++) - pr_cont("%08x", apic_read(base + i*0x10)); - - pr_cont("\n"); -} - -__apicdebuginit(void) print_local_APIC(void *dummy) -{ - unsigned int i, v, ver, maxlvt; - u64 icr; - - printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", - smp_processor_id(), hard_smp_processor_id()); - v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); - v = apic_read(APIC_LVR); - printk(KERN_INFO "... APIC VERSION: %08x\n", v); - ver = GET_APIC_VERSION(v); - maxlvt = lapic_get_maxlvt(); - - v = apic_read(APIC_TASKPRI); - printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); - - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - if (!APIC_XAPIC(ver)) { - v = apic_read(APIC_ARBPRI); - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, - v & APIC_ARBPRI_MASK); - } - v = apic_read(APIC_PROCPRI); - printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); - } - - /* - * Remote read supported only in the 82489DX and local APIC for - * Pentium processors. - */ - if (!APIC_INTEGRATED(ver) || maxlvt == 3) { - v = apic_read(APIC_RRR); - printk(KERN_DEBUG "... APIC RRR: %08x\n", v); - } - - v = apic_read(APIC_LDR); - printk(KERN_DEBUG "... APIC LDR: %08x\n", v); - if (!x2apic_enabled()) { - v = apic_read(APIC_DFR); - printk(KERN_DEBUG "... APIC DFR: %08x\n", v); - } - v = apic_read(APIC_SPIV); - printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); - - printk(KERN_DEBUG "... APIC ISR field:\n"); - print_APIC_field(APIC_ISR); - printk(KERN_DEBUG "... APIC TMR field:\n"); - print_APIC_field(APIC_TMR); - printk(KERN_DEBUG "... APIC IRR field:\n"); - print_APIC_field(APIC_IRR); - - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } - - icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); - - v = apic_read(APIC_LVTT); - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); - - if (maxlvt > 3) { /* PC is LVT#4. */ - v = apic_read(APIC_LVTPC); - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); - } - v = apic_read(APIC_LVT0); - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); - v = apic_read(APIC_LVT1); - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); - - if (maxlvt > 2) { /* ERR is LVT#3. */ - v = apic_read(APIC_LVTERR); - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); - } - - v = apic_read(APIC_TMICT); - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); - v = apic_read(APIC_TMCCT); - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); - v = apic_read(APIC_TDCR); - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); - - if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { - v = apic_read(APIC_EFEAT); - maxlvt = (v >> 16) & 0xff; - printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v); - v = apic_read(APIC_ECTRL); - printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v); - for (i = 0; i < maxlvt; i++) { - v = apic_read(APIC_EILVTn(i)); - printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); - } - } - pr_cont("\n"); -} - -__apicdebuginit(void) print_local_APICs(int maxcpu) -{ - int cpu; - - if (!maxcpu) - return; - - preempt_disable(); - for_each_online_cpu(cpu) { - if (cpu >= maxcpu) - break; - smp_call_function_single(cpu, print_local_APIC, NULL, 1); - } - preempt_enable(); -} - -__apicdebuginit(void) print_PIC(void) -{ - unsigned int v; - unsigned long flags; - - if (!nr_legacy_irqs()) - return; - - printk(KERN_DEBUG "\nprinting PIC contents\n"); - - raw_spin_lock_irqsave(&i8259A_lock, flags); - - v = inb(0xa1) << 8 | inb(0x21); - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); - - v = inb(0xa0) << 8 | inb(0x20); - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - - outb(0x0b,0xa0); - outb(0x0b,0x20); - v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); - - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); - - v = inb(0x4d1) << 8 | inb(0x4d0); - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); -} - -static int __initdata show_lapic = 1; -static __init int setup_show_lapic(char *arg) -{ - int num = -1; - - if (strcmp(arg, "all") == 0) { - show_lapic = CONFIG_NR_CPUS; - } else { - get_option(&arg, &num); - if (num >= 0) - show_lapic = num; - } - - return 1; -} -__setup("show_lapic=", setup_show_lapic); - -__apicdebuginit(int) print_ICs(void) -{ - if (apic_verbosity == APIC_QUIET) - return 0; - - print_PIC(); - - /* don't print out if apic is not there */ - if (!cpu_has_apic && !apic_from_smp_config()) - return 0; - - print_local_APICs(show_lapic); - print_IO_APICs(); - - return 0; -} - -late_initcall(print_ICs); - - /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -2244,26 +1801,12 @@ static unsigned int startup_ioapic_irq(struct irq_data *data) if (legacy_pic->irq_pending(irq)) was_pending = 1; } - __unmask_ioapic(data->chip_data); + __unmask_ioapic(irqd_cfg(data)); raw_spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; } -static int ioapic_retrigger_irq(struct irq_data *data) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned long flags; - int cpu; - - raw_spin_lock_irqsave(&vector_lock, flags); - cpu = cpumask_first_and(cfg->domain, cpu_online_mask); - apic->send_IPI_mask(cpumask_of(cpu), cfg->vector); - raw_spin_unlock_irqrestore(&vector_lock, flags); - - return 1; -} - /* * Level and edge triggered IO-APIC interrupts need different handling, * so we use two separate IRQ descriptors. Edge triggered IRQs can be @@ -2273,113 +1816,6 @@ static int ioapic_retrigger_irq(struct irq_data *data) * races. */ -#ifdef CONFIG_SMP -void send_cleanup_vector(struct irq_cfg *cfg) -{ - cpumask_var_t cleanup_mask; - - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } - cfg->move_in_progress = 0; -} - -asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) -{ - unsigned vector, me; - - ack_APIC_irq(); - irq_enter(); - exit_idle(); - - me = smp_processor_id(); - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - int irq; - unsigned int irr; - struct irq_desc *desc; - struct irq_cfg *cfg; - irq = __this_cpu_read(vector_irq[vector]); - - if (irq <= VECTOR_UNDEFINED) - continue; - - desc = irq_to_desc(irq); - if (!desc) - continue; - - cfg = irq_cfg(irq); - if (!cfg) - continue; - - raw_spin_lock(&desc->lock); - - /* - * Check if the irq migration is in progress. If so, we - * haven't received the cleanup request yet for this irq. - */ - if (cfg->move_in_progress) - goto unlock; - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - goto unlock; - - irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - /* - * Check if the vector that needs to be cleanedup is - * registered at the cpu's IRR. If so, then this is not - * the best time to clean it up. Lets clean it up in the - * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR - * to myself. - */ - if (irr & (1 << (vector % 32))) { - apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); - goto unlock; - } - __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); -unlock: - raw_spin_unlock(&desc->lock); - } - - irq_exit(); -} - -static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) -{ - unsigned me; - - if (likely(!cfg->move_in_progress)) - return; - - me = smp_processor_id(); - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - send_cleanup_vector(cfg); -} - -static void irq_complete_move(struct irq_cfg *cfg) -{ - __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); -} - -void irq_force_complete_move(int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - - if (!cfg) - return; - - __irq_complete_move(cfg, cfg->vector); -} -#else -static inline void irq_complete_move(struct irq_cfg *cfg) { } -#endif - static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) { int apic, pin; @@ -2400,41 +1836,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -/* - * Either sets data->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and - * leaves data->affinity untouched. - */ -int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int irq = data->irq; - int err; - - if (!config_enabled(CONFIG_SMP)) - return -EPERM; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -EINVAL; - - err = assign_irq_vector(irq, cfg, mask); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); - if (err) { - if (assign_irq_vector(irq, cfg, data->affinity)) - pr_err("Failed to recover vector for irq %d\n", irq); - return err; - } - - cpumask_copy(data->affinity, mask); - - return 0; -} - - int native_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) @@ -2447,24 +1848,17 @@ int native_ioapic_set_affinity(struct irq_data *data, return -EPERM; raw_spin_lock_irqsave(&ioapic_lock, flags); - ret = __ioapic_set_affinity(data, mask, &dest); + ret = apic_set_affinity(data, mask, &dest); if (!ret) { /* Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, data->chip_data); + __target_IO_APIC_irq(irq, dest, irqd_cfg(data)); ret = IRQ_SET_MASK_OK_NOCOPY; } raw_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; } -static void ack_apic_edge(struct irq_data *data) -{ - irq_complete_move(data->chip_data); - irq_move_irq(data); - ack_APIC_irq(); -} - atomic_t irq_mis_count; #ifdef CONFIG_GENERIC_PENDING_IRQ @@ -2547,9 +1941,9 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, } #endif -static void ack_apic_level(struct irq_data *data) +static void ack_ioapic_level(struct irq_data *data) { - struct irq_cfg *cfg = data->chip_data; + struct irq_cfg *cfg = irqd_cfg(data); int i, irq = data->irq; unsigned long v; bool masked; @@ -2619,10 +2013,10 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_startup = startup_ioapic_irq, .irq_mask = mask_ioapic_irq, .irq_unmask = unmask_ioapic_irq, - .irq_ack = ack_apic_edge, - .irq_eoi = ack_apic_level, + .irq_ack = apic_ack_edge, + .irq_eoi = ack_ioapic_level, .irq_set_affinity = native_ioapic_set_affinity, - .irq_retrigger = ioapic_retrigger_irq, + .irq_retrigger = apic_retrigger_irq, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -2965,6 +2359,16 @@ static int mp_irqdomain_create(int ioapic) return 0; } +static void ioapic_destroy_irqdomain(int idx) +{ + if (ioapics[idx].irqdomain) { + irq_domain_remove(ioapics[idx].irqdomain); + ioapics[idx].irqdomain = NULL; + } + kfree(ioapics[idx].pin_info); + ioapics[idx].pin_info = NULL; +} + void __init setup_IO_APIC(void) { int ioapic; @@ -3044,399 +2448,6 @@ static int __init ioapic_init_ops(void) device_initcall(ioapic_init_ops); -/* - * Dynamic irq allocate and deallocation. Should be replaced by irq domains! - */ -int arch_setup_hwirq(unsigned int irq, int node) -{ - struct irq_cfg *cfg; - unsigned long flags; - int ret; - - cfg = alloc_irq_cfg(irq, node); - if (!cfg) - return -ENOMEM; - - raw_spin_lock_irqsave(&vector_lock, flags); - ret = __assign_irq_vector(irq, cfg, apic->target_cpus()); - raw_spin_unlock_irqrestore(&vector_lock, flags); - - if (!ret) - irq_set_chip_data(irq, cfg); - else - free_irq_cfg(irq, cfg); - return ret; -} - -void arch_teardown_hwirq(unsigned int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - unsigned long flags; - - free_remapped_irq(irq); - raw_spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq, cfg); - raw_spin_unlock_irqrestore(&vector_lock, flags); - free_irq_cfg(irq, cfg); -} - -/* - * MSI message composition - */ -void native_compose_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) -{ - struct irq_cfg *cfg = irq_cfg(irq); - - msg->address_hi = MSI_ADDR_BASE_HI; - - if (x2apic_enabled()) - msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); - - msg->address_lo = - MSI_ADDR_BASE_LO | - ((apic->irq_dest_mode == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); -} - -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, - struct msi_msg *msg, u8 hpet_id) -{ - struct irq_cfg *cfg; - int err; - unsigned dest; - - if (disable_apic) - return -ENXIO; - - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; - - x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); - - return 0; -} - -static int -msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) -{ - struct irq_cfg *cfg = data->chip_data; - struct msi_msg msg; - unsigned int dest; - int ret; - - ret = __ioapic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - __get_cached_msi_msg(data->msi_desc, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - __pci_write_msi_msg(data->msi_desc, &msg); - - return IRQ_SET_MASK_OK_NOCOPY; -} - -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", - .irq_unmask = pci_msi_unmask_irq, - .irq_mask = pci_msi_mask_irq, - .irq_ack = ack_apic_edge, - .irq_set_affinity = msi_set_affinity, - .irq_retrigger = ioapic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, - unsigned int irq_base, unsigned int irq_offset) -{ - struct irq_chip *chip = &msi_chip; - struct msi_msg msg; - unsigned int irq = irq_base + irq_offset; - int ret; - - ret = msi_compose_msg(dev, irq, &msg, -1); - if (ret < 0) - return ret; - - irq_set_msi_desc_off(irq_base, irq_offset, msidesc); - - /* - * MSI-X message is written per-IRQ, the offset is always 0. - * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. - */ - if (!irq_offset) - pci_write_msi_msg(irq, &msg); - - setup_remapped_irq(irq, irq_cfg(irq), chip); - - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); - - dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); - - return 0; -} - -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - struct msi_desc *msidesc; - unsigned int irq; - int node, ret; - - /* Multiple MSI vectors only supported with interrupt remapping */ - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - - node = dev_to_node(&dev->dev); - - list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = irq_alloc_hwirq(node); - if (!irq) - return -ENOSPC; - - ret = setup_msi_irq(dev, msidesc, irq, 0); - if (ret < 0) { - irq_free_hwirq(irq); - return ret; - } - - } - return 0; -} - -void native_teardown_msi_irq(unsigned int irq) -{ - irq_free_hwirq(irq); -} - -#ifdef CONFIG_DMAR_TABLE -static int -dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest, irq = data->irq; - struct msi_msg msg; - int ret; - - ret = __ioapic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - dmar_msi_read(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); - - dmar_msi_write(irq, &msg); - - return IRQ_SET_MASK_OK_NOCOPY; -} - -static struct irq_chip dmar_msi_type = { - .name = "DMAR_MSI", - .irq_unmask = dmar_msi_unmask, - .irq_mask = dmar_msi_mask, - .irq_ack = ack_apic_edge, - .irq_set_affinity = dmar_msi_set_affinity, - .irq_retrigger = ioapic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -int arch_setup_dmar_msi(unsigned int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(NULL, irq, &msg, -1); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; -} -#endif - -#ifdef CONFIG_HPET_TIMER - -static int hpet_msi_set_affinity(struct irq_data *data, - const struct cpumask *mask, bool force) -{ - struct irq_cfg *cfg = data->chip_data; - struct msi_msg msg; - unsigned int dest; - int ret; - - ret = __ioapic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - hpet_msi_read(data->handler_data, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - hpet_msi_write(data->handler_data, &msg); - - return IRQ_SET_MASK_OK_NOCOPY; -} - -static struct irq_chip hpet_msi_type = { - .name = "HPET_MSI", - .irq_unmask = hpet_msi_unmask, - .irq_mask = hpet_msi_mask, - .irq_ack = ack_apic_edge, - .irq_set_affinity = hpet_msi_set_affinity, - .irq_retrigger = ioapic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -int default_setup_hpet_msi(unsigned int irq, unsigned int id) -{ - struct irq_chip *chip = &hpet_msi_type; - struct msi_msg msg; - int ret; - - ret = msi_compose_msg(NULL, irq, &msg, id); - if (ret < 0) - return ret; - - hpet_msi_write(irq_get_handler_data(irq), &msg); - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - setup_remapped_irq(irq, irq_cfg(irq), chip); - - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); - return 0; -} -#endif - -#endif /* CONFIG_PCI_MSI */ -/* - * Hypertransport interrupt support - */ -#ifdef CONFIG_HT_IRQ - -static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - struct ht_irq_msg msg; - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - -static int -ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest; - int ret; - - ret = __ioapic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - target_ht_irq(data->irq, dest, cfg->vector); - return IRQ_SET_MASK_OK_NOCOPY; -} - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .irq_mask = mask_ht_irq, - .irq_unmask = unmask_ht_irq, - .irq_ack = ack_apic_edge, - .irq_set_affinity = ht_set_affinity, - .irq_retrigger = ioapic_retrigger_irq, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) -{ - struct irq_cfg *cfg; - struct ht_irq_msg msg; - unsigned dest; - int err; - - if (disable_apic) - return -ENXIO; - - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((apic->irq_dest_mode == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - - write_ht_irq_msg(irq, &msg); - - irq_set_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); - - dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); - - return 0; -} -#endif /* CONFIG_HT_IRQ */ - static int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) { @@ -3451,7 +2462,7 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) return ret; } -static int __init io_apic_get_redir_entries(int ioapic) +static int io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -3476,28 +2487,8 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) return ioapic_initialized ? ioapic_dynirq_base : gsi_top; } -int __init arch_probe_nr_irqs(void) -{ - int nr; - - if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) - nr_irqs = NR_VECTORS * nr_cpu_ids; - - nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; -#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) - /* - * for MSI and HT dyn irq - */ - nr += gsi_top * 16; -#endif - if (nr < nr_irqs) - nr_irqs = nr; - - return 0; -} - #ifdef CONFIG_X86_32 -static int __init io_apic_get_unique_id(int ioapic, int apic_id) +static int io_apic_get_unique_id(int ioapic, int apic_id) { union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; @@ -3572,30 +2563,63 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id) return apic_id; } -static u8 __init io_apic_unique_id(u8 id) +static u8 io_apic_unique_id(int idx, u8 id) { if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return io_apic_get_unique_id(nr_ioapics, id); + return io_apic_get_unique_id(idx, id); else return id; } #else -static u8 __init io_apic_unique_id(u8 id) +static u8 io_apic_unique_id(int idx, u8 id) { - int i; + union IO_APIC_reg_00 reg_00; DECLARE_BITMAP(used, 256); + unsigned long flags; + u8 new_id; + int i; bitmap_zero(used, 256); for_each_ioapic(i) __set_bit(mpc_ioapic_id(i), used); + + /* Hand out the requested id if available */ if (!test_bit(id, used)) return id; - return find_first_zero_bit(used, 256); + + /* + * Read the current id from the ioapic and keep it if + * available. + */ + raw_spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(idx, 0); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + new_id = reg_00.bits.ID; + if (!test_bit(new_id, used)) { + apic_printk(APIC_VERBOSE, KERN_INFO + "IOAPIC[%d]: Using reg apic_id %d instead of %d\n", + idx, new_id, id); + return new_id; + } + + /* + * Get the next free id and write it to the ioapic. + */ + new_id = find_first_zero_bit(used, 256); + reg_00.bits.ID = new_id; + raw_spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(idx, 0, reg_00.raw); + reg_00.raw = io_apic_read(idx, 0); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + /* Sanity check */ + BUG_ON(reg_00.bits.ID != new_id); + + return new_id; } #endif -static int __init io_apic_get_version(int ioapic) +static int io_apic_get_version(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -3702,6 +2726,7 @@ static struct resource * __init ioapic_setup_resources(void) snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; num++; + ioapics[i].iomem_res = res; } ioapic_resources = res; @@ -3799,21 +2824,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi) return gsi - gsi_cfg->gsi_base; } -static __init int bad_ioapic(unsigned long address) -{ - if (nr_ioapics >= MAX_IO_APICS) { - pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n", - MAX_IO_APICS, nr_ioapics); - return 1; - } - if (!address) { - pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n"); - return 1; - } - return 0; -} - -static __init int bad_ioapic_register(int idx) +static int bad_ioapic_register(int idx) { union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; @@ -3832,32 +2843,61 @@ static __init int bad_ioapic_register(int idx) return 0; } -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, - struct ioapic_domain_cfg *cfg) +static int find_free_ioapic_entry(void) { - int idx = 0; - int entries; + int idx; + + for (idx = 0; idx < MAX_IO_APICS; idx++) + if (ioapics[idx].nr_registers == 0) + return idx; + + return MAX_IO_APICS; +} + +/** + * mp_register_ioapic - Register an IOAPIC device + * @id: hardware IOAPIC ID + * @address: physical address of IOAPIC register area + * @gsi_base: base of GSI associated with the IOAPIC + * @cfg: configuration information for the IOAPIC + */ +int mp_register_ioapic(int id, u32 address, u32 gsi_base, + struct ioapic_domain_cfg *cfg) +{ + bool hotplug = !!ioapic_initialized; struct mp_ioapic_gsi *gsi_cfg; + int idx, ioapic, entries; + u32 gsi_end; - if (bad_ioapic(address)) - return; + if (!address) { + pr_warn("Bogus (zero) I/O APIC address found, skipping!\n"); + return -EINVAL; + } + for_each_ioapic(ioapic) + if (ioapics[ioapic].mp_config.apicaddr == address) { + pr_warn("address 0x%x conflicts with IOAPIC%d\n", + address, ioapic); + return -EEXIST; + } - idx = nr_ioapics; + idx = find_free_ioapic_entry(); + if (idx >= MAX_IO_APICS) { + pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n", + MAX_IO_APICS, idx); + return -ENOSPC; + } ioapics[idx].mp_config.type = MP_IOAPIC; ioapics[idx].mp_config.flags = MPC_APIC_USABLE; ioapics[idx].mp_config.apicaddr = address; - ioapics[idx].irqdomain = NULL; - ioapics[idx].irqdomain_cfg = *cfg; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - if (bad_ioapic_register(idx)) { clear_fixmap(FIX_IO_APIC_BASE_0 + idx); - return; + return -ENODEV; } - ioapics[idx].mp_config.apicid = io_apic_unique_id(id); + ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id); ioapics[idx].mp_config.apicver = io_apic_get_version(idx); /* @@ -3865,24 +2905,112 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ entries = io_apic_get_redir_entries(idx); + gsi_end = gsi_base + entries - 1; + for_each_ioapic(ioapic) { + gsi_cfg = mp_ioapic_gsi_routing(ioapic); + if ((gsi_base >= gsi_cfg->gsi_base && + gsi_base <= gsi_cfg->gsi_end) || + (gsi_end >= gsi_cfg->gsi_base && + gsi_end <= gsi_cfg->gsi_end)) { + pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n", + gsi_base, gsi_end, + gsi_cfg->gsi_base, gsi_cfg->gsi_end); + clear_fixmap(FIX_IO_APIC_BASE_0 + idx); + return -ENOSPC; + } + } gsi_cfg = mp_ioapic_gsi_routing(idx); gsi_cfg->gsi_base = gsi_base; - gsi_cfg->gsi_end = gsi_base + entries - 1; + gsi_cfg->gsi_end = gsi_end; + + ioapics[idx].irqdomain = NULL; + ioapics[idx].irqdomain_cfg = *cfg; /* - * The number of IO-APIC IRQ registers (== #pins): + * If mp_register_ioapic() is called during early boot stage when + * walking ACPI/SFI/DT tables, it's too early to create irqdomain, + * we are still using bootmem allocator. So delay it to setup_IO_APIC(). */ - ioapics[idx].nr_registers = entries; + if (hotplug) { + if (mp_irqdomain_create(idx)) { + clear_fixmap(FIX_IO_APIC_BASE_0 + idx); + return -ENOMEM; + } + alloc_ioapic_saved_registers(idx); + } if (gsi_cfg->gsi_end >= gsi_top) gsi_top = gsi_cfg->gsi_end + 1; + if (nr_ioapics <= idx) + nr_ioapics = idx + 1; + + /* Set nr_registers to mark entry present */ + ioapics[idx].nr_registers = entries; pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n", idx, mpc_ioapic_id(idx), mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), gsi_cfg->gsi_base, gsi_cfg->gsi_end); - nr_ioapics++; + return 0; +} + +int mp_unregister_ioapic(u32 gsi_base) +{ + int ioapic, pin; + int found = 0; + struct mp_pin_info *pin_info; + + for_each_ioapic(ioapic) + if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) { + found = 1; + break; + } + if (!found) { + pr_warn("can't find IOAPIC for GSI %d\n", gsi_base); + return -ENODEV; + } + + for_each_pin(ioapic, pin) { + pin_info = mp_pin_info(ioapic, pin); + if (pin_info->count) { + pr_warn("pin%d on IOAPIC%d is still in use.\n", + pin, ioapic); + return -EBUSY; + } + } + + /* Mark entry not present */ + ioapics[ioapic].nr_registers = 0; + ioapic_destroy_irqdomain(ioapic); + free_ioapic_saved_registers(ioapic); + if (ioapics[ioapic].iomem_res) + release_resource(ioapics[ioapic].iomem_res); + clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic); + memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic])); + + return 0; +} + +int mp_ioapic_registered(u32 gsi_base) +{ + int ioapic; + + for_each_ioapic(ioapic) + if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) + return 1; + + return 0; +} + +static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, + int ioapic, int ioapic_pin, + int trigger, int polarity) +{ + irq_attr->ioapic = ioapic; + irq_attr->ioapic_pin = ioapic_pin; + irq_attr->trigger = trigger; + irq_attr->polarity = polarity; } int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, @@ -3931,7 +3059,7 @@ void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq) ioapic_mask_entry(ioapic, pin); __remove_pin_from_irq(cfg, ioapic, pin); - WARN_ON(cfg->irq_2_pin != NULL); + WARN_ON(!list_empty(&cfg->irq_2_pin)); arch_teardown_hwirq(virq); } @@ -3964,18 +3092,6 @@ int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) return ret; } -bool mp_should_keep_irq(struct device *dev) -{ - if (dev->power.is_prepared) - return true; -#ifdef CONFIG_PM - if (dev->power.runtime_status == RPM_SUSPENDING) - return true; -#endif - - return false; -} - /* Enable IOAPIC early just for system timer */ void __init pre_init_apic_IRQ0(void) { diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c new file mode 100644 index 0000000..d6ba2d6 --- /dev/null +++ b/arch/x86/kernel/apic/msi.c @@ -0,0 +1,286 @@ +/* + * Support of MSI, HPET and DMAR interrupts. + * + * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo + * Moved from arch/x86/kernel/apic/io_apic.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/dmar.h> +#include <linux/hpet.h> +#include <linux/msi.h> +#include <asm/msidef.h> +#include <asm/hpet.h> +#include <asm/hw_irq.h> +#include <asm/apic.h> +#include <asm/irq_remapping.h> + +void native_compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_cfg *cfg = irq_cfg(irq); + + msg->address_hi = MSI_ADDR_BASE_HI; + + if (x2apic_enabled()) + msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); + + msg->address_lo = + MSI_ADDR_BASE_LO | + ((apic->irq_dest_mode == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL : + MSI_ADDR_DEST_MODE_LOGICAL) | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU : + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED : + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); +} + +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_cfg *cfg; + int err; + unsigned dest; + + if (disable_apic) + return -ENXIO; + + cfg = irq_cfg(irq); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; + + x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); + + return 0; +} + +static int +msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) +{ + struct irq_cfg *cfg = irqd_cfg(data); + struct msi_msg msg; + unsigned int dest; + int ret; + + ret = apic_set_affinity(data, mask, &dest); + if (ret) + return ret; + + __get_cached_msi_msg(data->msi_desc, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + __pci_write_msi_msg(data->msi_desc, &msg); + + return IRQ_SET_MASK_OK_NOCOPY; +} + +/* + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, + * which implement the MSI or MSI-X Capability Structure. + */ +static struct irq_chip msi_chip = { + .name = "PCI-MSI", + .irq_unmask = pci_msi_unmask_irq, + .irq_mask = pci_msi_mask_irq, + .irq_ack = apic_ack_edge, + .irq_set_affinity = msi_set_affinity, + .irq_retrigger = apic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset) +{ + struct irq_chip *chip = &msi_chip; + struct msi_msg msg; + unsigned int irq = irq_base + irq_offset; + int ret; + + ret = msi_compose_msg(dev, irq, &msg, -1); + if (ret < 0) + return ret; + + irq_set_msi_desc_off(irq_base, irq_offset, msidesc); + + /* + * MSI-X message is written per-IRQ, the offset is always 0. + * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. + */ + if (!irq_offset) + pci_write_msi_msg(irq, &msg); + + setup_remapped_irq(irq, irq_cfg(irq), chip); + + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); + + dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq); + + return 0; +} + +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + struct msi_desc *msidesc; + unsigned int irq; + int node, ret; + + /* Multiple MSI vectors only supported with interrupt remapping */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + + node = dev_to_node(&dev->dev); + + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = irq_alloc_hwirq(node); + if (!irq) + return -ENOSPC; + + ret = setup_msi_irq(dev, msidesc, irq, 0); + if (ret < 0) { + irq_free_hwirq(irq); + return ret; + } + + } + return 0; +} + +void native_teardown_msi_irq(unsigned int irq) +{ + irq_free_hwirq(irq); +} + +#ifdef CONFIG_DMAR_TABLE +static int +dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_cfg *cfg = irqd_cfg(data); + unsigned int dest, irq = data->irq; + struct msi_msg msg; + int ret; + + ret = apic_set_affinity(data, mask, &dest); + if (ret) + return ret; + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + + return IRQ_SET_MASK_OK_NOCOPY; +} + +static struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .irq_unmask = dmar_msi_unmask, + .irq_mask = dmar_msi_mask, + .irq_ack = apic_ack_edge, + .irq_set_affinity = dmar_msi_set_affinity, + .irq_retrigger = apic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg, -1); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif + +/* + * MSI message composition + */ +#ifdef CONFIG_HPET_TIMER + +static int hpet_msi_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct irq_cfg *cfg = irqd_cfg(data); + struct msi_msg msg; + unsigned int dest; + int ret; + + ret = apic_set_affinity(data, mask, &dest); + if (ret) + return ret; + + hpet_msi_read(data->handler_data, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + hpet_msi_write(data->handler_data, &msg); + + return IRQ_SET_MASK_OK_NOCOPY; +} + +static struct irq_chip hpet_msi_type = { + .name = "HPET_MSI", + .irq_unmask = hpet_msi_unmask, + .irq_mask = hpet_msi_mask, + .irq_ack = apic_ack_edge, + .irq_set_affinity = hpet_msi_set_affinity, + .irq_retrigger = apic_retrigger_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +int default_setup_hpet_msi(unsigned int irq, unsigned int id) +{ + struct irq_chip *chip = &hpet_msi_type; + struct msi_msg msg; + int ret; + + ret = msi_compose_msg(NULL, irq, &msg, id); + if (ret < 0) + return ret; + + hpet_msi_write(irq_get_handler_data(irq), &msg); + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); + setup_remapped_irq(irq, irq_cfg(irq), chip); + + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); + return 0; +} +#endif diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c new file mode 100644 index 0000000..6cedd79 --- /dev/null +++ b/arch/x86/kernel/apic/vector.c @@ -0,0 +1,719 @@ +/* + * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc. + * + * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo + * Moved from arch/x86/kernel/apic/io_apic.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/compiler.h> +#include <linux/irqdomain.h> +#include <linux/slab.h> +#include <asm/hw_irq.h> +#include <asm/apic.h> +#include <asm/i8259.h> +#include <asm/desc.h> +#include <asm/irq_remapping.h> + +static DEFINE_RAW_SPINLOCK(vector_lock); + +void lock_vector_lock(void) +{ + /* Used to the online set of cpus does not change + * during assign_irq_vector. + */ + raw_spin_lock(&vector_lock); +} + +void unlock_vector_lock(void) +{ + raw_spin_unlock(&vector_lock); +} + +struct irq_cfg *irq_cfg(unsigned int irq) +{ + return irq_get_chip_data(irq); +} + +struct irq_cfg *irqd_cfg(struct irq_data *irq_data) +{ + return irq_data->chip_data; +} + +static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) +{ + struct irq_cfg *cfg; + + cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node); + if (!cfg) + return NULL; + if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node)) + goto out_cfg; + if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node)) + goto out_domain; +#ifdef CONFIG_X86_IO_APIC + INIT_LIST_HEAD(&cfg->irq_2_pin); +#endif + return cfg; +out_domain: + free_cpumask_var(cfg->domain); +out_cfg: + kfree(cfg); + return NULL; +} + +struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) +{ + int res = irq_alloc_desc_at(at, node); + struct irq_cfg *cfg; + + if (res < 0) { + if (res != -EEXIST) + return NULL; + cfg = irq_cfg(at); + if (cfg) + return cfg; + } + + cfg = alloc_irq_cfg(at, node); + if (cfg) + irq_set_chip_data(at, cfg); + else + irq_free_desc(at); + return cfg; +} + +static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) +{ + if (!cfg) + return; + irq_set_chip_data(at, NULL); + free_cpumask_var(cfg->domain); + free_cpumask_var(cfg->old_domain); + kfree(cfg); +} + +static int +__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +{ + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; + static int current_offset = VECTOR_OFFSET_START % 16; + int cpu, err; + cpumask_var_t tmp_mask; + + if (cfg->move_in_progress) + return -EBUSY; + + if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) + return -ENOMEM; + + /* Only try and allocate irqs on cpus that are present */ + err = -ENOSPC; + cpumask_clear(cfg->old_domain); + cpu = cpumask_first_and(mask, cpu_online_mask); + while (cpu < nr_cpu_ids) { + int new_cpu, vector, offset; + + apic->vector_allocation_domain(cpu, tmp_mask, mask); + + if (cpumask_subset(tmp_mask, cfg->domain)) { + err = 0; + if (cpumask_equal(tmp_mask, cfg->domain)) + break; + /* + * New cpumask using the vector is a proper subset of + * the current in use mask. So cleanup the vector + * allocation for the members that are not used anymore. + */ + cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); + cfg->move_in_progress = + cpumask_intersects(cfg->old_domain, cpu_online_mask); + cpumask_and(cfg->domain, cfg->domain, tmp_mask); + break; + } + + vector = current_vector; + offset = current_offset; +next: + vector += 16; + if (vector >= first_system_vector) { + offset = (offset + 1) % 16; + vector = FIRST_EXTERNAL_VECTOR + offset; + } + + if (unlikely(current_vector == vector)) { + cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); + cpumask_andnot(tmp_mask, mask, cfg->old_domain); + cpu = cpumask_first_and(tmp_mask, cpu_online_mask); + continue; + } + + if (test_bit(vector, used_vectors)) + goto next; + + for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) { + if (per_cpu(vector_irq, new_cpu)[vector] > + VECTOR_UNDEFINED) + goto next; + } + /* Found one! */ + current_vector = vector; + current_offset = offset; + if (cfg->vector) { + cpumask_copy(cfg->old_domain, cfg->domain); + cfg->move_in_progress = + cpumask_intersects(cfg->old_domain, cpu_online_mask); + } + for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) + per_cpu(vector_irq, new_cpu)[vector] = irq; + cfg->vector = vector; + cpumask_copy(cfg->domain, tmp_mask); + err = 0; + break; + } + free_cpumask_var(tmp_mask); + + return err; +} + +int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +{ + int err; + unsigned long flags; + + raw_spin_lock_irqsave(&vector_lock, flags); + err = __assign_irq_vector(irq, cfg, mask); + raw_spin_unlock_irqrestore(&vector_lock, flags); + return err; +} + +void clear_irq_vector(int irq, struct irq_cfg *cfg) +{ + int cpu, vector; + unsigned long flags; + + raw_spin_lock_irqsave(&vector_lock, flags); + BUG_ON(!cfg->vector); + + vector = cfg->vector; + for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; + + cfg->vector = 0; + cpumask_clear(cfg->domain); + + if (likely(!cfg->move_in_progress)) { + raw_spin_unlock_irqrestore(&vector_lock, flags); + return; + } + + for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; + vector++) { + if (per_cpu(vector_irq, cpu)[vector] != irq) + continue; + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; + break; + } + } + cfg->move_in_progress = 0; + raw_spin_unlock_irqrestore(&vector_lock, flags); +} + +int __init arch_probe_nr_irqs(void) +{ + int nr; + + if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) + nr_irqs = NR_VECTORS * nr_cpu_ids; + + nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; +#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) + /* + * for MSI and HT dyn irq + */ + if (gsi_top <= NR_IRQS_LEGACY) + nr += 8 * nr_cpu_ids; + else + nr += gsi_top * 16; +#endif + if (nr < nr_irqs) + nr_irqs = nr; + + return nr_legacy_irqs(); +} + +int __init arch_early_irq_init(void) +{ + return arch_early_ioapic_init(); +} + +static void __setup_vector_irq(int cpu) +{ + /* Initialize vector_irq on a new cpu */ + int irq, vector; + struct irq_cfg *cfg; + + /* + * vector_lock will make sure that we don't run into irq vector + * assignments that might be happening on another cpu in parallel, + * while we setup our initial vector to irq mappings. + */ + raw_spin_lock(&vector_lock); + /* Mark the inuse vectors */ + for_each_active_irq(irq) { + cfg = irq_cfg(irq); + if (!cfg) + continue; + + if (!cpumask_test_cpu(cpu, cfg->domain)) + continue; + vector = cfg->vector; + per_cpu(vector_irq, cpu)[vector] = irq; + } + /* Mark the free vectors */ + for (vector = 0; vector < NR_VECTORS; ++vector) { + irq = per_cpu(vector_irq, cpu)[vector]; + if (irq <= VECTOR_UNDEFINED) + continue; + + cfg = irq_cfg(irq); + if (!cpumask_test_cpu(cpu, cfg->domain)) + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; + } + raw_spin_unlock(&vector_lock); +} + +/* + * Setup the vector to irq mappings. + */ +void setup_vector_irq(int cpu) +{ + int irq; + + /* + * On most of the platforms, legacy PIC delivers the interrupts on the + * boot cpu. But there are certain platforms where PIC interrupts are + * delivered to multiple cpu's. If the legacy IRQ is handled by the + * legacy PIC, for the new cpu that is coming online, setup the static + * legacy vector to irq mapping: + */ + for (irq = 0; irq < nr_legacy_irqs(); irq++) + per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; + + __setup_vector_irq(cpu); +} + +int apic_retrigger_irq(struct irq_data *data) +{ + struct irq_cfg *cfg = irqd_cfg(data); + unsigned long flags; + int cpu; + + raw_spin_lock_irqsave(&vector_lock, flags); + cpu = cpumask_first_and(cfg->domain, cpu_online_mask); + apic->send_IPI_mask(cpumask_of(cpu), cfg->vector); + raw_spin_unlock_irqrestore(&vector_lock, flags); + + return 1; +} + +void apic_ack_edge(struct irq_data *data) +{ + irq_complete_move(irqd_cfg(data)); + irq_move_irq(data); + ack_APIC_irq(); +} + +/* + * Either sets data->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and + * leaves data->affinity untouched. + */ +int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, + unsigned int *dest_id) +{ + struct irq_cfg *cfg = irqd_cfg(data); + unsigned int irq = data->irq; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -EPERM; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return -EINVAL; + + err = assign_irq_vector(irq, cfg, mask); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("Failed to recover vector for irq %d\n", irq); + return err; + } + + cpumask_copy(data->affinity, mask); + + return 0; +} + +#ifdef CONFIG_SMP +void send_cleanup_vector(struct irq_cfg *cfg) +{ + cpumask_var_t cleanup_mask; + + if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { + unsigned int i; + + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + apic->send_IPI_mask(cpumask_of(i), + IRQ_MOVE_CLEANUP_VECTOR); + } else { + cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + free_cpumask_var(cleanup_mask); + } + cfg->move_in_progress = 0; +} + +asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) +{ + unsigned vector, me; + + ack_APIC_irq(); + irq_enter(); + exit_idle(); + + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + int irq; + unsigned int irr; + struct irq_desc *desc; + struct irq_cfg *cfg; + + irq = __this_cpu_read(vector_irq[vector]); + + if (irq <= VECTOR_UNDEFINED) + continue; + + desc = irq_to_desc(irq); + if (!desc) + continue; + + cfg = irq_cfg(irq); + if (!cfg) + continue; + + raw_spin_lock(&desc->lock); + + /* + * Check if the irq migration is in progress. If so, we + * haven't received the cleanup request yet for this irq. + */ + if (cfg->move_in_progress) + goto unlock; + + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + goto unlock; + + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + /* + * Check if the vector that needs to be cleanedup is + * registered at the cpu's IRR. If so, then this is not + * the best time to clean it up. Lets clean it up in the + * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR + * to myself. + */ + if (irr & (1 << (vector % 32))) { + apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); + goto unlock; + } + __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); +unlock: + raw_spin_unlock(&desc->lock); + } + + irq_exit(); +} + +static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) +{ + unsigned me; + + if (likely(!cfg->move_in_progress)) + return; + + me = smp_processor_id(); + + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + send_cleanup_vector(cfg); +} + +void irq_complete_move(struct irq_cfg *cfg) +{ + __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); +} + +void irq_force_complete_move(int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + + if (!cfg) + return; + + __irq_complete_move(cfg, cfg->vector); +} +#endif + +/* + * Dynamic irq allocate and deallocation. Should be replaced by irq domains! + */ +int arch_setup_hwirq(unsigned int irq, int node) +{ + struct irq_cfg *cfg; + unsigned long flags; + int ret; + + cfg = alloc_irq_cfg(irq, node); + if (!cfg) + return -ENOMEM; + + raw_spin_lock_irqsave(&vector_lock, flags); + ret = __assign_irq_vector(irq, cfg, apic->target_cpus()); + raw_spin_unlock_irqrestore(&vector_lock, flags); + + if (!ret) + irq_set_chip_data(irq, cfg); + else + free_irq_cfg(irq, cfg); + return ret; +} + +void arch_teardown_hwirq(unsigned int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + + free_remapped_irq(irq); + clear_irq_vector(irq, cfg); + free_irq_cfg(irq, cfg); +} + +static void __init print_APIC_field(int base) +{ + int i; + + printk(KERN_DEBUG); + + for (i = 0; i < 8; i++) + pr_cont("%08x", apic_read(base + i*0x10)); + + pr_cont("\n"); +} + +static void __init print_local_APIC(void *dummy) +{ + unsigned int i, v, ver, maxlvt; + u64 icr; + + pr_debug("printing local APIC contents on CPU#%d/%d:\n", + smp_processor_id(), hard_smp_processor_id()); + v = apic_read(APIC_ID); + pr_info("... APIC ID: %08x (%01x)\n", v, read_apic_id()); + v = apic_read(APIC_LVR); + pr_info("... APIC VERSION: %08x\n", v); + ver = GET_APIC_VERSION(v); + maxlvt = lapic_get_maxlvt(); + + v = apic_read(APIC_TASKPRI); + pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); + + /* !82489DX */ + if (APIC_INTEGRATED(ver)) { + if (!APIC_XAPIC(ver)) { + v = apic_read(APIC_ARBPRI); + pr_debug("... APIC ARBPRI: %08x (%02x)\n", + v, v & APIC_ARBPRI_MASK); + } + v = apic_read(APIC_PROCPRI); + pr_debug("... APIC PROCPRI: %08x\n", v); + } + + /* + * Remote read supported only in the 82489DX and local APIC for + * Pentium processors. + */ + if (!APIC_INTEGRATED(ver) || maxlvt == 3) { + v = apic_read(APIC_RRR); + pr_debug("... APIC RRR: %08x\n", v); + } + + v = apic_read(APIC_LDR); + pr_debug("... APIC LDR: %08x\n", v); + if (!x2apic_enabled()) { + v = apic_read(APIC_DFR); + pr_debug("... APIC DFR: %08x\n", v); + } + v = apic_read(APIC_SPIV); + pr_debug("... APIC SPIV: %08x\n", v); + + pr_debug("... APIC ISR field:\n"); + print_APIC_field(APIC_ISR); + pr_debug("... APIC TMR field:\n"); + print_APIC_field(APIC_TMR); + pr_debug("... APIC IRR field:\n"); + print_APIC_field(APIC_IRR); + + /* !82489DX */ + if (APIC_INTEGRATED(ver)) { + /* Due to the Pentium erratum 3AP. */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + + v = apic_read(APIC_ESR); + pr_debug("... APIC ESR: %08x\n", v); + } + + icr = apic_icr_read(); + pr_debug("... APIC ICR: %08x\n", (u32)icr); + pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32)); + + v = apic_read(APIC_LVTT); + pr_debug("... APIC LVTT: %08x\n", v); + + if (maxlvt > 3) { + /* PC is LVT#4. */ + v = apic_read(APIC_LVTPC); + pr_debug("... APIC LVTPC: %08x\n", v); + } + v = apic_read(APIC_LVT0); + pr_debug("... APIC LVT0: %08x\n", v); + v = apic_read(APIC_LVT1); + pr_debug("... APIC LVT1: %08x\n", v); + + if (maxlvt > 2) { + /* ERR is LVT#3. */ + v = apic_read(APIC_LVTERR); + pr_debug("... APIC LVTERR: %08x\n", v); + } + + v = apic_read(APIC_TMICT); + pr_debug("... APIC TMICT: %08x\n", v); + v = apic_read(APIC_TMCCT); + pr_debug("... APIC TMCCT: %08x\n", v); + v = apic_read(APIC_TDCR); + pr_debug("... APIC TDCR: %08x\n", v); + + if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { + v = apic_read(APIC_EFEAT); + maxlvt = (v >> 16) & 0xff; + pr_debug("... APIC EFEAT: %08x\n", v); + v = apic_read(APIC_ECTRL); + pr_debug("... APIC ECTRL: %08x\n", v); + for (i = 0; i < maxlvt; i++) { + v = apic_read(APIC_EILVTn(i)); + pr_debug("... APIC EILVT%d: %08x\n", i, v); + } + } + pr_cont("\n"); +} + +static void __init print_local_APICs(int maxcpu) +{ + int cpu; + + if (!maxcpu) + return; + + preempt_disable(); + for_each_online_cpu(cpu) { + if (cpu >= maxcpu) + break; + smp_call_function_single(cpu, print_local_APIC, NULL, 1); + } + preempt_enable(); +} + +static void __init print_PIC(void) +{ + unsigned int v; + unsigned long flags; + + if (!nr_legacy_irqs()) + return; + + pr_debug("\nprinting PIC contents\n"); + + raw_spin_lock_irqsave(&i8259A_lock, flags); + + v = inb(0xa1) << 8 | inb(0x21); + pr_debug("... PIC IMR: %04x\n", v); + + v = inb(0xa0) << 8 | inb(0x20); + pr_debug("... PIC IRR: %04x\n", v); + + outb(0x0b, 0xa0); + outb(0x0b, 0x20); + v = inb(0xa0) << 8 | inb(0x20); + outb(0x0a, 0xa0); + outb(0x0a, 0x20); + + raw_spin_unlock_irqrestore(&i8259A_lock, flags); + + pr_debug("... PIC ISR: %04x\n", v); + + v = inb(0x4d1) << 8 | inb(0x4d0); + pr_debug("... PIC ELCR: %04x\n", v); +} + +static int show_lapic __initdata = 1; +static __init int setup_show_lapic(char *arg) +{ + int num = -1; + + if (strcmp(arg, "all") == 0) { + show_lapic = CONFIG_NR_CPUS; + } else { + get_option(&arg, &num); + if (num >= 0) + show_lapic = num; + } + + return 1; +} +__setup("show_lapic=", setup_show_lapic); + +static int __init print_ICs(void) +{ + if (apic_verbosity == APIC_QUIET) + return 0; + + print_PIC(); + + /* don't print out if apic is not there */ + if (!cpu_has_apic && !apic_from_smp_config()) + return 0; + + print_local_APICs(show_lapic); + print_IO_APICs(); + + return 0; +} + +late_initcall(print_ICs); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 08f3fed..10b8d3e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -276,6 +276,17 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, return box; } +/* + * Using uncore_pmu_event_init pmu event_init callback + * as a detection point for uncore events. + */ +static int uncore_pmu_event_init(struct perf_event *event); + +static bool is_uncore_event(struct perf_event *event) +{ + return event->pmu->event_init == uncore_pmu_event_init; +} + static int uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) { @@ -290,13 +301,18 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, b return -EINVAL; n = box->n_events; - box->event_list[n] = leader; - n++; + + if (is_uncore_event(leader)) { + box->event_list[n] = leader; + n++; + } + if (!dogrp) return n; list_for_each_entry(event, &leader->sibling_list, group_entry) { - if (event->state <= PERF_EVENT_STATE_OFF) + if (!is_uncore_event(event) || + event->state <= PERF_EVENT_STATE_OFF) continue; if (n >= max_count) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index f5ab56d..aceb2f9 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -28,6 +28,7 @@ #include <asm/nmi.h> #include <asm/hw_irq.h> #include <asm/apic.h> +#include <asm/io_apic.h> #include <asm/hpet.h> #include <linux/kdebug.h> #include <asm/cpu.h> diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 1cf7c97..000d419 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -732,10 +732,10 @@ ENTRY(interrupt) ENTRY(irq_entries_start) RING0_INT_FRAME vector=FIRST_EXTERNAL_VECTOR -.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 +.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 .balign 32 .rept 7 - .if vector < NR_VECTORS + .if vector < FIRST_SYSTEM_VECTOR .if vector <> FIRST_EXTERNAL_VECTOR CFI_ADJUST_CFA_OFFSET -4 .endif diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 90878aa..9ebaf63 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -740,10 +740,10 @@ ENTRY(interrupt) ENTRY(irq_entries_start) INTR_FRAME vector=FIRST_EXTERNAL_VECTOR -.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 +.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 .balign 32 .rept 7 - .if vector < NR_VECTORS + .if vector < FIRST_SYSTEM_VECTOR .if vector <> FIRST_EXTERNAL_VECTOR CFI_ADJUST_CFA_OFFSET -8 .endif diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 4de73ee..70e181e 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -99,32 +99,9 @@ void __init init_IRQ(void) x86_init.irqs.intr_init(); } -/* - * Setup the vector to irq mappings. - */ -void setup_vector_irq(int cpu) -{ -#ifndef CONFIG_X86_IO_APIC - int irq; - - /* - * On most of the platforms, legacy PIC delivers the interrupts on the - * boot cpu. But there are certain platforms where PIC interrupts are - * delivered to multiple cpu's. If the legacy IRQ is handled by the - * legacy PIC, for the new cpu that is coming online, setup the static - * legacy vector to irq mapping: - */ - for (irq = 0; irq < nr_legacy_irqs(); irq++) - per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; -#endif - - __setup_vector_irq(cpu); -} - static void __init smp_intr_init(void) { #ifdef CONFIG_SMP -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. @@ -144,7 +121,6 @@ static void __init smp_intr_init(void) /* IPI used for rebooting/stopping */ alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); -#endif #endif /* CONFIG_SMP */ } @@ -159,7 +135,7 @@ static void __init apic_intr_init(void) alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); #endif -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) +#ifdef CONFIG_X86_LOCAL_APIC /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); @@ -197,10 +173,17 @@ void __init native_init_IRQ(void) * 'special' SMP interrupts) */ i = FIRST_EXTERNAL_VECTOR; - for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { +#ifndef CONFIG_X86_LOCAL_APIC +#define first_system_vector NR_VECTORS +#endif + for_each_clear_bit_from(i, used_vectors, first_system_vector) { /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); } +#ifdef CONFIG_X86_LOCAL_APIC + for_each_clear_bit_from(i, used_vectors, NR_VECTORS) + set_intr_gate(i, spurious_interrupt); +#endif if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) setup_irq(2, &irq2); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 72e8e31..469b23d 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -20,6 +20,7 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/apic.h> +#include <asm/io_apic.h> #include <asm/cpufeature.h> #include <asm/desc.h> #include <asm/cacheflush.h> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 4859810..415480d 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -22,6 +22,7 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/io_apic.h> #include <asm/debugreg.h> #include <asm/kexec-bzimage64.h> diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 17962e6..bae6c60 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -12,6 +12,7 @@ #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> +#include <asm/io_apic.h> #include <asm/desc.h> #include <asm/hpet.h> #include <asm/pgtable.h> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7a8f584..6d7022c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1084,7 +1084,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) { unsigned int i; - preempt_disable(); smp_cpu_index_default(); /* @@ -1102,22 +1101,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } set_cpu_sibling_map(0); - if (smp_sanity_check(max_cpus) < 0) { pr_info("SMP disabled\n"); disable_smp(); - goto out; + return; } default_setup_apic_routing(); - preempt_disable(); if (read_apic_id() != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", read_apic_id(), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } - preempt_enable(); connect_bsp_APIC(); @@ -1151,8 +1147,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) uv_system_init(); set_mtrr_aps_delayed_init(); -out: - preempt_enable(); } void arch_enable_nonboot_cpus_begin(void) diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 3e551ee..4e942f3 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -55,12 +55,6 @@ static bool tls_desc_okay(const struct user_desc *info) if (info->seg_not_present) return false; -#ifdef CONFIG_X86_64 - /* The L bit makes no sense for data. */ - if (info->lm) - return false; -#endif - return true; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a9ae205..88900e2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -331,7 +331,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) break; /* Success, it was handled */ case 1: /* Bound violation. */ info = mpx_generate_siginfo(regs, xsave_buf); - if (PTR_ERR(info)) { + if (IS_ERR(info)) { /* * We failed to decode the MPX instruction. Act as if * the exception was not caused by MPX. diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index aae9413..c1c1544 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -841,7 +841,7 @@ static void __init lguest_init_IRQ(void) { unsigned int i; - for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { + for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) { /* Some systems map "vectors" to interrupts weirdly. Not us! */ __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); if (i != SYSCALL_VECTOR) diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index b9958c3..44b9271 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -210,6 +210,9 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) { int polarity; + if (dev->irq_managed && dev->irq > 0) + return 0; + if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) polarity = 0; /* active high */ else @@ -224,13 +227,18 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0) return -EBUSY; + dev->irq_managed = 1; + return 0; } static void intel_mid_pci_irq_disable(struct pci_dev *dev) { - if (!mp_should_keep_irq(&dev->dev) && dev->irq > 0) + if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed && + dev->irq > 0) { mp_unmap_irq(dev->irq); + dev->irq_managed = 0; + } } struct pci_ops intel_mid_pci_ops = { diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index eb500c2..5dc6ca5 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1200,11 +1200,12 @@ static int pirq_enable_irq(struct pci_dev *dev) #ifdef CONFIG_X86_IO_APIC struct pci_dev *temp_dev; int irq; - struct io_apic_irq_attr irq_attr; + + if (dev->irq_managed && dev->irq > 0) + return 0; irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, - PCI_SLOT(dev->devfn), - pin - 1, &irq_attr); + PCI_SLOT(dev->devfn), pin - 1); /* * Busses behind bridges are typically not listed in the MP-table. * In this case we have to look up the IRQ based on the parent bus, @@ -1218,7 +1219,7 @@ static int pirq_enable_irq(struct pci_dev *dev) pin = pci_swizzle_interrupt_pin(dev, pin); irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), - pin - 1, &irq_attr); + pin - 1); if (irq >= 0) dev_warn(&dev->dev, "using bridge %s " "INT %c to get IRQ %d\n", @@ -1228,6 +1229,7 @@ static int pirq_enable_irq(struct pci_dev *dev) } dev = temp_dev; if (irq >= 0) { + dev->irq_managed = 1; dev->irq = irq; dev_info(&dev->dev, "PCI->APIC IRQ transform: " "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); @@ -1254,11 +1256,24 @@ static int pirq_enable_irq(struct pci_dev *dev) return 0; } +bool mp_should_keep_irq(struct device *dev) +{ + if (dev->power.is_prepared) + return true; +#ifdef CONFIG_PM + if (dev->power.runtime_status == RPM_SUSPENDING) + return true; +#endif + + return false; +} + static void pirq_disable_irq(struct pci_dev *dev) { if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) && - dev->irq) { + dev->irq_managed && dev->irq) { mp_unmap_irq(dev->irq); dev->irq = 0; + dev->irq_managed = 0; } } diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index b233681..0ce6736 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -131,7 +131,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_offset, int limit) { const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; int mmr_pnode, err; @@ -198,13 +198,13 @@ static int uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_cfg *cfg = data->chip_data; + struct irq_cfg *cfg = irqd_cfg(data); unsigned int dest; unsigned long mmr_value, mmr_offset; struct uv_IO_APIC_route_entry *entry; int mmr_pnode; - if (__ioapic_set_affinity(data, mask, &dest)) + if (apic_set_affinity(data, mask, &dest)) return -1; mmr_value = 0; diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 7cc4e33..5277a0e 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -413,6 +413,9 @@ int acpi_pci_irq_enable(struct pci_dev *dev) return 0; } + if (dev->irq_managed && dev->irq > 0) + return 0; + entry = acpi_pci_irq_lookup(dev, pin); if (!entry) { /* @@ -456,6 +459,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) return rc; } dev->irq = rc; + dev->irq_managed = 1; if (link) snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link); @@ -478,7 +482,7 @@ void acpi_pci_irq_disable(struct pci_dev *dev) u8 pin; pin = dev->pin; - if (!pin) + if (!pin || !dev->irq_managed || dev->irq <= 0) return; /* Keep IOAPIC pin configuration when suspending */ @@ -506,6 +510,9 @@ void acpi_pci_irq_disable(struct pci_dev *dev) */ dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin)); - if (gsi >= 0 && dev->irq > 0) + if (gsi >= 0) { acpi_unregister_gsi(gsi); + dev->irq = 0; + dev->irq_managed = 0; + } } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index ef58f46..342942f 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -125,13 +125,12 @@ static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id) } header = (struct acpi_subtable_header *)obj->buffer.pointer; - if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) { + if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) map_lapic_id(header, acpi_id, &apic_id); - } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) { + else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) map_lsapic_id(header, type, acpi_id, &apic_id); - } else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) { + else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) map_x2apic_id(header, type, acpi_id, &apic_id); - } exit: kfree(buffer.pointer); @@ -164,7 +163,7 @@ int acpi_map_cpuid(int apic_id, u32 acpi_id) * For example, * * Scope (_PR) - * { + * { * Processor (CPU0, 0x00, 0x00000410, 0x06) {} * Processor (CPU1, 0x01, 0x00000410, 0x06) {} * Processor (CPU2, 0x02, 0x00000410, 0x06) {} diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 2ba8f02..782a0d1 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -200,7 +200,7 @@ bool acpi_dev_resource_address_space(struct acpi_resource *ares, status = acpi_resource_to_address64(ares, &addr); if (ACPI_FAILURE(status)) - return true; + return false; res->start = addr.minimum; res->end = addr.maximum; diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index c913906..0f6b229 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -1,5 +1,5 @@ /* - * (C) 2001-2004 Dave Jones. <davej@redhat.com> + * (C) 2001-2004 Dave Jones. * (C) 2002 Padraig Brady. <padraig@antefacto.com> * * Licensed under the terms of the GNU GPL License version 2. @@ -1008,7 +1008,7 @@ MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); module_param(enable, int, 0644); MODULE_PARM_DESC(enable, "Enable driver"); -MODULE_AUTHOR("Dave Jones <davej@redhat.com>"); +MODULE_AUTHOR("Dave Jones"); MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors."); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c index f910272..e6f24b2 100644 --- a/drivers/cpufreq/powernow-k6.c +++ b/drivers/cpufreq/powernow-k6.c @@ -300,7 +300,7 @@ static void __exit powernow_k6_exit(void) } -MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, " +MODULE_AUTHOR("Arjan van de Ven, Dave Jones, " "Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index e61e224..37c5742 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -1,7 +1,6 @@ /* * AMD K7 Powernow driver. * (C) 2003 Dave Jones on behalf of SuSE Labs. - * (C) 2003-2004 Dave Jones <davej@redhat.com> * * Licensed under the terms of the GNU GPL License version 2. * Based upon datasheets & sample CPUs kindly provided by AMD. @@ -701,7 +700,7 @@ static void __exit powernow_exit(void) module_param(acpi_force, int, 0444); MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); -MODULE_AUTHOR("Dave Jones <davej@redhat.com>"); +MODULE_AUTHOR("Dave Jones"); MODULE_DESCRIPTION("Powernow driver for AMD K7 processors."); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c index 1a07b59..e56d632 100644 --- a/drivers/cpufreq/speedstep-ich.c +++ b/drivers/cpufreq/speedstep-ich.c @@ -378,8 +378,7 @@ static void __exit speedstep_exit(void) } -MODULE_AUTHOR("Dave Jones <davej@redhat.com>, " - "Dominik Brodowski <linux@brodo.de>"); +MODULE_AUTHOR("Dave Jones, Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets " "with ICH-M southbridges."); MODULE_LICENSE("GPL"); diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index e9248bb..aedec09 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -16,13 +16,10 @@ #include <asm/machdep.h> #include <asm/firmware.h> +#include <asm/opal.h> #include <asm/runlatch.h> -/* Flags and constants used in PowerNV platform */ - #define MAX_POWERNV_IDLE_STATES 8 -#define IDLE_USE_INST_NAP 0x00010000 /* Use nap instruction */ -#define IDLE_USE_INST_SLEEP 0x00020000 /* Use sleep instruction */ struct cpuidle_driver powernv_idle_driver = { .name = "powernv_idle", @@ -197,7 +194,7 @@ static int powernv_add_idle_states(void) * target residency to be 10x exit_latency */ latency_ns = be32_to_cpu(idle_state_latency[i]); - if (flags & IDLE_USE_INST_NAP) { + if (flags & OPAL_PM_NAP_ENABLED) { /* Add NAP state */ strcpy(powernv_states[nr_idle_states].name, "Nap"); strcpy(powernv_states[nr_idle_states].desc, "Nap"); @@ -210,7 +207,8 @@ static int powernv_add_idle_states(void) nr_idle_states++; } - if (flags & IDLE_USE_INST_SLEEP) { + if (flags & OPAL_PM_SLEEP_ENABLED || + flags & OPAL_PM_SLEEP_ENABLED_ER1) { /* Add FASTSLEEP state */ strcpy(powernv_states[nr_idle_states].name, "FastSleep"); strcpy(powernv_states[nr_idle_states].desc, "FastSleep"); diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index c1351d9..91a488c 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1072,4 +1072,15 @@ config SCx200_ACB This support is also available as a module. If so, the module will be called scx200_acb. +config I2C_OPAL + tristate "IBM OPAL I2C driver" + depends on PPC_POWERNV + default y + help + This exposes the PowerNV platform i2c busses to the linux i2c layer, + the driver is based on the OPAL interfaces. + + This driver can also be built as a module. If so, the module will be + called as i2c-opal. + endmenu diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 5e6c822..56388f6 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_I2C_ACORN) += i2c-acorn.o obj-$(CONFIG_I2C_BCM_KONA) += i2c-bcm-kona.o obj-$(CONFIG_I2C_CROS_EC_TUNNEL) += i2c-cros-ec-tunnel.o obj-$(CONFIG_I2C_ELEKTOR) += i2c-elektor.o +obj-$(CONFIG_I2C_OPAL) += i2c-opal.o obj-$(CONFIG_I2C_PCA_ISA) += i2c-pca-isa.o obj-$(CONFIG_I2C_SIBYTE) += i2c-sibyte.o obj-$(CONFIG_SCx200_ACB) += scx200_acb.o diff --git a/drivers/i2c/busses/i2c-opal.c b/drivers/i2c/busses/i2c-opal.c new file mode 100644 index 0000000..16f90b1 --- /dev/null +++ b/drivers/i2c/busses/i2c-opal.c @@ -0,0 +1,294 @@ +/* + * IBM OPAL I2C driver + * Copyright (C) 2014 IBM + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. + */ + +#include <linux/device.h> +#include <linux/i2c.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include <asm/firmware.h> +#include <asm/opal.h> + +static int i2c_opal_translate_error(int rc) +{ + switch (rc) { + case OPAL_NO_MEM: + return -ENOMEM; + case OPAL_PARAMETER: + return -EINVAL; + case OPAL_I2C_ARBT_LOST: + return -EAGAIN; + case OPAL_I2C_TIMEOUT: + return -ETIMEDOUT; + case OPAL_I2C_NACK_RCVD: + return -ENXIO; + case OPAL_I2C_STOP_ERR: + return -EBUSY; + default: + return -EIO; + } +} + +static int i2c_opal_send_request(u32 bus_id, struct opal_i2c_request *req) +{ + struct opal_msg msg; + int token, rc; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_err("Failed to get the async token\n"); + + return token; + } + + rc = opal_i2c_request(token, bus_id, req); + if (rc != OPAL_ASYNC_COMPLETION) { + rc = i2c_opal_translate_error(rc); + goto exit; + } + + rc = opal_async_wait_response(token, &msg); + if (rc) + goto exit; + + rc = be64_to_cpu(msg.params[1]); + if (rc != OPAL_SUCCESS) { + rc = i2c_opal_translate_error(rc); + goto exit; + } + +exit: + opal_async_release_token(token); + return rc; +} + +static int i2c_opal_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, + int num) +{ + unsigned long opal_id = (unsigned long)adap->algo_data; + struct opal_i2c_request req; + int rc, i; + + /* We only support fairly simple combinations here of one + * or two messages + */ + memset(&req, 0, sizeof(req)); + switch(num) { + case 0: + return 0; + case 1: + req.type = (msgs[0].flags & I2C_M_RD) ? + OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE; + req.addr = cpu_to_be16(msgs[0].addr); + req.size = cpu_to_be32(msgs[0].len); + req.buffer_ra = cpu_to_be64(__pa(msgs[0].buf)); + break; + case 2: + /* For two messages, we basically support only simple + * smbus transactions of a write plus a read. We might + * want to allow also two writes but we'd have to bounce + * the data into a single buffer. + */ + if ((msgs[0].flags & I2C_M_RD) || !(msgs[1].flags & I2C_M_RD)) + return -EOPNOTSUPP; + if (msgs[0].len > 4) + return -EOPNOTSUPP; + if (msgs[0].addr != msgs[1].addr) + return -EOPNOTSUPP; + req.type = OPAL_I2C_SM_READ; + req.addr = cpu_to_be16(msgs[0].addr); + req.subaddr_sz = msgs[0].len; + for (i = 0; i < msgs[0].len; i++) + req.subaddr = (req.subaddr << 8) | msgs[0].buf[i]; + req.subaddr = cpu_to_be32(req.subaddr); + req.size = cpu_to_be32(msgs[1].len); + req.buffer_ra = cpu_to_be64(__pa(msgs[1].buf)); + break; + default: + return -EOPNOTSUPP; + } + + rc = i2c_opal_send_request(opal_id, &req); + if (rc) + return rc; + + return num; +} + +static int i2c_opal_smbus_xfer(struct i2c_adapter *adap, u16 addr, + unsigned short flags, char read_write, + u8 command, int size, union i2c_smbus_data *data) +{ + unsigned long opal_id = (unsigned long)adap->algo_data; + struct opal_i2c_request req; + u8 local[2]; + int rc; + + memset(&req, 0, sizeof(req)); + + req.addr = cpu_to_be16(addr); + switch (size) { + case I2C_SMBUS_BYTE: + req.buffer_ra = cpu_to_be64(__pa(&data->byte)); + req.size = cpu_to_be32(1); + /* Fall through */ + case I2C_SMBUS_QUICK: + req.type = (read_write == I2C_SMBUS_READ) ? + OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE; + break; + case I2C_SMBUS_BYTE_DATA: + req.buffer_ra = cpu_to_be64(__pa(&data->byte)); + req.size = cpu_to_be32(1); + req.subaddr = cpu_to_be32(command); + req.subaddr_sz = 1; + req.type = (read_write == I2C_SMBUS_READ) ? + OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE; + break; + case I2C_SMBUS_WORD_DATA: + if (!read_write) { + local[0] = data->word & 0xff; + local[1] = (data->word >> 8) & 0xff; + } + req.buffer_ra = cpu_to_be64(__pa(local)); + req.size = cpu_to_be32(2); + req.subaddr = cpu_to_be32(command); + req.subaddr_sz = 1; + req.type = (read_write == I2C_SMBUS_READ) ? + OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE; + break; + case I2C_SMBUS_I2C_BLOCK_DATA: + req.buffer_ra = cpu_to_be64(__pa(&data->block[1])); + req.size = cpu_to_be32(data->block[0]); + req.subaddr = cpu_to_be32(command); + req.subaddr_sz = 1; + req.type = (read_write == I2C_SMBUS_READ) ? + OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE; + break; + default: + return -EINVAL; + } + + rc = i2c_opal_send_request(opal_id, &req); + if (!rc && read_write && size == I2C_SMBUS_WORD_DATA) { + data->word = ((u16)local[1]) << 8; + data->word |= local[0]; + } + + return rc; +} + +static u32 i2c_opal_func(struct i2c_adapter *adapter) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | + I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA | + I2C_FUNC_SMBUS_I2C_BLOCK; +} + +static const struct i2c_algorithm i2c_opal_algo = { + .master_xfer = i2c_opal_master_xfer, + .smbus_xfer = i2c_opal_smbus_xfer, + .functionality = i2c_opal_func, +}; + +static int i2c_opal_probe(struct platform_device *pdev) +{ + struct i2c_adapter *adapter; + const char *pname; + u32 opal_id; + int rc; + + if (!pdev->dev.of_node) + return -ENODEV; + + rc = of_property_read_u32(pdev->dev.of_node, "ibm,opal-id", &opal_id); + if (rc) { + dev_err(&pdev->dev, "Missing ibm,opal-id property !\n"); + return -EIO; + } + + adapter = devm_kzalloc(&pdev->dev, sizeof(*adapter), GFP_KERNEL); + if (!adapter) + return -ENOMEM; + + adapter->algo = &i2c_opal_algo; + adapter->algo_data = (void *)(unsigned long)opal_id; + adapter->dev.parent = &pdev->dev; + adapter->dev.of_node = of_node_get(pdev->dev.of_node); + pname = of_get_property(pdev->dev.of_node, "ibm,port-name", NULL); + if (pname) + strlcpy(adapter->name, pname, sizeof(adapter->name)); + else + strlcpy(adapter->name, "opal", sizeof(adapter->name)); + + platform_set_drvdata(pdev, adapter); + rc = i2c_add_adapter(adapter); + if (rc) + dev_err(&pdev->dev, "Failed to register the i2c adapter\n"); + + return rc; +} + +static int i2c_opal_remove(struct platform_device *pdev) +{ + struct i2c_adapter *adapter = platform_get_drvdata(pdev); + + i2c_del_adapter(adapter); + + return 0; +} + +static const struct of_device_id i2c_opal_of_match[] = { + { + .compatible = "ibm,opal-i2c", + }, + { } +}; +MODULE_DEVICE_TABLE(of, i2c_opal_of_match); + +static struct platform_driver i2c_opal_driver = { + .probe = i2c_opal_probe, + .remove = i2c_opal_remove, + .driver = { + .name = "i2c-opal", + .of_match_table = i2c_opal_of_match, + }, +}; + +static int __init i2c_opal_init(void) +{ + if (!firmware_has_feature(FW_FEATURE_OPAL)) + return -ENODEV; + + return platform_driver_register(&i2c_opal_driver); +} +module_init(i2c_opal_init); + +static void __exit i2c_opal_exit(void) +{ + return platform_driver_unregister(&i2c_opal_driver); +} +module_exit(i2c_opal_exit); + +MODULE_AUTHOR("Neelesh Gupta <neelegup@linux.vnet.ibm.com>"); +MODULE_DESCRIPTION("IBM OPAL I2C driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 10641b7..dafb3c5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -22,7 +22,6 @@ #include <linux/socket.h> #include <linux/in.h> #include <linux/in6.h> -#include <linux/llist.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> #include <target/target_core_base.h> @@ -36,11 +35,17 @@ #define ISERT_MAX_CONN 8 #define ISER_MAX_RX_CQ_LEN (ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN) #define ISER_MAX_TX_CQ_LEN (ISERT_QP_MAX_REQ_DTOS * ISERT_MAX_CONN) +#define ISER_MAX_CQ_LEN (ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \ + ISERT_MAX_CONN) + +int isert_debug_level = 0; +module_param_named(debug_level, isert_debug_level, int, 0644); +MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)"); static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); -static struct workqueue_struct *isert_rx_wq; static struct workqueue_struct *isert_comp_wq; +static struct workqueue_struct *isert_release_wq; static void isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); @@ -54,19 +59,32 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_rdma_wr *wr); static int isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd); +static int +isert_rdma_post_recvl(struct isert_conn *isert_conn); +static int +isert_rdma_accept(struct isert_conn *isert_conn); +struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np); + +static inline bool +isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) +{ + return (conn->pi_support && + cmd->prot_op != TARGET_PROT_NORMAL); +} + static void isert_qp_event_callback(struct ib_event *e, void *context) { struct isert_conn *isert_conn = (struct isert_conn *)context; - pr_err("isert_qp_event_callback event: %d\n", e->event); + isert_err("conn %p event: %d\n", isert_conn, e->event); switch (e->event) { case IB_EVENT_COMM_EST: rdma_notify(isert_conn->conn_cm_id, IB_EVENT_COMM_EST); break; case IB_EVENT_QP_LAST_WQE_REACHED: - pr_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED:\n"); + isert_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED\n"); break; default: break; @@ -80,39 +98,41 @@ isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr) ret = ib_query_device(ib_dev, devattr); if (ret) { - pr_err("ib_query_device() failed: %d\n", ret); + isert_err("ib_query_device() failed: %d\n", ret); return ret; } - pr_debug("devattr->max_sge: %d\n", devattr->max_sge); - pr_debug("devattr->max_sge_rd: %d\n", devattr->max_sge_rd); + isert_dbg("devattr->max_sge: %d\n", devattr->max_sge); + isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd); return 0; } static int -isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id, - u8 protection) +isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) { struct isert_device *device = isert_conn->conn_device; struct ib_qp_init_attr attr; - int ret, index, min_index = 0; + struct isert_comp *comp; + int ret, i, min = 0; mutex_lock(&device_list_mutex); - for (index = 0; index < device->cqs_used; index++) - if (device->cq_active_qps[index] < - device->cq_active_qps[min_index]) - min_index = index; - device->cq_active_qps[min_index]++; - pr_debug("isert_conn_setup_qp: Using min_index: %d\n", min_index); + for (i = 0; i < device->comps_used; i++) + if (device->comps[i].active_qps < + device->comps[min].active_qps) + min = i; + comp = &device->comps[min]; + comp->active_qps++; + isert_info("conn %p, using comp %p min_index: %d\n", + isert_conn, comp, min); mutex_unlock(&device_list_mutex); memset(&attr, 0, sizeof(struct ib_qp_init_attr)); attr.event_handler = isert_qp_event_callback; attr.qp_context = isert_conn; - attr.send_cq = device->dev_tx_cq[min_index]; - attr.recv_cq = device->dev_rx_cq[min_index]; + attr.send_cq = comp->cq; + attr.recv_cq = comp->cq; attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS; - attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS; + attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; /* * FIXME: Use devattr.max_sge - 2 for max_send_sge as * work-around for RDMA_READs with ConnectX-2. @@ -126,29 +146,29 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id, attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; - if (protection) + if (device->pi_capable) attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; - pr_debug("isert_conn_setup_qp cma_id->device: %p\n", - cma_id->device); - pr_debug("isert_conn_setup_qp conn_pd->device: %p\n", - isert_conn->conn_pd->device); - ret = rdma_create_qp(cma_id, isert_conn->conn_pd, &attr); if (ret) { - pr_err("rdma_create_qp failed for cma_id %d\n", ret); - return ret; + isert_err("rdma_create_qp failed for cma_id %d\n", ret); + goto err; } isert_conn->conn_qp = cma_id->qp; - pr_debug("rdma_create_qp() returned success >>>>>>>>>>>>>>>>>>>>>>>>>.\n"); return 0; +err: + mutex_lock(&device_list_mutex); + comp->active_qps--; + mutex_unlock(&device_list_mutex); + + return ret; } static void isert_cq_event_callback(struct ib_event *e, void *context) { - pr_debug("isert_cq_event_callback event: %d\n", e->event); + isert_dbg("event: %d\n", e->event); } static int @@ -182,6 +202,7 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn) } isert_conn->conn_rx_desc_head = 0; + return 0; dma_map_fail: @@ -193,6 +214,8 @@ dma_map_fail: kfree(isert_conn->conn_rx_descs); isert_conn->conn_rx_descs = NULL; fail: + isert_err("conn %p failed to allocate rx descriptors\n", isert_conn); + return -ENOMEM; } @@ -216,27 +239,23 @@ isert_free_rx_descriptors(struct isert_conn *isert_conn) isert_conn->conn_rx_descs = NULL; } -static void isert_cq_tx_work(struct work_struct *); -static void isert_cq_tx_callback(struct ib_cq *, void *); -static void isert_cq_rx_work(struct work_struct *); -static void isert_cq_rx_callback(struct ib_cq *, void *); +static void isert_cq_work(struct work_struct *); +static void isert_cq_callback(struct ib_cq *, void *); static int isert_create_device_ib_res(struct isert_device *device) { struct ib_device *ib_dev = device->ib_device; - struct isert_cq_desc *cq_desc; struct ib_device_attr *dev_attr; - int ret = 0, i, j; - int max_rx_cqe, max_tx_cqe; + int ret = 0, i; + int max_cqe; dev_attr = &device->dev_attr; ret = isert_query_device(ib_dev, dev_attr); if (ret) return ret; - max_rx_cqe = min(ISER_MAX_RX_CQ_LEN, dev_attr->max_cqe); - max_tx_cqe = min(ISER_MAX_TX_CQ_LEN, dev_attr->max_cqe); + max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe); /* asign function handlers */ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS && @@ -254,55 +273,38 @@ isert_create_device_ib_res(struct isert_device *device) device->pi_capable = dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER ? true : false; - device->cqs_used = min_t(int, num_online_cpus(), - device->ib_device->num_comp_vectors); - device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used); - pr_debug("Using %d CQs, device %s supports %d vectors support " - "Fast registration %d pi_capable %d\n", - device->cqs_used, device->ib_device->name, - device->ib_device->num_comp_vectors, device->use_fastreg, - device->pi_capable); - device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) * - device->cqs_used, GFP_KERNEL); - if (!device->cq_desc) { - pr_err("Unable to allocate device->cq_desc\n"); + device->comps_used = min(ISERT_MAX_CQ, min_t(int, num_online_cpus(), + device->ib_device->num_comp_vectors)); + isert_info("Using %d CQs, %s supports %d vectors support " + "Fast registration %d pi_capable %d\n", + device->comps_used, device->ib_device->name, + device->ib_device->num_comp_vectors, device->use_fastreg, + device->pi_capable); + + device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp), + GFP_KERNEL); + if (!device->comps) { + isert_err("Unable to allocate completion contexts\n"); return -ENOMEM; } - cq_desc = device->cq_desc; - - for (i = 0; i < device->cqs_used; i++) { - cq_desc[i].device = device; - cq_desc[i].cq_index = i; - - INIT_WORK(&cq_desc[i].cq_rx_work, isert_cq_rx_work); - device->dev_rx_cq[i] = ib_create_cq(device->ib_device, - isert_cq_rx_callback, - isert_cq_event_callback, - (void *)&cq_desc[i], - max_rx_cqe, i); - if (IS_ERR(device->dev_rx_cq[i])) { - ret = PTR_ERR(device->dev_rx_cq[i]); - device->dev_rx_cq[i] = NULL; - goto out_cq; - } - INIT_WORK(&cq_desc[i].cq_tx_work, isert_cq_tx_work); - device->dev_tx_cq[i] = ib_create_cq(device->ib_device, - isert_cq_tx_callback, - isert_cq_event_callback, - (void *)&cq_desc[i], - max_tx_cqe, i); - if (IS_ERR(device->dev_tx_cq[i])) { - ret = PTR_ERR(device->dev_tx_cq[i]); - device->dev_tx_cq[i] = NULL; - goto out_cq; - } + for (i = 0; i < device->comps_used; i++) { + struct isert_comp *comp = &device->comps[i]; - ret = ib_req_notify_cq(device->dev_rx_cq[i], IB_CQ_NEXT_COMP); - if (ret) + comp->device = device; + INIT_WORK(&comp->work, isert_cq_work); + comp->cq = ib_create_cq(device->ib_device, + isert_cq_callback, + isert_cq_event_callback, + (void *)comp, + max_cqe, i); + if (IS_ERR(comp->cq)) { + ret = PTR_ERR(comp->cq); + comp->cq = NULL; goto out_cq; + } - ret = ib_req_notify_cq(device->dev_tx_cq[i], IB_CQ_NEXT_COMP); + ret = ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP); if (ret) goto out_cq; } @@ -310,19 +312,15 @@ isert_create_device_ib_res(struct isert_device *device) return 0; out_cq: - for (j = 0; j < i; j++) { - cq_desc = &device->cq_desc[j]; + for (i = 0; i < device->comps_used; i++) { + struct isert_comp *comp = &device->comps[i]; - if (device->dev_rx_cq[j]) { - cancel_work_sync(&cq_desc->cq_rx_work); - ib_destroy_cq(device->dev_rx_cq[j]); - } - if (device->dev_tx_cq[j]) { - cancel_work_sync(&cq_desc->cq_tx_work); - ib_destroy_cq(device->dev_tx_cq[j]); + if (comp->cq) { + cancel_work_sync(&comp->work); + ib_destroy_cq(comp->cq); } } - kfree(device->cq_desc); + kfree(device->comps); return ret; } @@ -330,21 +328,18 @@ out_cq: static void isert_free_device_ib_res(struct isert_device *device) { - struct isert_cq_desc *cq_desc; int i; - for (i = 0; i < device->cqs_used; i++) { - cq_desc = &device->cq_desc[i]; + isert_info("device %p\n", device); - cancel_work_sync(&cq_desc->cq_rx_work); - cancel_work_sync(&cq_desc->cq_tx_work); - ib_destroy_cq(device->dev_rx_cq[i]); - ib_destroy_cq(device->dev_tx_cq[i]); - device->dev_rx_cq[i] = NULL; - device->dev_tx_cq[i] = NULL; - } + for (i = 0; i < device->comps_used; i++) { + struct isert_comp *comp = &device->comps[i]; - kfree(device->cq_desc); + cancel_work_sync(&comp->work); + ib_destroy_cq(comp->cq); + comp->cq = NULL; + } + kfree(device->comps); } static void @@ -352,6 +347,7 @@ isert_device_try_release(struct isert_device *device) { mutex_lock(&device_list_mutex); device->refcount--; + isert_info("device %p refcount %d\n", device, device->refcount); if (!device->refcount) { isert_free_device_ib_res(device); list_del(&device->dev_node); @@ -370,6 +366,8 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id) list_for_each_entry(device, &device_list, dev_node) { if (device->ib_device->node_guid == cma_id->device->node_guid) { device->refcount++; + isert_info("Found iser device %p refcount %d\n", + device, device->refcount); mutex_unlock(&device_list_mutex); return device; } @@ -393,6 +391,8 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id) device->refcount++; list_add_tail(&device->dev_node, &device_list); + isert_info("Created a new iser device %p refcount %d\n", + device, device->refcount); mutex_unlock(&device_list_mutex); return device; @@ -407,7 +407,7 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) if (list_empty(&isert_conn->conn_fr_pool)) return; - pr_debug("Freeing conn %p fastreg pool", isert_conn); + isert_info("Freeing conn %p fastreg pool", isert_conn); list_for_each_entry_safe(fr_desc, tmp, &isert_conn->conn_fr_pool, list) { @@ -425,87 +425,97 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) } if (i < isert_conn->conn_fr_pool_size) - pr_warn("Pool still has %d regions registered\n", + isert_warn("Pool still has %d regions registered\n", isert_conn->conn_fr_pool_size - i); } static int +isert_create_pi_ctx(struct fast_reg_descriptor *desc, + struct ib_device *device, + struct ib_pd *pd) +{ + struct ib_mr_init_attr mr_init_attr; + struct pi_context *pi_ctx; + int ret; + + pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL); + if (!pi_ctx) { + isert_err("Failed to allocate pi context\n"); + return -ENOMEM; + } + + pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(pi_ctx->prot_frpl)) { + isert_err("Failed to allocate prot frpl err=%ld\n", + PTR_ERR(pi_ctx->prot_frpl)); + ret = PTR_ERR(pi_ctx->prot_frpl); + goto err_pi_ctx; + } + + pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(pi_ctx->prot_mr)) { + isert_err("Failed to allocate prot frmr err=%ld\n", + PTR_ERR(pi_ctx->prot_mr)); + ret = PTR_ERR(pi_ctx->prot_mr); + goto err_prot_frpl; + } + desc->ind |= ISERT_PROT_KEY_VALID; + + memset(&mr_init_attr, 0, sizeof(mr_init_attr)); + mr_init_attr.max_reg_descriptors = 2; + mr_init_attr.flags |= IB_MR_SIGNATURE_EN; + pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); + if (IS_ERR(pi_ctx->sig_mr)) { + isert_err("Failed to allocate signature enabled mr err=%ld\n", + PTR_ERR(pi_ctx->sig_mr)); + ret = PTR_ERR(pi_ctx->sig_mr); + goto err_prot_mr; + } + + desc->pi_ctx = pi_ctx; + desc->ind |= ISERT_SIG_KEY_VALID; + desc->ind &= ~ISERT_PROTECTED; + + return 0; + +err_prot_mr: + ib_dereg_mr(desc->pi_ctx->prot_mr); +err_prot_frpl: + ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); +err_pi_ctx: + kfree(desc->pi_ctx); + + return ret; +} + +static int isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, - struct fast_reg_descriptor *fr_desc, u8 protection) + struct fast_reg_descriptor *fr_desc) { int ret; fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, ISCSI_ISER_SG_TABLESIZE); if (IS_ERR(fr_desc->data_frpl)) { - pr_err("Failed to allocate data frpl err=%ld\n", - PTR_ERR(fr_desc->data_frpl)); + isert_err("Failed to allocate data frpl err=%ld\n", + PTR_ERR(fr_desc->data_frpl)); return PTR_ERR(fr_desc->data_frpl); } fr_desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE); if (IS_ERR(fr_desc->data_mr)) { - pr_err("Failed to allocate data frmr err=%ld\n", - PTR_ERR(fr_desc->data_mr)); + isert_err("Failed to allocate data frmr err=%ld\n", + PTR_ERR(fr_desc->data_mr)); ret = PTR_ERR(fr_desc->data_mr); goto err_data_frpl; } - pr_debug("Create fr_desc %p page_list %p\n", - fr_desc, fr_desc->data_frpl->page_list); fr_desc->ind |= ISERT_DATA_KEY_VALID; - if (protection) { - struct ib_mr_init_attr mr_init_attr = {0}; - struct pi_context *pi_ctx; - - fr_desc->pi_ctx = kzalloc(sizeof(*fr_desc->pi_ctx), GFP_KERNEL); - if (!fr_desc->pi_ctx) { - pr_err("Failed to allocate pi context\n"); - ret = -ENOMEM; - goto err_data_mr; - } - pi_ctx = fr_desc->pi_ctx; - - pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device, - ISCSI_ISER_SG_TABLESIZE); - if (IS_ERR(pi_ctx->prot_frpl)) { - pr_err("Failed to allocate prot frpl err=%ld\n", - PTR_ERR(pi_ctx->prot_frpl)); - ret = PTR_ERR(pi_ctx->prot_frpl); - goto err_pi_ctx; - } - - pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE); - if (IS_ERR(pi_ctx->prot_mr)) { - pr_err("Failed to allocate prot frmr err=%ld\n", - PTR_ERR(pi_ctx->prot_mr)); - ret = PTR_ERR(pi_ctx->prot_mr); - goto err_prot_frpl; - } - fr_desc->ind |= ISERT_PROT_KEY_VALID; - - mr_init_attr.max_reg_descriptors = 2; - mr_init_attr.flags |= IB_MR_SIGNATURE_EN; - pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); - if (IS_ERR(pi_ctx->sig_mr)) { - pr_err("Failed to allocate signature enabled mr err=%ld\n", - PTR_ERR(pi_ctx->sig_mr)); - ret = PTR_ERR(pi_ctx->sig_mr); - goto err_prot_mr; - } - fr_desc->ind |= ISERT_SIG_KEY_VALID; - } - fr_desc->ind &= ~ISERT_PROTECTED; + isert_dbg("Created fr_desc %p\n", fr_desc); return 0; -err_prot_mr: - ib_dereg_mr(fr_desc->pi_ctx->prot_mr); -err_prot_frpl: - ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl); -err_pi_ctx: - kfree(fr_desc->pi_ctx); -err_data_mr: - ib_dereg_mr(fr_desc->data_mr); + err_data_frpl: ib_free_fast_reg_page_list(fr_desc->data_frpl); @@ -513,7 +523,7 @@ err_data_frpl: } static int -isert_conn_create_fastreg_pool(struct isert_conn *isert_conn, u8 pi_support) +isert_conn_create_fastreg_pool(struct isert_conn *isert_conn) { struct fast_reg_descriptor *fr_desc; struct isert_device *device = isert_conn->conn_device; @@ -531,16 +541,15 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn, u8 pi_support) for (i = 0; i < tag_num; i++) { fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL); if (!fr_desc) { - pr_err("Failed to allocate fast_reg descriptor\n"); + isert_err("Failed to allocate fast_reg descriptor\n"); ret = -ENOMEM; goto err; } ret = isert_create_fr_desc(device->ib_device, - isert_conn->conn_pd, fr_desc, - pi_support); + isert_conn->conn_pd, fr_desc); if (ret) { - pr_err("Failed to create fastreg descriptor err=%d\n", + isert_err("Failed to create fastreg descriptor err=%d\n", ret); kfree(fr_desc); goto err; @@ -550,7 +559,7 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn, u8 pi_support) isert_conn->conn_fr_pool_size++; } - pr_debug("Creating conn %p fastreg pool size=%d", + isert_dbg("Creating conn %p fastreg pool size=%d", isert_conn, isert_conn->conn_fr_pool_size); return 0; @@ -563,47 +572,45 @@ err: static int isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { - struct iscsi_np *np = cma_id->context; - struct isert_np *isert_np = np->np_context; + struct isert_np *isert_np = cma_id->context; + struct iscsi_np *np = isert_np->np; struct isert_conn *isert_conn; struct isert_device *device; struct ib_device *ib_dev = cma_id->device; int ret = 0; - u8 pi_support; spin_lock_bh(&np->np_thread_lock); if (!np->enabled) { spin_unlock_bh(&np->np_thread_lock); - pr_debug("iscsi_np is not enabled, reject connect request\n"); + isert_dbg("iscsi_np is not enabled, reject connect request\n"); return rdma_reject(cma_id, NULL, 0); } spin_unlock_bh(&np->np_thread_lock); - pr_debug("Entering isert_connect_request cma_id: %p, context: %p\n", + isert_dbg("cma_id: %p, portal: %p\n", cma_id, cma_id->context); isert_conn = kzalloc(sizeof(struct isert_conn), GFP_KERNEL); if (!isert_conn) { - pr_err("Unable to allocate isert_conn\n"); + isert_err("Unable to allocate isert_conn\n"); return -ENOMEM; } isert_conn->state = ISER_CONN_INIT; INIT_LIST_HEAD(&isert_conn->conn_accept_node); init_completion(&isert_conn->conn_login_comp); + init_completion(&isert_conn->login_req_comp); init_completion(&isert_conn->conn_wait); - init_completion(&isert_conn->conn_wait_comp_err); kref_init(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); spin_lock_init(&isert_conn->conn_lock); INIT_LIST_HEAD(&isert_conn->conn_fr_pool); - cma_id->context = isert_conn; isert_conn->conn_cm_id = cma_id; isert_conn->login_buf = kzalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, GFP_KERNEL); if (!isert_conn->login_buf) { - pr_err("Unable to allocate isert_conn->login_buf\n"); + isert_err("Unable to allocate isert_conn->login_buf\n"); ret = -ENOMEM; goto out; } @@ -611,7 +618,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->login_req_buf = isert_conn->login_buf; isert_conn->login_rsp_buf = isert_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; - pr_debug("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n", + isert_dbg("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n", isert_conn->login_buf, isert_conn->login_req_buf, isert_conn->login_rsp_buf); @@ -621,7 +628,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) ret = ib_dma_mapping_error(ib_dev, isert_conn->login_req_dma); if (ret) { - pr_err("ib_dma_mapping_error failed for login_req_dma: %d\n", + isert_err("ib_dma_mapping_error failed for login_req_dma: %d\n", ret); isert_conn->login_req_dma = 0; goto out_login_buf; @@ -633,7 +640,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) ret = ib_dma_mapping_error(ib_dev, isert_conn->login_rsp_dma); if (ret) { - pr_err("ib_dma_mapping_error failed for login_rsp_dma: %d\n", + isert_err("ib_dma_mapping_error failed for login_rsp_dma: %d\n", ret); isert_conn->login_rsp_dma = 0; goto out_req_dma_map; @@ -649,13 +656,13 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->initiator_depth = min_t(u8, event->param.conn.initiator_depth, device->dev_attr.max_qp_init_rd_atom); - pr_debug("Using initiator_depth: %u\n", isert_conn->initiator_depth); + isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth); isert_conn->conn_device = device; isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device); if (IS_ERR(isert_conn->conn_pd)) { ret = PTR_ERR(isert_conn->conn_pd); - pr_err("ib_alloc_pd failed for conn %p: ret=%d\n", + isert_err("ib_alloc_pd failed for conn %p: ret=%d\n", isert_conn, ret); goto out_pd; } @@ -664,20 +671,20 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) IB_ACCESS_LOCAL_WRITE); if (IS_ERR(isert_conn->conn_mr)) { ret = PTR_ERR(isert_conn->conn_mr); - pr_err("ib_get_dma_mr failed for conn %p: ret=%d\n", + isert_err("ib_get_dma_mr failed for conn %p: ret=%d\n", isert_conn, ret); goto out_mr; } - pi_support = np->tpg_np->tpg->tpg_attrib.t10_pi; - if (pi_support && !device->pi_capable) { - pr_err("Protection information requested but not supported, " - "rejecting connect request\n"); - ret = rdma_reject(cma_id, NULL, 0); - goto out_mr; - } + ret = isert_conn_setup_qp(isert_conn, cma_id); + if (ret) + goto out_conn_dev; - ret = isert_conn_setup_qp(isert_conn, cma_id, pi_support); + ret = isert_rdma_post_recvl(isert_conn); + if (ret) + goto out_conn_dev; + + ret = isert_rdma_accept(isert_conn); if (ret) goto out_conn_dev; @@ -685,7 +692,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list); mutex_unlock(&isert_np->np_accept_mutex); - pr_debug("isert_connect_request() up np_sem np: %p\n", np); + isert_info("np %p: Allow accept_np to continue\n", np); up(&isert_np->np_sem); return 0; @@ -705,6 +712,7 @@ out_login_buf: kfree(isert_conn->login_buf); out: kfree(isert_conn); + rdma_reject(cma_id, NULL, 0); return ret; } @@ -713,24 +721,25 @@ isert_connect_release(struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; struct isert_device *device = isert_conn->conn_device; - int cq_index; - pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); + isert_dbg("conn %p\n", isert_conn); if (device && device->use_fastreg) isert_conn_free_fastreg_pool(isert_conn); + isert_free_rx_descriptors(isert_conn); + rdma_destroy_id(isert_conn->conn_cm_id); + if (isert_conn->conn_qp) { - cq_index = ((struct isert_cq_desc *) - isert_conn->conn_qp->recv_cq->cq_context)->cq_index; - pr_debug("isert_connect_release: cq_index: %d\n", cq_index); - isert_conn->conn_device->cq_active_qps[cq_index]--; + struct isert_comp *comp = isert_conn->conn_qp->recv_cq->cq_context; - rdma_destroy_qp(isert_conn->conn_cm_id); - } + isert_dbg("dec completion context %p active_qps\n", comp); + mutex_lock(&device_list_mutex); + comp->active_qps--; + mutex_unlock(&device_list_mutex); - isert_free_rx_descriptors(isert_conn); - rdma_destroy_id(isert_conn->conn_cm_id); + ib_destroy_qp(isert_conn->conn_qp); + } ib_dereg_mr(isert_conn->conn_mr); ib_dealloc_pd(isert_conn->conn_pd); @@ -747,16 +756,24 @@ isert_connect_release(struct isert_conn *isert_conn) if (device) isert_device_try_release(device); - - pr_debug("Leaving isert_connect_release >>>>>>>>>>>>\n"); } static void isert_connected_handler(struct rdma_cm_id *cma_id) { - struct isert_conn *isert_conn = cma_id->context; + struct isert_conn *isert_conn = cma_id->qp->qp_context; - kref_get(&isert_conn->conn_kref); + isert_info("conn %p\n", isert_conn); + + if (!kref_get_unless_zero(&isert_conn->conn_kref)) { + isert_warn("conn %p connect_release is running\n", isert_conn); + return; + } + + mutex_lock(&isert_conn->conn_mutex); + if (isert_conn->state != ISER_CONN_FULL_FEATURE) + isert_conn->state = ISER_CONN_UP; + mutex_unlock(&isert_conn->conn_mutex); } static void @@ -765,8 +782,8 @@ isert_release_conn_kref(struct kref *kref) struct isert_conn *isert_conn = container_of(kref, struct isert_conn, conn_kref); - pr_debug("Calling isert_connect_release for final kref %s/%d\n", - current->comm, current->pid); + isert_info("conn %p final kref %s/%d\n", isert_conn, current->comm, + current->pid); isert_connect_release(isert_conn); } @@ -777,75 +794,111 @@ isert_put_conn(struct isert_conn *isert_conn) kref_put(&isert_conn->conn_kref, isert_release_conn_kref); } +/** + * isert_conn_terminate() - Initiate connection termination + * @isert_conn: isert connection struct + * + * Notes: + * In case the connection state is FULL_FEATURE, move state + * to TEMINATING and start teardown sequence (rdma_disconnect). + * In case the connection state is UP, complete flush as well. + * + * This routine must be called with conn_mutex held. Thus it is + * safe to call multiple times. + */ static void -isert_disconnect_work(struct work_struct *work) +isert_conn_terminate(struct isert_conn *isert_conn) { - struct isert_conn *isert_conn = container_of(work, - struct isert_conn, conn_logout_work); + int err; - pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->state == ISER_CONN_UP) + switch (isert_conn->state) { + case ISER_CONN_TERMINATING: + break; + case ISER_CONN_UP: + case ISER_CONN_FULL_FEATURE: /* FALLTHRU */ + isert_info("Terminating conn %p state %d\n", + isert_conn, isert_conn->state); isert_conn->state = ISER_CONN_TERMINATING; - - if (isert_conn->post_recv_buf_count == 0 && - atomic_read(&isert_conn->post_send_buf_count) == 0) { - mutex_unlock(&isert_conn->conn_mutex); - goto wake_up; - } - if (!isert_conn->conn_cm_id) { - mutex_unlock(&isert_conn->conn_mutex); - isert_put_conn(isert_conn); - return; + err = rdma_disconnect(isert_conn->conn_cm_id); + if (err) + isert_warn("Failed rdma_disconnect isert_conn %p\n", + isert_conn); + break; + default: + isert_warn("conn %p teminating in state %d\n", + isert_conn, isert_conn->state); } +} - if (isert_conn->disconnect) { - /* Send DREQ/DREP towards our initiator */ - rdma_disconnect(isert_conn->conn_cm_id); - } +static int +isert_np_cma_handler(struct isert_np *isert_np, + enum rdma_cm_event_type event) +{ + isert_dbg("isert np %p, handling event %d\n", isert_np, event); - mutex_unlock(&isert_conn->conn_mutex); + switch (event) { + case RDMA_CM_EVENT_DEVICE_REMOVAL: + isert_np->np_cm_id = NULL; + break; + case RDMA_CM_EVENT_ADDR_CHANGE: + isert_np->np_cm_id = isert_setup_id(isert_np); + if (IS_ERR(isert_np->np_cm_id)) { + isert_err("isert np %p setup id failed: %ld\n", + isert_np, PTR_ERR(isert_np->np_cm_id)); + isert_np->np_cm_id = NULL; + } + break; + default: + isert_err("isert np %p Unexpected event %d\n", + isert_np, event); + } -wake_up: - complete(&isert_conn->conn_wait); + return -1; } static int -isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect) +isert_disconnected_handler(struct rdma_cm_id *cma_id, + enum rdma_cm_event_type event) { + struct isert_np *isert_np = cma_id->context; struct isert_conn *isert_conn; - if (!cma_id->qp) { - struct isert_np *isert_np = cma_id->context; + if (isert_np->np_cm_id == cma_id) + return isert_np_cma_handler(cma_id->context, event); - isert_np->np_cm_id = NULL; - return -1; - } + isert_conn = cma_id->qp->qp_context; - isert_conn = (struct isert_conn *)cma_id->context; + mutex_lock(&isert_conn->conn_mutex); + isert_conn_terminate(isert_conn); + mutex_unlock(&isert_conn->conn_mutex); - isert_conn->disconnect = disconnect; - INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work); - schedule_work(&isert_conn->conn_logout_work); + isert_info("conn %p completing conn_wait\n", isert_conn); + complete(&isert_conn->conn_wait); return 0; } +static void +isert_connect_error(struct rdma_cm_id *cma_id) +{ + struct isert_conn *isert_conn = cma_id->qp->qp_context; + + isert_put_conn(isert_conn); +} + static int isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { int ret = 0; - bool disconnect = false; - pr_debug("isert_cma_handler: event %d status %d conn %p id %p\n", - event->event, event->status, cma_id->context, cma_id); + isert_info("event %d status %d id %p np %p\n", event->event, + event->status, cma_id, cma_id->context); switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: ret = isert_connect_request(cma_id, event); if (ret) - pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n", - event->event, ret); + isert_err("failed handle connect request %d\n", ret); break; case RDMA_CM_EVENT_ESTABLISHED: isert_connected_handler(cma_id); @@ -853,13 +906,16 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */ case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */ case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ - disconnect = true; case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ - ret = isert_disconnected_handler(cma_id, disconnect); + ret = isert_disconnected_handler(cma_id, event->event); break; + case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ + case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ case RDMA_CM_EVENT_CONNECT_ERROR: + isert_connect_error(cma_id); + break; default: - pr_err("Unhandled RDMA CMA event: %d\n", event->event); + isert_err("Unhandled RDMA CMA event: %d\n", event->event); break; } @@ -876,7 +932,7 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count) for (rx_wr = isert_conn->conn_rx_wr, i = 0; i < count; i++, rx_wr++) { rx_desc = &isert_conn->conn_rx_descs[rx_head]; - rx_wr->wr_id = (unsigned long)rx_desc; + rx_wr->wr_id = (uintptr_t)rx_desc; rx_wr->sg_list = &rx_desc->rx_sg; rx_wr->num_sge = 1; rx_wr->next = rx_wr + 1; @@ -890,10 +946,10 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count) ret = ib_post_recv(isert_conn->conn_qp, isert_conn->conn_rx_wr, &rx_wr_failed); if (ret) { - pr_err("ib_post_recv() failed with ret: %d\n", ret); + isert_err("ib_post_recv() failed with ret: %d\n", ret); isert_conn->post_recv_buf_count -= count; } else { - pr_debug("isert_post_recv(): Posted %d RX buffers\n", count); + isert_dbg("isert_post_recv(): Posted %d RX buffers\n", count); isert_conn->conn_rx_desc_head = rx_head; } return ret; @@ -910,19 +966,15 @@ isert_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc) ISER_HEADERS_LEN, DMA_TO_DEVICE); send_wr.next = NULL; - send_wr.wr_id = (unsigned long)tx_desc; + send_wr.wr_id = (uintptr_t)tx_desc; send_wr.sg_list = tx_desc->tx_sg; send_wr.num_sge = tx_desc->num_sge; send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - atomic_inc(&isert_conn->post_send_buf_count); - ret = ib_post_send(isert_conn->conn_qp, &send_wr, &send_wr_failed); - if (ret) { - pr_err("ib_post_send() failed, ret: %d\n", ret); - atomic_dec(&isert_conn->post_send_buf_count); - } + if (ret) + isert_err("ib_post_send() failed, ret: %d\n", ret); return ret; } @@ -945,7 +997,7 @@ isert_create_send_desc(struct isert_conn *isert_conn, if (tx_desc->tx_sg[0].lkey != isert_conn->conn_mr->lkey) { tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey; - pr_debug("tx_desc %p lkey mismatch, fixing\n", tx_desc); + isert_dbg("tx_desc %p lkey mismatch, fixing\n", tx_desc); } } @@ -959,7 +1011,7 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn, dma_addr = ib_dma_map_single(ib_dev, (void *)tx_desc, ISER_HEADERS_LEN, DMA_TO_DEVICE); if (ib_dma_mapping_error(ib_dev, dma_addr)) { - pr_err("ib_dma_mapping_error() failed\n"); + isert_err("ib_dma_mapping_error() failed\n"); return -ENOMEM; } @@ -968,40 +1020,24 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn, tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey; - pr_debug("isert_init_tx_hdrs: Setup tx_sg[0].addr: 0x%llx length: %u" - " lkey: 0x%08x\n", tx_desc->tx_sg[0].addr, - tx_desc->tx_sg[0].length, tx_desc->tx_sg[0].lkey); + isert_dbg("Setup tx_sg[0].addr: 0x%llx length: %u lkey: 0x%x\n", + tx_desc->tx_sg[0].addr, tx_desc->tx_sg[0].length, + tx_desc->tx_sg[0].lkey); return 0; } static void isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, - struct ib_send_wr *send_wr, bool coalesce) + struct ib_send_wr *send_wr) { struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc; isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND; - send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc; + send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; send_wr->opcode = IB_WR_SEND; send_wr->sg_list = &tx_desc->tx_sg[0]; send_wr->num_sge = isert_cmd->tx_desc.num_sge; - /* - * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED - * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls. - */ - mutex_lock(&isert_conn->conn_mutex); - if (coalesce && isert_conn->state == ISER_CONN_UP && - ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) { - tx_desc->llnode_active = true; - llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist); - mutex_unlock(&isert_conn->conn_mutex); - return; - } - isert_conn->conn_comp_batch = 0; - tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist); - mutex_unlock(&isert_conn->conn_mutex); - send_wr->send_flags = IB_SEND_SIGNALED; } @@ -1017,22 +1053,21 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn) sge.length = ISER_RX_LOGIN_SIZE; sge.lkey = isert_conn->conn_mr->lkey; - pr_debug("Setup sge: addr: %llx length: %d 0x%08x\n", + isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n", sge.addr, sge.length, sge.lkey); memset(&rx_wr, 0, sizeof(struct ib_recv_wr)); - rx_wr.wr_id = (unsigned long)isert_conn->login_req_buf; + rx_wr.wr_id = (uintptr_t)isert_conn->login_req_buf; rx_wr.sg_list = &sge; rx_wr.num_sge = 1; isert_conn->post_recv_buf_count++; ret = ib_post_recv(isert_conn->conn_qp, &rx_wr, &rx_wr_fail); if (ret) { - pr_err("ib_post_recv() failed: %d\n", ret); + isert_err("ib_post_recv() failed: %d\n", ret); isert_conn->post_recv_buf_count--; } - pr_debug("ib_post_recv(): returned success >>>>>>>>>>>>>>>>>>>>>>>>\n"); return ret; } @@ -1072,13 +1107,9 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, if (login->login_complete) { if (!conn->sess->sess_ops->SessionType && isert_conn->conn_device->use_fastreg) { - /* Normal Session and fastreg is used */ - u8 pi_support = login->np->tpg_np->tpg->tpg_attrib.t10_pi; - - ret = isert_conn_create_fastreg_pool(isert_conn, - pi_support); + ret = isert_conn_create_fastreg_pool(isert_conn); if (ret) { - pr_err("Conn: %p failed to create" + isert_err("Conn: %p failed to create" " fastreg pool\n", isert_conn); return ret; } @@ -1092,7 +1123,10 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, if (ret) return ret; - isert_conn->state = ISER_CONN_UP; + /* Now we are in FULL_FEATURE phase */ + mutex_lock(&isert_conn->conn_mutex); + isert_conn->state = ISER_CONN_FULL_FEATURE; + mutex_unlock(&isert_conn->conn_mutex); goto post_send; } @@ -1109,18 +1143,17 @@ post_send: } static void -isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen, - struct isert_conn *isert_conn) +isert_rx_login_req(struct isert_conn *isert_conn) { + struct iser_rx_desc *rx_desc = (void *)isert_conn->login_req_buf; + int rx_buflen = isert_conn->login_req_len; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_login *login = conn->conn_login; int size; - if (!login) { - pr_err("conn->conn_login is NULL\n"); - dump_stack(); - return; - } + isert_info("conn %p\n", isert_conn); + + WARN_ON_ONCE(!login); if (login->first_request) { struct iscsi_login_req *login_req = @@ -1146,8 +1179,9 @@ isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen, memcpy(&login->req[0], (void *)&rx_desc->iscsi_header, ISCSI_HDR_LEN); size = min(rx_buflen, MAX_KEY_VALUE_PAIRS); - pr_debug("Using login payload size: %d, rx_buflen: %d MAX_KEY_VALUE_PAIRS: %d\n", - size, rx_buflen, MAX_KEY_VALUE_PAIRS); + isert_dbg("Using login payload size: %d, rx_buflen: %d " + "MAX_KEY_VALUE_PAIRS: %d\n", size, rx_buflen, + MAX_KEY_VALUE_PAIRS); memcpy(login->req_buf, &rx_desc->data[0], size); if (login->first_request) { @@ -1166,7 +1200,7 @@ static struct iscsi_cmd cmd = iscsit_allocate_cmd(conn, TASK_INTERRUPTIBLE); if (!cmd) { - pr_err("Unable to allocate iscsi_cmd + isert_cmd\n"); + isert_err("Unable to allocate iscsi_cmd + isert_cmd\n"); return NULL; } isert_cmd = iscsit_priv_cmd(cmd); @@ -1209,8 +1243,8 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, sg = &cmd->se_cmd.t_data_sg[0]; sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE)); - pr_debug("Copying Immediate SG: %p sg_nents: %u from %p imm_data_len: %d\n", - sg, sg_nents, &rx_desc->data[0], imm_data_len); + isert_dbg("Copying Immediate SG: %p sg_nents: %u from %p imm_data_len: %d\n", + sg, sg_nents, &rx_desc->data[0], imm_data_len); sg_copy_from_buffer(sg, sg_nents, &rx_desc->data[0], imm_data_len); @@ -1254,13 +1288,15 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, * FIXME: Unexpected unsolicited_data out */ if (!cmd->unsolicited_data) { - pr_err("Received unexpected solicited data payload\n"); + isert_err("Received unexpected solicited data payload\n"); dump_stack(); return -1; } - pr_debug("Unsolicited DataOut unsol_data_len: %u, write_data_done: %u, data_length: %u\n", - unsol_data_len, cmd->write_data_done, cmd->se_cmd.data_length); + isert_dbg("Unsolicited DataOut unsol_data_len: %u, " + "write_data_done: %u, data_length: %u\n", + unsol_data_len, cmd->write_data_done, + cmd->se_cmd.data_length); sg_off = cmd->write_data_done / PAGE_SIZE; sg_start = &cmd->se_cmd.t_data_sg[sg_off]; @@ -1270,12 +1306,13 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, * FIXME: Non page-aligned unsolicited_data out */ if (page_off) { - pr_err("Received unexpected non-page aligned data payload\n"); + isert_err("unexpected non-page aligned data payload\n"); dump_stack(); return -1; } - pr_debug("Copying DataOut: sg_start: %p, sg_off: %u sg_nents: %u from %p %u\n", - sg_start, sg_off, sg_nents, &rx_desc->data[0], unsol_data_len); + isert_dbg("Copying DataOut: sg_start: %p, sg_off: %u " + "sg_nents: %u from %p %u\n", sg_start, sg_off, + sg_nents, &rx_desc->data[0], unsol_data_len); sg_copy_from_buffer(sg_start, sg_nents, &rx_desc->data[0], unsol_data_len); @@ -1322,8 +1359,8 @@ isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd text_in = kzalloc(payload_length, GFP_KERNEL); if (!text_in) { - pr_err("Unable to allocate text_in of payload_length: %u\n", - payload_length); + isert_err("Unable to allocate text_in of payload_length: %u\n", + payload_length); return -ENOMEM; } cmd->text_in_ptr = text_in; @@ -1348,8 +1385,8 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, if (sess->sess_ops->SessionType && (!(opcode & ISCSI_OP_TEXT) || !(opcode & ISCSI_OP_LOGOUT))) { - pr_err("Got illegal opcode: 0x%02x in SessionType=Discovery," - " ignoring\n", opcode); + isert_err("Got illegal opcode: 0x%02x in SessionType=Discovery," + " ignoring\n", opcode); return 0; } @@ -1395,10 +1432,6 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, break; ret = iscsit_handle_logout_cmd(conn, cmd, (unsigned char *)hdr); - if (ret > 0) - wait_for_completion_timeout(&conn->conn_logout_comp, - SECONDS_FOR_LOGOUT_COMP * - HZ); break; case ISCSI_OP_TEXT: cmd = isert_allocate_cmd(conn); @@ -1410,7 +1443,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, rx_desc, (struct iscsi_text *)hdr); break; default: - pr_err("Got unknown iSCSI OpCode: 0x%02x\n", opcode); + isert_err("Got unknown iSCSI OpCode: 0x%02x\n", opcode); dump_stack(); break; } @@ -1431,23 +1464,23 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn) if (iser_hdr->flags & ISER_RSV) { read_stag = be32_to_cpu(iser_hdr->read_stag); read_va = be64_to_cpu(iser_hdr->read_va); - pr_debug("ISER_RSV: read_stag: 0x%08x read_va: 0x%16llx\n", - read_stag, (unsigned long long)read_va); + isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n", + read_stag, (unsigned long long)read_va); } if (iser_hdr->flags & ISER_WSV) { write_stag = be32_to_cpu(iser_hdr->write_stag); write_va = be64_to_cpu(iser_hdr->write_va); - pr_debug("ISER_WSV: write__stag: 0x%08x write_va: 0x%16llx\n", - write_stag, (unsigned long long)write_va); + isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n", + write_stag, (unsigned long long)write_va); } - pr_debug("ISER ISCSI_CTRL PDU\n"); + isert_dbg("ISER ISCSI_CTRL PDU\n"); break; case ISER_HELLO: - pr_err("iSER Hello message\n"); + isert_err("iSER Hello message\n"); break; default: - pr_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags); + isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags); break; } @@ -1457,7 +1490,7 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn) static void isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn, - unsigned long xfer_len) + u32 xfer_len) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; struct iscsi_hdr *hdr; @@ -1467,34 +1500,43 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn, if ((char *)desc == isert_conn->login_req_buf) { rx_dma = isert_conn->login_req_dma; rx_buflen = ISER_RX_LOGIN_SIZE; - pr_debug("ISER login_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n", + isert_dbg("login_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n", rx_dma, rx_buflen); } else { rx_dma = desc->dma_addr; rx_buflen = ISER_RX_PAYLOAD_SIZE; - pr_debug("ISER req_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n", + isert_dbg("req_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n", rx_dma, rx_buflen); } ib_dma_sync_single_for_cpu(ib_dev, rx_dma, rx_buflen, DMA_FROM_DEVICE); hdr = &desc->iscsi_header; - pr_debug("iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n", + isert_dbg("iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n", hdr->opcode, hdr->itt, hdr->flags, (int)(xfer_len - ISER_HEADERS_LEN)); - if ((char *)desc == isert_conn->login_req_buf) - isert_rx_login_req(desc, xfer_len - ISER_HEADERS_LEN, - isert_conn); - else + if ((char *)desc == isert_conn->login_req_buf) { + isert_conn->login_req_len = xfer_len - ISER_HEADERS_LEN; + if (isert_conn->conn) { + struct iscsi_login *login = isert_conn->conn->conn_login; + + if (login && !login->first_request) + isert_rx_login_req(isert_conn); + } + mutex_lock(&isert_conn->conn_mutex); + complete(&isert_conn->login_req_comp); + mutex_unlock(&isert_conn->conn_mutex); + } else { isert_rx_do_work(desc, isert_conn); + } ib_dma_sync_single_for_device(ib_dev, rx_dma, rx_buflen, DMA_FROM_DEVICE); isert_conn->post_recv_buf_count--; - pr_debug("iSERT: Decremented post_recv_buf_count: %d\n", - isert_conn->post_recv_buf_count); + isert_dbg("Decremented post_recv_buf_count: %d\n", + isert_conn->post_recv_buf_count); if ((char *)desc == isert_conn->login_req_buf) return; @@ -1505,7 +1547,7 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn, ISERT_MIN_POSTED_RX); err = isert_post_recv(isert_conn, count); if (err) { - pr_err("isert_post_recv() count: %d failed, %d\n", + isert_err("isert_post_recv() count: %d failed, %d\n", count, err); } } @@ -1534,12 +1576,12 @@ isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, data->dma_nents = ib_dma_map_sg(ib_dev, data->sg, data->nents, data->dma_dir); if (unlikely(!data->dma_nents)) { - pr_err("Cmd: unable to dma map SGs %p\n", sg); + isert_err("Cmd: unable to dma map SGs %p\n", sg); return -EINVAL; } - pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", - isert_cmd, data->dma_nents, data->sg, data->nents, data->len); + isert_dbg("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", + isert_cmd, data->dma_nents, data->sg, data->nents, data->len); return 0; } @@ -1560,21 +1602,21 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) { struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; - pr_debug("isert_unmap_cmd: %p\n", isert_cmd); + isert_dbg("Cmd %p\n", isert_cmd); if (wr->data.sg) { - pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd); + isert_dbg("Cmd %p unmap_sg op\n", isert_cmd); isert_unmap_data_buf(isert_conn, &wr->data); } if (wr->send_wr) { - pr_debug("isert_unmap_cmd: %p free send_wr\n", isert_cmd); + isert_dbg("Cmd %p free send_wr\n", isert_cmd); kfree(wr->send_wr); wr->send_wr = NULL; } if (wr->ib_sge) { - pr_debug("isert_unmap_cmd: %p free ib_sge\n", isert_cmd); + isert_dbg("Cmd %p free ib_sge\n", isert_cmd); kfree(wr->ib_sge); wr->ib_sge = NULL; } @@ -1586,11 +1628,10 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; LIST_HEAD(unmap_list); - pr_debug("unreg_fastreg_cmd: %p\n", isert_cmd); + isert_dbg("Cmd %p\n", isert_cmd); if (wr->fr_desc) { - pr_debug("unreg_fastreg_cmd: %p free fr_desc %p\n", - isert_cmd, wr->fr_desc); + isert_dbg("Cmd %p free fr_desc %p\n", isert_cmd, wr->fr_desc); if (wr->fr_desc->ind & ISERT_PROTECTED) { isert_unmap_data_buf(isert_conn, &wr->prot); wr->fr_desc->ind &= ~ISERT_PROTECTED; @@ -1602,7 +1643,7 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) } if (wr->data.sg) { - pr_debug("unreg_fastreg_cmd: %p unmap_sg op\n", isert_cmd); + isert_dbg("Cmd %p unmap_sg op\n", isert_cmd); isert_unmap_data_buf(isert_conn, &wr->data); } @@ -1618,7 +1659,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) struct iscsi_conn *conn = isert_conn->conn; struct isert_device *device = isert_conn->conn_device; - pr_debug("Entering isert_put_cmd: %p\n", isert_cmd); + isert_dbg("Cmd %p\n", isert_cmd); switch (cmd->iscsi_opcode) { case ISCSI_OP_SCSI_CMD: @@ -1668,7 +1709,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) * associated cmd->se_cmd needs to be released. */ if (cmd->se_cmd.se_tfo != NULL) { - pr_debug("Calling transport_generic_free_cmd from" + isert_dbg("Calling transport_generic_free_cmd from" " isert_put_cmd for 0x%02x\n", cmd->iscsi_opcode); transport_generic_free_cmd(&cmd->se_cmd, 0); @@ -1687,7 +1728,7 @@ static void isert_unmap_tx_desc(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev) { if (tx_desc->dma_addr != 0) { - pr_debug("Calling ib_dma_unmap_single for tx_desc->dma_addr\n"); + isert_dbg("unmap single for tx_desc->dma_addr\n"); ib_dma_unmap_single(ib_dev, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); tx_desc->dma_addr = 0; @@ -1699,7 +1740,7 @@ isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd, struct ib_device *ib_dev, bool comp_err) { if (isert_cmd->pdu_buf_dma != 0) { - pr_debug("Calling ib_dma_unmap_single for isert_cmd->pdu_buf_dma\n"); + isert_dbg("unmap single for isert_cmd->pdu_buf_dma\n"); ib_dma_unmap_single(ib_dev, isert_cmd->pdu_buf_dma, isert_cmd->pdu_buf_len, DMA_TO_DEVICE); isert_cmd->pdu_buf_dma = 0; @@ -1717,7 +1758,7 @@ isert_check_pi_status(struct se_cmd *se_cmd, struct ib_mr *sig_mr) ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); if (ret) { - pr_err("ib_check_mr_status failed, ret %d\n", ret); + isert_err("ib_check_mr_status failed, ret %d\n", ret); goto fail_mr_status; } @@ -1740,12 +1781,12 @@ isert_check_pi_status(struct se_cmd *se_cmd, struct ib_mr *sig_mr) do_div(sec_offset_err, block_size); se_cmd->bad_sector = sec_offset_err + se_cmd->t_task_lba; - pr_err("isert: PI error found type %d at sector 0x%llx " - "expected 0x%x vs actual 0x%x\n", - mr_status.sig_err.err_type, - (unsigned long long)se_cmd->bad_sector, - mr_status.sig_err.expected, - mr_status.sig_err.actual); + isert_err("PI error found type %d at sector 0x%llx " + "expected 0x%x vs actual 0x%x\n", + mr_status.sig_err.err_type, + (unsigned long long)se_cmd->bad_sector, + mr_status.sig_err.expected, + mr_status.sig_err.actual); ret = 1; } @@ -1801,7 +1842,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, cmd->write_data_done = wr->data.len; wr->send_wr_num = 0; - pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); + isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); spin_lock_bh(&cmd->istate_lock); cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT; cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; @@ -1823,36 +1864,22 @@ isert_do_control_comp(struct work_struct *work) struct ib_device *ib_dev = isert_conn->conn_cm_id->device; struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; + isert_dbg("Cmd %p i_state %d\n", isert_cmd, cmd->i_state); + switch (cmd->i_state) { case ISTATE_SEND_TASKMGTRSP: - pr_debug("Calling iscsit_tmr_post_handler >>>>>>>>>>>>>>>>>\n"); - - atomic_dec(&isert_conn->post_send_buf_count); iscsit_tmr_post_handler(cmd, cmd->conn); - - cmd->i_state = ISTATE_SENT_STATUS; - isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false); - break; - case ISTATE_SEND_REJECT: - pr_debug("Got isert_do_control_comp ISTATE_SEND_REJECT: >>>\n"); - atomic_dec(&isert_conn->post_send_buf_count); - + case ISTATE_SEND_REJECT: /* FALLTHRU */ + case ISTATE_SEND_TEXTRSP: /* FALLTHRU */ cmd->i_state = ISTATE_SENT_STATUS; - isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false); + isert_completion_put(&isert_cmd->tx_desc, isert_cmd, + ib_dev, false); break; case ISTATE_SEND_LOGOUTRSP: - pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n"); - - atomic_dec(&isert_conn->post_send_buf_count); iscsit_logout_post_handler(cmd, cmd->conn); break; - case ISTATE_SEND_TEXTRSP: - atomic_dec(&isert_conn->post_send_buf_count); - cmd->i_state = ISTATE_SENT_STATUS; - isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false); - break; default: - pr_err("Unknown do_control_comp i_state %d\n", cmd->i_state); + isert_err("Unknown i_state %d\n", cmd->i_state); dump_stack(); break; } @@ -1865,7 +1892,6 @@ isert_response_completion(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev) { struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; - struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; if (cmd->i_state == ISTATE_SEND_TASKMGTRSP || cmd->i_state == ISTATE_SEND_LOGOUTRSP || @@ -1878,267 +1904,151 @@ isert_response_completion(struct iser_tx_desc *tx_desc, return; } - /** - * If send_wr_num is 0 this means that we got - * RDMA completion and we cleared it and we should - * simply decrement the response post. else the - * response is incorporated in send_wr_num, just - * sub it. - **/ - if (wr->send_wr_num) - atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); - else - atomic_dec(&isert_conn->post_send_buf_count); - cmd->i_state = ISTATE_SENT_STATUS; isert_completion_put(tx_desc, isert_cmd, ib_dev, false); } static void -__isert_send_completion(struct iser_tx_desc *tx_desc, - struct isert_conn *isert_conn) +isert_send_completion(struct iser_tx_desc *tx_desc, + struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; struct isert_cmd *isert_cmd = tx_desc->isert_cmd; struct isert_rdma_wr *wr; if (!isert_cmd) { - atomic_dec(&isert_conn->post_send_buf_count); isert_unmap_tx_desc(tx_desc, ib_dev); return; } wr = &isert_cmd->rdma_wr; + isert_dbg("Cmd %p iser_ib_op %d\n", isert_cmd, wr->iser_ib_op); + switch (wr->iser_ib_op) { case ISER_IB_RECV: - pr_err("isert_send_completion: Got ISER_IB_RECV\n"); + isert_err("Got ISER_IB_RECV\n"); dump_stack(); break; case ISER_IB_SEND: - pr_debug("isert_send_completion: Got ISER_IB_SEND\n"); isert_response_completion(tx_desc, isert_cmd, isert_conn, ib_dev); break; case ISER_IB_RDMA_WRITE: - pr_debug("isert_send_completion: Got ISER_IB_RDMA_WRITE\n"); - atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); isert_completion_rdma_write(tx_desc, isert_cmd); break; case ISER_IB_RDMA_READ: - pr_debug("isert_send_completion: Got ISER_IB_RDMA_READ:\n"); - - atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); isert_completion_rdma_read(tx_desc, isert_cmd); break; default: - pr_err("Unknown wr->iser_ib_op: 0x%02x\n", wr->iser_ib_op); + isert_err("Unknown wr->iser_ib_op: 0x%x\n", wr->iser_ib_op); dump_stack(); break; } } -static void -isert_send_completion(struct iser_tx_desc *tx_desc, - struct isert_conn *isert_conn) -{ - struct llist_node *llnode = tx_desc->comp_llnode_batch; - struct iser_tx_desc *t; - /* - * Drain coalesced completion llist starting from comp_llnode_batch - * setup in isert_init_send_wr(), and then complete trailing tx_desc. - */ - while (llnode) { - t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); - llnode = llist_next(llnode); - __isert_send_completion(t, isert_conn); - } - __isert_send_completion(tx_desc, isert_conn); -} - -static void -isert_cq_drain_comp_llist(struct isert_conn *isert_conn, struct ib_device *ib_dev) +/** + * is_isert_tx_desc() - Indicate if the completion wr_id + * is a TX descriptor or not. + * @isert_conn: iser connection + * @wr_id: completion WR identifier + * + * Since we cannot rely on wc opcode in FLUSH errors + * we must work around it by checking if the wr_id address + * falls in the iser connection rx_descs buffer. If so + * it is an RX descriptor, otherwize it is a TX. + */ +static inline bool +is_isert_tx_desc(struct isert_conn *isert_conn, void *wr_id) { - struct llist_node *llnode; - struct isert_rdma_wr *wr; - struct iser_tx_desc *t; + void *start = isert_conn->conn_rx_descs; + int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->conn_rx_descs); - mutex_lock(&isert_conn->conn_mutex); - llnode = llist_del_all(&isert_conn->conn_comp_llist); - isert_conn->conn_comp_batch = 0; - mutex_unlock(&isert_conn->conn_mutex); - - while (llnode) { - t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); - llnode = llist_next(llnode); - wr = &t->isert_cmd->rdma_wr; - - /** - * If send_wr_num is 0 this means that we got - * RDMA completion and we cleared it and we should - * simply decrement the response post. else the - * response is incorporated in send_wr_num, just - * sub it. - **/ - if (wr->send_wr_num) - atomic_sub(wr->send_wr_num, - &isert_conn->post_send_buf_count); - else - atomic_dec(&isert_conn->post_send_buf_count); + if (wr_id >= start && wr_id < start + len) + return false; - isert_completion_put(t, t->isert_cmd, ib_dev, true); - } + return true; } static void -isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) +isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc) { - struct ib_device *ib_dev = isert_conn->conn_cm_id->device; - struct isert_cmd *isert_cmd = tx_desc->isert_cmd; - struct llist_node *llnode = tx_desc->comp_llnode_batch; - struct isert_rdma_wr *wr; - struct iser_tx_desc *t; - - while (llnode) { - t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); - llnode = llist_next(llnode); - wr = &t->isert_cmd->rdma_wr; + if (wc->wr_id == ISER_BEACON_WRID) { + isert_info("conn %p completing conn_wait_comp_err\n", + isert_conn); + complete(&isert_conn->conn_wait_comp_err); + } else if (is_isert_tx_desc(isert_conn, (void *)(uintptr_t)wc->wr_id)) { + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct isert_cmd *isert_cmd; + struct iser_tx_desc *desc; - /** - * If send_wr_num is 0 this means that we got - * RDMA completion and we cleared it and we should - * simply decrement the response post. else the - * response is incorporated in send_wr_num, just - * sub it. - **/ - if (wr->send_wr_num) - atomic_sub(wr->send_wr_num, - &isert_conn->post_send_buf_count); + desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id; + isert_cmd = desc->isert_cmd; + if (!isert_cmd) + isert_unmap_tx_desc(desc, ib_dev); else - atomic_dec(&isert_conn->post_send_buf_count); - - isert_completion_put(t, t->isert_cmd, ib_dev, true); - } - tx_desc->comp_llnode_batch = NULL; - - if (!isert_cmd) - isert_unmap_tx_desc(tx_desc, ib_dev); - else - isert_completion_put(tx_desc, isert_cmd, ib_dev, true); -} - -static void -isert_cq_rx_comp_err(struct isert_conn *isert_conn) -{ - struct ib_device *ib_dev = isert_conn->conn_cm_id->device; - struct iscsi_conn *conn = isert_conn->conn; - - if (isert_conn->post_recv_buf_count) - return; - - isert_cq_drain_comp_llist(isert_conn, ib_dev); - - if (conn->sess) { - target_sess_cmd_list_set_waiting(conn->sess->se_sess); - target_wait_for_sess_cmds(conn->sess->se_sess); + isert_completion_put(desc, isert_cmd, ib_dev, true); + } else { + isert_conn->post_recv_buf_count--; + if (!isert_conn->post_recv_buf_count) + iscsit_cause_connection_reinstatement(isert_conn->conn, 0); } - - while (atomic_read(&isert_conn->post_send_buf_count)) - msleep(3000); - - mutex_lock(&isert_conn->conn_mutex); - isert_conn->state = ISER_CONN_DOWN; - mutex_unlock(&isert_conn->conn_mutex); - - iscsit_cause_connection_reinstatement(isert_conn->conn, 0); - - complete(&isert_conn->conn_wait_comp_err); } static void -isert_cq_tx_work(struct work_struct *work) +isert_handle_wc(struct ib_wc *wc) { - struct isert_cq_desc *cq_desc = container_of(work, - struct isert_cq_desc, cq_tx_work); - struct isert_device *device = cq_desc->device; - int cq_index = cq_desc->cq_index; - struct ib_cq *tx_cq = device->dev_tx_cq[cq_index]; struct isert_conn *isert_conn; struct iser_tx_desc *tx_desc; - struct ib_wc wc; - - while (ib_poll_cq(tx_cq, 1, &wc) == 1) { - tx_desc = (struct iser_tx_desc *)(unsigned long)wc.wr_id; - isert_conn = wc.qp->qp_context; + struct iser_rx_desc *rx_desc; - if (wc.status == IB_WC_SUCCESS) { - isert_send_completion(tx_desc, isert_conn); + isert_conn = wc->qp->qp_context; + if (likely(wc->status == IB_WC_SUCCESS)) { + if (wc->opcode == IB_WC_RECV) { + rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id; + isert_rx_completion(rx_desc, isert_conn, wc->byte_len); } else { - pr_debug("TX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n"); - pr_debug("TX wc.status: 0x%08x\n", wc.status); - pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err); - - if (wc.wr_id != ISER_FASTREG_LI_WRID) { - if (tx_desc->llnode_active) - continue; - - atomic_dec(&isert_conn->post_send_buf_count); - isert_cq_tx_comp_err(tx_desc, isert_conn); - } + tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id; + isert_send_completion(tx_desc, isert_conn); } - } - - ib_req_notify_cq(tx_cq, IB_CQ_NEXT_COMP); -} - -static void -isert_cq_tx_callback(struct ib_cq *cq, void *context) -{ - struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context; + } else { + if (wc->status != IB_WC_WR_FLUSH_ERR) + isert_err("wr id %llx status %d vend_err %x\n", + wc->wr_id, wc->status, wc->vendor_err); + else + isert_dbg("flush error: wr id %llx\n", wc->wr_id); - queue_work(isert_comp_wq, &cq_desc->cq_tx_work); + if (wc->wr_id != ISER_FASTREG_LI_WRID) + isert_cq_comp_err(isert_conn, wc); + } } static void -isert_cq_rx_work(struct work_struct *work) +isert_cq_work(struct work_struct *work) { - struct isert_cq_desc *cq_desc = container_of(work, - struct isert_cq_desc, cq_rx_work); - struct isert_device *device = cq_desc->device; - int cq_index = cq_desc->cq_index; - struct ib_cq *rx_cq = device->dev_rx_cq[cq_index]; - struct isert_conn *isert_conn; - struct iser_rx_desc *rx_desc; - struct ib_wc wc; - unsigned long xfer_len; + enum { isert_poll_budget = 65536 }; + struct isert_comp *comp = container_of(work, struct isert_comp, + work); + struct ib_wc *const wcs = comp->wcs; + int i, n, completed = 0; - while (ib_poll_cq(rx_cq, 1, &wc) == 1) { - rx_desc = (struct iser_rx_desc *)(unsigned long)wc.wr_id; - isert_conn = wc.qp->qp_context; + while ((n = ib_poll_cq(comp->cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) { + for (i = 0; i < n; i++) + isert_handle_wc(&wcs[i]); - if (wc.status == IB_WC_SUCCESS) { - xfer_len = (unsigned long)wc.byte_len; - isert_rx_completion(rx_desc, isert_conn, xfer_len); - } else { - pr_debug("RX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n"); - if (wc.status != IB_WC_WR_FLUSH_ERR) { - pr_debug("RX wc.status: 0x%08x\n", wc.status); - pr_debug("RX wc.vendor_err: 0x%08x\n", - wc.vendor_err); - } - isert_conn->post_recv_buf_count--; - isert_cq_rx_comp_err(isert_conn); - } + completed += n; + if (completed >= isert_poll_budget) + break; } - ib_req_notify_cq(rx_cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP); } static void -isert_cq_rx_callback(struct ib_cq *cq, void *context) +isert_cq_callback(struct ib_cq *cq, void *context) { - struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context; + struct isert_comp *comp = context; - queue_work(isert_rx_wq, &cq_desc->cq_rx_work); + queue_work(isert_comp_wq, &comp->work); } static int @@ -2147,13 +2057,10 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) struct ib_send_wr *wr_failed; int ret; - atomic_inc(&isert_conn->post_send_buf_count); - ret = ib_post_send(isert_conn->conn_qp, &isert_cmd->tx_desc.send_wr, &wr_failed); if (ret) { - pr_err("ib_post_send failed with %d\n", ret); - atomic_dec(&isert_conn->post_send_buf_count); + isert_err("ib_post_send failed with %d\n", ret); return ret; } return ret; @@ -2200,9 +2107,9 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_cmd->tx_desc.num_sge = 2; } - isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); + isert_init_send_wr(isert_conn, isert_cmd, send_wr); - pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); + isert_dbg("Posting SCSI Response\n"); return isert_post_response(isert_conn, isert_cmd); } @@ -2231,8 +2138,16 @@ isert_get_sup_prot_ops(struct iscsi_conn *conn) struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct isert_device *device = isert_conn->conn_device; - if (device->pi_capable) - return TARGET_PROT_ALL; + if (conn->tpg->tpg_attrib.t10_pi) { + if (device->pi_capable) { + isert_info("conn %p PI offload enabled\n", isert_conn); + isert_conn->pi_support = true; + return TARGET_PROT_ALL; + } + } + + isert_info("conn %p PI offload disabled\n", isert_conn); + isert_conn->pi_support = false; return TARGET_PROT_NORMAL; } @@ -2250,9 +2165,9 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, &isert_cmd->tx_desc.iscsi_header, nopout_response); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); + isert_init_send_wr(isert_conn, isert_cmd, send_wr); - pr_debug("Posting NOPIN Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); + isert_dbg("conn %p Posting NOPIN Response\n", isert_conn); return isert_post_response(isert_conn, isert_cmd); } @@ -2268,9 +2183,9 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) iscsit_build_logout_rsp(cmd, conn, (struct iscsi_logout_rsp *) &isert_cmd->tx_desc.iscsi_header); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); + isert_init_send_wr(isert_conn, isert_cmd, send_wr); - pr_debug("Posting Logout Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); + isert_dbg("conn %p Posting Logout Response\n", isert_conn); return isert_post_response(isert_conn, isert_cmd); } @@ -2286,9 +2201,9 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) iscsit_build_task_mgt_rsp(cmd, conn, (struct iscsi_tm_rsp *) &isert_cmd->tx_desc.iscsi_header); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); + isert_init_send_wr(isert_conn, isert_cmd, send_wr); - pr_debug("Posting Task Management Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); + isert_dbg("conn %p Posting Task Management Response\n", isert_conn); return isert_post_response(isert_conn, isert_cmd); } @@ -2318,9 +2233,9 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) tx_dsg->lkey = isert_conn->conn_mr->lkey; isert_cmd->tx_desc.num_sge = 2; - isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); + isert_init_send_wr(isert_conn, isert_cmd, send_wr); - pr_debug("Posting Reject IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); + isert_dbg("conn %p Posting Reject\n", isert_conn); return isert_post_response(isert_conn, isert_cmd); } @@ -2358,9 +2273,9 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) tx_dsg->lkey = isert_conn->conn_mr->lkey; isert_cmd->tx_desc.num_sge = 2; } - isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); + isert_init_send_wr(isert_conn, isert_cmd, send_wr); - pr_debug("Posting Text Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); + isert_dbg("conn %p Text Reject\n", isert_conn); return isert_post_response(isert_conn, isert_cmd); } @@ -2383,30 +2298,31 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, send_wr->sg_list = ib_sge; send_wr->num_sge = sg_nents; - send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc; + send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; /* * Perform mapping of TCM scatterlist memory ib_sge dma_addr. */ for_each_sg(sg_start, tmp_sg, sg_nents, i) { - pr_debug("ISER RDMA from SGL dma_addr: 0x%16llx dma_len: %u, page_off: %u\n", - (unsigned long long)tmp_sg->dma_address, - tmp_sg->length, page_off); + isert_dbg("RDMA from SGL dma_addr: 0x%llx dma_len: %u, " + "page_off: %u\n", + (unsigned long long)tmp_sg->dma_address, + tmp_sg->length, page_off); ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off; ib_sge->length = min_t(u32, data_left, ib_sg_dma_len(ib_dev, tmp_sg) - page_off); ib_sge->lkey = isert_conn->conn_mr->lkey; - pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n", - ib_sge->addr, ib_sge->length, ib_sge->lkey); + isert_dbg("RDMA ib_sge: addr: 0x%llx length: %u lkey: %x\n", + ib_sge->addr, ib_sge->length, ib_sge->lkey); page_off = 0; data_left -= ib_sge->length; ib_sge++; - pr_debug("Incrementing ib_sge pointer to %p\n", ib_sge); + isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge); } - pr_debug("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n", - send_wr->sg_list, send_wr->num_sge); + isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n", + send_wr->sg_list, send_wr->num_sge); return sg_nents; } @@ -2438,7 +2354,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, ib_sge = kzalloc(sizeof(struct ib_sge) * data->nents, GFP_KERNEL); if (!ib_sge) { - pr_warn("Unable to allocate ib_sge\n"); + isert_warn("Unable to allocate ib_sge\n"); ret = -ENOMEM; goto unmap_cmd; } @@ -2448,7 +2364,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, GFP_KERNEL); if (!wr->send_wr) { - pr_debug("Unable to allocate wr->send_wr\n"); + isert_dbg("Unable to allocate wr->send_wr\n"); ret = -ENOMEM; goto unmap_cmd; } @@ -2512,9 +2428,9 @@ isert_map_fr_pagelist(struct ib_device *ib_dev, chunk_start = start_addr; end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg); - pr_debug("SGL[%d] dma_addr: 0x%16llx len: %u\n", - i, (unsigned long long)tmp_sg->dma_address, - tmp_sg->length); + isert_dbg("SGL[%d] dma_addr: 0x%llx len: %u\n", + i, (unsigned long long)tmp_sg->dma_address, + tmp_sg->length); if ((end_addr & ~PAGE_MASK) && i < last_ent) { new_chunk = 0; @@ -2525,8 +2441,8 @@ isert_map_fr_pagelist(struct ib_device *ib_dev, page = chunk_start & PAGE_MASK; do { fr_pl[n_pages++] = page; - pr_debug("Mapped page_list[%d] page_addr: 0x%16llx\n", - n_pages - 1, page); + isert_dbg("Mapped page_list[%d] page_addr: 0x%llx\n", + n_pages - 1, page); page += PAGE_SIZE; } while (page < end_addr); } @@ -2534,6 +2450,21 @@ isert_map_fr_pagelist(struct ib_device *ib_dev, return n_pages; } +static inline void +isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) +{ + u32 rkey; + + memset(inv_wr, 0, sizeof(*inv_wr)); + inv_wr->wr_id = ISER_FASTREG_LI_WRID; + inv_wr->opcode = IB_WR_LOCAL_INV; + inv_wr->ex.invalidate_rkey = mr->rkey; + + /* Bump the key */ + rkey = ib_inc_rkey(mr->rkey); + ib_update_fast_reg_key(mr, rkey); +} + static int isert_fast_reg_mr(struct isert_conn *isert_conn, struct fast_reg_descriptor *fr_desc, @@ -2548,15 +2479,13 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, struct ib_send_wr *bad_wr, *wr = NULL; int ret, pagelist_len; u32 page_off; - u8 key; if (mem->dma_nents == 1) { sge->lkey = isert_conn->conn_mr->lkey; sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]); sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]); - pr_debug("%s:%d sge: addr: 0x%llx length: %u lkey: %x\n", - __func__, __LINE__, sge->addr, sge->length, - sge->lkey); + isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n", + sge->addr, sge->length, sge->lkey); return 0; } @@ -2572,21 +2501,15 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, page_off = mem->offset % PAGE_SIZE; - pr_debug("Use fr_desc %p sg_nents %d offset %u\n", - fr_desc, mem->nents, mem->offset); + isert_dbg("Use fr_desc %p sg_nents %d offset %u\n", + fr_desc, mem->nents, mem->offset); pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents, &frpl->page_list[0]); - if (!(fr_desc->ind & ISERT_DATA_KEY_VALID)) { - memset(&inv_wr, 0, sizeof(inv_wr)); - inv_wr.wr_id = ISER_FASTREG_LI_WRID; - inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.ex.invalidate_rkey = mr->rkey; + if (!(fr_desc->ind & ind)) { + isert_inv_rkey(&inv_wr, mr); wr = &inv_wr; - /* Bump the key */ - key = (u8)(mr->rkey & 0x000000FF); - ib_update_fast_reg_key(mr, ++key); } /* Prepare FASTREG WR */ @@ -2608,7 +2531,7 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr); if (ret) { - pr_err("fast registration failed, ret:%d\n", ret); + isert_err("fast registration failed, ret:%d\n", ret); return ret; } fr_desc->ind &= ~ind; @@ -2617,9 +2540,8 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, sge->addr = frpl->page_list[0] + page_off; sge->length = mem->len; - pr_debug("%s:%d sge: addr: 0x%llx length: %u lkey: %x\n", - __func__, __LINE__, sge->addr, sge->length, - sge->lkey); + isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n", + sge->addr, sge->length, sge->lkey); return ret; } @@ -2665,7 +2587,7 @@ isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem); break; default: - pr_err("Unsupported PI operation %d\n", se_cmd->prot_op); + isert_err("Unsupported PI operation %d\n", se_cmd->prot_op); return -EINVAL; } @@ -2681,17 +2603,16 @@ isert_set_prot_checks(u8 prot_checks) } static int -isert_reg_sig_mr(struct isert_conn *isert_conn, struct se_cmd *se_cmd, - struct fast_reg_descriptor *fr_desc, - struct ib_sge *data_sge, struct ib_sge *prot_sge, - struct ib_sge *sig_sge) +isert_reg_sig_mr(struct isert_conn *isert_conn, + struct se_cmd *se_cmd, + struct isert_rdma_wr *rdma_wr, + struct fast_reg_descriptor *fr_desc) { struct ib_send_wr sig_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; struct pi_context *pi_ctx = fr_desc->pi_ctx; struct ib_sig_attrs sig_attrs; int ret; - u32 key; memset(&sig_attrs, 0, sizeof(sig_attrs)); ret = isert_set_sig_attrs(se_cmd, &sig_attrs); @@ -2701,26 +2622,20 @@ isert_reg_sig_mr(struct isert_conn *isert_conn, struct se_cmd *se_cmd, sig_attrs.check_mask = isert_set_prot_checks(se_cmd->prot_checks); if (!(fr_desc->ind & ISERT_SIG_KEY_VALID)) { - memset(&inv_wr, 0, sizeof(inv_wr)); - inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.wr_id = ISER_FASTREG_LI_WRID; - inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey; + isert_inv_rkey(&inv_wr, pi_ctx->sig_mr); wr = &inv_wr; - /* Bump the key */ - key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF); - ib_update_fast_reg_key(pi_ctx->sig_mr, ++key); } memset(&sig_wr, 0, sizeof(sig_wr)); sig_wr.opcode = IB_WR_REG_SIG_MR; sig_wr.wr_id = ISER_FASTREG_LI_WRID; - sig_wr.sg_list = data_sge; + sig_wr.sg_list = &rdma_wr->ib_sg[DATA]; sig_wr.num_sge = 1; sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE; sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; if (se_cmd->t_prot_sg) - sig_wr.wr.sig_handover.prot = prot_sge; + sig_wr.wr.sig_handover.prot = &rdma_wr->ib_sg[PROT]; if (!wr) wr = &sig_wr; @@ -2729,39 +2644,98 @@ isert_reg_sig_mr(struct isert_conn *isert_conn, struct se_cmd *se_cmd, ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr); if (ret) { - pr_err("fast registration failed, ret:%d\n", ret); + isert_err("fast registration failed, ret:%d\n", ret); goto err; } fr_desc->ind &= ~ISERT_SIG_KEY_VALID; - sig_sge->lkey = pi_ctx->sig_mr->lkey; - sig_sge->addr = 0; - sig_sge->length = se_cmd->data_length; + rdma_wr->ib_sg[SIG].lkey = pi_ctx->sig_mr->lkey; + rdma_wr->ib_sg[SIG].addr = 0; + rdma_wr->ib_sg[SIG].length = se_cmd->data_length; if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP && se_cmd->prot_op != TARGET_PROT_DOUT_INSERT) /* * We have protection guards on the wire * so we need to set a larget transfer */ - sig_sge->length += se_cmd->prot_length; + rdma_wr->ib_sg[SIG].length += se_cmd->prot_length; - pr_debug("sig_sge: addr: 0x%llx length: %u lkey: %x\n", - sig_sge->addr, sig_sge->length, - sig_sge->lkey); + isert_dbg("sig_sge: addr: 0x%llx length: %u lkey: %x\n", + rdma_wr->ib_sg[SIG].addr, rdma_wr->ib_sg[SIG].length, + rdma_wr->ib_sg[SIG].lkey); err: return ret; } static int +isert_handle_prot_cmd(struct isert_conn *isert_conn, + struct isert_cmd *isert_cmd, + struct isert_rdma_wr *wr) +{ + struct isert_device *device = isert_conn->conn_device; + struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd; + int ret; + + if (!wr->fr_desc->pi_ctx) { + ret = isert_create_pi_ctx(wr->fr_desc, + device->ib_device, + isert_conn->conn_pd); + if (ret) { + isert_err("conn %p failed to allocate pi_ctx\n", + isert_conn); + return ret; + } + } + + if (se_cmd->t_prot_sg) { + ret = isert_map_data_buf(isert_conn, isert_cmd, + se_cmd->t_prot_sg, + se_cmd->t_prot_nents, + se_cmd->prot_length, + 0, wr->iser_ib_op, &wr->prot); + if (ret) { + isert_err("conn %p failed to map protection buffer\n", + isert_conn); + return ret; + } + + memset(&wr->ib_sg[PROT], 0, sizeof(wr->ib_sg[PROT])); + ret = isert_fast_reg_mr(isert_conn, wr->fr_desc, &wr->prot, + ISERT_PROT_KEY_VALID, &wr->ib_sg[PROT]); + if (ret) { + isert_err("conn %p failed to fast reg mr\n", + isert_conn); + goto unmap_prot_cmd; + } + } + + ret = isert_reg_sig_mr(isert_conn, se_cmd, wr, wr->fr_desc); + if (ret) { + isert_err("conn %p failed to fast reg mr\n", + isert_conn); + goto unmap_prot_cmd; + } + wr->fr_desc->ind |= ISERT_PROTECTED; + + return 0; + +unmap_prot_cmd: + if (se_cmd->t_prot_sg) + isert_unmap_data_buf(isert_conn, &wr->prot); + + return ret; +} + +static int isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_rdma_wr *wr) { struct se_cmd *se_cmd = &cmd->se_cmd; struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = conn->context; - struct ib_sge data_sge; - struct ib_send_wr *send_wr; struct fast_reg_descriptor *fr_desc = NULL; + struct ib_send_wr *send_wr; + struct ib_sge *ib_sg; u32 offset; int ret = 0; unsigned long flags; @@ -2775,8 +2749,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (ret) return ret; - if (wr->data.dma_nents != 1 || - se_cmd->prot_op != TARGET_PROT_NORMAL) { + if (wr->data.dma_nents != 1 || isert_prot_cmd(isert_conn, se_cmd)) { spin_lock_irqsave(&isert_conn->conn_lock, flags); fr_desc = list_first_entry(&isert_conn->conn_fr_pool, struct fast_reg_descriptor, list); @@ -2786,38 +2759,21 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, } ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->data, - ISERT_DATA_KEY_VALID, &data_sge); + ISERT_DATA_KEY_VALID, &wr->ib_sg[DATA]); if (ret) goto unmap_cmd; - if (se_cmd->prot_op != TARGET_PROT_NORMAL) { - struct ib_sge prot_sge, sig_sge; - - if (se_cmd->t_prot_sg) { - ret = isert_map_data_buf(isert_conn, isert_cmd, - se_cmd->t_prot_sg, - se_cmd->t_prot_nents, - se_cmd->prot_length, - 0, wr->iser_ib_op, &wr->prot); - if (ret) - goto unmap_cmd; - - ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->prot, - ISERT_PROT_KEY_VALID, &prot_sge); - if (ret) - goto unmap_prot_cmd; - } - - ret = isert_reg_sig_mr(isert_conn, se_cmd, fr_desc, - &data_sge, &prot_sge, &sig_sge); + if (isert_prot_cmd(isert_conn, se_cmd)) { + ret = isert_handle_prot_cmd(isert_conn, isert_cmd, wr); if (ret) - goto unmap_prot_cmd; + goto unmap_cmd; - fr_desc->ind |= ISERT_PROTECTED; - memcpy(&wr->s_ib_sge, &sig_sge, sizeof(sig_sge)); - } else - memcpy(&wr->s_ib_sge, &data_sge, sizeof(data_sge)); + ib_sg = &wr->ib_sg[SIG]; + } else { + ib_sg = &wr->ib_sg[DATA]; + } + memcpy(&wr->s_ib_sge, ib_sg, sizeof(*ib_sg)); wr->ib_sge = &wr->s_ib_sge; wr->send_wr_num = 1; memset(&wr->s_send_wr, 0, sizeof(*send_wr)); @@ -2827,12 +2783,12 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, send_wr = &isert_cmd->rdma_wr.s_send_wr; send_wr->sg_list = &wr->s_ib_sge; send_wr->num_sge = 1; - send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc; + send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { send_wr->opcode = IB_WR_RDMA_WRITE; send_wr->wr.rdma.remote_addr = isert_cmd->read_va; send_wr->wr.rdma.rkey = isert_cmd->read_stag; - send_wr->send_flags = se_cmd->prot_op == TARGET_PROT_NORMAL ? + send_wr->send_flags = !isert_prot_cmd(isert_conn, se_cmd) ? 0 : IB_SEND_SIGNALED; } else { send_wr->opcode = IB_WR_RDMA_READ; @@ -2842,9 +2798,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, } return 0; -unmap_prot_cmd: - if (se_cmd->t_prot_sg) - isert_unmap_data_buf(isert_conn, &wr->prot); + unmap_cmd: if (fr_desc) { spin_lock_irqsave(&isert_conn->conn_lock, flags); @@ -2867,16 +2821,17 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) struct ib_send_wr *wr_failed; int rc; - pr_debug("Cmd: %p RDMA_WRITE data_length: %u\n", + isert_dbg("Cmd: %p RDMA_WRITE data_length: %u\n", isert_cmd, se_cmd->data_length); + wr->iser_ib_op = ISER_IB_RDMA_WRITE; rc = device->reg_rdma_mem(conn, cmd, wr); if (rc) { - pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); + isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); return rc; } - if (se_cmd->prot_op == TARGET_PROT_NORMAL) { + if (!isert_prot_cmd(isert_conn, se_cmd)) { /* * Build isert_conn->tx_desc for iSCSI response PDU and attach */ @@ -2886,24 +2841,20 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) &isert_cmd->tx_desc.iscsi_header); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); isert_init_send_wr(isert_conn, isert_cmd, - &isert_cmd->tx_desc.send_wr, false); + &isert_cmd->tx_desc.send_wr); isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr; wr->send_wr_num += 1; } - atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count); - rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); - if (rc) { - pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); - atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); - } + if (rc) + isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); - if (se_cmd->prot_op == TARGET_PROT_NORMAL) - pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data " + if (!isert_prot_cmd(isert_conn, se_cmd)) + isert_dbg("Cmd: %p posted RDMA_WRITE + Response for iSER Data " "READ\n", isert_cmd); else - pr_debug("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", + isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", isert_cmd); return 1; @@ -2920,23 +2871,20 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) struct ib_send_wr *wr_failed; int rc; - pr_debug("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n", + isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n", isert_cmd, se_cmd->data_length, cmd->write_data_done); wr->iser_ib_op = ISER_IB_RDMA_READ; rc = device->reg_rdma_mem(conn, cmd, wr); if (rc) { - pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); + isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); return rc; } - atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count); - rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); - if (rc) { - pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); - atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); - } - pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", + if (rc) + isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); + + isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", isert_cmd); return 0; @@ -2952,7 +2900,7 @@ isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) ret = isert_put_nopin(cmd, conn, false); break; default: - pr_err("Unknown immediate state: 0x%02x\n", state); + isert_err("Unknown immediate state: 0x%02x\n", state); ret = -EINVAL; break; } @@ -2963,15 +2911,14 @@ isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) static int isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) { + struct isert_conn *isert_conn = conn->context; int ret; switch (state) { case ISTATE_SEND_LOGOUTRSP: ret = isert_put_logout_rsp(cmd, conn); - if (!ret) { - pr_debug("Returning iSER Logout -EAGAIN\n"); - ret = -EAGAIN; - } + if (!ret) + isert_conn->logout_posted = true; break; case ISTATE_SEND_NOPIN: ret = isert_put_nopin(cmd, conn, true); @@ -2993,7 +2940,7 @@ isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) ret = isert_put_response(conn, cmd); break; default: - pr_err("Unknown response state: 0x%02x\n", state); + isert_err("Unknown response state: 0x%02x\n", state); ret = -EINVAL; break; } @@ -3001,27 +2948,64 @@ isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) return ret; } +struct rdma_cm_id * +isert_setup_id(struct isert_np *isert_np) +{ + struct iscsi_np *np = isert_np->np; + struct rdma_cm_id *id; + struct sockaddr *sa; + int ret; + + sa = (struct sockaddr *)&np->np_sockaddr; + isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa); + + id = rdma_create_id(isert_cma_handler, isert_np, + RDMA_PS_TCP, IB_QPT_RC); + if (IS_ERR(id)) { + isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id)); + ret = PTR_ERR(id); + goto out; + } + isert_dbg("id %p context %p\n", id, id->context); + + ret = rdma_bind_addr(id, sa); + if (ret) { + isert_err("rdma_bind_addr() failed: %d\n", ret); + goto out_id; + } + + ret = rdma_listen(id, ISERT_RDMA_LISTEN_BACKLOG); + if (ret) { + isert_err("rdma_listen() failed: %d\n", ret); + goto out_id; + } + + return id; +out_id: + rdma_destroy_id(id); +out: + return ERR_PTR(ret); +} + static int isert_setup_np(struct iscsi_np *np, struct __kernel_sockaddr_storage *ksockaddr) { struct isert_np *isert_np; struct rdma_cm_id *isert_lid; - struct sockaddr *sa; int ret; isert_np = kzalloc(sizeof(struct isert_np), GFP_KERNEL); if (!isert_np) { - pr_err("Unable to allocate struct isert_np\n"); + isert_err("Unable to allocate struct isert_np\n"); return -ENOMEM; } sema_init(&isert_np->np_sem, 0); mutex_init(&isert_np->np_accept_mutex); INIT_LIST_HEAD(&isert_np->np_accept_list); init_completion(&isert_np->np_login_comp); + isert_np->np = np; - sa = (struct sockaddr *)ksockaddr; - pr_debug("ksockaddr: %p, sa: %p\n", ksockaddr, sa); /* * Setup the np->np_sockaddr from the passed sockaddr setup * in iscsi_target_configfs.c code.. @@ -3029,37 +3013,20 @@ isert_setup_np(struct iscsi_np *np, memcpy(&np->np_sockaddr, ksockaddr, sizeof(struct __kernel_sockaddr_storage)); - isert_lid = rdma_create_id(isert_cma_handler, np, RDMA_PS_TCP, - IB_QPT_RC); + isert_lid = isert_setup_id(isert_np); if (IS_ERR(isert_lid)) { - pr_err("rdma_create_id() for isert_listen_handler failed: %ld\n", - PTR_ERR(isert_lid)); ret = PTR_ERR(isert_lid); goto out; } - ret = rdma_bind_addr(isert_lid, sa); - if (ret) { - pr_err("rdma_bind_addr() for isert_lid failed: %d\n", ret); - goto out_lid; - } - - ret = rdma_listen(isert_lid, ISERT_RDMA_LISTEN_BACKLOG); - if (ret) { - pr_err("rdma_listen() for isert_lid failed: %d\n", ret); - goto out_lid; - } - isert_np->np_cm_id = isert_lid; np->np_context = isert_np; - pr_debug("Setup isert_lid->context: %p\n", isert_lid->context); return 0; -out_lid: - rdma_destroy_id(isert_lid); out: kfree(isert_np); + return ret; } @@ -3075,16 +3042,12 @@ isert_rdma_accept(struct isert_conn *isert_conn) cp.retry_count = 7; cp.rnr_retry_count = 7; - pr_debug("Before rdma_accept >>>>>>>>>>>>>>>>>>>>.\n"); - ret = rdma_accept(cm_id, &cp); if (ret) { - pr_err("rdma_accept() failed with: %d\n", ret); + isert_err("rdma_accept() failed with: %d\n", ret); return ret; } - pr_debug("After rdma_accept >>>>>>>>>>>>>>>>>>>>>.\n"); - return 0; } @@ -3094,7 +3057,15 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) struct isert_conn *isert_conn = (struct isert_conn *)conn->context; int ret; - pr_debug("isert_get_login_rx before conn_login_comp conn: %p\n", conn); + isert_info("before login_req comp conn: %p\n", isert_conn); + ret = wait_for_completion_interruptible(&isert_conn->login_req_comp); + if (ret) { + isert_err("isert_conn %p interrupted before got login req\n", + isert_conn); + return ret; + } + reinit_completion(&isert_conn->login_req_comp); + /* * For login requests after the first PDU, isert_rx_login_req() will * kick schedule_delayed_work(&conn->login_work) as the packet is @@ -3104,11 +3075,15 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) if (!login->first_request) return 0; + isert_rx_login_req(isert_conn); + + isert_info("before conn_login_comp conn: %p\n", conn); ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp); if (ret) return ret; - pr_debug("isert_get_login_rx processing login->req: %p\n", login->req); + isert_info("processing login->req: %p\n", login->req); + return 0; } @@ -3161,7 +3136,7 @@ accept_wait: spin_lock_bh(&np->np_thread_lock); if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) { spin_unlock_bh(&np->np_thread_lock); - pr_debug("np_thread_state %d for isert_accept_np\n", + isert_dbg("np_thread_state %d for isert_accept_np\n", np->np_thread_state); /** * No point in stalling here when np_thread @@ -3186,17 +3161,10 @@ accept_wait: isert_conn->conn = conn; max_accept = 0; - ret = isert_rdma_post_recvl(isert_conn); - if (ret) - return ret; - - ret = isert_rdma_accept(isert_conn); - if (ret) - return ret; - isert_set_conn_info(np, conn, isert_conn); - pr_debug("Processing isert_accept_np: isert_conn: %p\n", isert_conn); + isert_dbg("Processing isert_conn: %p\n", isert_conn); + return 0; } @@ -3204,25 +3172,103 @@ static void isert_free_np(struct iscsi_np *np) { struct isert_np *isert_np = (struct isert_np *)np->np_context; + struct isert_conn *isert_conn, *n; if (isert_np->np_cm_id) rdma_destroy_id(isert_np->np_cm_id); + /* + * FIXME: At this point we don't have a good way to insure + * that at this point we don't have hanging connections that + * completed RDMA establishment but didn't start iscsi login + * process. So work-around this by cleaning up what ever piled + * up in np_accept_list. + */ + mutex_lock(&isert_np->np_accept_mutex); + if (!list_empty(&isert_np->np_accept_list)) { + isert_info("Still have isert connections, cleaning up...\n"); + list_for_each_entry_safe(isert_conn, n, + &isert_np->np_accept_list, + conn_accept_node) { + isert_info("cleaning isert_conn %p state (%d)\n", + isert_conn, isert_conn->state); + isert_connect_release(isert_conn); + } + } + mutex_unlock(&isert_np->np_accept_mutex); + np->np_context = NULL; kfree(isert_np); } +static void isert_release_work(struct work_struct *work) +{ + struct isert_conn *isert_conn = container_of(work, + struct isert_conn, + release_work); + + isert_info("Starting release conn %p\n", isert_conn); + + wait_for_completion(&isert_conn->conn_wait); + + mutex_lock(&isert_conn->conn_mutex); + isert_conn->state = ISER_CONN_DOWN; + mutex_unlock(&isert_conn->conn_mutex); + + isert_info("Destroying conn %p\n", isert_conn); + isert_put_conn(isert_conn); +} + +static void +isert_wait4logout(struct isert_conn *isert_conn) +{ + struct iscsi_conn *conn = isert_conn->conn; + + isert_info("conn %p\n", isert_conn); + + if (isert_conn->logout_posted) { + isert_info("conn %p wait for conn_logout_comp\n", isert_conn); + wait_for_completion_timeout(&conn->conn_logout_comp, + SECONDS_FOR_LOGOUT_COMP * HZ); + } +} + +static void +isert_wait4cmds(struct iscsi_conn *conn) +{ + isert_info("iscsi_conn %p\n", conn); + + if (conn->sess) { + target_sess_cmd_list_set_waiting(conn->sess->se_sess); + target_wait_for_sess_cmds(conn->sess->se_sess); + } +} + +static void +isert_wait4flush(struct isert_conn *isert_conn) +{ + struct ib_recv_wr *bad_wr; + + isert_info("conn %p\n", isert_conn); + + init_completion(&isert_conn->conn_wait_comp_err); + isert_conn->beacon.wr_id = ISER_BEACON_WRID; + /* post an indication that all flush errors were consumed */ + if (ib_post_recv(isert_conn->conn_qp, &isert_conn->beacon, &bad_wr)) { + isert_err("conn %p failed to post beacon", isert_conn); + return; + } + + wait_for_completion(&isert_conn->conn_wait_comp_err); +} + static void isert_wait_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; - pr_debug("isert_wait_conn: Starting \n"); + isert_info("Starting conn %p\n", isert_conn); mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->conn_cm_id && !isert_conn->disconnect) { - pr_debug("Calling rdma_disconnect from isert_wait_conn\n"); - rdma_disconnect(isert_conn->conn_cm_id); - } /* * Only wait for conn_wait_comp_err if the isert_conn made it * into full feature phase.. @@ -3231,14 +3277,15 @@ static void isert_wait_conn(struct iscsi_conn *conn) mutex_unlock(&isert_conn->conn_mutex); return; } - if (isert_conn->state == ISER_CONN_UP) - isert_conn->state = ISER_CONN_TERMINATING; + isert_conn_terminate(isert_conn); mutex_unlock(&isert_conn->conn_mutex); - wait_for_completion(&isert_conn->conn_wait_comp_err); + isert_wait4cmds(conn); + isert_wait4flush(isert_conn); + isert_wait4logout(isert_conn); - wait_for_completion(&isert_conn->conn_wait); - isert_put_conn(isert_conn); + INIT_WORK(&isert_conn->release_work, isert_release_work); + queue_work(isert_release_wq, &isert_conn->release_work); } static void isert_free_conn(struct iscsi_conn *conn) @@ -3273,35 +3320,39 @@ static int __init isert_init(void) { int ret; - isert_rx_wq = alloc_workqueue("isert_rx_wq", 0, 0); - if (!isert_rx_wq) { - pr_err("Unable to allocate isert_rx_wq\n"); + isert_comp_wq = alloc_workqueue("isert_comp_wq", 0, 0); + if (!isert_comp_wq) { + isert_err("Unable to allocate isert_comp_wq\n"); + ret = -ENOMEM; return -ENOMEM; } - isert_comp_wq = alloc_workqueue("isert_comp_wq", 0, 0); - if (!isert_comp_wq) { - pr_err("Unable to allocate isert_comp_wq\n"); + isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND, + WQ_UNBOUND_MAX_ACTIVE); + if (!isert_release_wq) { + isert_err("Unable to allocate isert_release_wq\n"); ret = -ENOMEM; - goto destroy_rx_wq; + goto destroy_comp_wq; } iscsit_register_transport(&iser_target_transport); - pr_debug("iSER_TARGET[0] - Loaded iser_target_transport\n"); + isert_info("iSER_TARGET[0] - Loaded iser_target_transport\n"); + return 0; -destroy_rx_wq: - destroy_workqueue(isert_rx_wq); +destroy_comp_wq: + destroy_workqueue(isert_comp_wq); + return ret; } static void __exit isert_exit(void) { flush_scheduled_work(); + destroy_workqueue(isert_release_wq); destroy_workqueue(isert_comp_wq); - destroy_workqueue(isert_rx_wq); iscsit_unregister_transport(&iser_target_transport); - pr_debug("iSER_TARGET[0] - Released iser_target_transport\n"); + isert_info("iSER_TARGET[0] - Released iser_target_transport\n"); } MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure"); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 04f51f7..8dc8415 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -4,9 +4,37 @@ #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> +#define DRV_NAME "isert" +#define PFX DRV_NAME ": " + +#define isert_dbg(fmt, arg...) \ + do { \ + if (unlikely(isert_debug_level > 2)) \ + printk(KERN_DEBUG PFX "%s: " fmt,\ + __func__ , ## arg); \ + } while (0) + +#define isert_warn(fmt, arg...) \ + do { \ + if (unlikely(isert_debug_level > 0)) \ + pr_warn(PFX "%s: " fmt, \ + __func__ , ## arg); \ + } while (0) + +#define isert_info(fmt, arg...) \ + do { \ + if (unlikely(isert_debug_level > 1)) \ + pr_info(PFX "%s: " fmt, \ + __func__ , ## arg); \ + } while (0) + +#define isert_err(fmt, arg...) \ + pr_err(PFX "%s: " fmt, __func__ , ## arg) + #define ISERT_RDMA_LISTEN_BACKLOG 10 #define ISCSI_ISER_SG_TABLESIZE 256 #define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL +#define ISER_BEACON_WRID 0xfffffffffffffffeULL enum isert_desc_type { ISCSI_TX_CONTROL, @@ -23,6 +51,7 @@ enum iser_ib_op_code { enum iser_conn_state { ISER_CONN_INIT, ISER_CONN_UP, + ISER_CONN_FULL_FEATURE, ISER_CONN_TERMINATING, ISER_CONN_DOWN, }; @@ -44,9 +73,6 @@ struct iser_tx_desc { struct ib_sge tx_sg[2]; int num_sge; struct isert_cmd *isert_cmd; - struct llist_node *comp_llnode_batch; - struct llist_node comp_llnode; - bool llnode_active; struct ib_send_wr send_wr; } __packed; @@ -81,6 +107,12 @@ struct isert_data_buf { enum dma_data_direction dma_dir; }; +enum { + DATA = 0, + PROT = 1, + SIG = 2, +}; + struct isert_rdma_wr { struct list_head wr_list; struct isert_cmd *isert_cmd; @@ -90,6 +122,7 @@ struct isert_rdma_wr { int send_wr_num; struct ib_send_wr *send_wr; struct ib_send_wr s_send_wr; + struct ib_sge ib_sg[3]; struct isert_data_buf data; struct isert_data_buf prot; struct fast_reg_descriptor *fr_desc; @@ -117,14 +150,15 @@ struct isert_device; struct isert_conn { enum iser_conn_state state; int post_recv_buf_count; - atomic_t post_send_buf_count; u32 responder_resources; u32 initiator_depth; + bool pi_support; u32 max_sge; char *login_buf; char *login_req_buf; char *login_rsp_buf; u64 login_req_dma; + int login_req_len; u64 login_rsp_dma; unsigned int conn_rx_desc_head; struct iser_rx_desc *conn_rx_descs; @@ -132,13 +166,13 @@ struct isert_conn { struct iscsi_conn *conn; struct list_head conn_accept_node; struct completion conn_login_comp; + struct completion login_req_comp; struct iser_tx_desc conn_login_tx_desc; struct rdma_cm_id *conn_cm_id; struct ib_pd *conn_pd; struct ib_mr *conn_mr; struct ib_qp *conn_qp; struct isert_device *conn_device; - struct work_struct conn_logout_work; struct mutex conn_mutex; struct completion conn_wait; struct completion conn_wait_comp_err; @@ -147,31 +181,38 @@ struct isert_conn { int conn_fr_pool_size; /* lock to protect fastreg pool */ spinlock_t conn_lock; -#define ISERT_COMP_BATCH_COUNT 8 - int conn_comp_batch; - struct llist_head conn_comp_llist; - bool disconnect; + struct work_struct release_work; + struct ib_recv_wr beacon; + bool logout_posted; }; #define ISERT_MAX_CQ 64 -struct isert_cq_desc { - struct isert_device *device; - int cq_index; - struct work_struct cq_rx_work; - struct work_struct cq_tx_work; +/** + * struct isert_comp - iSER completion context + * + * @device: pointer to device handle + * @cq: completion queue + * @wcs: work completion array + * @active_qps: Number of active QPs attached + * to completion context + * @work: completion work handle + */ +struct isert_comp { + struct isert_device *device; + struct ib_cq *cq; + struct ib_wc wcs[16]; + int active_qps; + struct work_struct work; }; struct isert_device { int use_fastreg; bool pi_capable; - int cqs_used; int refcount; - int cq_active_qps[ISERT_MAX_CQ]; struct ib_device *ib_device; - struct ib_cq *dev_rx_cq[ISERT_MAX_CQ]; - struct ib_cq *dev_tx_cq[ISERT_MAX_CQ]; - struct isert_cq_desc *cq_desc; + struct isert_comp *comps; + int comps_used; struct list_head dev_node; struct ib_device_attr dev_attr; int (*reg_rdma_mem)(struct iscsi_conn *conn, @@ -182,6 +223,7 @@ struct isert_device { }; struct isert_np { + struct iscsi_np *np; struct semaphore np_sem; struct rdma_cm_id *np_cm_id; struct mutex np_accept_mutex; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b205f76..9802485 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4071,7 +4071,7 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, int devid; int ret; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return -EINVAL; @@ -4134,7 +4134,7 @@ static int set_affinity(struct irq_data *data, const struct cpumask *mask, if (!config_enabled(CONFIG_SMP)) return -1; - cfg = data->chip_data; + cfg = irqd_cfg(data); irq = data->irq; irte_info = &cfg->irq_2_irte; @@ -4172,7 +4172,7 @@ static int free_irq(int irq) struct irq_2_irte *irte_info; struct irq_cfg *cfg; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return -EINVAL; @@ -4191,7 +4191,7 @@ static void compose_msi_msg(struct pci_dev *pdev, struct irq_cfg *cfg; union irte irte; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return; @@ -4220,7 +4220,7 @@ static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) if (!pdev) return -EINVAL; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return -EINVAL; @@ -4240,7 +4240,7 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, if (!pdev) return -EINVAL; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return -EINVAL; @@ -4263,7 +4263,7 @@ static int alloc_hpet_msi(unsigned int irq, unsigned int id) struct irq_cfg *cfg; int index, devid; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); if (!cfg) return -EINVAL; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 27541d4..a55b207 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -54,7 +54,7 @@ static int __init parse_ioapics_under_ir(void); static struct irq_2_iommu *irq_2_iommu(unsigned int irq) { - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); return cfg ? &cfg->irq_2_iommu : NULL; } @@ -85,7 +85,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) { struct ir_table *table = iommu->ir_table; struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); unsigned int mask = 0; unsigned long flags; int index; @@ -153,7 +153,7 @@ static int map_irq_to_irte_handle(int irq, u16 *sub_handle) static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) { struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; if (!irq_iommu) @@ -1050,7 +1050,7 @@ static int intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_cfg *cfg = data->chip_data; + struct irq_cfg *cfg = irqd_cfg(data); unsigned int dest, irq = data->irq; struct irte irte; int err; @@ -1105,7 +1105,7 @@ static void intel_compose_msi_msg(struct pci_dev *pdev, u16 sub_handle = 0; int ir_index; - cfg = irq_get_chip_data(irq); + cfg = irq_cfg(irq); ir_index = map_irq_to_irte_handle(irq, &sub_handle); BUG_ON(ir_index == -1); diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 2c3f5ad..89c4846 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -298,7 +298,7 @@ static int set_remapped_irq_affinity(struct irq_data *data, void free_remapped_irq(int irq) { - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); if (!remap_ops || !remap_ops->free_irq) return; @@ -311,7 +311,7 @@ void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id) { - struct irq_cfg *cfg = irq_get_chip_data(irq); + struct irq_cfg *cfg = irq_cfg(irq); if (!irq_remapped(cfg)) native_compose_msi_msg(pdev, irq, dest, msg, hpet_id); @@ -364,7 +364,7 @@ static void ir_ack_apic_edge(struct irq_data *data) static void ir_ack_apic_level(struct irq_data *data) { ack_APIC_irq(); - eoi_ioapic_irq(data->irq, data->chip_data); + eoi_ioapic_irq(data->irq, irqd_cfg(data)); } static void ir_print_prefix(struct irq_data *data, struct seq_file *p) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index cca4721..51fd6b5 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -34,7 +34,8 @@ struct cxl_context *cxl_context_alloc(void) /* * Initialises a CXL context. */ -int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, + struct address_space *mapping) { int i; @@ -42,6 +43,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) ctx->afu = afu; ctx->master = master; ctx->pid = NULL; /* Set in start work ioctl */ + mutex_init(&ctx->mapping_lock); + ctx->mapping = mapping; /* * Allocate the segment table before we put it in the IDR so that we @@ -82,12 +85,12 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) * Allocating IDR! We better make sure everything's setup that * dereferences from it. */ + mutex_lock(&afu->contexts_lock); idr_preload(GFP_KERNEL); - spin_lock(&afu->contexts_lock); i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0, ctx->afu->num_procs, GFP_NOWAIT); - spin_unlock(&afu->contexts_lock); idr_preload_end(); + mutex_unlock(&afu->contexts_lock); if (i < 0) return i; @@ -147,6 +150,12 @@ static void __detach_context(struct cxl_context *ctx) afu_release_irqs(ctx); flush_work(&ctx->fault_work); /* Only needed for dedicated process */ wake_up_all(&ctx->wq); + + /* Release Problem State Area mapping */ + mutex_lock(&ctx->mapping_lock); + if (ctx->mapping) + unmap_mapping_range(ctx->mapping, 0, 0, 1); + mutex_unlock(&ctx->mapping_lock); } /* @@ -168,21 +177,22 @@ void cxl_context_detach_all(struct cxl_afu *afu) struct cxl_context *ctx; int tmp; - rcu_read_lock(); - idr_for_each_entry(&afu->contexts_idr, ctx, tmp) + mutex_lock(&afu->contexts_lock); + idr_for_each_entry(&afu->contexts_idr, ctx, tmp) { /* * Anything done in here needs to be setup before the IDR is * created and torn down after the IDR removed */ __detach_context(ctx); - rcu_read_unlock(); + } + mutex_unlock(&afu->contexts_lock); } void cxl_context_free(struct cxl_context *ctx) { - spin_lock(&ctx->afu->contexts_lock); + mutex_lock(&ctx->afu->contexts_lock); idr_remove(&ctx->afu->contexts_idr, ctx->pe); - spin_unlock(&ctx->afu->contexts_lock); + mutex_unlock(&ctx->afu->contexts_lock); synchronize_rcu(); free_page((u64)ctx->sstp); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index b5b6bda..28078f8 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -351,7 +351,7 @@ struct cxl_afu { struct device *chardev_s, *chardev_m, *chardev_d; struct idr contexts_idr; struct dentry *debugfs; - spinlock_t contexts_lock; + struct mutex contexts_lock; struct mutex spa_mutex; spinlock_t afu_cntl_lock; @@ -398,6 +398,10 @@ struct cxl_context { phys_addr_t psn_phys; u64 psn_size; + /* Used to unmap any mmaps when force detaching */ + struct address_space *mapping; + struct mutex mapping_lock; + spinlock_t sste_lock; /* Protects segment table entries */ struct cxl_sste *sstp; u64 sstp0, sstp1; @@ -599,7 +603,8 @@ int cxl_alloc_sst(struct cxl_context *ctx); void init_cxl_native(void); struct cxl_context *cxl_context_alloc(void); -int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master); +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, + struct address_space *mapping); void cxl_context_free(struct cxl_context *ctx); int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma); diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 378b099..e9f2f10 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -77,7 +77,7 @@ static int __afu_open(struct inode *inode, struct file *file, bool master) goto err_put_afu; } - if ((rc = cxl_context_init(ctx, afu, master))) + if ((rc = cxl_context_init(ctx, afu, master, inode->i_mapping))) goto err_put_afu; pr_devel("afu_open pe: %i\n", ctx->pe); @@ -113,6 +113,10 @@ static int afu_release(struct inode *inode, struct file *file) __func__, ctx->pe); cxl_context_detach(ctx); + mutex_lock(&ctx->mapping_lock); + ctx->mapping = NULL; + mutex_unlock(&ctx->mapping_lock); + put_device(&ctx->afu->dev); /* diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 9a5a442..f2b37b4 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -277,6 +277,7 @@ static int do_process_element_cmd(struct cxl_context *ctx, u64 cmd, u64 pe_state) { u64 state; + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); WARN_ON(!ctx->afu->enabled); @@ -286,6 +287,10 @@ static int do_process_element_cmd(struct cxl_context *ctx, smp_mb(); cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe); while (1) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n"); + return -EBUSY; + } state = be64_to_cpup(ctx->afu->sw_command_status); if (state == ~0ULL) { pr_err("cxl: Error adding process element to AFU\n"); @@ -610,13 +615,6 @@ static inline int detach_process_native_dedicated(struct cxl_context *ctx) return 0; } -/* - * TODO: handle case when this is called inside a rcu_read_lock() which may - * happen when we unbind the driver (ie. cxl_context_detach_all()) . Terminate - * & remove use a mutex lock and schedule which will not good with lock held. - * May need to write do_process_element_cmd() that handles outstanding page - * faults synchronously. - */ static inline int detach_process_native_afu_directed(struct cxl_context *ctx) { if (!ctx->pe_inserted) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 10c98ab..0f2cc9f8 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -502,7 +502,7 @@ static struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) afu->dev.release = cxl_release_afu; afu->slice = slice; idr_init(&afu->contexts_idr); - spin_lock_init(&afu->contexts_lock); + mutex_init(&afu->contexts_lock); spin_lock_init(&afu->afu_cntl_lock); mutex_init(&afu->spa_mutex); diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index ce7ec06..461bdbd 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -121,7 +121,7 @@ static ssize_t reset_store_afu(struct device *device, int rc; /* Not safe to reset if it is currently in use */ - spin_lock(&afu->contexts_lock); + mutex_lock(&afu->contexts_lock); if (!idr_is_empty(&afu->contexts_idr)) { rc = -EBUSY; goto err; @@ -132,7 +132,7 @@ static ssize_t reset_store_afu(struct device *device, rc = count; err: - spin_unlock(&afu->contexts_lock); + mutex_unlock(&afu->contexts_lock); return rc; } @@ -247,7 +247,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr, int rc = -EBUSY; /* can't change this if we have a user */ - spin_lock(&afu->contexts_lock); + mutex_lock(&afu->contexts_lock); if (!idr_is_empty(&afu->contexts_idr)) goto err; @@ -271,7 +271,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr, afu->current_mode = 0; afu->num_procs = 0; - spin_unlock(&afu->contexts_lock); + mutex_unlock(&afu->contexts_lock); if ((rc = _cxl_afu_deactivate_mode(afu, old_mode))) return rc; @@ -280,7 +280,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr, return count; err: - spin_unlock(&afu->contexts_lock); + mutex_unlock(&afu->contexts_lock); return rc; } diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c index 028ba5d6..687e9aa 100644 --- a/drivers/misc/mic/host/mic_debugfs.c +++ b/drivers/misc/mic/host/mic_debugfs.c @@ -326,21 +326,27 @@ static int mic_vdev_info_show(struct seq_file *s, void *unused) } avail = vrh->vring.avail; seq_printf(s, "avail flags 0x%x idx %d\n", - avail->flags, avail->idx & (num - 1)); + vringh16_to_cpu(vrh, avail->flags), + vringh16_to_cpu(vrh, avail->idx) & (num - 1)); seq_printf(s, "avail flags 0x%x idx %d\n", - avail->flags, avail->idx); + vringh16_to_cpu(vrh, avail->flags), + vringh16_to_cpu(vrh, avail->idx)); for (j = 0; j < num; j++) seq_printf(s, "avail ring[%d] %d\n", j, avail->ring[j]); used = vrh->vring.used; seq_printf(s, "used flags 0x%x idx %d\n", - used->flags, used->idx & (num - 1)); + vringh16_to_cpu(vrh, used->flags), + vringh16_to_cpu(vrh, used->idx) & (num - 1)); seq_printf(s, "used flags 0x%x idx %d\n", - used->flags, used->idx); + vringh16_to_cpu(vrh, used->flags), + vringh16_to_cpu(vrh, used->idx)); for (j = 0; j < num; j++) seq_printf(s, "used ring[%d] id %d len %d\n", - j, used->ring[j].id, - used->ring[j].len); + j, vringh32_to_cpu(vrh, + used->ring[j].id), + vringh32_to_cpu(vrh, + used->ring[j].len)); } } mutex_unlock(&mdev->mic_mutex); diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index cced842..7a8f1c5 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -67,7 +67,7 @@ config XEN_PCIDEV_FRONTEND config HT_IRQ bool "Interrupts on hypertransport devices" default y - depends on PCI && X86_LOCAL_APIC && X86_IO_APIC + depends on PCI && X86_LOCAL_APIC help This allows native hypertransport devices to use interrupts. @@ -110,13 +110,6 @@ config PCI_PASID If unsure, say N. -config PCI_IOAPIC - bool "PCI IO-APIC hotplug support" if X86 - depends on PCI - depends on ACPI - depends on X86_IO_APIC - default !X86 - config PCI_LABEL def_bool y if (DMI || ACPI) select NLS diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index e04fe2d..73e4af4 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -13,8 +13,6 @@ obj-$(CONFIG_PCI_QUIRKS) += quirks.o # Build PCI Express stuff if needed obj-$(CONFIG_PCIEPORTBUS) += pcie/ -obj-$(CONFIG_PCI_IOAPIC) += ioapic.o - # Build the PCI Hotplug drivers if we were asked to obj-$(CONFIG_HOTPLUG_PCI) += hotplug/ ifdef CONFIG_HOTPLUG_PCI diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index 3efaf4c..96c5c72 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -36,6 +36,7 @@ #include <linux/wait.h> #include "../pci.h" #include <asm/pci_x86.h> /* for struct irq_routing_table */ +#include <asm/io_apic.h> #include "ibmphp.h" #define attn_on(sl) ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON) @@ -155,13 +156,10 @@ int ibmphp_init_devno(struct slot **cur_slot) for (loop = 0; loop < len; loop++) { if ((*cur_slot)->number == rtable->slots[loop].slot && (*cur_slot)->bus == rtable->slots[loop].bus) { - struct io_apic_irq_attr irq_attr; - (*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn); for (i = 0; i < 4; i++) (*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus, - (int) (*cur_slot)->device, i, - &irq_attr); + (int) (*cur_slot)->device, i); debug("(*cur_slot)->irq[0] = %x\n", (*cur_slot)->irq[0]); diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c deleted file mode 100644 index f6219d3..0000000 --- a/drivers/pci/ioapic.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * IOAPIC/IOxAPIC/IOSAPIC driver - * - * Copyright (C) 2009 Fujitsu Limited. - * (c) Copyright 2009 Hewlett-Packard Development Company, L.P. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* - * This driver manages PCI I/O APICs added by hotplug after boot. We try to - * claim all I/O APIC PCI devices, but those present at boot were registered - * when we parsed the ACPI MADT, so we'll fail when we try to re-register - * them. - */ - -#include <linux/pci.h> -#include <linux/module.h> -#include <linux/acpi.h> -#include <linux/slab.h> - -struct ioapic { - acpi_handle handle; - u32 gsi_base; -}; - -static int ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent) -{ - acpi_handle handle; - acpi_status status; - unsigned long long gsb; - struct ioapic *ioapic; - int ret; - char *type; - struct resource *res; - - handle = ACPI_HANDLE(&dev->dev); - if (!handle) - return -EINVAL; - - status = acpi_evaluate_integer(handle, "_GSB", NULL, &gsb); - if (ACPI_FAILURE(status)) - return -EINVAL; - - /* - * The previous code in acpiphp evaluated _MAT if _GSB failed, but - * ACPI spec 4.0 sec 6.2.2 requires _GSB for hot-pluggable I/O APICs. - */ - - ioapic = kzalloc(sizeof(*ioapic), GFP_KERNEL); - if (!ioapic) - return -ENOMEM; - - ioapic->handle = handle; - ioapic->gsi_base = (u32) gsb; - - if (dev->class == PCI_CLASS_SYSTEM_PIC_IOAPIC) - type = "IOAPIC"; - else - type = "IOxAPIC"; - - ret = pci_enable_device(dev); - if (ret < 0) - goto exit_free; - - pci_set_master(dev); - - if (pci_request_region(dev, 0, type)) - goto exit_disable; - - res = &dev->resource[0]; - if (acpi_register_ioapic(ioapic->handle, res->start, ioapic->gsi_base)) - goto exit_release; - - pci_set_drvdata(dev, ioapic); - dev_info(&dev->dev, "%s at %pR, GSI %u\n", type, res, ioapic->gsi_base); - return 0; - -exit_release: - pci_release_region(dev, 0); -exit_disable: - pci_disable_device(dev); -exit_free: - kfree(ioapic); - return -ENODEV; -} - -static void ioapic_remove(struct pci_dev *dev) -{ - struct ioapic *ioapic = pci_get_drvdata(dev); - - acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base); - pci_release_region(dev, 0); - pci_disable_device(dev); - kfree(ioapic); -} - - -static const struct pci_device_id ioapic_devices[] = { - { PCI_DEVICE_CLASS(PCI_CLASS_SYSTEM_PIC_IOAPIC, ~0) }, - { PCI_DEVICE_CLASS(PCI_CLASS_SYSTEM_PIC_IOXAPIC, ~0) }, - { } -}; -MODULE_DEVICE_TABLE(pci, ioapic_devices); - -static struct pci_driver ioapic_driver = { - .name = "ioapic", - .id_table = ioapic_devices, - .probe = ioapic_probe, - .remove = ioapic_remove, -}; - -static int __init ioapic_init(void) -{ - return pci_register_driver(&ioapic_driver); -} -module_init(ioapic_init); - -MODULE_LICENSE("GPL"); diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h index 8156b4c..3925db1 100644 --- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h +++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h @@ -42,28 +42,6 @@ #include "lustre_patchless_compat.h" -# define LOCK_FS_STRUCT(fs) spin_lock(&(fs)->lock) -# define UNLOCK_FS_STRUCT(fs) spin_unlock(&(fs)->lock) - -static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, - struct dentry *dentry) -{ - struct path path; - struct path old_pwd; - - path.mnt = mnt; - path.dentry = dentry; - LOCK_FS_STRUCT(fs); - old_pwd = fs->pwd; - path_get(&path); - fs->pwd = path; - UNLOCK_FS_STRUCT(fs); - - if (old_pwd.dentry) - path_put(&old_pwd); -} - - /* * set ATTR_BLOCKS to a high value to avoid any risk of collision with other * ATTR_* attributes (see bug 13828) @@ -110,8 +88,6 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, #define cfs_bio_io_error(a, b) bio_io_error((a)) #define cfs_bio_endio(a, b, c) bio_endio((a), (c)) -#define cfs_fs_pwd(fs) ((fs)->pwd.dentry) -#define cfs_fs_mnt(fs) ((fs)->pwd.mnt) #define cfs_path_put(nd) path_put(&(nd)->path) diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index 407718a..1ac7a70 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -661,7 +661,7 @@ int ll_dir_setdirstripe(struct inode *dir, struct lmv_user_md *lump, int mode; int err; - mode = (0755 & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR; + mode = (0755 & ~current_umask()) | S_IFDIR; op_data = ll_prep_md_op_data(NULL, dir, NULL, filename, strlen(filename), mode, LUSTRE_OPC_MKDIR, lump); diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index 6e423aa..a3367bf 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -2372,21 +2372,6 @@ char *ll_get_fsname(struct super_block *sb, char *buf, int buflen) return buf; } -static char *ll_d_path(struct dentry *dentry, char *buf, int bufsize) -{ - char *path = NULL; - - struct path p; - - p.dentry = dentry; - p.mnt = current->fs->root.mnt; - path_get(&p); - path = d_path(&p, buf, bufsize); - path_put(&p); - - return path; -} - void ll_dirty_page_discard_warn(struct page *page, int ioret) { char *buf, *path = NULL; @@ -2398,7 +2383,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret) if (buf != NULL) { dentry = d_find_alias(page->mapping->host); if (dentry != NULL) - path = ll_d_path(dentry, buf, PAGE_SIZE); + path = dentry_path_raw(dentry, buf, PAGE_SIZE); } CDEBUG(D_WARNING, diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 73e58d2..f8ec322 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -609,6 +609,7 @@ static int __init iscsi_target_init_module(void) return ret; r2t_out: + iscsit_unregister_transport(&iscsi_target_transport); kmem_cache_destroy(lio_r2t_cache); ooo_out: kmem_cache_destroy(lio_ooo_cache); diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 302eb3b..09a522b 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -790,7 +790,6 @@ struct iscsi_np { void *np_context; struct iscsit_transport *np_transport; struct list_head np_list; - struct iscsi_tpg_np *tpg_np; } ____cacheline_aligned; struct iscsi_tpg_np { diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 480f2e0..713c0c1 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -281,7 +281,6 @@ static int iscsi_login_zero_tsih_s1( { struct iscsi_session *sess = NULL; struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf; - enum target_prot_op sup_pro_ops; int ret; sess = kzalloc(sizeof(struct iscsi_session), GFP_KERNEL); @@ -343,9 +342,8 @@ static int iscsi_login_zero_tsih_s1( kfree(sess); return -ENOMEM; } - sup_pro_ops = conn->conn_transport->iscsit_get_sup_prot_ops(conn); - sess->se_sess = transport_init_session(sup_pro_ops); + sess->se_sess = transport_init_session(TARGET_PROT_NORMAL); if (IS_ERR(sess->se_sess)) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); @@ -1161,6 +1159,7 @@ void iscsi_target_login_sess_out(struct iscsi_conn *conn, } kfree(conn->sess->sess_ops); kfree(conn->sess); + conn->sess = NULL; old_sess_out: iscsi_stop_login_thread_timer(np); @@ -1204,6 +1203,9 @@ old_sess_out: conn->sock = NULL; } + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + if (conn->conn_transport->iscsit_free_conn) conn->conn_transport->iscsit_free_conn(conn); @@ -1364,6 +1366,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) } login->zero_tsih = zero_tsih; + conn->sess->se_sess->sup_prot_ops = + conn->conn_transport->iscsit_get_sup_prot_ops(conn); + tpg = conn->tpg; if (!tpg) { pr_err("Unable to locate struct iscsi_conn->tpg\n"); diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index c3cb5c1..9053a3c 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -501,7 +501,6 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( init_completion(&tpg_np->tpg_np_comp); kref_init(&tpg_np->tpg_np_kref); tpg_np->tpg_np = np; - np->tpg_np = tpg_np; tpg_np->tpg = tpg; spin_lock(&tpg->tpg_np_lock); diff --git a/drivers/target/iscsi/iscsi_target_transport.c b/drivers/target/iscsi/iscsi_target_transport.c index 882728f..08217d6 100644 --- a/drivers/target/iscsi/iscsi_target_transport.c +++ b/drivers/target/iscsi/iscsi_target_transport.c @@ -26,8 +26,7 @@ struct iscsit_transport *iscsit_get_transport(int type) void iscsit_put_transport(struct iscsit_transport *t) { - if (t->owner) - module_put(t->owner); + module_put(t->owner); } int iscsit_register_transport(struct iscsit_transport *t) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 7c6a95b..bcd88ec 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -1356,15 +1356,15 @@ static int iscsit_do_tx_data( struct iscsi_conn *conn, struct iscsi_data_count *count) { - int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len; + int ret, iov_len; struct kvec *iov_p; struct msghdr msg; if (!conn || !conn->sock || !conn->conn_ops) return -1; - if (data <= 0) { - pr_err("Data length is: %d\n", data); + if (count->data_length <= 0) { + pr_err("Data length is: %d\n", count->data_length); return -1; } @@ -1373,20 +1373,16 @@ static int iscsit_do_tx_data( iov_p = count->iov; iov_len = count->iov_count; - while (total_tx < data) { - tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len, - (data - total_tx)); - if (tx_loop <= 0) { - pr_debug("tx_loop: %d total_tx %d\n", - tx_loop, total_tx); - return tx_loop; - } - total_tx += tx_loop; - pr_debug("tx_loop: %d, total_tx: %d, data: %d\n", - tx_loop, total_tx, data); + ret = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len, + count->data_length); + if (ret != count->data_length) { + pr_err("Unexpected ret: %d send data %d\n", + ret, count->data_length); + return -EPIPE; } + pr_debug("ret: %d, sent data: %d\n", ret, count->data_length); - return total_tx; + return ret; } int rx_data( diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 4d1b722..7172a71 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -138,7 +138,7 @@ static void tcm_loop_submission_work(struct work_struct *work) set_host_byte(sc, DID_TRANSPORT_DISRUPTED); goto out_done; } - tl_nexus = tl_hba->tl_nexus; + tl_nexus = tl_tpg->tl_nexus; if (!tl_nexus) { scmd_printk(KERN_ERR, sc, "TCM_Loop I_T Nexus" " does not exist\n"); @@ -218,16 +218,26 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) * to struct scsi_device */ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg, - struct tcm_loop_nexus *tl_nexus, int lun, int task, enum tcm_tmreq_table tmr) { struct se_cmd *se_cmd = NULL; struct se_session *se_sess; struct se_portal_group *se_tpg; + struct tcm_loop_nexus *tl_nexus; struct tcm_loop_cmd *tl_cmd = NULL; struct tcm_loop_tmr *tl_tmr = NULL; int ret = TMR_FUNCTION_FAILED, rc; + /* + * Locate the tl_nexus and se_sess pointers + */ + tl_nexus = tl_tpg->tl_nexus; + if (!tl_nexus) { + pr_err("Unable to perform device reset without" + " active I_T Nexus\n"); + return ret; + } + tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_KERNEL); if (!tl_cmd) { pr_err("Unable to allocate memory for tl_cmd\n"); @@ -243,7 +253,7 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg, se_cmd = &tl_cmd->tl_se_cmd; se_tpg = &tl_tpg->tl_se_tpg; - se_sess = tl_nexus->se_sess; + se_sess = tl_tpg->tl_nexus->se_sess; /* * Initialize struct se_cmd descriptor from target_core_mod infrastructure */ @@ -288,7 +298,6 @@ release: static int tcm_loop_abort_task(struct scsi_cmnd *sc) { struct tcm_loop_hba *tl_hba; - struct tcm_loop_nexus *tl_nexus; struct tcm_loop_tpg *tl_tpg; int ret = FAILED; @@ -296,21 +305,8 @@ static int tcm_loop_abort_task(struct scsi_cmnd *sc) * Locate the tcm_loop_hba_t pointer */ tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); - /* - * Locate the tl_nexus and se_sess pointers - */ - tl_nexus = tl_hba->tl_nexus; - if (!tl_nexus) { - pr_err("Unable to perform device reset without" - " active I_T Nexus\n"); - return FAILED; - } - - /* - * Locate the tl_tpg pointer from TargetID in sc->device->id - */ tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; - ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun, + ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun, sc->request->tag, TMR_ABORT_TASK); return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; } @@ -322,7 +318,6 @@ static int tcm_loop_abort_task(struct scsi_cmnd *sc) static int tcm_loop_device_reset(struct scsi_cmnd *sc) { struct tcm_loop_hba *tl_hba; - struct tcm_loop_nexus *tl_nexus; struct tcm_loop_tpg *tl_tpg; int ret = FAILED; @@ -330,20 +325,9 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc) * Locate the tcm_loop_hba_t pointer */ tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); - /* - * Locate the tl_nexus and se_sess pointers - */ - tl_nexus = tl_hba->tl_nexus; - if (!tl_nexus) { - pr_err("Unable to perform device reset without" - " active I_T Nexus\n"); - return FAILED; - } - /* - * Locate the tl_tpg pointer from TargetID in sc->device->id - */ tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; - ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun, + + ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun, 0, TMR_LUN_RESET); return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; } @@ -940,8 +924,8 @@ static int tcm_loop_make_nexus( struct tcm_loop_nexus *tl_nexus; int ret = -ENOMEM; - if (tl_tpg->tl_hba->tl_nexus) { - pr_debug("tl_tpg->tl_hba->tl_nexus already exists\n"); + if (tl_tpg->tl_nexus) { + pr_debug("tl_tpg->tl_nexus already exists\n"); return -EEXIST; } se_tpg = &tl_tpg->tl_se_tpg; @@ -976,7 +960,7 @@ static int tcm_loop_make_nexus( */ __transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl, tl_nexus->se_sess, tl_nexus); - tl_tpg->tl_hba->tl_nexus = tl_nexus; + tl_tpg->tl_nexus = tl_nexus; pr_debug("TCM_Loop_ConfigFS: Established I_T Nexus to emulated" " %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba), name); @@ -992,12 +976,8 @@ static int tcm_loop_drop_nexus( { struct se_session *se_sess; struct tcm_loop_nexus *tl_nexus; - struct tcm_loop_hba *tl_hba = tpg->tl_hba; - if (!tl_hba) - return -ENODEV; - - tl_nexus = tl_hba->tl_nexus; + tl_nexus = tpg->tl_nexus; if (!tl_nexus) return -ENODEV; @@ -1013,13 +993,13 @@ static int tcm_loop_drop_nexus( } pr_debug("TCM_Loop_ConfigFS: Removing I_T Nexus to emulated" - " %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba), + " %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tpg->tl_hba), tl_nexus->se_sess->se_node_acl->initiatorname); /* * Release the SCSI I_T Nexus to the emulated SAS Target Port */ transport_deregister_session(tl_nexus->se_sess); - tpg->tl_hba->tl_nexus = NULL; + tpg->tl_nexus = NULL; kfree(tl_nexus); return 0; } @@ -1035,7 +1015,7 @@ static ssize_t tcm_loop_tpg_show_nexus( struct tcm_loop_nexus *tl_nexus; ssize_t ret; - tl_nexus = tl_tpg->tl_hba->tl_nexus; + tl_nexus = tl_tpg->tl_nexus; if (!tl_nexus) return -ENODEV; diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h index 54c59d0..6ae49f2 100644 --- a/drivers/target/loopback/tcm_loop.h +++ b/drivers/target/loopback/tcm_loop.h @@ -27,11 +27,6 @@ struct tcm_loop_tmr { }; struct tcm_loop_nexus { - int it_nexus_active; - /* - * Pointer to Linux/SCSI HBA from linux/include/scsi_host.h - */ - struct scsi_host *sh; /* * Pointer to TCM session for I_T Nexus */ @@ -51,6 +46,7 @@ struct tcm_loop_tpg { atomic_t tl_tpg_port_count; struct se_portal_group tl_se_tpg; struct tcm_loop_hba *tl_hba; + struct tcm_loop_nexus *tl_nexus; }; struct tcm_loop_hba { @@ -59,7 +55,6 @@ struct tcm_loop_hba { struct se_hba_s *se_hba; struct se_lun *tl_hba_lun; struct se_port *tl_hba_lun_sep; - struct tcm_loop_nexus *tl_nexus; struct device dev; struct Scsi_Host *sh; struct tcm_loop_tpg tl_hba_tpgs[TL_TPGS_PER_HBA]; diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 79f9296..75d89ad 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -50,6 +50,19 @@ #include "target_core_rd.h" #include "target_core_xcopy.h" +#define TB_CIT_SETUP(_name, _item_ops, _group_ops, _attrs) \ +static void target_core_setup_##_name##_cit(struct se_subsystem_api *sa) \ +{ \ + struct target_backend_cits *tbc = &sa->tb_cits; \ + struct config_item_type *cit = &tbc->tb_##_name##_cit; \ + \ + cit->ct_item_ops = _item_ops; \ + cit->ct_group_ops = _group_ops; \ + cit->ct_attrs = _attrs; \ + cit->ct_owner = sa->owner; \ + pr_debug("Setup generic %s\n", __stringify(_name)); \ +} + extern struct t10_alua_lu_gp *default_lu_gp; static LIST_HEAD(g_tf_list); @@ -126,48 +139,57 @@ static struct config_group *target_core_register_fabric( pr_debug("Target_Core_ConfigFS: REGISTER -> group: %p name:" " %s\n", group, name); - /* - * Below are some hardcoded request_module() calls to automatically - * local fabric modules when the following is called: - * - * mkdir -p /sys/kernel/config/target/$MODULE_NAME - * - * Note that this does not limit which TCM fabric module can be - * registered, but simply provids auto loading logic for modules with - * mkdir(2) system calls with known TCM fabric modules. - */ - if (!strncmp(name, "iscsi", 5)) { + + tf = target_core_get_fabric(name); + if (!tf) { + pr_err("target_core_register_fabric() trying autoload for %s\n", + name); + /* - * Automatically load the LIO Target fabric module when the - * following is called: + * Below are some hardcoded request_module() calls to automatically + * local fabric modules when the following is called: * - * mkdir -p $CONFIGFS/target/iscsi - */ - ret = request_module("iscsi_target_mod"); - if (ret < 0) { - pr_err("request_module() failed for" - " iscsi_target_mod.ko: %d\n", ret); - return ERR_PTR(-EINVAL); - } - } else if (!strncmp(name, "loopback", 8)) { - /* - * Automatically load the tcm_loop fabric module when the - * following is called: + * mkdir -p /sys/kernel/config/target/$MODULE_NAME * - * mkdir -p $CONFIGFS/target/loopback + * Note that this does not limit which TCM fabric module can be + * registered, but simply provids auto loading logic for modules with + * mkdir(2) system calls with known TCM fabric modules. */ - ret = request_module("tcm_loop"); - if (ret < 0) { - pr_err("request_module() failed for" - " tcm_loop.ko: %d\n", ret); - return ERR_PTR(-EINVAL); + + if (!strncmp(name, "iscsi", 5)) { + /* + * Automatically load the LIO Target fabric module when the + * following is called: + * + * mkdir -p $CONFIGFS/target/iscsi + */ + ret = request_module("iscsi_target_mod"); + if (ret < 0) { + pr_err("request_module() failed for" + " iscsi_target_mod.ko: %d\n", ret); + return ERR_PTR(-EINVAL); + } + } else if (!strncmp(name, "loopback", 8)) { + /* + * Automatically load the tcm_loop fabric module when the + * following is called: + * + * mkdir -p $CONFIGFS/target/loopback + */ + ret = request_module("tcm_loop"); + if (ret < 0) { + pr_err("request_module() failed for" + " tcm_loop.ko: %d\n", ret); + return ERR_PTR(-EINVAL); + } } + + tf = target_core_get_fabric(name); } - tf = target_core_get_fabric(name); if (!tf) { pr_err("target_core_get_fabric() failed for %s\n", - name); + name); return ERR_PTR(-EINVAL); } pr_debug("Target_Core_ConfigFS: REGISTER -> Located fabric:" @@ -562,198 +584,21 @@ EXPORT_SYMBOL(target_fabric_configfs_deregister); // Stop functions called by external Target Fabrics Modules //############################################################################*/ -/* Start functions for struct config_item_type target_core_dev_attrib_cit */ - -#define DEF_DEV_ATTRIB_SHOW(_name) \ -static ssize_t target_core_dev_show_attr_##_name( \ - struct se_dev_attrib *da, \ - char *page) \ -{ \ - return snprintf(page, PAGE_SIZE, "%u\n", \ - (u32)da->da_dev->dev_attrib._name); \ -} - -#define DEF_DEV_ATTRIB_STORE(_name) \ -static ssize_t target_core_dev_store_attr_##_name( \ - struct se_dev_attrib *da, \ - const char *page, \ - size_t count) \ -{ \ - unsigned long val; \ - int ret; \ - \ - ret = kstrtoul(page, 0, &val); \ - if (ret < 0) { \ - pr_err("kstrtoul() failed with" \ - " ret: %d\n", ret); \ - return -EINVAL; \ - } \ - ret = se_dev_set_##_name(da->da_dev, (u32)val); \ - \ - return (!ret) ? count : -EINVAL; \ -} - -#define DEF_DEV_ATTRIB(_name) \ -DEF_DEV_ATTRIB_SHOW(_name); \ -DEF_DEV_ATTRIB_STORE(_name); - -#define DEF_DEV_ATTRIB_RO(_name) \ -DEF_DEV_ATTRIB_SHOW(_name); +/* Start functions for struct config_item_type tb_dev_attrib_cit */ CONFIGFS_EATTR_STRUCT(target_core_dev_attrib, se_dev_attrib); -#define SE_DEV_ATTR(_name, _mode) \ -static struct target_core_dev_attrib_attribute \ - target_core_dev_attrib_##_name = \ - __CONFIGFS_EATTR(_name, _mode, \ - target_core_dev_show_attr_##_name, \ - target_core_dev_store_attr_##_name); - -#define SE_DEV_ATTR_RO(_name); \ -static struct target_core_dev_attrib_attribute \ - target_core_dev_attrib_##_name = \ - __CONFIGFS_EATTR_RO(_name, \ - target_core_dev_show_attr_##_name); - -DEF_DEV_ATTRIB(emulate_model_alias); -SE_DEV_ATTR(emulate_model_alias, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(emulate_dpo); -SE_DEV_ATTR(emulate_dpo, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(emulate_fua_write); -SE_DEV_ATTR(emulate_fua_write, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(emulate_fua_read); -SE_DEV_ATTR(emulate_fua_read, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(emulate_write_cache); -SE_DEV_ATTR(emulate_write_cache, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(emulate_ua_intlck_ctrl); -SE_DEV_ATTR(emulate_ua_intlck_ctrl, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(emulate_tas); -SE_DEV_ATTR(emulate_tas, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(emulate_tpu); -SE_DEV_ATTR(emulate_tpu, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(emulate_tpws); -SE_DEV_ATTR(emulate_tpws, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(emulate_caw); -SE_DEV_ATTR(emulate_caw, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(emulate_3pc); -SE_DEV_ATTR(emulate_3pc, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(pi_prot_type); -SE_DEV_ATTR(pi_prot_type, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB_RO(hw_pi_prot_type); -SE_DEV_ATTR_RO(hw_pi_prot_type); - -DEF_DEV_ATTRIB(pi_prot_format); -SE_DEV_ATTR(pi_prot_format, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(enforce_pr_isids); -SE_DEV_ATTR(enforce_pr_isids, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(is_nonrot); -SE_DEV_ATTR(is_nonrot, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(emulate_rest_reord); -SE_DEV_ATTR(emulate_rest_reord, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(force_pr_aptpl); -SE_DEV_ATTR(force_pr_aptpl, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB_RO(hw_block_size); -SE_DEV_ATTR_RO(hw_block_size); - -DEF_DEV_ATTRIB(block_size); -SE_DEV_ATTR(block_size, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB_RO(hw_max_sectors); -SE_DEV_ATTR_RO(hw_max_sectors); - -DEF_DEV_ATTRIB(fabric_max_sectors); -SE_DEV_ATTR(fabric_max_sectors, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(optimal_sectors); -SE_DEV_ATTR(optimal_sectors, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB_RO(hw_queue_depth); -SE_DEV_ATTR_RO(hw_queue_depth); - -DEF_DEV_ATTRIB(queue_depth); -SE_DEV_ATTR(queue_depth, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(max_unmap_lba_count); -SE_DEV_ATTR(max_unmap_lba_count, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(max_unmap_block_desc_count); -SE_DEV_ATTR(max_unmap_block_desc_count, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(unmap_granularity); -SE_DEV_ATTR(unmap_granularity, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(unmap_granularity_alignment); -SE_DEV_ATTR(unmap_granularity_alignment, S_IRUGO | S_IWUSR); - -DEF_DEV_ATTRIB(max_write_same_len); -SE_DEV_ATTR(max_write_same_len, S_IRUGO | S_IWUSR); - CONFIGFS_EATTR_OPS(target_core_dev_attrib, se_dev_attrib, da_group); -static struct configfs_attribute *target_core_dev_attrib_attrs[] = { - &target_core_dev_attrib_emulate_model_alias.attr, - &target_core_dev_attrib_emulate_dpo.attr, - &target_core_dev_attrib_emulate_fua_write.attr, - &target_core_dev_attrib_emulate_fua_read.attr, - &target_core_dev_attrib_emulate_write_cache.attr, - &target_core_dev_attrib_emulate_ua_intlck_ctrl.attr, - &target_core_dev_attrib_emulate_tas.attr, - &target_core_dev_attrib_emulate_tpu.attr, - &target_core_dev_attrib_emulate_tpws.attr, - &target_core_dev_attrib_emulate_caw.attr, - &target_core_dev_attrib_emulate_3pc.attr, - &target_core_dev_attrib_pi_prot_type.attr, - &target_core_dev_attrib_hw_pi_prot_type.attr, - &target_core_dev_attrib_pi_prot_format.attr, - &target_core_dev_attrib_enforce_pr_isids.attr, - &target_core_dev_attrib_force_pr_aptpl.attr, - &target_core_dev_attrib_is_nonrot.attr, - &target_core_dev_attrib_emulate_rest_reord.attr, - &target_core_dev_attrib_hw_block_size.attr, - &target_core_dev_attrib_block_size.attr, - &target_core_dev_attrib_hw_max_sectors.attr, - &target_core_dev_attrib_fabric_max_sectors.attr, - &target_core_dev_attrib_optimal_sectors.attr, - &target_core_dev_attrib_hw_queue_depth.attr, - &target_core_dev_attrib_queue_depth.attr, - &target_core_dev_attrib_max_unmap_lba_count.attr, - &target_core_dev_attrib_max_unmap_block_desc_count.attr, - &target_core_dev_attrib_unmap_granularity.attr, - &target_core_dev_attrib_unmap_granularity_alignment.attr, - &target_core_dev_attrib_max_write_same_len.attr, - NULL, -}; - static struct configfs_item_operations target_core_dev_attrib_ops = { .show_attribute = target_core_dev_attrib_attr_show, .store_attribute = target_core_dev_attrib_attr_store, }; -static struct config_item_type target_core_dev_attrib_cit = { - .ct_item_ops = &target_core_dev_attrib_ops, - .ct_attrs = target_core_dev_attrib_attrs, - .ct_owner = THIS_MODULE, -}; +TB_CIT_SETUP(dev_attrib, &target_core_dev_attrib_ops, NULL, NULL); -/* End functions for struct config_item_type target_core_dev_attrib_cit */ +/* End functions for struct config_item_type tb_dev_attrib_cit */ -/* Start functions for struct config_item_type target_core_dev_wwn_cit */ +/* Start functions for struct config_item_type tb_dev_wwn_cit */ CONFIGFS_EATTR_STRUCT(target_core_dev_wwn, t10_wwn); #define SE_DEV_WWN_ATTR(_name, _mode) \ @@ -984,15 +829,11 @@ static struct configfs_item_operations target_core_dev_wwn_ops = { .store_attribute = target_core_dev_wwn_attr_store, }; -static struct config_item_type target_core_dev_wwn_cit = { - .ct_item_ops = &target_core_dev_wwn_ops, - .ct_attrs = target_core_dev_wwn_attrs, - .ct_owner = THIS_MODULE, -}; +TB_CIT_SETUP(dev_wwn, &target_core_dev_wwn_ops, NULL, target_core_dev_wwn_attrs); -/* End functions for struct config_item_type target_core_dev_wwn_cit */ +/* End functions for struct config_item_type tb_dev_wwn_cit */ -/* Start functions for struct config_item_type target_core_dev_pr_cit */ +/* Start functions for struct config_item_type tb_dev_pr_cit */ CONFIGFS_EATTR_STRUCT(target_core_dev_pr, se_device); #define SE_DEV_PR_ATTR(_name, _mode) \ @@ -1453,15 +1294,11 @@ static struct configfs_item_operations target_core_dev_pr_ops = { .store_attribute = target_core_dev_pr_attr_store, }; -static struct config_item_type target_core_dev_pr_cit = { - .ct_item_ops = &target_core_dev_pr_ops, - .ct_attrs = target_core_dev_pr_attrs, - .ct_owner = THIS_MODULE, -}; +TB_CIT_SETUP(dev_pr, &target_core_dev_pr_ops, NULL, target_core_dev_pr_attrs); -/* End functions for struct config_item_type target_core_dev_pr_cit */ +/* End functions for struct config_item_type tb_dev_pr_cit */ -/* Start functions for struct config_item_type target_core_dev_cit */ +/* Start functions for struct config_item_type tb_dev_cit */ static ssize_t target_core_show_dev_info(void *p, char *page) { @@ -1925,7 +1762,7 @@ static struct target_core_configfs_attribute target_core_attr_dev_lba_map = { .store = target_core_store_dev_lba_map, }; -static struct configfs_attribute *lio_core_dev_attrs[] = { +static struct configfs_attribute *target_core_dev_attrs[] = { &target_core_attr_dev_info.attr, &target_core_attr_dev_control.attr, &target_core_attr_dev_alias.attr, @@ -1984,13 +1821,9 @@ static struct configfs_item_operations target_core_dev_item_ops = { .store_attribute = target_core_dev_store, }; -static struct config_item_type target_core_dev_cit = { - .ct_item_ops = &target_core_dev_item_ops, - .ct_attrs = lio_core_dev_attrs, - .ct_owner = THIS_MODULE, -}; +TB_CIT_SETUP(dev, &target_core_dev_item_ops, NULL, target_core_dev_attrs); -/* End functions for struct config_item_type target_core_dev_cit */ +/* End functions for struct config_item_type tb_dev_cit */ /* Start functions for struct config_item_type target_core_alua_lu_gp_cit */ @@ -2670,7 +2503,7 @@ static struct config_item_type target_core_alua_tg_pt_gp_cit = { /* End functions for struct config_item_type target_core_alua_tg_pt_gp_cit */ -/* Start functions for struct config_item_type target_core_alua_tg_pt_gps_cit */ +/* Start functions for struct config_item_type tb_alua_tg_pt_gps_cit */ static struct config_group *target_core_alua_create_tg_pt_gp( struct config_group *group, @@ -2721,12 +2554,9 @@ static struct configfs_group_operations target_core_alua_tg_pt_gps_group_ops = { .drop_item = &target_core_alua_drop_tg_pt_gp, }; -static struct config_item_type target_core_alua_tg_pt_gps_cit = { - .ct_group_ops = &target_core_alua_tg_pt_gps_group_ops, - .ct_owner = THIS_MODULE, -}; +TB_CIT_SETUP(dev_alua_tg_pt_gps, NULL, &target_core_alua_tg_pt_gps_group_ops, NULL); -/* End functions for struct config_item_type target_core_alua_tg_pt_gps_cit */ +/* End functions for struct config_item_type tb_alua_tg_pt_gps_cit */ /* Start functions for struct config_item_type target_core_alua_cit */ @@ -2744,7 +2574,7 @@ static struct config_item_type target_core_alua_cit = { /* End functions for struct config_item_type target_core_alua_cit */ -/* Start functions for struct config_item_type target_core_stat_cit */ +/* Start functions for struct config_item_type tb_dev_stat_cit */ static struct config_group *target_core_stat_mkdir( struct config_group *group, @@ -2765,12 +2595,9 @@ static struct configfs_group_operations target_core_stat_group_ops = { .drop_item = &target_core_stat_rmdir, }; -static struct config_item_type target_core_stat_cit = { - .ct_group_ops = &target_core_stat_group_ops, - .ct_owner = THIS_MODULE, -}; +TB_CIT_SETUP(dev_stat, NULL, &target_core_stat_group_ops, NULL); -/* End functions for struct config_item_type target_core_stat_cit */ +/* End functions for struct config_item_type tb_dev_stat_cit */ /* Start functions for struct config_item_type target_core_hba_cit */ @@ -2806,17 +2633,17 @@ static struct config_group *target_core_make_subdev( if (!dev_cg->default_groups) goto out_free_device; - config_group_init_type_name(dev_cg, name, &target_core_dev_cit); + config_group_init_type_name(dev_cg, name, &t->tb_cits.tb_dev_cit); config_group_init_type_name(&dev->dev_attrib.da_group, "attrib", - &target_core_dev_attrib_cit); + &t->tb_cits.tb_dev_attrib_cit); config_group_init_type_name(&dev->dev_pr_group, "pr", - &target_core_dev_pr_cit); + &t->tb_cits.tb_dev_pr_cit); config_group_init_type_name(&dev->t10_wwn.t10_wwn_group, "wwn", - &target_core_dev_wwn_cit); + &t->tb_cits.tb_dev_wwn_cit); config_group_init_type_name(&dev->t10_alua.alua_tg_pt_gps_group, - "alua", &target_core_alua_tg_pt_gps_cit); + "alua", &t->tb_cits.tb_dev_alua_tg_pt_gps_cit); config_group_init_type_name(&dev->dev_stat_grps.stat_group, - "statistics", &target_core_stat_cit); + "statistics", &t->tb_cits.tb_dev_stat_cit); dev_cg->default_groups[0] = &dev->dev_attrib.da_group; dev_cg->default_groups[1] = &dev->dev_pr_group; @@ -3110,6 +2937,17 @@ static struct config_item_type target_core_cit = { /* Stop functions for struct config_item_type target_core_hba_cit */ +void target_core_setup_sub_cits(struct se_subsystem_api *sa) +{ + target_core_setup_dev_cit(sa); + target_core_setup_dev_attrib_cit(sa); + target_core_setup_dev_pr_cit(sa); + target_core_setup_dev_wwn_cit(sa); + target_core_setup_dev_alua_tg_pt_gps_cit(sa); + target_core_setup_dev_stat_cit(sa); +} +EXPORT_SYMBOL(target_core_setup_sub_cits); + static int __init target_core_init_configfs(void) { struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index c45f9e9..7653cfb 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -659,6 +659,7 @@ int se_dev_set_max_unmap_lba_count( dev, dev->dev_attrib.max_unmap_lba_count); return 0; } +EXPORT_SYMBOL(se_dev_set_max_unmap_lba_count); int se_dev_set_max_unmap_block_desc_count( struct se_device *dev, @@ -670,6 +671,7 @@ int se_dev_set_max_unmap_block_desc_count( dev, dev->dev_attrib.max_unmap_block_desc_count); return 0; } +EXPORT_SYMBOL(se_dev_set_max_unmap_block_desc_count); int se_dev_set_unmap_granularity( struct se_device *dev, @@ -680,6 +682,7 @@ int se_dev_set_unmap_granularity( dev, dev->dev_attrib.unmap_granularity); return 0; } +EXPORT_SYMBOL(se_dev_set_unmap_granularity); int se_dev_set_unmap_granularity_alignment( struct se_device *dev, @@ -690,6 +693,7 @@ int se_dev_set_unmap_granularity_alignment( dev, dev->dev_attrib.unmap_granularity_alignment); return 0; } +EXPORT_SYMBOL(se_dev_set_unmap_granularity_alignment); int se_dev_set_max_write_same_len( struct se_device *dev, @@ -700,6 +704,7 @@ int se_dev_set_max_write_same_len( dev, dev->dev_attrib.max_write_same_len); return 0; } +EXPORT_SYMBOL(se_dev_set_max_write_same_len); static void dev_set_t10_wwn_model_alias(struct se_device *dev) { @@ -738,6 +743,7 @@ int se_dev_set_emulate_model_alias(struct se_device *dev, int flag) return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_model_alias); int se_dev_set_emulate_dpo(struct se_device *dev, int flag) { @@ -753,6 +759,7 @@ int se_dev_set_emulate_dpo(struct se_device *dev, int flag) return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_dpo); int se_dev_set_emulate_fua_write(struct se_device *dev, int flag) { @@ -760,17 +767,12 @@ int se_dev_set_emulate_fua_write(struct se_device *dev, int flag) pr_err("Illegal value %d\n", flag); return -EINVAL; } - - if (flag && - dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) { - pr_err("emulate_fua_write not supported for pSCSI\n"); - return -EINVAL; - } dev->dev_attrib.emulate_fua_write = flag; pr_debug("dev[%p]: SE Device Forced Unit Access WRITEs: %d\n", dev, dev->dev_attrib.emulate_fua_write); return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_fua_write); int se_dev_set_emulate_fua_read(struct se_device *dev, int flag) { @@ -786,6 +788,7 @@ int se_dev_set_emulate_fua_read(struct se_device *dev, int flag) return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_fua_read); int se_dev_set_emulate_write_cache(struct se_device *dev, int flag) { @@ -794,11 +797,6 @@ int se_dev_set_emulate_write_cache(struct se_device *dev, int flag) return -EINVAL; } if (flag && - dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) { - pr_err("emulate_write_cache not supported for pSCSI\n"); - return -EINVAL; - } - if (flag && dev->transport->get_write_cache) { pr_err("emulate_write_cache not supported for this device\n"); return -EINVAL; @@ -809,6 +807,7 @@ int se_dev_set_emulate_write_cache(struct se_device *dev, int flag) dev, dev->dev_attrib.emulate_write_cache); return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_write_cache); int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *dev, int flag) { @@ -829,6 +828,7 @@ int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *dev, int flag) return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_ua_intlck_ctrl); int se_dev_set_emulate_tas(struct se_device *dev, int flag) { @@ -849,6 +849,7 @@ int se_dev_set_emulate_tas(struct se_device *dev, int flag) return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_tas); int se_dev_set_emulate_tpu(struct se_device *dev, int flag) { @@ -870,6 +871,7 @@ int se_dev_set_emulate_tpu(struct se_device *dev, int flag) dev, flag); return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_tpu); int se_dev_set_emulate_tpws(struct se_device *dev, int flag) { @@ -891,6 +893,7 @@ int se_dev_set_emulate_tpws(struct se_device *dev, int flag) dev, flag); return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_tpws); int se_dev_set_emulate_caw(struct se_device *dev, int flag) { @@ -904,6 +907,7 @@ int se_dev_set_emulate_caw(struct se_device *dev, int flag) return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_caw); int se_dev_set_emulate_3pc(struct se_device *dev, int flag) { @@ -917,6 +921,7 @@ int se_dev_set_emulate_3pc(struct se_device *dev, int flag) return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_3pc); int se_dev_set_pi_prot_type(struct se_device *dev, int flag) { @@ -970,6 +975,7 @@ int se_dev_set_pi_prot_type(struct se_device *dev, int flag) return 0; } +EXPORT_SYMBOL(se_dev_set_pi_prot_type); int se_dev_set_pi_prot_format(struct se_device *dev, int flag) { @@ -1005,6 +1011,7 @@ int se_dev_set_pi_prot_format(struct se_device *dev, int flag) return 0; } +EXPORT_SYMBOL(se_dev_set_pi_prot_format); int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag) { @@ -1017,6 +1024,7 @@ int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag) (dev->dev_attrib.enforce_pr_isids) ? "Enabled" : "Disabled"); return 0; } +EXPORT_SYMBOL(se_dev_set_enforce_pr_isids); int se_dev_set_force_pr_aptpl(struct se_device *dev, int flag) { @@ -1034,6 +1042,7 @@ int se_dev_set_force_pr_aptpl(struct se_device *dev, int flag) pr_debug("dev[%p]: SE Device force_pr_aptpl: %d\n", dev, flag); return 0; } +EXPORT_SYMBOL(se_dev_set_force_pr_aptpl); int se_dev_set_is_nonrot(struct se_device *dev, int flag) { @@ -1046,6 +1055,7 @@ int se_dev_set_is_nonrot(struct se_device *dev, int flag) dev, flag); return 0; } +EXPORT_SYMBOL(se_dev_set_is_nonrot); int se_dev_set_emulate_rest_reord(struct se_device *dev, int flag) { @@ -1058,6 +1068,7 @@ int se_dev_set_emulate_rest_reord(struct se_device *dev, int flag) pr_debug("dev[%p]: SE Device emulate_rest_reord: %d\n", dev, flag); return 0; } +EXPORT_SYMBOL(se_dev_set_emulate_rest_reord); /* * Note, this can only be called on unexported SE Device Object. @@ -1076,31 +1087,21 @@ int se_dev_set_queue_depth(struct se_device *dev, u32 queue_depth) return -EINVAL; } - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) { + if (queue_depth > dev->dev_attrib.queue_depth) { if (queue_depth > dev->dev_attrib.hw_queue_depth) { - pr_err("dev[%p]: Passed queue_depth: %u" - " exceeds TCM/SE_Device TCQ: %u\n", - dev, queue_depth, + pr_err("dev[%p]: Passed queue_depth:" + " %u exceeds TCM/SE_Device MAX" + " TCQ: %u\n", dev, queue_depth, dev->dev_attrib.hw_queue_depth); return -EINVAL; } - } else { - if (queue_depth > dev->dev_attrib.queue_depth) { - if (queue_depth > dev->dev_attrib.hw_queue_depth) { - pr_err("dev[%p]: Passed queue_depth:" - " %u exceeds TCM/SE_Device MAX" - " TCQ: %u\n", dev, queue_depth, - dev->dev_attrib.hw_queue_depth); - return -EINVAL; - } - } } - dev->dev_attrib.queue_depth = dev->queue_depth = queue_depth; pr_debug("dev[%p]: SE Device TCQ Depth changed to: %u\n", dev, queue_depth); return 0; } +EXPORT_SYMBOL(se_dev_set_queue_depth); int se_dev_set_fabric_max_sectors(struct se_device *dev, u32 fabric_max_sectors) { @@ -1123,22 +1124,12 @@ int se_dev_set_fabric_max_sectors(struct se_device *dev, u32 fabric_max_sectors) DA_STATUS_MAX_SECTORS_MIN); return -EINVAL; } - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) { - if (fabric_max_sectors > dev->dev_attrib.hw_max_sectors) { - pr_err("dev[%p]: Passed fabric_max_sectors: %u" - " greater than TCM/SE_Device max_sectors:" - " %u\n", dev, fabric_max_sectors, - dev->dev_attrib.hw_max_sectors); - return -EINVAL; - } - } else { - if (fabric_max_sectors > DA_STATUS_MAX_SECTORS_MAX) { - pr_err("dev[%p]: Passed fabric_max_sectors: %u" - " greater than DA_STATUS_MAX_SECTORS_MAX:" - " %u\n", dev, fabric_max_sectors, - DA_STATUS_MAX_SECTORS_MAX); - return -EINVAL; - } + if (fabric_max_sectors > DA_STATUS_MAX_SECTORS_MAX) { + pr_err("dev[%p]: Passed fabric_max_sectors: %u" + " greater than DA_STATUS_MAX_SECTORS_MAX:" + " %u\n", dev, fabric_max_sectors, + DA_STATUS_MAX_SECTORS_MAX); + return -EINVAL; } /* * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks() @@ -1155,6 +1146,7 @@ int se_dev_set_fabric_max_sectors(struct se_device *dev, u32 fabric_max_sectors) dev, fabric_max_sectors); return 0; } +EXPORT_SYMBOL(se_dev_set_fabric_max_sectors); int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors) { @@ -1164,11 +1156,6 @@ int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors) dev, dev->export_count); return -EINVAL; } - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) { - pr_err("dev[%p]: Passed optimal_sectors cannot be" - " changed for TCM/pSCSI\n", dev); - return -EINVAL; - } if (optimal_sectors > dev->dev_attrib.fabric_max_sectors) { pr_err("dev[%p]: Passed optimal_sectors %u cannot be" " greater than fabric_max_sectors: %u\n", dev, @@ -1181,6 +1168,7 @@ int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors) dev, optimal_sectors); return 0; } +EXPORT_SYMBOL(se_dev_set_optimal_sectors); int se_dev_set_block_size(struct se_device *dev, u32 block_size) { @@ -1201,13 +1189,6 @@ int se_dev_set_block_size(struct se_device *dev, u32 block_size) return -EINVAL; } - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) { - pr_err("dev[%p]: Not allowed to change block_size for" - " Physical Device, use for Linux/SCSI to change" - " block_size for underlying hardware\n", dev); - return -EINVAL; - } - dev->dev_attrib.block_size = block_size; pr_debug("dev[%p]: SE Device block_size changed to %u\n", dev, block_size); @@ -1218,6 +1199,7 @@ int se_dev_set_block_size(struct se_device *dev, u32 block_size) return 0; } +EXPORT_SYMBOL(se_dev_set_block_size); struct se_lun *core_dev_add_lun( struct se_portal_group *tpg, diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 72c83d9..c2aea09 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -37,6 +37,7 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> +#include <target/target_core_backend_configfs.h> #include "target_core_file.h" @@ -934,6 +935,42 @@ fd_parse_cdb(struct se_cmd *cmd) return sbc_parse_cdb(cmd, &fd_sbc_ops); } +DEF_TB_DEFAULT_ATTRIBS(fileio); + +static struct configfs_attribute *fileio_backend_dev_attrs[] = { + &fileio_dev_attrib_emulate_model_alias.attr, + &fileio_dev_attrib_emulate_dpo.attr, + &fileio_dev_attrib_emulate_fua_write.attr, + &fileio_dev_attrib_emulate_fua_read.attr, + &fileio_dev_attrib_emulate_write_cache.attr, + &fileio_dev_attrib_emulate_ua_intlck_ctrl.attr, + &fileio_dev_attrib_emulate_tas.attr, + &fileio_dev_attrib_emulate_tpu.attr, + &fileio_dev_attrib_emulate_tpws.attr, + &fileio_dev_attrib_emulate_caw.attr, + &fileio_dev_attrib_emulate_3pc.attr, + &fileio_dev_attrib_pi_prot_type.attr, + &fileio_dev_attrib_hw_pi_prot_type.attr, + &fileio_dev_attrib_pi_prot_format.attr, + &fileio_dev_attrib_enforce_pr_isids.attr, + &fileio_dev_attrib_is_nonrot.attr, + &fileio_dev_attrib_emulate_rest_reord.attr, + &fileio_dev_attrib_force_pr_aptpl.attr, + &fileio_dev_attrib_hw_block_size.attr, + &fileio_dev_attrib_block_size.attr, + &fileio_dev_attrib_hw_max_sectors.attr, + &fileio_dev_attrib_fabric_max_sectors.attr, + &fileio_dev_attrib_optimal_sectors.attr, + &fileio_dev_attrib_hw_queue_depth.attr, + &fileio_dev_attrib_queue_depth.attr, + &fileio_dev_attrib_max_unmap_lba_count.attr, + &fileio_dev_attrib_max_unmap_block_desc_count.attr, + &fileio_dev_attrib_unmap_granularity.attr, + &fileio_dev_attrib_unmap_granularity_alignment.attr, + &fileio_dev_attrib_max_write_same_len.attr, + NULL, +}; + static struct se_subsystem_api fileio_template = { .name = "fileio", .inquiry_prod = "FILEIO", @@ -957,6 +994,11 @@ static struct se_subsystem_api fileio_template = { static int __init fileio_module_init(void) { + struct target_backend_cits *tbc = &fileio_template.tb_cits; + + target_core_setup_sub_cits(&fileio_template); + tbc->tb_dev_attrib_cit.ct_attrs = fileio_backend_dev_attrs; + return transport_subsystem_register(&fileio_template); } diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c index a25051a..ff95f95 100644 --- a/drivers/target/target_core_hba.c +++ b/drivers/target/target_core_hba.c @@ -36,6 +36,7 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> #include <target/target_core_fabric.h> +#include <target/target_core_configfs.h> #include "target_core_internal.h" @@ -137,8 +138,7 @@ core_alloc_hba(const char *plugin_name, u32 plugin_dep_id, u32 hba_flags) return hba; out_module_put: - if (hba->transport->owner) - module_put(hba->transport->owner); + module_put(hba->transport->owner); hba->transport = NULL; out_free_hba: kfree(hba); @@ -159,8 +159,7 @@ core_delete_hba(struct se_hba *hba) pr_debug("CORE_HBA[%d] - Detached HBA from Generic Target" " Core\n", hba->hba_id); - if (hba->transport->owner) - module_put(hba->transport->owner); + module_put(hba->transport->owner); hba->transport = NULL; kfree(hba); diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 7e6b857..3efff94 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -41,6 +41,7 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> +#include <target/target_core_backend_configfs.h> #include "target_core_iblock.h" @@ -858,6 +859,42 @@ static bool iblock_get_write_cache(struct se_device *dev) return q->flush_flags & REQ_FLUSH; } +DEF_TB_DEFAULT_ATTRIBS(iblock); + +static struct configfs_attribute *iblock_backend_dev_attrs[] = { + &iblock_dev_attrib_emulate_model_alias.attr, + &iblock_dev_attrib_emulate_dpo.attr, + &iblock_dev_attrib_emulate_fua_write.attr, + &iblock_dev_attrib_emulate_fua_read.attr, + &iblock_dev_attrib_emulate_write_cache.attr, + &iblock_dev_attrib_emulate_ua_intlck_ctrl.attr, + &iblock_dev_attrib_emulate_tas.attr, + &iblock_dev_attrib_emulate_tpu.attr, + &iblock_dev_attrib_emulate_tpws.attr, + &iblock_dev_attrib_emulate_caw.attr, + &iblock_dev_attrib_emulate_3pc.attr, + &iblock_dev_attrib_pi_prot_type.attr, + &iblock_dev_attrib_hw_pi_prot_type.attr, + &iblock_dev_attrib_pi_prot_format.attr, + &iblock_dev_attrib_enforce_pr_isids.attr, + &iblock_dev_attrib_is_nonrot.attr, + &iblock_dev_attrib_emulate_rest_reord.attr, + &iblock_dev_attrib_force_pr_aptpl.attr, + &iblock_dev_attrib_hw_block_size.attr, + &iblock_dev_attrib_block_size.attr, + &iblock_dev_attrib_hw_max_sectors.attr, + &iblock_dev_attrib_fabric_max_sectors.attr, + &iblock_dev_attrib_optimal_sectors.attr, + &iblock_dev_attrib_hw_queue_depth.attr, + &iblock_dev_attrib_queue_depth.attr, + &iblock_dev_attrib_max_unmap_lba_count.attr, + &iblock_dev_attrib_max_unmap_block_desc_count.attr, + &iblock_dev_attrib_unmap_granularity.attr, + &iblock_dev_attrib_unmap_granularity_alignment.attr, + &iblock_dev_attrib_max_write_same_len.attr, + NULL, +}; + static struct se_subsystem_api iblock_template = { .name = "iblock", .inquiry_prod = "IBLOCK", @@ -883,6 +920,11 @@ static struct se_subsystem_api iblock_template = { static int __init iblock_module_init(void) { + struct target_backend_cits *tbc = &iblock_template.tb_cits; + + target_core_setup_sub_cits(&iblock_template); + tbc->tb_dev_attrib_cit.ct_attrs = iblock_backend_dev_attrs; + return transport_subsystem_register(&iblock_template); } diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index e31f42f..60381db 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -18,34 +18,6 @@ int core_dev_export(struct se_device *, struct se_portal_group *, struct se_lun *); void core_dev_unexport(struct se_device *, struct se_portal_group *, struct se_lun *); -int se_dev_set_task_timeout(struct se_device *, u32); -int se_dev_set_max_unmap_lba_count(struct se_device *, u32); -int se_dev_set_max_unmap_block_desc_count(struct se_device *, u32); -int se_dev_set_unmap_granularity(struct se_device *, u32); -int se_dev_set_unmap_granularity_alignment(struct se_device *, u32); -int se_dev_set_max_write_same_len(struct se_device *, u32); -int se_dev_set_emulate_model_alias(struct se_device *, int); -int se_dev_set_emulate_dpo(struct se_device *, int); -int se_dev_set_emulate_fua_write(struct se_device *, int); -int se_dev_set_emulate_fua_read(struct se_device *, int); -int se_dev_set_emulate_write_cache(struct se_device *, int); -int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int); -int se_dev_set_emulate_tas(struct se_device *, int); -int se_dev_set_emulate_tpu(struct se_device *, int); -int se_dev_set_emulate_tpws(struct se_device *, int); -int se_dev_set_emulate_caw(struct se_device *, int); -int se_dev_set_emulate_3pc(struct se_device *, int); -int se_dev_set_pi_prot_type(struct se_device *, int); -int se_dev_set_pi_prot_format(struct se_device *, int); -int se_dev_set_enforce_pr_isids(struct se_device *, int); -int se_dev_set_force_pr_aptpl(struct se_device *, int); -int se_dev_set_is_nonrot(struct se_device *, int); -int se_dev_set_emulate_rest_reord(struct se_device *dev, int); -int se_dev_set_queue_depth(struct se_device *, u32); -int se_dev_set_max_sectors(struct se_device *, u32); -int se_dev_set_fabric_max_sectors(struct se_device *, u32); -int se_dev_set_optimal_sectors(struct se_device *, u32); -int se_dev_set_block_size(struct se_device *, u32); struct se_lun *core_dev_add_lun(struct se_portal_group *, struct se_device *, u32); void core_dev_del_lun(struct se_portal_group *, struct se_lun *); struct se_lun *core_get_lun_from_tpg(struct se_portal_group *, u32); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 4c261c3..d56f2aa 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -76,7 +76,7 @@ enum preempt_type { }; static void __core_scsi3_complete_pro_release(struct se_device *, struct se_node_acl *, - struct t10_pr_registration *, int); + struct t10_pr_registration *, int, int); static sense_reason_t target_scsi2_reservation_check(struct se_cmd *cmd) @@ -1177,7 +1177,7 @@ static int core_scsi3_check_implicit_release( * service action with the SERVICE ACTION RESERVATION KEY * field set to zero (see 5.7.11.3). */ - __core_scsi3_complete_pro_release(dev, nacl, pr_reg, 0); + __core_scsi3_complete_pro_release(dev, nacl, pr_reg, 0, 1); ret = 1; /* * For 'All Registrants' reservation types, all existing @@ -1219,7 +1219,8 @@ static void __core_scsi3_free_registration( pr_reg->pr_reg_deve->def_pr_registered = 0; pr_reg->pr_reg_deve->pr_res_key = 0; - list_del(&pr_reg->pr_reg_list); + if (!list_empty(&pr_reg->pr_reg_list)) + list_del(&pr_reg->pr_reg_list); /* * Caller accessing *pr_reg using core_scsi3_locate_pr_reg(), * so call core_scsi3_put_pr_reg() to decrement our reference. @@ -1271,6 +1272,7 @@ void core_scsi3_free_pr_reg_from_nacl( { struct t10_reservation *pr_tmpl = &dev->t10_pr; struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_res_holder; + bool free_reg = false; /* * If the passed se_node_acl matches the reservation holder, * release the reservation. @@ -1278,13 +1280,18 @@ void core_scsi3_free_pr_reg_from_nacl( spin_lock(&dev->dev_reservation_lock); pr_res_holder = dev->dev_pr_res_holder; if ((pr_res_holder != NULL) && - (pr_res_holder->pr_reg_nacl == nacl)) - __core_scsi3_complete_pro_release(dev, nacl, pr_res_holder, 0); + (pr_res_holder->pr_reg_nacl == nacl)) { + __core_scsi3_complete_pro_release(dev, nacl, pr_res_holder, 0, 1); + free_reg = true; + } spin_unlock(&dev->dev_reservation_lock); /* * Release any registration associated with the struct se_node_acl. */ spin_lock(&pr_tmpl->registration_lock); + if (pr_res_holder && free_reg) + __core_scsi3_free_registration(dev, pr_res_holder, NULL, 0); + list_for_each_entry_safe(pr_reg, pr_reg_tmp, &pr_tmpl->registration_list, pr_reg_list) { @@ -1307,7 +1314,7 @@ void core_scsi3_free_all_registrations( if (pr_res_holder != NULL) { struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl; __core_scsi3_complete_pro_release(dev, pr_res_nacl, - pr_res_holder, 0); + pr_res_holder, 0, 0); } spin_unlock(&dev->dev_reservation_lock); @@ -1429,14 +1436,12 @@ core_scsi3_decode_spec_i_port( struct target_core_fabric_ops *tmp_tf_ops; unsigned char *buf; unsigned char *ptr, *i_str = NULL, proto_ident, tmp_proto_ident; - char *iport_ptr = NULL, dest_iport[64], i_buf[PR_REG_ISID_ID_LEN]; + char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN]; sense_reason_t ret; u32 tpdl, tid_len = 0; int dest_local_nexus; u32 dest_rtpi = 0; - memset(dest_iport, 0, 64); - local_se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun]; /* * Allocate a struct pr_transport_id_holder and setup the @@ -2105,13 +2110,13 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, /* * sa_res_key=0 Unregister Reservation Key for registered I_T Nexus. */ - pr_holder = core_scsi3_check_implicit_release( - cmd->se_dev, pr_reg); + type = pr_reg->pr_res_type; + pr_holder = core_scsi3_check_implicit_release(cmd->se_dev, + pr_reg); if (pr_holder < 0) { ret = TCM_RESERVATION_CONFLICT; goto out; } - type = pr_reg->pr_res_type; spin_lock(&pr_tmpl->registration_lock); /* @@ -2269,6 +2274,7 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key) spin_lock(&dev->dev_reservation_lock); pr_res_holder = dev->dev_pr_res_holder; if (pr_res_holder) { + int pr_res_type = pr_res_holder->pr_res_type; /* * From spc4r17 Section 5.7.9: Reserving: * @@ -2279,7 +2285,9 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key) * the logical unit, then the command shall be completed with * RESERVATION CONFLICT status. */ - if (pr_res_holder != pr_reg) { + if ((pr_res_holder != pr_reg) && + (pr_res_type != PR_TYPE_WRITE_EXCLUSIVE_ALLREG) && + (pr_res_type != PR_TYPE_EXCLUSIVE_ACCESS_ALLREG)) { struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl; pr_err("SPC-3 PR: Attempted RESERVE from" " [%s]: %s while reservation already held by" @@ -2385,23 +2393,59 @@ static void __core_scsi3_complete_pro_release( struct se_device *dev, struct se_node_acl *se_nacl, struct t10_pr_registration *pr_reg, - int explicit) + int explicit, + int unreg) { struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo; char i_buf[PR_REG_ISID_ID_LEN]; + int pr_res_type = 0, pr_res_scope = 0; memset(i_buf, 0, PR_REG_ISID_ID_LEN); core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); /* * Go ahead and release the current PR reservation holder. + * If an All Registrants reservation is currently active and + * a unregister operation is requested, replace the current + * dev_pr_res_holder with another active registration. */ - dev->dev_pr_res_holder = NULL; + if (dev->dev_pr_res_holder) { + pr_res_type = dev->dev_pr_res_holder->pr_res_type; + pr_res_scope = dev->dev_pr_res_holder->pr_res_scope; + dev->dev_pr_res_holder->pr_res_type = 0; + dev->dev_pr_res_holder->pr_res_scope = 0; + dev->dev_pr_res_holder->pr_res_holder = 0; + dev->dev_pr_res_holder = NULL; + } + if (!unreg) + goto out; - pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared" - " reservation holder TYPE: %s ALL_TG_PT: %d\n", - tfo->get_fabric_name(), (explicit) ? "explicit" : "implicit", - core_scsi3_pr_dump_type(pr_reg->pr_res_type), - (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); + spin_lock(&dev->t10_pr.registration_lock); + list_del_init(&pr_reg->pr_reg_list); + /* + * If the I_T nexus is a reservation holder, the persistent reservation + * is of an all registrants type, and the I_T nexus is the last remaining + * registered I_T nexus, then the device server shall also release the + * persistent reservation. + */ + if (!list_empty(&dev->t10_pr.registration_list) && + ((pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG) || + (pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG))) { + dev->dev_pr_res_holder = + list_entry(dev->t10_pr.registration_list.next, + struct t10_pr_registration, pr_reg_list); + dev->dev_pr_res_holder->pr_res_type = pr_res_type; + dev->dev_pr_res_holder->pr_res_scope = pr_res_scope; + dev->dev_pr_res_holder->pr_res_holder = 1; + } + spin_unlock(&dev->t10_pr.registration_lock); +out: + if (!dev->dev_pr_res_holder) { + pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared" + " reservation holder TYPE: %s ALL_TG_PT: %d\n", + tfo->get_fabric_name(), (explicit) ? "explicit" : + "implicit", core_scsi3_pr_dump_type(pr_res_type), + (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); + } pr_debug("SPC-3 PR [%s] RELEASE Node: %s%s\n", tfo->get_fabric_name(), se_nacl->initiatorname, i_buf); @@ -2532,7 +2576,7 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope, * server shall not establish a unit attention condition. */ __core_scsi3_complete_pro_release(dev, se_sess->se_node_acl, - pr_reg, 1); + pr_reg, 1, 0); spin_unlock(&dev->dev_reservation_lock); @@ -2620,7 +2664,7 @@ core_scsi3_emulate_pro_clear(struct se_cmd *cmd, u64 res_key) if (pr_res_holder) { struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl; __core_scsi3_complete_pro_release(dev, pr_res_nacl, - pr_res_holder, 0); + pr_res_holder, 0, 0); } spin_unlock(&dev->dev_reservation_lock); /* @@ -2679,7 +2723,7 @@ static void __core_scsi3_complete_pro_preempt( */ if (dev->dev_pr_res_holder) __core_scsi3_complete_pro_release(dev, nacl, - dev->dev_pr_res_holder, 0); + dev->dev_pr_res_holder, 0, 0); dev->dev_pr_res_holder = pr_reg; pr_reg->pr_res_holder = 1; @@ -2924,8 +2968,8 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, */ if (pr_reg_n != pr_res_holder) __core_scsi3_complete_pro_release(dev, - pr_res_holder->pr_reg_nacl, - dev->dev_pr_res_holder, 0); + pr_res_holder->pr_reg_nacl, + dev->dev_pr_res_holder, 0, 0); /* * b) Remove the registrations for all I_T nexuses identified * by the SERVICE ACTION RESERVATION KEY field, except the @@ -3059,7 +3103,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, struct t10_reservation *pr_tmpl = &dev->t10_pr; unsigned char *buf; unsigned char *initiator_str; - char *iport_ptr = NULL, dest_iport[64], i_buf[PR_REG_ISID_ID_LEN]; + char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN]; u32 tid_len, tmp_tid_len; int new_reg = 0, type, scope, matching_iname; sense_reason_t ret; @@ -3071,7 +3115,6 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } - memset(dest_iport, 0, 64); memset(i_buf, 0, PR_REG_ISID_ID_LEN); se_tpg = se_sess->se_tpg; tf_ops = se_tpg->se_tpg_tfo; @@ -3389,7 +3432,7 @@ after_iport_check: * holder (i.e., the I_T nexus on which the */ __core_scsi3_complete_pro_release(dev, pr_res_nacl, - dev->dev_pr_res_holder, 0); + dev->dev_pr_res_holder, 0, 0); /* * g) Move the persistent reservation to the specified I_T nexus using * the same scope and type as the persistent reservation released in @@ -3837,7 +3880,8 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) unsigned char *buf; u32 add_desc_len = 0, add_len = 0, desc_len, exp_desc_len; u32 off = 8; /* off into first Full Status descriptor */ - int format_code = 0; + int format_code = 0, pr_res_type = 0, pr_res_scope = 0; + bool all_reg = false; if (cmd->data_length < 8) { pr_err("PRIN SA READ_FULL_STATUS SCSI Data Length: %u" @@ -3854,6 +3898,19 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) buf[2] = ((dev->t10_pr.pr_generation >> 8) & 0xff); buf[3] = (dev->t10_pr.pr_generation & 0xff); + spin_lock(&dev->dev_reservation_lock); + if (dev->dev_pr_res_holder) { + struct t10_pr_registration *pr_holder = dev->dev_pr_res_holder; + + if (pr_holder->pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG || + pr_holder->pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG) { + all_reg = true; + pr_res_type = pr_holder->pr_res_type; + pr_res_scope = pr_holder->pr_res_scope; + } + } + spin_unlock(&dev->dev_reservation_lock); + spin_lock(&pr_tmpl->registration_lock); list_for_each_entry_safe(pr_reg, pr_reg_tmp, &pr_tmpl->registration_list, pr_reg_list) { @@ -3901,14 +3958,20 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) * reservation holder for PR_HOLDER bit. * * Also, if this registration is the reservation - * holder, fill in SCOPE and TYPE in the next byte. + * holder or there is an All Registrants reservation + * active, fill in SCOPE and TYPE in the next byte. */ if (pr_reg->pr_res_holder) { buf[off++] |= 0x01; buf[off++] = (pr_reg->pr_res_scope & 0xf0) | (pr_reg->pr_res_type & 0x0f); - } else + } else if (all_reg) { + buf[off++] |= 0x01; + buf[off++] = (pr_res_scope & 0xf0) | + (pr_res_type & 0x0f); + } else { off += 2; + } off += 4; /* Skip over reserved area */ /* diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 7c8291f..74873e4 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -44,6 +44,7 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> +#include <target/target_core_backend_configfs.h> #include "target_core_alua.h" #include "target_core_pscsi.h" @@ -1165,6 +1166,26 @@ static void pscsi_req_done(struct request *req, int uptodate) kfree(pt); } +DEF_TB_DEV_ATTRIB_RO(pscsi, hw_pi_prot_type); +TB_DEV_ATTR_RO(pscsi, hw_pi_prot_type); + +DEF_TB_DEV_ATTRIB_RO(pscsi, hw_block_size); +TB_DEV_ATTR_RO(pscsi, hw_block_size); + +DEF_TB_DEV_ATTRIB_RO(pscsi, hw_max_sectors); +TB_DEV_ATTR_RO(pscsi, hw_max_sectors); + +DEF_TB_DEV_ATTRIB_RO(pscsi, hw_queue_depth); +TB_DEV_ATTR_RO(pscsi, hw_queue_depth); + +static struct configfs_attribute *pscsi_backend_dev_attrs[] = { + &pscsi_dev_attrib_hw_pi_prot_type.attr, + &pscsi_dev_attrib_hw_block_size.attr, + &pscsi_dev_attrib_hw_max_sectors.attr, + &pscsi_dev_attrib_hw_queue_depth.attr, + NULL, +}; + static struct se_subsystem_api pscsi_template = { .name = "pscsi", .owner = THIS_MODULE, @@ -1185,6 +1206,11 @@ static struct se_subsystem_api pscsi_template = { static int __init pscsi_module_init(void) { + struct target_backend_cits *tbc = &pscsi_template.tb_cits; + + target_core_setup_sub_cits(&pscsi_template); + tbc->tb_dev_attrib_cit.ct_attrs = pscsi_backend_dev_attrs; + return transport_subsystem_register(&pscsi_template); } diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index b920db3..60ebd17 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -34,6 +34,7 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> +#include <target/target_core_backend_configfs.h> #include "target_core_rd.h" @@ -632,6 +633,42 @@ rd_parse_cdb(struct se_cmd *cmd) return sbc_parse_cdb(cmd, &rd_sbc_ops); } +DEF_TB_DEFAULT_ATTRIBS(rd_mcp); + +static struct configfs_attribute *rd_mcp_backend_dev_attrs[] = { + &rd_mcp_dev_attrib_emulate_model_alias.attr, + &rd_mcp_dev_attrib_emulate_dpo.attr, + &rd_mcp_dev_attrib_emulate_fua_write.attr, + &rd_mcp_dev_attrib_emulate_fua_read.attr, + &rd_mcp_dev_attrib_emulate_write_cache.attr, + &rd_mcp_dev_attrib_emulate_ua_intlck_ctrl.attr, + &rd_mcp_dev_attrib_emulate_tas.attr, + &rd_mcp_dev_attrib_emulate_tpu.attr, + &rd_mcp_dev_attrib_emulate_tpws.attr, + &rd_mcp_dev_attrib_emulate_caw.attr, + &rd_mcp_dev_attrib_emulate_3pc.attr, + &rd_mcp_dev_attrib_pi_prot_type.attr, + &rd_mcp_dev_attrib_hw_pi_prot_type.attr, + &rd_mcp_dev_attrib_pi_prot_format.attr, + &rd_mcp_dev_attrib_enforce_pr_isids.attr, + &rd_mcp_dev_attrib_is_nonrot.attr, + &rd_mcp_dev_attrib_emulate_rest_reord.attr, + &rd_mcp_dev_attrib_force_pr_aptpl.attr, + &rd_mcp_dev_attrib_hw_block_size.attr, + &rd_mcp_dev_attrib_block_size.attr, + &rd_mcp_dev_attrib_hw_max_sectors.attr, + &rd_mcp_dev_attrib_fabric_max_sectors.attr, + &rd_mcp_dev_attrib_optimal_sectors.attr, + &rd_mcp_dev_attrib_hw_queue_depth.attr, + &rd_mcp_dev_attrib_queue_depth.attr, + &rd_mcp_dev_attrib_max_unmap_lba_count.attr, + &rd_mcp_dev_attrib_max_unmap_block_desc_count.attr, + &rd_mcp_dev_attrib_unmap_granularity.attr, + &rd_mcp_dev_attrib_unmap_granularity_alignment.attr, + &rd_mcp_dev_attrib_max_write_same_len.attr, + NULL, +}; + static struct se_subsystem_api rd_mcp_template = { .name = "rd_mcp", .inquiry_prod = "RAMDISK-MCP", @@ -653,8 +690,12 @@ static struct se_subsystem_api rd_mcp_template = { int __init rd_module_init(void) { + struct target_backend_cits *tbc = &rd_mcp_template.tb_cits; int ret; + target_core_setup_sub_cits(&rd_mcp_template); + tbc->tb_dev_attrib_cit.ct_attrs = rd_mcp_backend_dev_attrs; + ret = transport_subsystem_register(&rd_mcp_template); if (ret < 0) { return ret; diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 9a1b314..8bfa61c 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -28,6 +28,8 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/target_core_backend.h> +#include <target/target_core_backend_configfs.h> + #include <linux/target_core_user.h> /* @@ -1092,6 +1094,42 @@ tcmu_parse_cdb(struct se_cmd *cmd) return ret; } +DEF_TB_DEFAULT_ATTRIBS(tcmu); + +static struct configfs_attribute *tcmu_backend_dev_attrs[] = { + &tcmu_dev_attrib_emulate_model_alias.attr, + &tcmu_dev_attrib_emulate_dpo.attr, + &tcmu_dev_attrib_emulate_fua_write.attr, + &tcmu_dev_attrib_emulate_fua_read.attr, + &tcmu_dev_attrib_emulate_write_cache.attr, + &tcmu_dev_attrib_emulate_ua_intlck_ctrl.attr, + &tcmu_dev_attrib_emulate_tas.attr, + &tcmu_dev_attrib_emulate_tpu.attr, + &tcmu_dev_attrib_emulate_tpws.attr, + &tcmu_dev_attrib_emulate_caw.attr, + &tcmu_dev_attrib_emulate_3pc.attr, + &tcmu_dev_attrib_pi_prot_type.attr, + &tcmu_dev_attrib_hw_pi_prot_type.attr, + &tcmu_dev_attrib_pi_prot_format.attr, + &tcmu_dev_attrib_enforce_pr_isids.attr, + &tcmu_dev_attrib_is_nonrot.attr, + &tcmu_dev_attrib_emulate_rest_reord.attr, + &tcmu_dev_attrib_force_pr_aptpl.attr, + &tcmu_dev_attrib_hw_block_size.attr, + &tcmu_dev_attrib_block_size.attr, + &tcmu_dev_attrib_hw_max_sectors.attr, + &tcmu_dev_attrib_fabric_max_sectors.attr, + &tcmu_dev_attrib_optimal_sectors.attr, + &tcmu_dev_attrib_hw_queue_depth.attr, + &tcmu_dev_attrib_queue_depth.attr, + &tcmu_dev_attrib_max_unmap_lba_count.attr, + &tcmu_dev_attrib_max_unmap_block_desc_count.attr, + &tcmu_dev_attrib_unmap_granularity.attr, + &tcmu_dev_attrib_unmap_granularity_alignment.attr, + &tcmu_dev_attrib_max_write_same_len.attr, + NULL, +}; + static struct se_subsystem_api tcmu_template = { .name = "user", .inquiry_prod = "USER", @@ -1112,6 +1150,7 @@ static struct se_subsystem_api tcmu_template = { static int __init tcmu_module_init(void) { + struct target_backend_cits *tbc = &tcmu_template.tb_cits; int ret; BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0); @@ -1134,6 +1173,9 @@ static int __init tcmu_module_init(void) goto out_unreg_device; } + target_core_setup_sub_cits(&tcmu_template); + tbc->tb_dev_attrib_cit.ct_attrs = tcmu_backend_dev_attrs; + ret = transport_subsystem_register(&tcmu_template); if (ret) goto out_unreg_genl; diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index b46c706..e98b424 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -435,7 +435,6 @@ static int clamp_thread(void *arg) * allowed. thus jiffies are updated properly. */ preempt_disable(); - tick_nohz_idle_enter(); /* mwait until target jiffies is reached */ while (time_before(jiffies, target_jiffies)) { unsigned long ecx = 1; @@ -451,7 +450,6 @@ static int clamp_thread(void *arg) start_critical_timings(); atomic_inc(&idle_wakeup_counter); } - tick_nohz_idle_exit(); preempt_enable(); } del_timer_sync(&wakeup_timer); diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 5174eba..3bb02c6 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -11,6 +11,7 @@ #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/export.h> +#include <uapi/linux/virtio_config.h> static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) { @@ -28,13 +29,14 @@ static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) /* Returns vring->num if empty, -ve on error. */ static inline int __vringh_get_head(const struct vringh *vrh, - int (*getu16)(u16 *val, const u16 *p), + int (*getu16)(const struct vringh *vrh, + u16 *val, const __virtio16 *p), u16 *last_avail_idx) { u16 avail_idx, i, head; int err; - err = getu16(&avail_idx, &vrh->vring.avail->idx); + err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx); if (err) { vringh_bad("Failed to access avail idx at %p", &vrh->vring.avail->idx); @@ -49,7 +51,7 @@ static inline int __vringh_get_head(const struct vringh *vrh, i = *last_avail_idx & (vrh->vring.num - 1); - err = getu16(&head, &vrh->vring.avail->ring[i]); + err = getu16(vrh, &head, &vrh->vring.avail->ring[i]); if (err) { vringh_bad("Failed to read head: idx %d address %p", *last_avail_idx, &vrh->vring.avail->ring[i]); @@ -144,28 +146,32 @@ static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len, } /* No reason for this code to be inline. */ -static int move_to_indirect(int *up_next, u16 *i, void *addr, +static int move_to_indirect(const struct vringh *vrh, + int *up_next, u16 *i, void *addr, const struct vring_desc *desc, struct vring_desc **descs, int *desc_max) { + u32 len; + /* Indirect tables can't have indirect. */ if (*up_next != -1) { vringh_bad("Multilevel indirect %u->%u", *up_next, *i); return -EINVAL; } - if (unlikely(desc->len % sizeof(struct vring_desc))) { + len = vringh32_to_cpu(vrh, desc->len); + if (unlikely(len % sizeof(struct vring_desc))) { vringh_bad("Strange indirect len %u", desc->len); return -EINVAL; } /* We will check this when we follow it! */ - if (desc->flags & VRING_DESC_F_NEXT) - *up_next = desc->next; + if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) + *up_next = vringh16_to_cpu(vrh, desc->next); else *up_next = -2; *descs = addr; - *desc_max = desc->len / sizeof(struct vring_desc); + *desc_max = len / sizeof(struct vring_desc); /* Now, start at the first indirect. */ *i = 0; @@ -287,22 +293,25 @@ __vringh_iov(struct vringh *vrh, u16 i, if (unlikely(err)) goto fail; - if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) { + if (unlikely(desc.flags & + cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) { + u64 a = vringh64_to_cpu(vrh, desc.addr); + /* Make sure it's OK, and get offset. */ - len = desc.len; - if (!rcheck(vrh, desc.addr, &len, &range, getrange)) { + len = vringh32_to_cpu(vrh, desc.len); + if (!rcheck(vrh, a, &len, &range, getrange)) { err = -EINVAL; goto fail; } - if (unlikely(len != desc.len)) { + if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { slow = true; /* We need to save this range to use offset */ slowrange = range; } - addr = (void *)(long)(desc.addr + range.offset); - err = move_to_indirect(&up_next, &i, addr, &desc, + addr = (void *)(long)(a + range.offset); + err = move_to_indirect(vrh, &up_next, &i, addr, &desc, &descs, &desc_max); if (err) goto fail; @@ -315,7 +324,7 @@ __vringh_iov(struct vringh *vrh, u16 i, goto fail; } - if (desc.flags & VRING_DESC_F_WRITE) + if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE)) iov = wiov; else { iov = riov; @@ -336,12 +345,14 @@ __vringh_iov(struct vringh *vrh, u16 i, again: /* Make sure it's OK, and get offset. */ - len = desc.len; - if (!rcheck(vrh, desc.addr, &len, &range, getrange)) { + len = vringh32_to_cpu(vrh, desc.len); + if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range, + getrange)) { err = -EINVAL; goto fail; } - addr = (void *)(unsigned long)(desc.addr + range.offset); + addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) + + range.offset); if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) { err = resize_iovec(iov, gfp); @@ -353,14 +364,16 @@ __vringh_iov(struct vringh *vrh, u16 i, iov->iov[iov->used].iov_len = len; iov->used++; - if (unlikely(len != desc.len)) { - desc.len -= len; - desc.addr += len; + if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { + desc.len = cpu_to_vringh32(vrh, + vringh32_to_cpu(vrh, desc.len) - len); + desc.addr = cpu_to_vringh64(vrh, + vringh64_to_cpu(vrh, desc.addr) + len); goto again; } - if (desc.flags & VRING_DESC_F_NEXT) { - i = desc.next; + if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) { + i = vringh16_to_cpu(vrh, desc.next); } else { /* Just in case we need to finish traversing above. */ if (unlikely(up_next > 0)) { @@ -387,7 +400,8 @@ fail: static inline int __vringh_complete(struct vringh *vrh, const struct vring_used_elem *used, unsigned int num_used, - int (*putu16)(u16 *p, u16 val), + int (*putu16)(const struct vringh *vrh, + __virtio16 *p, u16 val), int (*putused)(struct vring_used_elem *dst, const struct vring_used_elem *src, unsigned num)) @@ -420,7 +434,7 @@ static inline int __vringh_complete(struct vringh *vrh, /* Make sure buffer is written before we update index. */ virtio_wmb(vrh->weak_barriers); - err = putu16(&vrh->vring.used->idx, used_idx + num_used); + err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used); if (err) { vringh_bad("Failed to update used index at %p", &vrh->vring.used->idx); @@ -433,7 +447,9 @@ static inline int __vringh_complete(struct vringh *vrh, static inline int __vringh_need_notify(struct vringh *vrh, - int (*getu16)(u16 *val, const u16 *p)) + int (*getu16)(const struct vringh *vrh, + u16 *val, + const __virtio16 *p)) { bool notify; u16 used_event; @@ -447,7 +463,7 @@ static inline int __vringh_need_notify(struct vringh *vrh, /* Old-style, without event indices. */ if (!vrh->event_indices) { u16 flags; - err = getu16(&flags, &vrh->vring.avail->flags); + err = getu16(vrh, &flags, &vrh->vring.avail->flags); if (err) { vringh_bad("Failed to get flags at %p", &vrh->vring.avail->flags); @@ -457,7 +473,7 @@ static inline int __vringh_need_notify(struct vringh *vrh, } /* Modern: we know when other side wants to know. */ - err = getu16(&used_event, &vring_used_event(&vrh->vring)); + err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring)); if (err) { vringh_bad("Failed to get used event idx at %p", &vring_used_event(&vrh->vring)); @@ -478,20 +494,22 @@ static inline int __vringh_need_notify(struct vringh *vrh, } static inline bool __vringh_notify_enable(struct vringh *vrh, - int (*getu16)(u16 *val, const u16 *p), - int (*putu16)(u16 *p, u16 val)) + int (*getu16)(const struct vringh *vrh, + u16 *val, const __virtio16 *p), + int (*putu16)(const struct vringh *vrh, + __virtio16 *p, u16 val)) { u16 avail; if (!vrh->event_indices) { /* Old-school; update flags. */ - if (putu16(&vrh->vring.used->flags, 0) != 0) { + if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) { vringh_bad("Clearing used flags %p", &vrh->vring.used->flags); return true; } } else { - if (putu16(&vring_avail_event(&vrh->vring), + if (putu16(vrh, &vring_avail_event(&vrh->vring), vrh->last_avail_idx) != 0) { vringh_bad("Updating avail event index %p", &vring_avail_event(&vrh->vring)); @@ -503,7 +521,7 @@ static inline bool __vringh_notify_enable(struct vringh *vrh, * sure it's written, then check again. */ virtio_mb(vrh->weak_barriers); - if (getu16(&avail, &vrh->vring.avail->idx) != 0) { + if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) { vringh_bad("Failed to check avail idx at %p", &vrh->vring.avail->idx); return true; @@ -516,11 +534,13 @@ static inline bool __vringh_notify_enable(struct vringh *vrh, } static inline void __vringh_notify_disable(struct vringh *vrh, - int (*putu16)(u16 *p, u16 val)) + int (*putu16)(const struct vringh *vrh, + __virtio16 *p, u16 val)) { if (!vrh->event_indices) { /* Old-school; update flags. */ - if (putu16(&vrh->vring.used->flags, VRING_USED_F_NO_NOTIFY)) { + if (putu16(vrh, &vrh->vring.used->flags, + VRING_USED_F_NO_NOTIFY)) { vringh_bad("Setting used flags %p", &vrh->vring.used->flags); } @@ -528,14 +548,18 @@ static inline void __vringh_notify_disable(struct vringh *vrh, } /* Userspace access helpers: in this case, addresses are really userspace. */ -static inline int getu16_user(u16 *val, const u16 *p) +static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p) { - return get_user(*val, (__force u16 __user *)p); + __virtio16 v = 0; + int rc = get_user(v, (__force __virtio16 __user *)p); + *val = vringh16_to_cpu(vrh, v); + return rc; } -static inline int putu16_user(u16 *p, u16 val) +static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val) { - return put_user(val, (__force u16 __user *)p); + __virtio16 v = cpu_to_vringh16(vrh, val); + return put_user(v, (__force __virtio16 __user *)p); } static inline int copydesc_user(void *dst, const void *src, size_t len) @@ -577,7 +601,7 @@ static inline int xfer_to_user(void *dst, void *src, size_t len) * Returns an error if num is invalid: you should check pointers * yourself! */ -int vringh_init_user(struct vringh *vrh, u32 features, +int vringh_init_user(struct vringh *vrh, u64 features, unsigned int num, bool weak_barriers, struct vring_desc __user *desc, struct vring_avail __user *avail, @@ -589,6 +613,7 @@ int vringh_init_user(struct vringh *vrh, u32 features, return -EINVAL; } + vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); vrh->weak_barriers = weak_barriers; vrh->completed = 0; @@ -729,8 +754,8 @@ int vringh_complete_user(struct vringh *vrh, u16 head, u32 len) { struct vring_used_elem used; - used.id = head; - used.len = len; + used.id = cpu_to_vringh32(vrh, head); + used.len = cpu_to_vringh32(vrh, len); return __vringh_complete(vrh, &used, 1, putu16_user, putused_user); } EXPORT_SYMBOL(vringh_complete_user); @@ -792,15 +817,16 @@ int vringh_need_notify_user(struct vringh *vrh) EXPORT_SYMBOL(vringh_need_notify_user); /* Kernelspace access helpers. */ -static inline int getu16_kern(u16 *val, const u16 *p) +static inline int getu16_kern(const struct vringh *vrh, + u16 *val, const __virtio16 *p) { - *val = ACCESS_ONCE(*p); + *val = vringh16_to_cpu(vrh, ACCESS_ONCE(*p)); return 0; } -static inline int putu16_kern(u16 *p, u16 val) +static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val) { - ACCESS_ONCE(*p) = val; + ACCESS_ONCE(*p) = cpu_to_vringh16(vrh, val); return 0; } @@ -836,7 +862,7 @@ static inline int xfer_kern(void *src, void *dst, size_t len) * * Returns an error if num is invalid. */ -int vringh_init_kern(struct vringh *vrh, u32 features, +int vringh_init_kern(struct vringh *vrh, u64 features, unsigned int num, bool weak_barriers, struct vring_desc *desc, struct vring_avail *avail, @@ -848,6 +874,7 @@ int vringh_init_kern(struct vringh *vrh, u32 features, return -EINVAL; } + vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); vrh->weak_barriers = weak_barriers; vrh->completed = 0; @@ -962,8 +989,8 @@ int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len) { struct vring_used_elem used; - used.id = head; - used.len = len; + used.id = cpu_to_vringh32(vrh, head); + used.len = cpu_to_vringh32(vrh, len); return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern); } diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index f226658..b9f70df 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -162,6 +162,27 @@ static void virtio_config_enable(struct virtio_device *dev) spin_unlock_irq(&dev->config_lock); } +static int virtio_finalize_features(struct virtio_device *dev) +{ + int ret = dev->config->finalize_features(dev); + unsigned status; + + if (ret) + return ret; + + if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1)) + return 0; + + add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); + status = dev->config->get_status(dev); + if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { + dev_err(&dev->dev, "virtio: device refuses features: %x\n", + status); + return -ENODEV; + } + return 0; +} + static int virtio_dev_probe(struct device *_d) { int err, i; @@ -170,7 +191,6 @@ static int virtio_dev_probe(struct device *_d) u64 device_features; u64 driver_features; u64 driver_features_legacy; - unsigned status; /* We have a driver! */ add_status(dev, VIRTIO_CONFIG_S_DRIVER); @@ -208,21 +228,10 @@ static int virtio_dev_probe(struct device *_d) if (device_features & (1ULL << i)) __virtio_set_bit(dev, i); - err = dev->config->finalize_features(dev); + err = virtio_finalize_features(dev); if (err) goto err; - if (virtio_has_feature(dev, VIRTIO_F_VERSION_1)) { - add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); - status = dev->config->get_status(dev); - if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { - dev_err(_d, "virtio: device refuses features: %x\n", - status); - err = -ENODEV; - goto err; - } - } - err = drv->probe(dev); if (err) goto err; @@ -372,7 +381,7 @@ int virtio_device_restore(struct virtio_device *dev) /* We have a driver! */ add_status(dev, VIRTIO_CONFIG_S_DRIVER); - ret = dev->config->finalize_features(dev); + ret = virtio_finalize_features(dev); if (ret) goto err; diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 953057d..2ef9529 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -458,7 +458,44 @@ static int virtio_pci_restore(struct device *dev) return virtio_device_restore(&vp_dev->vdev); } -const struct dev_pm_ops virtio_pci_pm_ops = { +static const struct dev_pm_ops virtio_pci_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore) }; #endif + + +/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ +static const struct pci_device_id virtio_pci_id_table[] = { + { PCI_DEVICE(0x1af4, PCI_ANY_ID) }, + { 0 } +}; + +MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); + +static int virtio_pci_probe(struct pci_dev *pci_dev, + const struct pci_device_id *id) +{ + return virtio_pci_legacy_probe(pci_dev, id); +} + +static void virtio_pci_remove(struct pci_dev *pci_dev) +{ + virtio_pci_legacy_remove(pci_dev); +} + +static struct pci_driver virtio_pci_driver = { + .name = "virtio-pci", + .id_table = virtio_pci_id_table, + .probe = virtio_pci_probe, + .remove = virtio_pci_remove, +#ifdef CONFIG_PM_SLEEP + .driver.pm = &virtio_pci_pm_ops, +#endif +}; + +module_pci_driver(virtio_pci_driver); + +MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>"); +MODULE_DESCRIPTION("virtio-pci"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1"); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index d840dad..adddb64 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -27,7 +27,6 @@ #include <linux/virtio.h> #include <linux/virtio_config.h> #include <linux/virtio_ring.h> -#define VIRTIO_PCI_NO_LEGACY #include <linux/virtio_pci.h> #include <linux/highmem.h> #include <linux/spinlock.h> @@ -129,8 +128,8 @@ const char *vp_bus_name(struct virtio_device *vdev); int vp_set_vq_affinity(struct virtqueue *vq, int cpu); void virtio_pci_release_dev(struct device *); -#ifdef CONFIG_PM_SLEEP -extern const struct dev_pm_ops virtio_pci_pm_ops; -#endif +int virtio_pci_legacy_probe(struct pci_dev *pci_dev, + const struct pci_device_id *id); +void virtio_pci_legacy_remove(struct pci_dev *pci_dev); #endif diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 2588252..6c76f0f 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -19,14 +19,6 @@ #include "virtio_pci_common.h" -/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ -static const struct pci_device_id virtio_pci_id_table[] = { - { PCI_DEVICE(0x1af4, PCI_ANY_ID) }, - { 0 } -}; - -MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); - /* virtio config->get_features() implementation */ static u64 vp_get_features(struct virtio_device *vdev) { @@ -220,7 +212,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { }; /* the PCI probing function */ -static int virtio_pci_probe(struct pci_dev *pci_dev, +int virtio_pci_legacy_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) { struct virtio_pci_device *vp_dev; @@ -300,7 +292,7 @@ out: return err; } -static void virtio_pci_remove(struct pci_dev *pci_dev) +void virtio_pci_legacy_remove(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); @@ -312,15 +304,3 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) pci_disable_device(pci_dev); kfree(vp_dev); } - -static struct pci_driver virtio_pci_driver = { - .name = "virtio-pci", - .id_table = virtio_pci_id_table, - .probe = virtio_pci_probe, - .remove = virtio_pci_remove, -#ifdef CONFIG_PM_SLEEP - .driver.pm = &virtio_pci_pm_ops, -#endif -}; - -module_pci_driver(virtio_pci_driver); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index c04ef1d..97aff28 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -254,6 +254,7 @@ static char *scanarg(char *s, char del) return NULL; } } + s[-1] ='\0'; return s; } @@ -378,8 +379,7 @@ static Node *create_entry(const char __user *buffer, size_t count) p = scanarg(p, del); if (!p) goto einval; - p[-1] = '\0'; - if (p == e->magic) + if (!e->magic[0]) goto einval; if (USE_DEBUG) print_hex_dump_bytes( @@ -391,8 +391,7 @@ static Node *create_entry(const char __user *buffer, size_t count) p = scanarg(p, del); if (!p) goto einval; - p[-1] = '\0'; - if (p == e->mask) { + if (!e->mask[0]) { e->mask = NULL; pr_debug("register: mask[raw]: none\n"); } else if (USE_DEBUG) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e6fbbd7..7e60741 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3481,8 +3481,8 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info); u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end); -int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 *actual_bytes); +int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u64 *actual_bytes); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type); int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3096512..8c63419 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4121,12 +4121,6 @@ again: if (ret) break; - /* opt_discard */ - if (btrfs_test_opt(root, DISCARD)) - ret = btrfs_error_discard_extent(root, start, - end + 1 - start, - NULL); - clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); cond_resched(); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 222d6ae..a80b971 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1889,8 +1889,8 @@ static int btrfs_issue_discard(struct block_device *bdev, return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); } -static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 *actual_bytes) +int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u64 *actual_bytes) { int ret; u64 discarded_bytes = 0; @@ -5727,7 +5727,8 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, update_global_block_rsv(fs_info); } -static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) +static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, + const bool return_free_space) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_group_cache *cache = NULL; @@ -5751,7 +5752,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) if (start < cache->last_byte_to_unpin) { len = min(len, cache->last_byte_to_unpin - start); - btrfs_add_free_space(cache, start, len); + if (return_free_space) + btrfs_add_free_space(cache, start, len); } start += len; @@ -5815,7 +5817,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, end + 1 - start, NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); - unpin_extent_range(root, start, end); + unpin_extent_range(root, start, end, true); cond_resched(); } @@ -8872,6 +8874,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) cache_node); rb_erase(&block_group->cache_node, &info->block_group_cache_tree); + RB_CLEAR_NODE(&block_group->cache_node); spin_unlock(&info->block_group_cache_lock); down_write(&block_group->space_info->groups_sem); @@ -9130,6 +9133,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) spin_lock(&info->block_group_cache_lock); rb_erase(&cache->cache_node, &info->block_group_cache_tree); + RB_CLEAR_NODE(&cache->cache_node); spin_unlock(&info->block_group_cache_lock); btrfs_put_block_group(cache); goto error; @@ -9271,6 +9275,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, spin_lock(&root->fs_info->block_group_cache_lock); rb_erase(&cache->cache_node, &root->fs_info->block_group_cache_tree); + RB_CLEAR_NODE(&cache->cache_node); spin_unlock(&root->fs_info->block_group_cache_lock); btrfs_put_block_group(cache); return ret; @@ -9690,13 +9695,7 @@ out: int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) { - return unpin_extent_range(root, start, end); -} - -int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 *actual_bytes) -{ - return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); + return unpin_extent_range(root, start, end, false); } int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 030847b..d6c03f7 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2966,8 +2966,8 @@ static int do_trimming(struct btrfs_block_group_cache *block_group, spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); - ret = btrfs_error_discard_extent(fs_info->extent_root, - start, bytes, &trimmed); + ret = btrfs_discard_extent(fs_info->extent_root, + start, bytes, &trimmed); if (!ret) *total_trimmed += trimmed; @@ -3185,16 +3185,18 @@ out: spin_unlock(&block_group->lock); + lock_chunks(block_group->fs_info->chunk_root); em_tree = &block_group->fs_info->mapping_tree.map_tree; write_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, block_group->key.objectid, 1); BUG_ON(!em); /* logic error, can't happen */ + /* + * remove_extent_mapping() will delete us from the pinned_chunks + * list, which is protected by the chunk mutex. + */ remove_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); - - lock_chunks(block_group->fs_info->chunk_root); - list_del_init(&em->list); unlock_chunks(block_group->fs_info->chunk_root); /* once for us and once for the tree */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0144790..50c5a87 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1485,7 +1485,7 @@ static void update_dev_time(char *path_name) struct file *filp; filp = filp_open(path_name, O_RDWR, 0); - if (!filp) + if (IS_ERR(filp)) return; file_update_time(filp); filp_close(filp, NULL); diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index c2d6604..719e1ce 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1917,7 +1917,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size, break; case 2: dst[dst_byte_offset++] |= (src_byte); - dst[dst_byte_offset] = 0; current_bit_offset = 0; break; } diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 80154ec..6f4e659 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -190,23 +190,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file) { int rc = 0; struct ecryptfs_crypt_stat *crypt_stat = NULL; - struct ecryptfs_mount_crypt_stat *mount_crypt_stat; struct dentry *ecryptfs_dentry = file->f_path.dentry; /* Private value of ecryptfs_dentry allocated in * ecryptfs_lookup() */ struct ecryptfs_file_info *file_info; - mount_crypt_stat = &ecryptfs_superblock_to_private( - ecryptfs_dentry->d_sb)->mount_crypt_stat; - if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) - && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR) - || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC) - || (file->f_flags & O_APPEND))) { - printk(KERN_WARNING "Mount has encrypted view enabled; " - "files may only be read\n"); - rc = -EPERM; - goto out; - } /* Released in ecryptfs_release or end of function if failure */ file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL); ecryptfs_set_file_private(file, file_info); diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 635e8e1..917bd5c 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -100,12 +100,12 @@ int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, (*size) = 0; if (data[0] < 192) { /* One-byte length */ - (*size) = (unsigned char)data[0]; + (*size) = data[0]; (*length_size) = 1; } else if (data[0] < 224) { /* Two-byte length */ - (*size) = (((unsigned char)(data[0]) - 192) * 256); - (*size) += ((unsigned char)(data[1]) + 192); + (*size) = (data[0] - 192) * 256; + (*size) += data[1] + 192; (*length_size) = 2; } else if (data[0] == 255) { /* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */ diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index c4cd1fd..d9eb84b 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -493,6 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags { struct super_block *s; struct ecryptfs_sb_info *sbi; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; struct ecryptfs_dentry_info *root_info; const char *err = "Getting sb failed"; struct inode *inode; @@ -511,6 +512,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags err = "Error parsing options"; goto out; } + mount_crypt_stat = &sbi->mount_crypt_stat; s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) { @@ -557,11 +559,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags /** * Set the POSIX ACL flag based on whether they're enabled in the lower - * mount. Force a read-only eCryptfs mount if the lower mount is ro. - * Allow a ro eCryptfs mount even when the lower mount is rw. + * mount. */ s->s_flags = flags & ~MS_POSIXACL; - s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL); + s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL; + + /** + * Force a read-only eCryptfs mount when: + * 1) The lower mount is ro + * 2) The ecryptfs_encrypted_view mount option is specified + */ + if (path.dentry->d_sb->s_flags & MS_RDONLY || + mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) + s->s_flags |= MS_RDONLY; s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 503ea15..370420b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -267,7 +267,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, handle_t *handle; ext4_lblk_t orig_blk_offset, donor_blk_offset; unsigned long blocksize = orig_inode->i_sb->s_blocksize; - unsigned int w_flags = 0; unsigned int tmp_data_size, data_size, replaced_size; int err2, jblocks, retries = 0; int replaced_count = 0; @@ -288,9 +287,6 @@ again: return 0; } - if (segment_eq(get_fs(), KERNEL_DS)) - w_flags |= AOP_FLAG_UNINTERRUPTIBLE; - orig_blk_offset = orig_page_offset * blocks_per_page + data_offset_in_page; diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 697390e..ddc9f96 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -448,27 +448,6 @@ static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, return pol; } -static int kernfs_vma_migrate(struct vm_area_struct *vma, - const nodemask_t *from, const nodemask_t *to, - unsigned long flags) -{ - struct file *file = vma->vm_file; - struct kernfs_open_file *of = kernfs_of(file); - int ret; - - if (!of->vm_ops) - return 0; - - if (!kernfs_get_active(of->kn)) - return 0; - - ret = 0; - if (of->vm_ops->migrate) - ret = of->vm_ops->migrate(vma, from, to, flags); - - kernfs_put_active(of->kn); - return ret; -} #endif static const struct vm_operations_struct kernfs_vm_ops = { @@ -479,7 +458,6 @@ static const struct vm_operations_struct kernfs_vm_ops = { #ifdef CONFIG_NUMA .set_policy = kernfs_vma_set_policy, .get_policy = kernfs_vma_get_policy, - .migrate = kernfs_vma_migrate, #endif }; diff --git a/fs/proc/stat.c b/fs/proc/stat.c index bf2d03f..510413eb 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -159,7 +159,7 @@ static int show_stat(struct seq_file *p, void *v) /* sum again ? it could be updated? */ for_each_irq_nr(j) - seq_put_decimal_ull(p, ' ', kstat_irqs(j)); + seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j)); seq_printf(p, "\nctxt %llu\n" diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 73ca174..0f96f71 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -91,6 +91,7 @@ static void show_type(struct seq_file *m, struct super_block *sb) static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) { + struct proc_mounts *p = proc_mounts(m); struct mount *r = real_mount(mnt); int err = 0; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; @@ -104,7 +105,10 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) mangle(m, r->mnt_devname ? r->mnt_devname : "none"); } seq_putc(m, ' '); - seq_path(m, &mnt_path, " \t\n\\"); + /* mountpoints outside of chroot jail will give SEQ_SKIP on this */ + err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\"); + if (err) + goto out; seq_putc(m, ' '); show_type(m, sb); seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); @@ -125,7 +129,6 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) struct mount *r = real_mount(mnt); struct super_block *sb = mnt->mnt_sb; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; - struct path root = p->root; int err = 0; seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id, @@ -139,7 +142,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) seq_putc(m, ' '); /* mountpoints outside of chroot jail will give SEQ_SKIP on this */ - err = seq_path_root(m, &mnt_path, &root, " \t\n\\"); + err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\"); if (err) goto out; @@ -182,6 +185,7 @@ out: static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt) { + struct proc_mounts *p = proc_mounts(m); struct mount *r = real_mount(mnt); struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; struct super_block *sb = mnt_path.dentry->d_sb; @@ -201,7 +205,10 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt) /* mount point */ seq_puts(m, " mounted on "); - seq_path(m, &mnt_path, " \t\n\\"); + /* mountpoints outside of chroot jail will give SEQ_SKIP on this */ + err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\"); + if (err) + goto out; seq_putc(m, ' '); /* file system type */ @@ -216,6 +223,7 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt) } seq_putc(m, '\n'); +out: return err; } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6bff83b..856d381 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -153,6 +153,7 @@ int acpi_unmap_lsapic(int cpu); int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base); int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base); +int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base); void acpi_irq_stats_init(void); extern u32 acpi_irq_handled; extern u32 acpi_irq_not_handled; diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index b9376cd..25a822f 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -68,6 +68,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) * Number of interrupts per specific IRQ source, since bootup */ extern unsigned int kstat_irqs(unsigned int irq); +extern unsigned int kstat_irqs_usr(unsigned int irq); /* * Number of interrupts per cpu, since bootup diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 01aad3e..fab9b32 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -36,9 +36,6 @@ extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, extern int migrate_prep(void); extern int migrate_prep_local(void); -extern int migrate_vmas(struct mm_struct *mm, - const nodemask_t *from, const nodemask_t *to, - unsigned long flags); extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); @@ -57,13 +54,6 @@ static inline int migrate_pages(struct list_head *l, new_page_t new, static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } -static inline int migrate_vmas(struct mm_struct *mm, - const nodemask_t *from, const nodemask_t *to, - unsigned long flags) -{ - return -ENOSYS; -} - static inline void migrate_page_copy(struct page *newpage, struct page *page) {} diff --git a/include/linux/mm.h b/include/linux/mm.h index c0a67b8..f80d019 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -286,8 +286,6 @@ struct vm_operations_struct { */ struct mempolicy *(*get_policy)(struct vm_area_struct *vma, unsigned long addr); - int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from, - const nodemask_t *to, unsigned long flags); #endif /* called by sys_remap_file_pages() to populate non-linear mapping */ int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, diff --git a/include/linux/module.h b/include/linux/module.h index 71f282a..ebfb0e1 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -210,20 +210,6 @@ enum module_state { MODULE_STATE_UNFORMED, /* Still setting it up. */ }; -/** - * struct module_ref - per cpu module reference counts - * @incs: number of module get on this cpu - * @decs: number of module put on this cpu - * - * We force an alignment on 8 or 16 bytes, so that alloc_percpu() - * put @incs/@decs in same cache line, with no extra memory cost, - * since alloc_percpu() is fine grained. - */ -struct module_ref { - unsigned long incs; - unsigned long decs; -} __attribute((aligned(2 * sizeof(unsigned long)))); - struct module { enum module_state state; @@ -367,7 +353,7 @@ struct module { /* Destruction function. */ void (*exit)(void); - struct module_ref __percpu *refptr; + atomic_t refcnt; #endif #ifdef CONFIG_CONSTRUCTORS diff --git a/include/linux/pci.h b/include/linux/pci.h index 44a2769..360a966 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -349,6 +349,7 @@ struct pci_dev { unsigned int __aer_firmware_first:1; unsigned int broken_intx_masking:1; unsigned int io_window_1k:1; /* Intel P2P bridge 1K I/O windows */ + unsigned int irq_managed:1; pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ diff --git a/include/linux/uio.h b/include/linux/uio.h index a41e252..1c5e453 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -101,6 +101,11 @@ static inline size_t iov_iter_count(struct iov_iter *i) return i->count; } +static inline bool iter_is_iovec(struct iov_iter *i) +{ + return !(i->type & (ITER_BVEC | ITER_KVEC)); +} + /* * Cap the iov_iter by given limit; note that the second argument is * *not* the new size - it's upper limit for such. Passing it a value diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 7979f85..ca3ed78 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -19,6 +19,9 @@ * offset: the offset of the configuration field * buf: the buffer to read the field value from. * len: the length of the buffer + * @generation: config generation counter + * vdev: the virtio_device + * Returns the config generation counter * @get_status: read the status byte * vdev: the virtio_device * Returns the status byte @@ -60,6 +63,7 @@ struct virtio_config_ops { void *buf, unsigned len); void (*set)(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len); + u32 (*generation)(struct virtio_device *vdev); u8 (*get_status)(struct virtio_device *vdev); void (*set_status)(struct virtio_device *vdev, u8 status); void (*reset)(struct virtio_device *vdev); @@ -301,11 +305,33 @@ static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset) return ret; } +/* Read @count fields, @bytes each. */ +static inline void __virtio_cread_many(struct virtio_device *vdev, + unsigned int offset, + void *buf, size_t count, size_t bytes) +{ + u32 old, gen = vdev->config->generation ? + vdev->config->generation(vdev) : 0; + int i; + + do { + old = gen; + + for (i = 0; i < count; i++) + vdev->config->get(vdev, offset + bytes * i, + buf + i * bytes, bytes); + + gen = vdev->config->generation ? + vdev->config->generation(vdev) : 0; + } while (gen != old); +} + + static inline void virtio_cread_bytes(struct virtio_device *vdev, unsigned int offset, void *buf, size_t len) { - vdev->config->get(vdev, offset, buf, len); + __virtio_cread_many(vdev, offset, buf, len, 1); } static inline void virtio_cwrite8(struct virtio_device *vdev, @@ -349,6 +375,7 @@ static inline u64 virtio_cread64(struct virtio_device *vdev, { u64 ret; vdev->config->get(vdev, offset, &ret, sizeof(ret)); + __virtio_cread_many(vdev, offset, &ret, 1, sizeof(ret)); return virtio64_to_cpu(vdev, (__force __virtio64)ret); } diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 749cde2..a3fa537 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -24,12 +24,16 @@ #ifndef _LINUX_VRINGH_H #define _LINUX_VRINGH_H #include <uapi/linux/virtio_ring.h> +#include <linux/virtio_byteorder.h> #include <linux/uio.h> #include <linux/slab.h> #include <asm/barrier.h> /* virtio_ring with information needed for host access. */ struct vringh { + /* Everything is little endian */ + bool little_endian; + /* Guest publishes used event idx (note: we always do). */ bool event_indices; @@ -105,7 +109,7 @@ struct vringh_kiov { #define VRINGH_IOV_ALLOCATED 0x8000000 /* Helpers for userspace vrings. */ -int vringh_init_user(struct vringh *vrh, u32 features, +int vringh_init_user(struct vringh *vrh, u64 features, unsigned int num, bool weak_barriers, struct vring_desc __user *desc, struct vring_avail __user *avail, @@ -167,7 +171,7 @@ bool vringh_notify_enable_user(struct vringh *vrh); void vringh_notify_disable_user(struct vringh *vrh); /* Helpers for kernelspace vrings. */ -int vringh_init_kern(struct vringh *vrh, u32 features, +int vringh_init_kern(struct vringh *vrh, u64 features, unsigned int num, bool weak_barriers, struct vring_desc *desc, struct vring_avail *avail, @@ -222,4 +226,33 @@ static inline void vringh_notify(struct vringh *vrh) vrh->notify(vrh); } +static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val) +{ + return __virtio16_to_cpu(vrh->little_endian, val); +} + +static inline __virtio16 cpu_to_vringh16(const struct vringh *vrh, u16 val) +{ + return __cpu_to_virtio16(vrh->little_endian, val); +} + +static inline u32 vringh32_to_cpu(const struct vringh *vrh, __virtio32 val) +{ + return __virtio32_to_cpu(vrh->little_endian, val); +} + +static inline __virtio32 cpu_to_vringh32(const struct vringh *vrh, u32 val) +{ + return __cpu_to_virtio32(vrh->little_endian, val); +} + +static inline u64 vringh64_to_cpu(const struct vringh *vrh, __virtio64 val) +{ + return __virtio64_to_cpu(vrh->little_endian, val); +} + +static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) +{ + return __cpu_to_virtio64(vrh->little_endian, val); +} #endif /* _LINUX_VRINGH_H */ diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 9adc1bc..430cfaf 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -5,6 +5,15 @@ #define TRANSPORT_PLUGIN_VHBA_PDEV 2 #define TRANSPORT_PLUGIN_VHBA_VDEV 3 +struct target_backend_cits { + struct config_item_type tb_dev_cit; + struct config_item_type tb_dev_attrib_cit; + struct config_item_type tb_dev_pr_cit; + struct config_item_type tb_dev_wwn_cit; + struct config_item_type tb_dev_alua_tg_pt_gps_cit; + struct config_item_type tb_dev_stat_cit; +}; + struct se_subsystem_api { struct list_head sub_api_list; @@ -44,6 +53,8 @@ struct se_subsystem_api { int (*init_prot)(struct se_device *); int (*format_prot)(struct se_device *); void (*free_prot)(struct se_device *); + + struct target_backend_cits tb_cits; }; struct sbc_ops { @@ -96,4 +107,36 @@ sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *, void array_free(void *array, int n); +/* From target_core_configfs.c to setup default backend config_item_types */ +void target_core_setup_sub_cits(struct se_subsystem_api *); + +/* attribute helpers from target_core_device.c for backend drivers */ +int se_dev_set_max_unmap_lba_count(struct se_device *, u32); +int se_dev_set_max_unmap_block_desc_count(struct se_device *, u32); +int se_dev_set_unmap_granularity(struct se_device *, u32); +int se_dev_set_unmap_granularity_alignment(struct se_device *, u32); +int se_dev_set_max_write_same_len(struct se_device *, u32); +int se_dev_set_emulate_model_alias(struct se_device *, int); +int se_dev_set_emulate_dpo(struct se_device *, int); +int se_dev_set_emulate_fua_write(struct se_device *, int); +int se_dev_set_emulate_fua_read(struct se_device *, int); +int se_dev_set_emulate_write_cache(struct se_device *, int); +int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int); +int se_dev_set_emulate_tas(struct se_device *, int); +int se_dev_set_emulate_tpu(struct se_device *, int); +int se_dev_set_emulate_tpws(struct se_device *, int); +int se_dev_set_emulate_caw(struct se_device *, int); +int se_dev_set_emulate_3pc(struct se_device *, int); +int se_dev_set_pi_prot_type(struct se_device *, int); +int se_dev_set_pi_prot_format(struct se_device *, int); +int se_dev_set_enforce_pr_isids(struct se_device *, int); +int se_dev_set_force_pr_aptpl(struct se_device *, int); +int se_dev_set_is_nonrot(struct se_device *, int); +int se_dev_set_emulate_rest_reord(struct se_device *dev, int); +int se_dev_set_queue_depth(struct se_device *, u32); +int se_dev_set_max_sectors(struct se_device *, u32); +int se_dev_set_fabric_max_sectors(struct se_device *, u32); +int se_dev_set_optimal_sectors(struct se_device *, u32); +int se_dev_set_block_size(struct se_device *, u32); + #endif /* TARGET_CORE_BACKEND_H */ diff --git a/include/target/target_core_backend_configfs.h b/include/target/target_core_backend_configfs.h new file mode 100644 index 0000000..3247d75 --- /dev/null +++ b/include/target/target_core_backend_configfs.h @@ -0,0 +1,120 @@ +#ifndef TARGET_CORE_BACKEND_CONFIGFS_H +#define TARGET_CORE_BACKEND_CONFIGFS_H + +#include <target/configfs_macros.h> + +#define DEF_TB_DEV_ATTRIB_SHOW(_backend, _name) \ +static ssize_t _backend##_dev_show_attr_##_name( \ + struct se_dev_attrib *da, \ + char *page) \ +{ \ + return snprintf(page, PAGE_SIZE, "%u\n", \ + (u32)da->da_dev->dev_attrib._name); \ +} + +#define DEF_TB_DEV_ATTRIB_STORE(_backend, _name) \ +static ssize_t _backend##_dev_store_attr_##_name( \ + struct se_dev_attrib *da, \ + const char *page, \ + size_t count) \ +{ \ + unsigned long val; \ + int ret; \ + \ + ret = kstrtoul(page, 0, &val); \ + if (ret < 0) { \ + pr_err("kstrtoul() failed with ret: %d\n", ret); \ + return -EINVAL; \ + } \ + ret = se_dev_set_##_name(da->da_dev, (u32)val); \ + \ + return (!ret) ? count : -EINVAL; \ +} + +#define DEF_TB_DEV_ATTRIB(_backend, _name) \ +DEF_TB_DEV_ATTRIB_SHOW(_backend, _name); \ +DEF_TB_DEV_ATTRIB_STORE(_backend, _name); + +#define DEF_TB_DEV_ATTRIB_RO(_backend, name) \ +DEF_TB_DEV_ATTRIB_SHOW(_backend, name); + +CONFIGFS_EATTR_STRUCT(target_backend_dev_attrib, se_dev_attrib); +#define TB_DEV_ATTR(_backend, _name, _mode) \ +static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + _backend##_dev_show_attr_##_name, \ + _backend##_dev_store_attr_##_name); + +#define TB_DEV_ATTR_RO(_backend, _name) \ +static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + _backend##_dev_show_attr_##_name); + +/* + * Default list of target backend device attributes as defined by + * struct se_dev_attrib + */ + +#define DEF_TB_DEFAULT_ATTRIBS(_backend) \ + DEF_TB_DEV_ATTRIB(_backend, emulate_model_alias); \ + TB_DEV_ATTR(_backend, emulate_model_alias, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, emulate_dpo); \ + TB_DEV_ATTR(_backend, emulate_dpo, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, emulate_fua_write); \ + TB_DEV_ATTR(_backend, emulate_fua_write, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, emulate_fua_read); \ + TB_DEV_ATTR(_backend, emulate_fua_read, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, emulate_write_cache); \ + TB_DEV_ATTR(_backend, emulate_write_cache, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, emulate_ua_intlck_ctrl); \ + TB_DEV_ATTR(_backend, emulate_ua_intlck_ctrl, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, emulate_tas); \ + TB_DEV_ATTR(_backend, emulate_tas, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, emulate_tpu); \ + TB_DEV_ATTR(_backend, emulate_tpu, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, emulate_tpws); \ + TB_DEV_ATTR(_backend, emulate_tpws, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, emulate_caw); \ + TB_DEV_ATTR(_backend, emulate_caw, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, emulate_3pc); \ + TB_DEV_ATTR(_backend, emulate_3pc, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, pi_prot_type); \ + TB_DEV_ATTR(_backend, pi_prot_type, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB_RO(_backend, hw_pi_prot_type); \ + TB_DEV_ATTR_RO(_backend, hw_pi_prot_type); \ + DEF_TB_DEV_ATTRIB(_backend, pi_prot_format); \ + TB_DEV_ATTR(_backend, pi_prot_format, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, enforce_pr_isids); \ + TB_DEV_ATTR(_backend, enforce_pr_isids, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, is_nonrot); \ + TB_DEV_ATTR(_backend, is_nonrot, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, emulate_rest_reord); \ + TB_DEV_ATTR(_backend, emulate_rest_reord, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, force_pr_aptpl); \ + TB_DEV_ATTR(_backend, force_pr_aptpl, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB_RO(_backend, hw_block_size); \ + TB_DEV_ATTR_RO(_backend, hw_block_size); \ + DEF_TB_DEV_ATTRIB(_backend, block_size); \ + TB_DEV_ATTR(_backend, block_size, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB_RO(_backend, hw_max_sectors); \ + TB_DEV_ATTR_RO(_backend, hw_max_sectors); \ + DEF_TB_DEV_ATTRIB(_backend, fabric_max_sectors); \ + TB_DEV_ATTR(_backend, fabric_max_sectors, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, optimal_sectors); \ + TB_DEV_ATTR(_backend, optimal_sectors, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB_RO(_backend, hw_queue_depth); \ + TB_DEV_ATTR_RO(_backend, hw_queue_depth); \ + DEF_TB_DEV_ATTRIB(_backend, queue_depth); \ + TB_DEV_ATTR(_backend, queue_depth, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, max_unmap_lba_count); \ + TB_DEV_ATTR(_backend, max_unmap_lba_count, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, max_unmap_block_desc_count); \ + TB_DEV_ATTR(_backend, max_unmap_block_desc_count, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, unmap_granularity); \ + TB_DEV_ATTR(_backend, unmap_granularity, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, unmap_granularity_alignment); \ + TB_DEV_ATTR(_backend, unmap_granularity_alignment, S_IRUGO | S_IWUSR); \ + DEF_TB_DEV_ATTRIB(_backend, max_write_same_len); \ + TB_DEV_ATTR(_backend, max_write_same_len, S_IRUGO | S_IWUSR); + +#endif /* TARGET_CORE_BACKEND_CONFIGFS_H */ diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 7c5cbfe..81c4c18 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -80,7 +80,7 @@ DECLARE_EVENT_CLASS(module_refcnt, TP_fast_assign( __entry->ip = ip; - __entry->refcnt = __this_cpu_read(mod->refptr->incs) - __this_cpu_read(mod->refptr->decs); + __entry->refcnt = atomic_read(&mod->refcnt); __assign_str(name, mod->name); ), diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 12e2668..d3475e1 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -371,7 +371,9 @@ enum { #define AUDIT_ARCH_PARISC (EM_PARISC) #define AUDIT_ARCH_PARISC64 (EM_PARISC|__AUDIT_ARCH_64BIT) #define AUDIT_ARCH_PPC (EM_PPC) +/* do not define AUDIT_ARCH_PPCLE since it is not supported by audit */ #define AUDIT_ARCH_PPC64 (EM_PPC64|__AUDIT_ARCH_64BIT) +#define AUDIT_ARCH_PPC64LE (EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_S390 (EM_S390) #define AUDIT_ARCH_S390X (EM_S390|__AUDIT_ARCH_64BIT) #define AUDIT_ARCH_SH (EM_SH) diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index 7dcfbe6..b483d19 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -6,10 +6,6 @@ #include <linux/types.h> #include <linux/uio.h> -#ifndef __packed -#define __packed __attribute__((packed)) -#endif - #define TCMU_VERSION "1.0" /* diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index e5ec1ca..35b552c7 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -41,6 +41,8 @@ #include <linux/virtio_config.h> +#ifndef VIRTIO_PCI_NO_LEGACY + /* A 32-bit r/o bitmask of the features supported by the host */ #define VIRTIO_PCI_HOST_FEATURES 0 @@ -67,16 +69,11 @@ * a read-and-acknowledge. */ #define VIRTIO_PCI_ISR 19 -/* The bit of the ISR which indicates a device configuration change. */ -#define VIRTIO_PCI_ISR_CONFIG 0x2 - /* MSI-X registers: only enabled if MSI-X is enabled. */ /* A 16-bit vector for configuration changes. */ #define VIRTIO_MSI_CONFIG_VECTOR 20 /* A 16-bit vector for selected queue notifications. */ #define VIRTIO_MSI_QUEUE_VECTOR 22 -/* Vector value used to disable MSI for queue */ -#define VIRTIO_MSI_NO_VECTOR 0xffff /* The remaining space is defined by each driver as the per-driver * configuration space */ @@ -94,4 +91,12 @@ /* The alignment to use between consumer and producer parts of vring. * x86 pagesize again. */ #define VIRTIO_PCI_VRING_ALIGN 4096 + +#endif /* VIRTIO_PCI_NO_LEGACY */ + +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 +/* Vector value used to disable MSI for queue */ +#define VIRTIO_MSI_NO_VECTOR 0xffff + #endif diff --git a/init/do_mounts.c b/init/do_mounts.c index 9b3565c..eb41008 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -395,8 +395,6 @@ retry: case 0: goto out; case -EACCES: - flags |= MS_RDONLY; - goto retry; case -EINVAL: continue; } @@ -419,6 +417,10 @@ retry: #endif panic("VFS: Unable to mount root fs on %s", b); } + if (!(flags & MS_RDONLY)) { + flags |= MS_RDONLY; + goto retry; + } printk("List of all partitions:\n"); printk_all_partitions(); diff --git a/kernel/events/core.c b/kernel/events/core.c index 113b837..4c1ee7f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7477,11 +7477,11 @@ SYSCALL_DEFINE5(perf_event_open, if (move_group) { synchronize_rcu(); - perf_install_in_context(ctx, group_leader, event->cpu); + perf_install_in_context(ctx, group_leader, group_leader->cpu); get_ctx(ctx); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { - perf_install_in_context(ctx, sibling, event->cpu); + perf_install_in_context(ctx, sibling, sibling->cpu); get_ctx(ctx); } } diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4332d76..df553b0 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -78,8 +78,12 @@ extern void unmask_threaded_irq(struct irq_desc *desc); #ifdef CONFIG_SPARSE_IRQ static inline void irq_mark_irq(unsigned int irq) { } +extern void irq_lock_sparse(void); +extern void irq_unlock_sparse(void); #else extern void irq_mark_irq(unsigned int irq); +static inline void irq_lock_sparse(void) { } +static inline void irq_unlock_sparse(void) { } #endif extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a1782f8..99793b9 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -132,6 +132,16 @@ static void free_masks(struct irq_desc *desc) static inline void free_masks(struct irq_desc *desc) { } #endif +void irq_lock_sparse(void) +{ + mutex_lock(&sparse_irq_lock); +} + +void irq_unlock_sparse(void) +{ + mutex_unlock(&sparse_irq_lock); +} + static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) { struct irq_desc *desc; @@ -168,6 +178,12 @@ static void free_desc(unsigned int irq) unregister_irq_proc(irq, desc); + /* + * sparse_irq_lock protects also show_interrupts() and + * kstat_irq_usr(). Once we deleted the descriptor from the + * sparse tree we can free it. Access in proc will fail to + * lookup the descriptor. + */ mutex_lock(&sparse_irq_lock); delete_irq_desc(irq); mutex_unlock(&sparse_irq_lock); @@ -574,6 +590,15 @@ void kstat_incr_irq_this_cpu(unsigned int irq) kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); } +/** + * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu + * @irq: The interrupt number + * @cpu: The cpu number + * + * Returns the sum of interrupt counts on @cpu since boot for + * @irq. The caller must ensure that the interrupt is not removed + * concurrently. + */ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { struct irq_desc *desc = irq_to_desc(irq); @@ -582,6 +607,14 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; } +/** + * kstat_irqs - Get the statistics for an interrupt + * @irq: The interrupt number + * + * Returns the sum of interrupt counts on all cpus since boot for + * @irq. The caller must ensure that the interrupt is not removed + * concurrently. + */ unsigned int kstat_irqs(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -594,3 +627,22 @@ unsigned int kstat_irqs(unsigned int irq) sum += *per_cpu_ptr(desc->kstat_irqs, cpu); return sum; } + +/** + * kstat_irqs_usr - Get the statistics for an interrupt + * @irq: The interrupt number + * + * Returns the sum of interrupt counts on all cpus since boot for + * @irq. Contrary to kstat_irqs() this can be called from any + * preemptible context. It's protected against concurrent removal of + * an interrupt descriptor when sparse irqs are enabled. + */ +unsigned int kstat_irqs_usr(unsigned int irq) +{ + int sum; + + irq_lock_sparse(); + sum = kstat_irqs(irq); + irq_unlock_sparse(); + return sum; +} diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index ac1ba2f..9dc9bfd 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -15,6 +15,23 @@ #include "internals.h" +/* + * Access rules: + * + * procfs protects read/write of /proc/irq/N/ files against a + * concurrent free of the interrupt descriptor. remove_proc_entry() + * immediately prevents new read/writes to happen and waits for + * already running read/write functions to complete. + * + * We remove the proc entries first and then delete the interrupt + * descriptor from the radix tree and free it. So it is guaranteed + * that irq_to_desc(N) is valid as long as the read/writes are + * permitted by procfs. + * + * The read from /proc/interrupts is a different problem because there + * is no protection. So the lookup and the access to irqdesc + * information must be protected by sparse_irq_lock. + */ static struct proc_dir_entry *root_irq_dir; #ifdef CONFIG_SMP @@ -437,9 +454,10 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } + irq_lock_sparse(); desc = irq_to_desc(i); if (!desc) - return 0; + goto outsparse; raw_spin_lock_irqsave(&desc->lock, flags); for_each_online_cpu(j) @@ -479,6 +497,8 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); out: raw_spin_unlock_irqrestore(&desc->lock, flags); +outsparse: + irq_unlock_sparse(); return 0; } #endif diff --git a/kernel/module.c b/kernel/module.c index e52a873..3965511 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -42,7 +42,6 @@ #include <linux/vermagic.h> #include <linux/notifier.h> #include <linux/sched.h> -#include <linux/stop_machine.h> #include <linux/device.h> #include <linux/string.h> #include <linux/mutex.h> @@ -98,7 +97,7 @@ * 1) List of modules (also safely readable with preempt_disable), * 2) module_use links, * 3) module_addr_min/module_addr_max. - * (delete uses stop_machine/add uses RCU list operations). */ + * (delete and add uses RCU list operations). */ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); @@ -158,13 +157,13 @@ static BLOCKING_NOTIFIER_HEAD(module_notify_list); * Protected by module_mutex. */ static unsigned long module_addr_min = -1UL, module_addr_max = 0; -int register_module_notifier(struct notifier_block * nb) +int register_module_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&module_notify_list, nb); } EXPORT_SYMBOL(register_module_notifier); -int unregister_module_notifier(struct notifier_block * nb) +int unregister_module_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&module_notify_list, nb); } @@ -628,18 +627,23 @@ static char last_unloaded_module[MODULE_NAME_LEN+1]; EXPORT_TRACEPOINT_SYMBOL(module_get); +/* MODULE_REF_BASE is the base reference count by kmodule loader. */ +#define MODULE_REF_BASE 1 + /* Init the unload section of the module. */ static int module_unload_init(struct module *mod) { - mod->refptr = alloc_percpu(struct module_ref); - if (!mod->refptr) - return -ENOMEM; + /* + * Initialize reference counter to MODULE_REF_BASE. + * refcnt == 0 means module is going. + */ + atomic_set(&mod->refcnt, MODULE_REF_BASE); INIT_LIST_HEAD(&mod->source_list); INIT_LIST_HEAD(&mod->target_list); /* Hold reference count during initialization. */ - raw_cpu_write(mod->refptr->incs, 1); + atomic_inc(&mod->refcnt); return 0; } @@ -721,8 +725,6 @@ static void module_unload_free(struct module *mod) kfree(use); } mutex_unlock(&module_mutex); - - free_percpu(mod->refptr); } #ifdef CONFIG_MODULE_FORCE_UNLOAD @@ -740,60 +742,39 @@ static inline int try_force_unload(unsigned int flags) } #endif /* CONFIG_MODULE_FORCE_UNLOAD */ -struct stopref +/* Try to release refcount of module, 0 means success. */ +static int try_release_module_ref(struct module *mod) { - struct module *mod; - int flags; - int *forced; -}; + int ret; -/* Whole machine is stopped with interrupts off when this runs. */ -static int __try_stop_module(void *_sref) -{ - struct stopref *sref = _sref; + /* Try to decrement refcnt which we set at loading */ + ret = atomic_sub_return(MODULE_REF_BASE, &mod->refcnt); + BUG_ON(ret < 0); + if (ret) + /* Someone can put this right now, recover with checking */ + ret = atomic_add_unless(&mod->refcnt, MODULE_REF_BASE, 0); + + return ret; +} +static int try_stop_module(struct module *mod, int flags, int *forced) +{ /* If it's not unused, quit unless we're forcing. */ - if (module_refcount(sref->mod) != 0) { - if (!(*sref->forced = try_force_unload(sref->flags))) + if (try_release_module_ref(mod) != 0) { + *forced = try_force_unload(flags); + if (!(*forced)) return -EWOULDBLOCK; } /* Mark it as dying. */ - sref->mod->state = MODULE_STATE_GOING; - return 0; -} - -static int try_stop_module(struct module *mod, int flags, int *forced) -{ - struct stopref sref = { mod, flags, forced }; + mod->state = MODULE_STATE_GOING; - return stop_machine(__try_stop_module, &sref, NULL); + return 0; } unsigned long module_refcount(struct module *mod) { - unsigned long incs = 0, decs = 0; - int cpu; - - for_each_possible_cpu(cpu) - decs += per_cpu_ptr(mod->refptr, cpu)->decs; - /* - * ensure the incs are added up after the decs. - * module_put ensures incs are visible before decs with smp_wmb. - * - * This 2-count scheme avoids the situation where the refcount - * for CPU0 is read, then CPU0 increments the module refcount, - * then CPU1 drops that refcount, then the refcount for CPU1 is - * read. We would record a decrement but not its corresponding - * increment so we would see a low count (disaster). - * - * Rare situation? But module_refcount can be preempted, and we - * might be tallying up 4096+ CPUs. So it is not impossible. - */ - smp_rmb(); - for_each_possible_cpu(cpu) - incs += per_cpu_ptr(mod->refptr, cpu)->incs; - return incs - decs; + return (unsigned long)atomic_read(&mod->refcnt) - MODULE_REF_BASE; } EXPORT_SYMBOL(module_refcount); @@ -877,8 +858,10 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) seq_printf(m, " %lu ", module_refcount(mod)); - /* Always include a trailing , so userspace can differentiate - between this and the old multi-field proc format. */ + /* + * Always include a trailing , so userspace can differentiate + * between this and the old multi-field proc format. + */ list_for_each_entry(use, &mod->source_list, source_list) { printed_something = 1; seq_printf(m, "%s,", use->source->name); @@ -886,11 +869,11 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) if (mod->init != NULL && mod->exit == NULL) { printed_something = 1; - seq_printf(m, "[permanent],"); + seq_puts(m, "[permanent],"); } if (!printed_something) - seq_printf(m, "-"); + seq_puts(m, "-"); } void __symbol_put(const char *symbol) @@ -935,7 +918,7 @@ void __module_get(struct module *module) { if (module) { preempt_disable(); - __this_cpu_inc(module->refptr->incs); + atomic_inc(&module->refcnt); trace_module_get(module, _RET_IP_); preempt_enable(); } @@ -948,11 +931,11 @@ bool try_module_get(struct module *module) if (module) { preempt_disable(); - - if (likely(module_is_live(module))) { - __this_cpu_inc(module->refptr->incs); + /* Note: here, we can fail to get a reference */ + if (likely(module_is_live(module) && + atomic_inc_not_zero(&module->refcnt) != 0)) trace_module_get(module, _RET_IP_); - } else + else ret = false; preempt_enable(); @@ -963,11 +946,12 @@ EXPORT_SYMBOL(try_module_get); void module_put(struct module *module) { + int ret; + if (module) { preempt_disable(); - smp_wmb(); /* see comment in module_refcount */ - __this_cpu_inc(module->refptr->decs); - + ret = atomic_dec_if_positive(&module->refcnt); + WARN_ON(ret < 0); /* Failed to put refcount */ trace_module_put(module, _RET_IP_); preempt_enable(); } @@ -978,7 +962,7 @@ EXPORT_SYMBOL(module_put); static inline void print_unload_info(struct seq_file *m, struct module *mod) { /* We don't know the usage count, or what modules are using. */ - seq_printf(m, " - -"); + seq_puts(m, " - -"); } static inline void module_unload_free(struct module *mod) @@ -1131,7 +1115,7 @@ static unsigned long maybe_relocated(unsigned long crc, static int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, - struct module *mod, + struct module *mod, const unsigned long *crc, const struct module *crc_owner) { @@ -1165,7 +1149,7 @@ static int check_version(Elf_Shdr *sechdrs, return 0; bad_version: - printk("%s: disagrees about version of symbol %s\n", + pr_warn("%s: disagrees about version of symbol %s\n", mod->name, symname); return 0; } @@ -1200,7 +1184,7 @@ static inline int same_magic(const char *amagic, const char *bmagic, static inline int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, - struct module *mod, + struct module *mod, const unsigned long *crc, const struct module *crc_owner) { @@ -1288,15 +1272,13 @@ static inline bool sect_empty(const Elf_Shdr *sect) return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0; } -struct module_sect_attr -{ +struct module_sect_attr { struct module_attribute mattr; char *name; unsigned long address; }; -struct module_sect_attrs -{ +struct module_sect_attrs { struct attribute_group grp; unsigned int nsections; struct module_sect_attr attrs[0]; @@ -1550,7 +1532,8 @@ static int module_add_modinfo_attrs(struct module *mod) (attr->test && attr->test(mod))) { memcpy(temp_attr, attr, sizeof(*temp_attr)); sysfs_attr_init(&temp_attr->attr); - error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); + error = sysfs_create_file(&mod->mkobj.kobj, + &temp_attr->attr); ++temp_attr; } } @@ -1566,7 +1549,7 @@ static void module_remove_modinfo_attrs(struct module *mod) /* pick a field to test for end of list */ if (!attr->attr.name) break; - sysfs_remove_file(&mod->mkobj.kobj,&attr->attr); + sysfs_remove_file(&mod->mkobj.kobj, &attr->attr); if (attr->free) attr->free(mod); } @@ -1697,18 +1680,6 @@ static void mod_sysfs_teardown(struct module *mod) mod_sysfs_fini(mod); } -/* - * unlink the module with the whole machine is stopped with interrupts off - * - this defends against kallsyms not taking locks - */ -static int __unlink_module(void *_mod) -{ - struct module *mod = _mod; - list_del(&mod->list); - module_bug_cleanup(mod); - return 0; -} - #ifdef CONFIG_DEBUG_SET_MODULE_RONX /* * LKM RO/NX protection: protect module's text/ro-data @@ -1860,7 +1831,12 @@ static void free_module(struct module *mod) /* Now we can delete it from the lists */ mutex_lock(&module_mutex); - stop_machine(__unlink_module, mod, NULL); + /* Unlink carefully: kallsyms could be walking list. */ + list_del_rcu(&mod->list); + /* Remove this module from bug list, this uses list_del_rcu */ + module_bug_cleanup(mod); + /* Wait for RCU synchronizing before releasing mod->list and buglist. */ + synchronize_rcu(); mutex_unlock(&module_mutex); /* This may be NULL, but that's OK */ @@ -1955,7 +1931,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) /* We compiled with -fno-common. These are not supposed to happen. */ pr_debug("Common symbol: %s\n", name); - printk("%s: please compile with -fno-common\n", + pr_warn("%s: please compile with -fno-common\n", mod->name); ret = -ENOEXEC; break; @@ -2259,7 +2235,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info) } static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, - unsigned int shnum) + unsigned int shnum) { const Elf_Shdr *sec; @@ -2735,7 +2711,7 @@ static int find_module_sections(struct module *mod, struct load_info *info) * This shouldn't happen with same compiler and binutils * building all parts of the module. */ - printk(KERN_WARNING "%s: has both .ctors and .init_array.\n", + pr_warn("%s: has both .ctors and .init_array.\n", mod->name); return -EINVAL; } @@ -3023,8 +2999,10 @@ static int do_init_module(struct module *mod) if (mod->init != NULL) ret = do_one_initcall(mod->init); if (ret < 0) { - /* Init routine failed: abort. Try to protect us from - buggy refcounters. */ + /* + * Init routine failed: abort. Try to protect us from + * buggy refcounters. + */ mod->state = MODULE_STATE_GOING; synchronize_sched(); module_put(mod); @@ -3202,7 +3180,7 @@ out: static int unknown_module_param_cb(char *param, char *val, const char *modname) { - /* Check for magic 'dyndbg' arg */ + /* Check for magic 'dyndbg' arg */ int ret = ddebug_dyndbg_module_param_cb(param, val, modname); if (ret != 0) pr_warn("%s: unknown parameter '%s' ignored\n", modname, param); @@ -3352,6 +3330,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); wake_up_all(&module_wq); + /* Wait for RCU synchronizing before releasing mod->list. */ + synchronize_rcu(); mutex_unlock(&module_mutex); free_module: module_deallocate(mod, info); @@ -3685,8 +3665,8 @@ static int m_show(struct seq_file *m, void *p) /* Informative for users. */ seq_printf(m, " %s", - mod->state == MODULE_STATE_GOING ? "Unloading": - mod->state == MODULE_STATE_COMING ? "Loading": + mod->state == MODULE_STATE_GOING ? "Unloading" : + mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ seq_printf(m, " 0x%pK", mod->module_core); @@ -3695,7 +3675,7 @@ static int m_show(struct seq_file *m, void *p) if (mod->taints) seq_printf(m, " %s", module_flags(mod, buf)); - seq_printf(m, "\n"); + seq_puts(m, "\n"); return 0; } diff --git a/kernel/params.c b/kernel/params.c index db97b79..0af9b2c 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -603,74 +603,67 @@ static __modinit int add_sysfs_param(struct module_kobject *mk, const struct kernel_param *kp, const char *name) { - struct module_param_attrs *new; - struct attribute **attrs; - int err, num; + struct module_param_attrs *new_mp; + struct attribute **new_attrs; + unsigned int i; /* We don't bother calling this with invisible parameters. */ BUG_ON(!kp->perm); if (!mk->mp) { - num = 0; - attrs = NULL; - } else { - num = mk->mp->num; - attrs = mk->mp->grp.attrs; + /* First allocation. */ + mk->mp = kzalloc(sizeof(*mk->mp), GFP_KERNEL); + if (!mk->mp) + return -ENOMEM; + mk->mp->grp.name = "parameters"; + /* NULL-terminated attribute array. */ + mk->mp->grp.attrs = kzalloc(sizeof(mk->mp->grp.attrs[0]), + GFP_KERNEL); + /* Caller will cleanup via free_module_param_attrs */ + if (!mk->mp->grp.attrs) + return -ENOMEM; } - /* Enlarge. */ - new = krealloc(mk->mp, - sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1), - GFP_KERNEL); - if (!new) { - kfree(attrs); - err = -ENOMEM; - goto fail; - } - /* Despite looking like the typical realloc() bug, this is safe. - * We *want* the old 'attrs' to be freed either way, and we'll store - * the new one in the success case. */ - attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL); - if (!attrs) { - err = -ENOMEM; - goto fail_free_new; - } + /* Enlarge allocations. */ + new_mp = krealloc(mk->mp, + sizeof(*mk->mp) + + sizeof(mk->mp->attrs[0]) * (mk->mp->num + 1), + GFP_KERNEL); + if (!new_mp) + return -ENOMEM; + mk->mp = new_mp; - /* Sysfs wants everything zeroed. */ - memset(new, 0, sizeof(*new)); - memset(&new->attrs[num], 0, sizeof(new->attrs[num])); - memset(&attrs[num], 0, sizeof(attrs[num])); - new->grp.name = "parameters"; - new->grp.attrs = attrs; + /* Extra pointer for NULL terminator */ + new_attrs = krealloc(mk->mp->grp.attrs, + sizeof(mk->mp->grp.attrs[0]) * (mk->mp->num + 2), + GFP_KERNEL); + if (!new_attrs) + return -ENOMEM; + mk->mp->grp.attrs = new_attrs; /* Tack new one on the end. */ - sysfs_attr_init(&new->attrs[num].mattr.attr); - new->attrs[num].param = kp; - new->attrs[num].mattr.show = param_attr_show; - new->attrs[num].mattr.store = param_attr_store; - new->attrs[num].mattr.attr.name = (char *)name; - new->attrs[num].mattr.attr.mode = kp->perm; - new->num = num+1; + sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr); + mk->mp->attrs[mk->mp->num].param = kp; + mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show; + /* Do not allow runtime DAC changes to make param writable. */ + if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0) + mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store; + mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name; + mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm; + mk->mp->num++; /* Fix up all the pointers, since krealloc can move us */ - for (num = 0; num < new->num; num++) - new->grp.attrs[num] = &new->attrs[num].mattr.attr; - new->grp.attrs[num] = NULL; - - mk->mp = new; + for (i = 0; i < mk->mp->num; i++) + mk->mp->grp.attrs[i] = &mk->mp->attrs[i].mattr.attr; + mk->mp->grp.attrs[mk->mp->num] = NULL; return 0; - -fail_free_new: - kfree(new); -fail: - mk->mp = NULL; - return err; } #ifdef CONFIG_MODULES static void free_module_param_attrs(struct module_kobject *mk) { - kfree(mk->mp->grp.attrs); + if (mk->mp) + kfree(mk->mp->grp.attrs); kfree(mk->mp); mk->mp = NULL; } @@ -695,8 +688,10 @@ int module_param_sysfs_setup(struct module *mod, if (kparam[i].perm == 0) continue; err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name); - if (err) + if (err) { + free_module_param_attrs(&mod->mkobj); return err; + } params = true; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4d54b75..1363d58 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -847,7 +847,6 @@ void tick_nohz_idle_enter(void) local_irq_enable(); } -EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); /** * tick_nohz_irq_exit - update next tick event from interrupt exit @@ -974,7 +973,6 @@ void tick_nohz_idle_exit(void) local_irq_enable(); } -EXPORT_SYMBOL_GPL(tick_nohz_idle_exit); static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) { @@ -64,16 +64,22 @@ static LIST_HEAD(module_bug_list); static const struct bug_entry *module_find_bug(unsigned long bugaddr) { struct module *mod; + const struct bug_entry *bug = NULL; - list_for_each_entry(mod, &module_bug_list, bug_list) { - const struct bug_entry *bug = mod->bug_table; + rcu_read_lock(); + list_for_each_entry_rcu(mod, &module_bug_list, bug_list) { unsigned i; + bug = mod->bug_table; for (i = 0; i < mod->num_bugs; ++i, ++bug) if (bugaddr == bug_addr(bug)) - return bug; + goto out; } - return NULL; + bug = NULL; +out: + rcu_read_unlock(); + + return bug; } void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, @@ -99,13 +105,15 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, * Strictly speaking this should have a spinlock to protect against * traversals, but since we only traverse on BUG()s, a spinlock * could potentially lead to deadlock and thus be counter-productive. + * Thus, this uses RCU to safely manipulate the bug list, since BUG + * must run in non-interruptive state. */ - list_add(&mod->bug_list, &module_bug_list); + list_add_rcu(&mod->bug_list, &module_bug_list); } void module_bug_cleanup(struct module *mod) { - list_del(&mod->bug_list); + list_del_rcu(&mod->bug_list); } #else diff --git a/mm/filemap.c b/mm/filemap.c index e8905bc..bd8543c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2464,7 +2464,7 @@ ssize_t generic_perform_write(struct file *file, /* * Copies from kernel address space cannot fail (NFSD is a big user). */ - if (segment_eq(get_fs(), KERNEL_DS)) + if (!iter_is_iovec(i)) flags |= AOP_FLAG_UNINTERRUPTIBLE; do { diff --git a/mm/mempolicy.c b/mm/mempolicy.c index f22c559..0e0961b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1041,10 +1041,6 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, down_read(&mm->mmap_sem); - err = migrate_vmas(mm, from, to, flags); - if (err) - goto out; - /* * Find a 'source' bit set in 'tmp' whose corresponding 'dest' * bit in 'to' is not also set in 'tmp'. Clear the found 'source' @@ -1124,7 +1120,6 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, if (err < 0) break; } -out: up_read(&mm->mmap_sem); if (err < 0) return err; diff --git a/mm/migrate.c b/mm/migrate.c index b1d0212..344cdf6 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1536,27 +1536,6 @@ out: return err; } -/* - * Call migration functions in the vma_ops that may prepare - * memory in a vm for migration. migration functions may perform - * the migration for vmas that do not have an underlying page struct. - */ -int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, - const nodemask_t *from, unsigned long flags) -{ - struct vm_area_struct *vma; - int err = 0; - - for (vma = mm->mmap; vma && !err; vma = vma->vm_next) { - if (vma->vm_ops && vma->vm_ops->migrate) { - err = vma->vm_ops->migrate(vma, to, from, flags); - if (err) - break; - } - } - return err; -} - #ifdef CONFIG_NUMA_BALANCING /* * Returns true if this is a safe migration target node for misplaced NUMA @@ -1536,7 +1536,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) * holes of a sparse file, we actually need to allocate those pages, * and even mark them dirty, so it cannot exceed the max_blocks limit. */ - if (segment_eq(get_fs(), KERNEL_DS)) + if (!iter_is_iovec(to)) sgp = SGP_DIRTY; index = *ppos >> PAGE_CACHE_SHIFT; diff --git a/net/socket.c b/net/socket.c index 70bbde6..a2c33a4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -372,7 +372,6 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) path.mnt = mntget(sock_mnt); d_instantiate(path.dentry, SOCK_INODE(sock)); - SOCK_INODE(sock)->i_fop = &socket_file_ops; file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &socket_file_ops); diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 5374b1b..edd2794 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -185,6 +185,18 @@ modbuiltin := -f $(srctree)/scripts/Makefile.modbuiltin obj # $(Q)$(MAKE) $(dtbinst)=dir dtbinst := -f $(if $(KBUILD_SRC),$(srctree)/)scripts/Makefile.dtbinst obj +### +# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj= +# Usage: +# $(Q)$(MAKE) $(clean)=dir +clean := -f $(srctree)/scripts/Makefile.clean obj + +### +# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.headersinst obj= +# Usage: +# $(Q)$(MAKE) $(hdr-inst)=dir +hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj + # Prefix -I with $(srctree) if it is not an absolute path. # skip if -I has no parameter addtree = $(if $(patsubst -I%,%,$(1)), \ diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean index b1c668d..1bca180 100644 --- a/scripts/Makefile.clean +++ b/scripts/Makefile.clean @@ -7,10 +7,7 @@ src := $(obj) PHONY := __clean __clean: -# Shorthand for $(Q)$(MAKE) scripts/Makefile.clean obj=dir -# Usage: -# $(Q)$(MAKE) $(clean)=dir -clean := -f $(srctree)/scripts/Makefile.clean obj +include scripts/Kbuild.include # The filename Kbuild has precedence over Makefile kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) @@ -91,11 +88,6 @@ PHONY += $(subdir-ymn) $(subdir-ymn): $(Q)$(MAKE) $(clean)=$@ -# If quiet is set, only print short version of command - -cmd = @$(if $($(quiet)cmd_$(1)),echo ' $($(quiet)cmd_$(1))' &&) $(cmd_$(1)) - - # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable se we can use it in if_changed and friends. diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst index 8ccf830..1106d6c 100644 --- a/scripts/Makefile.headersinst +++ b/scripts/Makefile.headersinst @@ -122,7 +122,6 @@ $(check-file): scripts/headers_check.pl $(output-files) FORCE endif # Recursion -hdr-inst := -rR -f $(srctree)/scripts/Makefile.headersinst obj .PHONY: $(subdirs) $(subdirs): $(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@ diff --git a/scripts/coccinelle/misc/bugon.cocci b/scripts/coccinelle/misc/bugon.cocci index 556456c..3b7eec2 100644 --- a/scripts/coccinelle/misc/bugon.cocci +++ b/scripts/coccinelle/misc/bugon.cocci @@ -8,7 +8,7 @@ // Confidence: High // Copyright: (C) 2014 Himangi Saraogi. GPLv2. // Comments: -// Options: --no-includes, --include-headers +// Options: --no-includes --include-headers virtual patch virtual context diff --git a/scripts/headers.sh b/scripts/headers.sh index 95ece06..d4dc4de 100755 --- a/scripts/headers.sh +++ b/scripts/headers.sh @@ -19,8 +19,6 @@ for arch in ${archs}; do case ${arch} in um) # no userspace export ;; - cris) # headers export are known broken - ;; *) if [ -d ${srctree}/arch/${arch} ]; then do_command $1 ${arch} diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index 14cea74..4dd3755 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -330,10 +330,10 @@ static void set_subtitle(void) list_for_each_entry(sp, &trail, entries) { if (sp->text) { if (pos) { - pos->next = xcalloc(sizeof(*pos), 1); + pos->next = xcalloc(1, sizeof(*pos)); pos = pos->next; } else { - subtitles = pos = xcalloc(sizeof(*pos), 1); + subtitles = pos = xcalloc(1, sizeof(*pos)); } pos->text = sp->text; } diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index a26cc5d..72c9dba 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -548,7 +548,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, { int i, j; struct menu *submenu[8], *menu, *location = NULL; - struct jump_key *jump; + struct jump_key *jump = NULL; str_printf(r, _("Prompt: %s\n"), _(prop->text)); menu = prop->menu->parent; @@ -586,7 +586,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, str_printf(r, _(" Location:\n")); for (j = 4; --i >= 0; j += 2) { menu = submenu[i]; - if (head && location && menu == location) + if (jump && menu == location) jump->offset = strlen(r->s); str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu))); diff --git a/scripts/package/mkspec b/scripts/package/mkspec index 1395760..d9ab94b 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -117,6 +117,7 @@ echo 'mv vmlinux.bz2 $RPM_BUILD_ROOT'"/boot/vmlinux-$KERNELRELEASE.bz2" echo 'mv vmlinux.orig vmlinux' echo "%endif" +if ! $PREBUILT; then echo 'rm -f $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE/{build,source}" echo "mkdir -p "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNELRELEASE" echo "EXCLUDES=\"$RCS_TAR_IGNORE --exclude .tmp_versions --exclude=*vmlinux* --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation --exclude=firmware --exclude .config.old --exclude .missing-syscalls.d\"" @@ -124,6 +125,7 @@ echo "tar "'$EXCLUDES'" -cf- . | (cd "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNEL echo 'cd $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE" echo "ln -sf /usr/src/kernels/$KERNELRELEASE build" echo "ln -sf /usr/src/kernels/$KERNELRELEASE source" +fi echo "" echo "%clean" @@ -151,9 +153,11 @@ echo "%files headers" echo '%defattr (-, root, root)' echo "/usr/include" echo "" +if ! $PREBUILT; then echo "%files devel" echo '%defattr (-, root, root)' echo "/usr/src/kernels/$KERNELRELEASE" echo "/lib/modules/$KERNELRELEASE/build" echo "/lib/modules/$KERNELRELEASE/source" echo "" +fi diff --git a/sound/firewire/oxfw/oxfw-pcm.c b/sound/firewire/oxfw/oxfw-pcm.c index 9bc556b..67ade07 100644 --- a/sound/firewire/oxfw/oxfw-pcm.c +++ b/sound/firewire/oxfw/oxfw-pcm.c @@ -19,7 +19,7 @@ static int hw_rule_rate(struct snd_pcm_hw_params *params, .min = UINT_MAX, .max = 0, .integer = 1 }; struct snd_oxfw_stream_formation formation; - unsigned int i, err; + int i, err; for (i = 0; i < SND_OXFW_STREAM_FORMAT_ENTRIES; i++) { if (formats[i] == NULL) @@ -47,7 +47,7 @@ static int hw_rule_channels(struct snd_pcm_hw_params *params, const struct snd_interval *r = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE); struct snd_oxfw_stream_formation formation; - unsigned int i, j, err; + int i, j, err; unsigned int count, list[SND_OXFW_STREAM_FORMAT_ENTRIES] = {0}; count = 0; @@ -80,7 +80,7 @@ static int hw_rule_channels(struct snd_pcm_hw_params *params, static void limit_channels_and_rates(struct snd_pcm_hardware *hw, u8 **formats) { struct snd_oxfw_stream_formation formation; - unsigned int i, err; + int i, err; hw->channels_min = UINT_MAX; hw->channels_max = 0; diff --git a/sound/firewire/oxfw/oxfw-proc.c b/sound/firewire/oxfw/oxfw-proc.c index 604808e..8ba4f9f 100644 --- a/sound/firewire/oxfw/oxfw-proc.c +++ b/sound/firewire/oxfw/oxfw-proc.c @@ -15,7 +15,7 @@ static void proc_read_formation(struct snd_info_entry *entry, struct snd_oxfw_stream_formation formation, curr; u8 *format; char flag; - unsigned int i, err; + int i, err; /* Show input. */ err = snd_oxfw_stream_get_current_formation(oxfw, diff --git a/sound/firewire/oxfw/oxfw-stream.c b/sound/firewire/oxfw/oxfw-stream.c index b77cf80..bda845a 100644 --- a/sound/firewire/oxfw/oxfw-stream.c +++ b/sound/firewire/oxfw/oxfw-stream.c @@ -61,7 +61,8 @@ static int set_stream_format(struct snd_oxfw *oxfw, struct amdtp_stream *s, u8 **formats; struct snd_oxfw_stream_formation formation; enum avc_general_plug_dir dir; - unsigned int i, err, len; + unsigned int len; + int i, err; if (s == &oxfw->tx_stream) { formats = oxfw->tx_stream_formats; diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index cf1d0b5..60e5cad 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -43,7 +43,7 @@ static bool detect_loud_models(struct fw_unit *unit) err = fw_csr_string(unit->directory, CSR_MODEL, model, sizeof(model)); if (err < 0) - return err; + return false; for (i = 0; i < ARRAY_SIZE(models); i++) { if (strcmp(models[i], model) == 0) diff --git a/sound/pci/asihpi/hpi_internal.h b/sound/pci/asihpi/hpi_internal.h index 48380ce..aeea679 100644 --- a/sound/pci/asihpi/hpi_internal.h +++ b/sound/pci/asihpi/hpi_internal.h @@ -1367,9 +1367,9 @@ struct hpi_control_cache_single { struct hpi_control_cache_pad { struct hpi_control_cache_info i; u32 field_valid_flags; - u8 c_channel[8]; - u8 c_artist[40]; - u8 c_title[40]; + u8 c_channel[40]; + u8 c_artist[100]; + u8 c_title[100]; u8 c_comment[200]; u32 pTY; u32 pI; diff --git a/sound/pci/asihpi/hpi_version.h b/sound/pci/asihpi/hpi_version.h index e9146e5..6623ab1 100644 --- a/sound/pci/asihpi/hpi_version.h +++ b/sound/pci/asihpi/hpi_version.h @@ -11,13 +11,13 @@ Production releases have even minor version. /* Use single digits for versions less that 10 to avoid octal. */ /* *** HPI_VER is the only edit required to update version *** */ /** HPI version */ -#define HPI_VER HPI_VERSION_CONSTRUCTOR(4, 10, 1) +#define HPI_VER HPI_VERSION_CONSTRUCTOR(4, 14, 3) /** HPI version string in dotted decimal format */ -#define HPI_VER_STRING "4.10.01" +#define HPI_VER_STRING "4.14.03" /** Library version as documented in hpi-api-versions.txt */ -#define HPI_LIB_VER HPI_VERSION_CONSTRUCTOR(10, 2, 0) +#define HPI_LIB_VER HPI_VERSION_CONSTRUCTOR(10, 4, 0) /** Construct hpi version number from major, minor, release numbers */ #define HPI_VERSION_CONSTRUCTOR(maj, min, r) ((maj << 16) + (min << 8) + r) diff --git a/sound/pci/asihpi/hpidspcd.c b/sound/pci/asihpi/hpidspcd.c index ac91637..3603c24 100644 --- a/sound/pci/asihpi/hpidspcd.c +++ b/sound/pci/asihpi/hpidspcd.c @@ -1,8 +1,9 @@ -/***********************************************************************/ -/** +/*********************************************************************** AudioScience HPI driver - Copyright (C) 1997-2011 AudioScience Inc. <support@audioscience.com> + Functions for reading DSP code using hotplug firmware loader + + Copyright (C) 1997-2014 AudioScience Inc. <support@audioscience.com> This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as @@ -17,11 +18,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -\file -Functions for reading DSP code using -hotplug firmware loader from individual dsp code files -*/ -/***********************************************************************/ +***********************************************************************/ #define SOURCEFILE_NAME "hpidspcd.c" #include "hpidspcd.h" #include "hpidebug.h" @@ -68,17 +65,18 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code, goto error2; } - if ((header.version >> 9) != (HPI_VER >> 9)) { - /* Consider even and subsequent odd minor versions to be compatible */ - dev_err(&dev->dev, "Incompatible firmware version DSP image %X != Driver %X\n", + if (HPI_VER_MAJOR(header.version) != HPI_VER_MAJOR(HPI_VER)) { + /* Major version change probably means Host-DSP protocol change */ + dev_err(&dev->dev, + "Incompatible firmware version DSP image %X != Driver %X\n", header.version, HPI_VER); goto error2; } if (header.version != HPI_VER) { - dev_info(&dev->dev, - "Firmware: release version mismatch DSP image %X != Driver %X\n", - header.version, HPI_VER); + dev_warn(&dev->dev, + "Firmware version mismatch: DSP image %X != Driver %X\n", + header.version, HPI_VER); } HPI_DEBUG_LOG(DEBUG, "dsp code %s opened\n", fw_name); diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 63b69f7..b680b4e 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -3218,12 +3218,13 @@ static int create_input_ctls(struct hda_codec *codec) } /* add stereo mix when explicitly enabled via hint */ - if (mixer && spec->add_stereo_mix_input && - snd_hda_get_bool_hint(codec, "add_stereo_mix_input") > 0) { + if (mixer && spec->add_stereo_mix_input == HDA_HINT_STEREO_MIX_ENABLE) { err = parse_capture_source(codec, mixer, CFG_IDX_MIX, num_adcs, "Stereo Mix", 0); if (err < 0) return err; + else + spec->suppress_auto_mic = 1; } return 0; @@ -4542,9 +4543,8 @@ int snd_hda_gen_parse_auto_config(struct hda_codec *codec, /* add stereo mix if available and not enabled yet */ if (!spec->auto_mic && spec->mixer_nid && - spec->add_stereo_mix_input && - spec->input_mux.num_items > 1 && - snd_hda_get_bool_hint(codec, "add_stereo_mix_input") < 0) { + spec->add_stereo_mix_input == HDA_HINT_STEREO_MIX_AUTO && + spec->input_mux.num_items > 1) { err = parse_capture_source(codec, spec->mixer_nid, CFG_IDX_MIX, spec->num_all_adcs, "Stereo Mix", 0); diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index 61dd515..3d85266 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -222,7 +222,7 @@ struct hda_gen_spec { unsigned int vmaster_mute_enum:1; /* add vmaster mute mode enum */ unsigned int indep_hp:1; /* independent HP supported */ unsigned int prefer_hp_amp:1; /* enable HP amp for speaker if any */ - unsigned int add_stereo_mix_input:1; /* add aamix as a capture src */ + unsigned int add_stereo_mix_input:2; /* add aamix as a capture src */ unsigned int add_jack_modes:1; /* add i/o jack mode enum ctls */ unsigned int power_down_unused:1; /* power down unused widgets */ unsigned int dac_min_mute:1; /* minimal = mute for DACs */ @@ -291,6 +291,13 @@ struct hda_gen_spec { struct hda_jack_callback *cb); }; +/* values for add_stereo_mix_input flag */ +enum { + HDA_HINT_STEREO_MIX_DISABLE, /* No stereo mix input */ + HDA_HINT_STEREO_MIX_ENABLE, /* Add stereo mix input */ + HDA_HINT_STEREO_MIX_AUTO, /* Add only if auto-mic is disabled */ +}; + int snd_hda_gen_spec_init(struct hda_gen_spec *spec); int snd_hda_gen_init(struct hda_codec *codec); diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c index bef7215..ccc962a 100644 --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -468,7 +468,7 @@ int snd_hda_get_bool_hint(struct hda_codec *codec, const char *key) EXPORT_SYMBOL_GPL(snd_hda_get_bool_hint); /** - * snd_hda_get_bool_hint - Get a boolean hint value + * snd_hda_get_int_hint - Get an integer hint value * @codec: the HDA codec * @key: the hint key string * @valp: pointer to store a value diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index c81b715..a9d78e2 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -195,7 +195,8 @@ static int ad198x_parse_auto_config(struct hda_codec *codec, bool indep_hp) codec->no_sticky_stream = 1; spec->gen.indep_hp = indep_hp; - spec->gen.add_stereo_mix_input = 1; + if (!spec->gen.add_stereo_mix_input) + spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO; err = snd_hda_parse_pin_defcfg(codec, cfg, NULL, 0); if (err < 0) @@ -256,6 +257,18 @@ static void ad1986a_fixup_eapd(struct hda_codec *codec, } } +/* enable stereo-mix input for avoiding regression on KDE (bko#88251) */ +static void ad1986a_fixup_eapd_mix_in(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct ad198x_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + ad1986a_fixup_eapd(codec, fix, action); + spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_ENABLE; + } +} + enum { AD1986A_FIXUP_INV_JACK_DETECT, AD1986A_FIXUP_ULTRA, @@ -264,6 +277,8 @@ enum { AD1986A_FIXUP_LAPTOP, AD1986A_FIXUP_LAPTOP_IMIC, AD1986A_FIXUP_EAPD, + AD1986A_FIXUP_EAPD_MIX_IN, + AD1986A_FIXUP_EASYNOTE, }; static const struct hda_fixup ad1986a_fixups[] = { @@ -328,6 +343,30 @@ static const struct hda_fixup ad1986a_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = ad1986a_fixup_eapd, }, + [AD1986A_FIXUP_EAPD_MIX_IN] = { + .type = HDA_FIXUP_FUNC, + .v.func = ad1986a_fixup_eapd_mix_in, + }, + [AD1986A_FIXUP_EASYNOTE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x0421402f }, /* headphone */ + { 0x1b, 0x90170110 }, /* speaker */ + { 0x1c, 0x411111f0 }, /* N/A */ + { 0x1d, 0x90a70130 }, /* int mic */ + { 0x1e, 0x411111f0 }, /* N/A */ + { 0x1f, 0x04a19040 }, /* mic */ + { 0x20, 0x411111f0 }, /* N/A */ + { 0x21, 0x411111f0 }, /* N/A */ + { 0x22, 0x411111f0 }, /* N/A */ + { 0x23, 0x411111f0 }, /* N/A */ + { 0x24, 0x411111f0 }, /* N/A */ + { 0x25, 0x411111f0 }, /* N/A */ + {} + }, + .chained = true, + .chain_id = AD1986A_FIXUP_EAPD_MIX_IN, + }, }; static const struct snd_pci_quirk ad1986a_fixup_tbl[] = { @@ -341,6 +380,7 @@ static const struct snd_pci_quirk ad1986a_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc01e, "FSC V2060", AD1986A_FIXUP_LAPTOP), SND_PCI_QUIRK_MASK(0x144d, 0xff00, 0xc000, "Samsung", AD1986A_FIXUP_SAMSUNG), SND_PCI_QUIRK(0x144d, 0xc027, "Samsung Q1", AD1986A_FIXUP_ULTRA), + SND_PCI_QUIRK(0x1631, 0xc022, "PackardBell EasyNote MX65", AD1986A_FIXUP_EASYNOTE), SND_PCI_QUIRK(0x17aa, 0x2066, "Lenovo N100", AD1986A_FIXUP_INV_JACK_DETECT), SND_PCI_QUIRK(0x17aa, 0x1011, "Lenovo M55", AD1986A_FIXUP_3STACK), SND_PCI_QUIRK(0x17aa, 0x1017, "Lenovo A60", AD1986A_FIXUP_3STACK), diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index e9ebc7b..fd3ed18 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -855,14 +855,14 @@ static int patch_conexant_auto(struct hda_codec *codec) case 0x14f15045: codec->single_adc_amp = 1; spec->gen.mixer_nid = 0x17; - spec->gen.add_stereo_mix_input = 1; + spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO; snd_hda_pick_fixup(codec, cxt5045_fixup_models, cxt5045_fixups, cxt_fixups); break; case 0x14f15047: codec->pin_amp_workaround = 1; spec->gen.mixer_nid = 0x19; - spec->gen.add_stereo_mix_input = 1; + spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO; snd_hda_pick_fixup(codec, cxt5047_fixup_models, cxt5047_fixups, cxt_fixups); break; diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 9dc9cf8..5f13d2d 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -47,7 +47,9 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info"); #define is_haswell(codec) ((codec)->vendor_id == 0x80862807) #define is_broadwell(codec) ((codec)->vendor_id == 0x80862808) -#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec)) +#define is_skylake(codec) ((codec)->vendor_id == 0x80862809) +#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \ + || is_skylake(codec)) #define is_valleyview(codec) ((codec)->vendor_id == 0x80862882) #define is_cherryview(codec) ((codec)->vendor_id == 0x80862883) @@ -3365,6 +3367,7 @@ static const struct hda_codec_preset snd_hda_preset_hdmi[] = { { .id = 0x80862806, .name = "PantherPoint HDMI", .patch = patch_generic_hdmi }, { .id = 0x80862807, .name = "Haswell HDMI", .patch = patch_generic_hdmi }, { .id = 0x80862808, .name = "Broadwell HDMI", .patch = patch_generic_hdmi }, +{ .id = 0x80862809, .name = "Skylake HDMI", .patch = patch_generic_hdmi }, { .id = 0x80862880, .name = "CedarTrail HDMI", .patch = patch_generic_hdmi }, { .id = 0x80862882, .name = "Valleyview2 HDMI", .patch = patch_generic_hdmi }, { .id = 0x80862883, .name = "Braswell HDMI", .patch = patch_generic_hdmi }, @@ -3425,6 +3428,7 @@ MODULE_ALIAS("snd-hda-codec-id:80862805"); MODULE_ALIAS("snd-hda-codec-id:80862806"); MODULE_ALIAS("snd-hda-codec-id:80862807"); MODULE_ALIAS("snd-hda-codec-id:80862808"); +MODULE_ALIAS("snd-hda-codec-id:80862809"); MODULE_ALIAS("snd-hda-codec-id:80862880"); MODULE_ALIAS("snd-hda-codec-id:80862882"); MODULE_ALIAS("snd-hda-codec-id:80862883"); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a722067..65f1f4e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -321,10 +321,12 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) break; case 0x10ec0233: case 0x10ec0255: + case 0x10ec0256: case 0x10ec0282: case 0x10ec0283: case 0x10ec0286: case 0x10ec0288: + case 0x10ec0298: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; case 0x10ec0285: @@ -2659,7 +2661,9 @@ enum { ALC269_TYPE_ALC284, ALC269_TYPE_ALC285, ALC269_TYPE_ALC286, + ALC269_TYPE_ALC298, ALC269_TYPE_ALC255, + ALC269_TYPE_ALC256, }; /* @@ -2686,7 +2690,9 @@ static int alc269_parse_auto_config(struct hda_codec *codec) case ALC269_TYPE_ALC282: case ALC269_TYPE_ALC283: case ALC269_TYPE_ALC286: + case ALC269_TYPE_ALC298: case ALC269_TYPE_ALC255: + case ALC269_TYPE_ALC256: ssids = alc269_ssids; break; default: @@ -4829,6 +4835,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), @@ -5417,9 +5424,15 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC286; spec->shutup = alc286_shutup; break; + case 0x10ec0298: + spec->codec_variant = ALC269_TYPE_ALC298; + break; case 0x10ec0255: spec->codec_variant = ALC269_TYPE_ALC255; break; + case 0x10ec0256: + spec->codec_variant = ALC269_TYPE_ALC256; + break; } if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) { @@ -6341,6 +6354,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = { { .id = 0x10ec0233, .name = "ALC233", .patch = patch_alc269 }, { .id = 0x10ec0235, .name = "ALC233", .patch = patch_alc269 }, { .id = 0x10ec0255, .name = "ALC255", .patch = patch_alc269 }, + { .id = 0x10ec0256, .name = "ALC256", .patch = patch_alc269 }, { .id = 0x10ec0260, .name = "ALC260", .patch = patch_alc260 }, { .id = 0x10ec0262, .name = "ALC262", .patch = patch_alc262 }, { .id = 0x10ec0267, .name = "ALC267", .patch = patch_alc268 }, @@ -6360,6 +6374,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = { { .id = 0x10ec0290, .name = "ALC290", .patch = patch_alc269 }, { .id = 0x10ec0292, .name = "ALC292", .patch = patch_alc269 }, { .id = 0x10ec0293, .name = "ALC293", .patch = patch_alc269 }, + { .id = 0x10ec0298, .name = "ALC298", .patch = patch_alc269 }, { .id = 0x10ec0861, .rev = 0x100340, .name = "ALC660", .patch = patch_alc861 }, { .id = 0x10ec0660, .name = "ALC660-VD", .patch = patch_alc861vd }, diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 6c206b6..3de6d3d 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -137,7 +137,7 @@ static struct via_spec *via_new_spec(struct hda_codec *codec) spec->gen.indep_hp = 1; spec->gen.keep_eapd_on = 1; spec->gen.pcm_playback_hook = via_playback_pcm_hook; - spec->gen.add_stereo_mix_input = 1; + spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO; return spec; } diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index b1cc2a4..99ff35e 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -267,7 +267,7 @@ static void atmel_ssc_shutdown(struct snd_pcm_substream *substream, if (!ssc_p->dir_mask) { if (ssc_p->initialized) { /* Shutdown the SSC clock. */ - pr_debug("atmel_ssc_dau: Stopping clock\n"); + pr_debug("atmel_ssc_dai: Stopping clock\n"); clk_disable(ssc_p->ssc->clk); free_irq(ssc_p->ssc->irq, ssc_p); diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 883c577..8349f98 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -520,6 +520,8 @@ config SND_SOC_RT5670 config SND_SOC_RT5677 tristate + select REGMAP_I2C + select REGMAP_IRQ config SND_SOC_RT5677_SPI tristate diff --git a/sound/soc/codecs/pcm512x-i2c.c b/sound/soc/codecs/pcm512x-i2c.c index 4d62230..d0547fa 100644 --- a/sound/soc/codecs/pcm512x-i2c.c +++ b/sound/soc/codecs/pcm512x-i2c.c @@ -24,8 +24,13 @@ static int pcm512x_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { struct regmap *regmap; + struct regmap_config config = pcm512x_regmap; - regmap = devm_regmap_init_i2c(i2c, &pcm512x_regmap); + /* msb needs to be set to enable auto-increment of addresses */ + config.read_flag_mask = 0x80; + config.write_flag_mask = 0x80; + + regmap = devm_regmap_init_i2c(i2c, &config); if (IS_ERR(regmap)) return PTR_ERR(regmap); diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index a7789a8..27141e2 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -2209,6 +2209,10 @@ static int rt5645_jack_detect(struct snd_soc_codec *codec) int gpio_state, jack_type = 0; unsigned int val; + if (!gpio_is_valid(rt5645->pdata.hp_det_gpio)) { + dev_err(codec->dev, "invalid gpio\n"); + return -EINVAL; + } gpio_state = gpio_get_value(rt5645->pdata.hp_det_gpio); dev_dbg(codec->dev, "gpio = %d(%d)\n", rt5645->pdata.hp_det_gpio, diff --git a/sound/soc/intel/sst/sst_acpi.c b/sound/soc/intel/sst/sst_acpi.c index 31124aa..3abc29e 100644 --- a/sound/soc/intel/sst/sst_acpi.c +++ b/sound/soc/intel/sst/sst_acpi.c @@ -43,7 +43,7 @@ #include "sst.h" struct sst_machines { - char codec_id[32]; + char *codec_id; char board[32]; char machine[32]; void (*machine_quirk)(void); @@ -277,16 +277,16 @@ int sst_acpi_probe(struct platform_device *pdev) dev_dbg(dev, "ACPI device id: %x\n", dev_id); plat_dev = platform_device_register_data(dev, mach->pdata->platform, -1, NULL, 0); - if (plat_dev == NULL) { + if (IS_ERR(plat_dev)) { dev_err(dev, "Failed to create machine device: %s\n", mach->pdata->platform); - return -ENODEV; + return PTR_ERR(plat_dev); } /* Create platform device for sst machine driver */ mdev = platform_device_register_data(dev, mach->machine, -1, NULL, 0); - if (mdev == NULL) { + if (IS_ERR(mdev)) { dev_err(dev, "Failed to create machine device: %s\n", mach->machine); - return -ENODEV; + return PTR_ERR(mdev); } ret = sst_alloc_drv_context(&ctx, dev, dev_id); diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index b1a7c5b..b5a80c5 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -1261,6 +1261,8 @@ static int samsung_i2s_probe(struct platform_device *pdev) ret = -ENOMEM; goto err; } + + sec_dai->variant_regs = pri_dai->variant_regs; sec_dai->dma_playback.dma_addr = regs_base + I2STXDS; sec_dai->dma_playback.ch_name = "tx-sec"; diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 1994d41..b703cb3 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -333,8 +333,11 @@ static struct usbmix_name_map gamecom780_map[] = { {} }; -static const struct usbmix_name_map kef_x300a_map[] = { - { 10, NULL }, /* firmware locks up (?) when we try to access this FU */ +/* some (all?) SCMS USB3318 devices are affected by a firmware lock up + * when anything attempts to access FU 10 (control) + */ +static const struct usbmix_name_map scms_usb3318_map[] = { + { 10, NULL }, { 0 } }; @@ -434,8 +437,14 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .map = ebox44_map, }, { + /* KEF X300A */ .id = USB_ID(0x27ac, 0x1000), - .map = kef_x300a_map, + .map = scms_usb3318_map, + }, + { + /* Arcam rPAC */ + .id = USB_ID(0x25c4, 0x0003), + .map = scms_usb3318_map, }, { 0 } /* terminator */ }; diff --git a/sound/usb/mixer_scarlett.c b/sound/usb/mixer_scarlett.c index 9109652..7438e7c 100644 --- a/sound/usb/mixer_scarlett.c +++ b/sound/usb/mixer_scarlett.c @@ -655,7 +655,7 @@ static struct scarlett_device_info s6i6_info = { .names = NULL }, - .num_controls = 0, + .num_controls = 9, .controls = { { .num = 0, .type = SCARLETT_OUTPUTS, .name = "Monitor" }, { .num = 1, .type = SCARLETT_OUTPUTS, .name = "Headphone" }, diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 4dbfb3d..a739841 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1245,8 +1245,9 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, /* XMOS based USB DACs */ switch (chip->usb_id) { - /* iFi Audio micro/nano iDSD */ - case USB_ID(0x20b1, 0x3008): + case USB_ID(0x20b1, 0x3008): /* iFi Audio micro/nano iDSD */ + case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */ + case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */ if (fp->altsetting == 2) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h new file mode 100644 index 0000000..6eedba1 --- /dev/null +++ b/tools/include/asm-generic/bitops.h @@ -0,0 +1,27 @@ +#ifndef __TOOLS_ASM_GENERIC_BITOPS_H +#define __TOOLS_ASM_GENERIC_BITOPS_H + +/* + * tools/ copied this from include/asm-generic/bitops.h, bit by bit as it needed + * some functions. + * + * For the benefit of those who are trying to port Linux to another + * architecture, here are some C-language equivalents. You should + * recode these in the native assembly language, if at all possible. + * + * C language equivalents written by Theodore Ts'o, 9/26/92 + */ + +#include <asm-generic/bitops/__ffs.h> +#include <asm-generic/bitops/fls.h> +#include <asm-generic/bitops/__fls.h> +#include <asm-generic/bitops/fls64.h> +#include <asm-generic/bitops/find.h> + +#ifndef _TOOLS_LINUX_BITOPS_H_ +#error only <linux/bitops.h> can be included directly +#endif + +#include <asm-generic/bitops/atomic.h> + +#endif /* __TOOLS_ASM_GENERIC_BITOPS_H */ diff --git a/tools/include/asm-generic/bitops/__ffs.h b/tools/include/asm-generic/bitops/__ffs.h new file mode 100644 index 0000000..c941750 --- /dev/null +++ b/tools/include/asm-generic/bitops/__ffs.h @@ -0,0 +1,43 @@ +#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_ +#define _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_ + +#include <asm/types.h> + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __always_inline unsigned long __ffs(unsigned long word) +{ + int num = 0; + +#if __BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_ */ diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h new file mode 100644 index 0000000..2218b9a --- /dev/null +++ b/tools/include/asm-generic/bitops/__fls.h @@ -0,0 +1 @@ +#include <../../../../include/asm-generic/bitops/__fls.h> diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h new file mode 100644 index 0000000..4bccd7c3 --- /dev/null +++ b/tools/include/asm-generic/bitops/atomic.h @@ -0,0 +1,22 @@ +#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ +#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ + +#include <asm/types.h> + +static inline void set_bit(int nr, unsigned long *addr) +{ + addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG); +} + +static inline void clear_bit(int nr, unsigned long *addr) +{ + addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG)); +} + +static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) +{ + return ((1UL << (nr % __BITS_PER_LONG)) & + (((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0; +} + +#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */ diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h new file mode 100644 index 0000000..31f5154 --- /dev/null +++ b/tools/include/asm-generic/bitops/find.h @@ -0,0 +1,33 @@ +#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ +#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ + +#ifndef find_next_bit +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +extern unsigned long find_next_bit(const unsigned long *addr, unsigned long + size, unsigned long offset); +#endif + +#ifndef find_first_bit + +/** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum number of bits to search + * + * Returns the bit number of the first set bit. + * If no bits are set, returns @size. + */ +extern unsigned long find_first_bit(const unsigned long *addr, + unsigned long size); + +#endif /* find_first_bit */ + +#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h new file mode 100644 index 0000000..dbf711a --- /dev/null +++ b/tools/include/asm-generic/bitops/fls.h @@ -0,0 +1 @@ +#include <../../../../include/asm-generic/bitops/fls.h> diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h new file mode 100644 index 0000000..980b1f6 --- /dev/null +++ b/tools/include/asm-generic/bitops/fls64.h @@ -0,0 +1 @@ +#include <../../../../include/asm-generic/bitops/fls64.h> diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h new file mode 100644 index 0000000..26005a15 --- /dev/null +++ b/tools/include/linux/bitops.h @@ -0,0 +1,53 @@ +#ifndef _TOOLS_LINUX_BITOPS_H_ +#define _TOOLS_LINUX_BITOPS_H_ + +#include <linux/kernel.h> +#include <linux/compiler.h> +#include <asm/hweight.h> + +#ifndef __WORDSIZE +#define __WORDSIZE (__SIZEOF_LONG__ * 8) +#endif + +#define BITS_PER_LONG __WORDSIZE + +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITS_PER_BYTE 8 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) +#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) +#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE) + +/* + * Include this here because some architectures need generic_ffs/fls in + * scope + * + * XXX: this needs to be asm/bitops.h, when we get to per arch optimizations + */ +#include <asm-generic/bitops.h> + +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_from(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +static inline unsigned long hweight_long(unsigned long w) +{ + return sizeof(w) == 4 ? hweight32(w) : hweight64(w); +} + +static inline unsigned fls_long(unsigned long l) +{ + if (sizeof(l) == 4) + return fls(l); + return fls64(l); +} + +#endif diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h new file mode 100644 index 0000000..4144666 --- /dev/null +++ b/tools/include/linux/log2.h @@ -0,0 +1,185 @@ +/* Integer base 2 logarithm calculation + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _TOOLS_LINUX_LOG2_H +#define _TOOLS_LINUX_LOG2_H + +/* + * deal with unrepresentable constant logarithms + */ +extern __attribute__((const, noreturn)) +int ____ilog2_NaN(void); + +/* + * non-constant log of base 2 calculators + * - the arch may override these in asm/bitops.h if they can be implemented + * more efficiently than using fls() and fls64() + * - the arch is not required to handle n==0 if implementing the fallback + */ +static inline __attribute__((const)) +int __ilog2_u32(u32 n) +{ + return fls(n) - 1; +} + +static inline __attribute__((const)) +int __ilog2_u64(u64 n) +{ + return fls64(n) - 1; +} + +/* + * Determine whether some value is a power of two, where zero is + * *not* considered a power of two. + */ + +static inline __attribute__((const)) +bool is_power_of_2(unsigned long n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + +/* + * round up to nearest power of two + */ +static inline __attribute__((const)) +unsigned long __roundup_pow_of_two(unsigned long n) +{ + return 1UL << fls_long(n - 1); +} + +/* + * round down to nearest power of two + */ +static inline __attribute__((const)) +unsigned long __rounddown_pow_of_two(unsigned long n) +{ + return 1UL << (fls_long(n) - 1); +} + +/** + * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value + * @n - parameter + * + * constant-capable log of base 2 calculation + * - this can be used to initialise global variables from constant data, hence + * the massive ternary operator construction + * + * selects the appropriately-sized optimised version depending on sizeof(n) + */ +#define ilog2(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n) < 1 ? ____ilog2_NaN() : \ + (n) & (1ULL << 63) ? 63 : \ + (n) & (1ULL << 62) ? 62 : \ + (n) & (1ULL << 61) ? 61 : \ + (n) & (1ULL << 60) ? 60 : \ + (n) & (1ULL << 59) ? 59 : \ + (n) & (1ULL << 58) ? 58 : \ + (n) & (1ULL << 57) ? 57 : \ + (n) & (1ULL << 56) ? 56 : \ + (n) & (1ULL << 55) ? 55 : \ + (n) & (1ULL << 54) ? 54 : \ + (n) & (1ULL << 53) ? 53 : \ + (n) & (1ULL << 52) ? 52 : \ + (n) & (1ULL << 51) ? 51 : \ + (n) & (1ULL << 50) ? 50 : \ + (n) & (1ULL << 49) ? 49 : \ + (n) & (1ULL << 48) ? 48 : \ + (n) & (1ULL << 47) ? 47 : \ + (n) & (1ULL << 46) ? 46 : \ + (n) & (1ULL << 45) ? 45 : \ + (n) & (1ULL << 44) ? 44 : \ + (n) & (1ULL << 43) ? 43 : \ + (n) & (1ULL << 42) ? 42 : \ + (n) & (1ULL << 41) ? 41 : \ + (n) & (1ULL << 40) ? 40 : \ + (n) & (1ULL << 39) ? 39 : \ + (n) & (1ULL << 38) ? 38 : \ + (n) & (1ULL << 37) ? 37 : \ + (n) & (1ULL << 36) ? 36 : \ + (n) & (1ULL << 35) ? 35 : \ + (n) & (1ULL << 34) ? 34 : \ + (n) & (1ULL << 33) ? 33 : \ + (n) & (1ULL << 32) ? 32 : \ + (n) & (1ULL << 31) ? 31 : \ + (n) & (1ULL << 30) ? 30 : \ + (n) & (1ULL << 29) ? 29 : \ + (n) & (1ULL << 28) ? 28 : \ + (n) & (1ULL << 27) ? 27 : \ + (n) & (1ULL << 26) ? 26 : \ + (n) & (1ULL << 25) ? 25 : \ + (n) & (1ULL << 24) ? 24 : \ + (n) & (1ULL << 23) ? 23 : \ + (n) & (1ULL << 22) ? 22 : \ + (n) & (1ULL << 21) ? 21 : \ + (n) & (1ULL << 20) ? 20 : \ + (n) & (1ULL << 19) ? 19 : \ + (n) & (1ULL << 18) ? 18 : \ + (n) & (1ULL << 17) ? 17 : \ + (n) & (1ULL << 16) ? 16 : \ + (n) & (1ULL << 15) ? 15 : \ + (n) & (1ULL << 14) ? 14 : \ + (n) & (1ULL << 13) ? 13 : \ + (n) & (1ULL << 12) ? 12 : \ + (n) & (1ULL << 11) ? 11 : \ + (n) & (1ULL << 10) ? 10 : \ + (n) & (1ULL << 9) ? 9 : \ + (n) & (1ULL << 8) ? 8 : \ + (n) & (1ULL << 7) ? 7 : \ + (n) & (1ULL << 6) ? 6 : \ + (n) & (1ULL << 5) ? 5 : \ + (n) & (1ULL << 4) ? 4 : \ + (n) & (1ULL << 3) ? 3 : \ + (n) & (1ULL << 2) ? 2 : \ + (n) & (1ULL << 1) ? 1 : \ + (n) & (1ULL << 0) ? 0 : \ + ____ilog2_NaN() \ + ) : \ + (sizeof(n) <= 4) ? \ + __ilog2_u32(n) : \ + __ilog2_u64(n) \ + ) + +/** + * roundup_pow_of_two - round the given value up to nearest power of two + * @n - parameter + * + * round the given value up to the nearest power of two + * - the result is undefined when n == 0 + * - this can be used to initialise global variables from constant data + */ +#define roundup_pow_of_two(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n == 1) ? 1 : \ + (1UL << (ilog2((n) - 1) + 1)) \ + ) : \ + __roundup_pow_of_two(n) \ + ) + +/** + * rounddown_pow_of_two - round the given value down to nearest power of two + * @n - parameter + * + * round the given value down to the nearest power of two + * - the result is undefined when n == 0 + * - this can be used to initialise global variables from constant data + */ +#define rounddown_pow_of_two(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (1UL << ilog2(n))) : \ + __rounddown_pow_of_two(n) \ + ) + +#endif /* _TOOLS_LINUX_LOG2_H */ diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index c1b49c3..65d9be3 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -7,6 +7,10 @@ #include <stdlib.h> #include <string.h> #include <sys/vfs.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> #include "debugfs.h" #include "fs.h" @@ -163,3 +167,33 @@ const char *name##__mountpoint(void) \ FS__MOUNTPOINT(sysfs, FS__SYSFS); FS__MOUNTPOINT(procfs, FS__PROCFS); + +int filename__read_int(const char *filename, int *value) +{ + char line[64]; + int fd = open(filename, O_RDONLY), err = -1; + + if (fd < 0) + return -1; + + if (read(fd, line, sizeof(line)) > 0) { + *value = atoi(line); + err = 0; + } + + close(fd); + return err; +} + +int sysctl__read_int(const char *sysctl, int *value) +{ + char path[PATH_MAX]; + const char *procfs = procfs__mountpoint(); + + if (!procfs) + return -1; + + snprintf(path, sizeof(path), "%s/sys/%s", procfs, sysctl); + + return filename__read_int(path, value); +} diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index cb70495..6caa2bb 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -11,4 +11,7 @@ const char *sysfs__mountpoint(void); const char *procfs__mountpoint(void); + +int filename__read_int(const char *filename, int *value); +int sysctl__read_int(const char *sysctl, int *value); #endif /* __API_FS__ */ diff --git a/tools/lib/util/find_next_bit.c b/tools/lib/util/find_next_bit.c new file mode 100644 index 0000000..41b44f6 --- /dev/null +++ b/tools/lib/util/find_next_bit.c @@ -0,0 +1,89 @@ +/* find_next_bit.c: fallback find next bit implementation + * + * Copied from lib/find_next_bit.c to tools/lib/next_bit.c + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/bitops.h> +#include <asm/types.h> +#include <asm/byteorder.h> + +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +#ifndef find_next_bit +/* + * Find the next set bit in a memory region. + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} +#endif + +#ifndef find_first_bit +/* + * Find the first set bit in a memory region. + */ +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +{ + const unsigned long *p = addr; + unsigned long result = 0; + unsigned long tmp; + + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + + tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found: + return result + __ffs(tmp); +} +#endif diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index d240bb2..1e8e400 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -18,6 +18,10 @@ OPTIONS --debug verbose # sets verbose = 1 --debug verbose=2 # sets verbose = 2 +--buildid-dir:: + Setup buildid cache directory. It has higher priority than + buildid.dir config file option. + DESCRIPTION ----------- Performance counters for Linux are a new kernel-based subsystem diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 344c4d3..83e2887 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -4,17 +4,31 @@ tools/lib/traceevent tools/lib/api tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h +tools/lib/util/find_next_bit.c tools/include/asm/bug.h +tools/include/asm-generic/bitops/atomic.h +tools/include/asm-generic/bitops/__ffs.h +tools/include/asm-generic/bitops/__fls.h +tools/include/asm-generic/bitops/find.h +tools/include/asm-generic/bitops/fls64.h +tools/include/asm-generic/bitops/fls.h +tools/include/asm-generic/bitops.h +tools/include/linux/bitops.h tools/include/linux/compiler.h -tools/include/linux/hash.h tools/include/linux/export.h +tools/include/linux/hash.h +tools/include/linux/log2.h tools/include/linux/types.h +include/asm-generic/bitops/fls64.h +include/asm-generic/bitops/__fls.h +include/asm-generic/bitops/fls.h include/linux/const.h include/linux/perf_event.h include/linux/rbtree.h include/linux/list.h include/linux/hash.h include/linux/stringify.h +lib/find_next_bit.c lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 478efa9..67a03a82 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -231,8 +231,16 @@ LIB_H += ../../include/uapi/linux/const.h LIB_H += ../include/linux/hash.h LIB_H += ../../include/linux/stringify.h LIB_H += util/include/linux/bitmap.h -LIB_H += util/include/linux/bitops.h +LIB_H += ../include/linux/bitops.h +LIB_H += ../include/asm-generic/bitops/atomic.h +LIB_H += ../include/asm-generic/bitops/find.h +LIB_H += ../include/asm-generic/bitops/fls64.h +LIB_H += ../include/asm-generic/bitops/fls.h +LIB_H += ../include/asm-generic/bitops/__ffs.h +LIB_H += ../include/asm-generic/bitops/__fls.h +LIB_H += ../include/asm-generic/bitops.h LIB_H += ../include/linux/compiler.h +LIB_H += ../include/linux/log2.h LIB_H += util/include/linux/const.h LIB_H += util/include/linux/ctype.h LIB_H += util/include/linux/kernel.h @@ -335,6 +343,7 @@ LIB_OBJS += $(OUTPUT)util/event.o LIB_OBJS += $(OUTPUT)util/evlist.o LIB_OBJS += $(OUTPUT)util/evsel.o LIB_OBJS += $(OUTPUT)util/exec_cmd.o +LIB_OBJS += $(OUTPUT)util/find_next_bit.o LIB_OBJS += $(OUTPUT)util/help.o LIB_OBJS += $(OUTPUT)util/kallsyms.o LIB_OBJS += $(OUTPUT)util/levenshtein.o @@ -458,7 +467,6 @@ BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o endif BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o -BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o @@ -735,6 +743,9 @@ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + $(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $< diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 2465141..6c14afe 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -13,6 +13,7 @@ #include "../util/cloexec.h" #include "bench.h" #include "mem-memcpy-arch.h" +#include "mem-memset-arch.h" #include <stdio.h> #include <stdlib.h> @@ -48,20 +49,24 @@ static const struct option options[] = { }; typedef void *(*memcpy_t)(void *, const void *, size_t); +typedef void *(*memset_t)(void *, int, size_t); struct routine { const char *name; const char *desc; - memcpy_t fn; + union { + memcpy_t memcpy; + memset_t memset; + } fn; }; -struct routine routines[] = { - { "default", - "Default memcpy() provided by glibc", - memcpy }, +struct routine memcpy_routines[] = { + { .name = "default", + .desc = "Default memcpy() provided by glibc", + .fn.memcpy = memcpy }, #ifdef HAVE_ARCH_X86_64_SUPPORT -#define MEMCPY_FN(fn, name, desc) { name, desc, fn }, +#define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, #include "mem-memcpy-x86-64-asm-def.h" #undef MEMCPY_FN @@ -69,7 +74,7 @@ struct routine routines[] = { { NULL, NULL, - NULL } + {NULL} } }; static const char * const bench_mem_memcpy_usage[] = { @@ -110,63 +115,6 @@ static double timeval2double(struct timeval *ts) (double)ts->tv_usec / (double)1000000; } -static void alloc_mem(void **dst, void **src, size_t length) -{ - *dst = zalloc(length); - if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); - - *src = zalloc(length); - if (!*src) - die("memory allocation failed - maybe length is too large?\n"); - /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ - memset(*src, 0, length); -} - -static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - void *src = NULL, *dst = NULL; - int i; - - alloc_mem(&src, &dst, len); - - if (prefault) - fn(dst, src, len); - - cycle_start = get_cycle(); - for (i = 0; i < iterations; ++i) - fn(dst, src, len); - cycle_end = get_cycle(); - - free(src); - free(dst); - return cycle_end - cycle_start; -} - -static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) -{ - struct timeval tv_start, tv_end, tv_diff; - void *src = NULL, *dst = NULL; - int i; - - alloc_mem(&src, &dst, len); - - if (prefault) - fn(dst, src, len); - - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) - fn(dst, src, len); - BUG_ON(gettimeofday(&tv_end, NULL)); - - timersub(&tv_end, &tv_start, &tv_diff); - - free(src); - free(dst); - return (double)((double)len / timeval2double(&tv_diff)); -} - #define pf (no_prefault ? 0 : 1) #define print_bps(x) do { \ @@ -180,16 +128,25 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) printf(" %14lf GB/Sec", x / K / K / K); \ } while (0) -int bench_mem_memcpy(int argc, const char **argv, - const char *prefix __maybe_unused) +struct bench_mem_info { + const struct routine *routines; + u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); + double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); + const char *const *usage; +}; + +static int bench_mem_common(int argc, const char **argv, + const char *prefix __maybe_unused, + struct bench_mem_info *info) { int i; size_t len; + double totallen; double result_bps[2]; u64 result_cycle[2]; argc = parse_options(argc, argv, options, - bench_mem_memcpy_usage, 0); + info->usage, 0); if (no_prefault && only_prefault) { fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); @@ -200,6 +157,7 @@ int bench_mem_memcpy(int argc, const char **argv, init_cycle(); len = (size_t)perf_atoll((char *)length_str); + totallen = (double)len * iterations; result_cycle[0] = result_cycle[1] = 0ULL; result_bps[0] = result_bps[1] = 0.0; @@ -213,16 +171,16 @@ int bench_mem_memcpy(int argc, const char **argv, if (only_prefault && no_prefault) only_prefault = no_prefault = false; - for (i = 0; routines[i].name; i++) { - if (!strcmp(routines[i].name, routine)) + for (i = 0; info->routines[i].name; i++) { + if (!strcmp(info->routines[i].name, routine)) break; } - if (!routines[i].name) { + if (!info->routines[i].name) { printf("Unknown routine:%s\n", routine); printf("Available routines...\n"); - for (i = 0; routines[i].name; i++) { + for (i = 0; info->routines[i].name; i++) { printf("\t%s ... %s\n", - routines[i].name, routines[i].desc); + info->routines[i].name, info->routines[i].desc); } return 1; } @@ -234,25 +192,25 @@ int bench_mem_memcpy(int argc, const char **argv, /* show both of results */ if (use_cycle) { result_cycle[0] = - do_memcpy_cycle(routines[i].fn, len, false); + info->do_cycle(&info->routines[i], len, false); result_cycle[1] = - do_memcpy_cycle(routines[i].fn, len, true); + info->do_cycle(&info->routines[i], len, true); } else { result_bps[0] = - do_memcpy_gettimeofday(routines[i].fn, + info->do_gettimeofday(&info->routines[i], len, false); result_bps[1] = - do_memcpy_gettimeofday(routines[i].fn, + info->do_gettimeofday(&info->routines[i], len, true); } } else { if (use_cycle) { result_cycle[pf] = - do_memcpy_cycle(routines[i].fn, + info->do_cycle(&info->routines[i], len, only_prefault); } else { result_bps[pf] = - do_memcpy_gettimeofday(routines[i].fn, + info->do_gettimeofday(&info->routines[i], len, only_prefault); } } @@ -263,10 +221,10 @@ int bench_mem_memcpy(int argc, const char **argv, if (use_cycle) { printf(" %14lf Cycle/Byte\n", (double)result_cycle[0] - / (double)len); + / totallen); printf(" %14lf Cycle/Byte (with prefault)\n", (double)result_cycle[1] - / (double)len); + / totallen); } else { print_bps(result_bps[0]); printf("\n"); @@ -277,7 +235,7 @@ int bench_mem_memcpy(int argc, const char **argv, if (use_cycle) { printf(" %14lf Cycle/Byte", (double)result_cycle[pf] - / (double)len); + / totallen); } else print_bps(result_bps[pf]); @@ -288,8 +246,8 @@ int bench_mem_memcpy(int argc, const char **argv, if (!only_prefault && !no_prefault) { if (use_cycle) { printf("%lf %lf\n", - (double)result_cycle[0] / (double)len, - (double)result_cycle[1] / (double)len); + (double)result_cycle[0] / totallen, + (double)result_cycle[1] / totallen); } else { printf("%lf %lf\n", result_bps[0], result_bps[1]); @@ -297,7 +255,7 @@ int bench_mem_memcpy(int argc, const char **argv, } else { if (use_cycle) { printf("%lf\n", (double)result_cycle[pf] - / (double)len); + / totallen); } else printf("%lf\n", result_bps[pf]); } @@ -310,3 +268,163 @@ int bench_mem_memcpy(int argc, const char **argv, return 0; } + +static void memcpy_alloc_mem(void **dst, void **src, size_t length) +{ + *dst = zalloc(length); + if (!*dst) + die("memory allocation failed - maybe length is too large?\n"); + + *src = zalloc(length); + if (!*src) + die("memory allocation failed - maybe length is too large?\n"); + /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ + memset(*src, 0, length); +} + +static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + void *src = NULL, *dst = NULL; + memcpy_t fn = r->fn.memcpy; + int i; + + memcpy_alloc_mem(&src, &dst, len); + + if (prefault) + fn(dst, src, len); + + cycle_start = get_cycle(); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); + cycle_end = get_cycle(); + + free(src); + free(dst); + return cycle_end - cycle_start; +} + +static double do_memcpy_gettimeofday(const struct routine *r, size_t len, + bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + memcpy_t fn = r->fn.memcpy; + void *src = NULL, *dst = NULL; + int i; + + memcpy_alloc_mem(&src, &dst, len); + + if (prefault) + fn(dst, src, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(src); + free(dst); + return (double)(((double)len * iterations) / timeval2double(&tv_diff)); +} + +int bench_mem_memcpy(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + struct bench_mem_info info = { + .routines = memcpy_routines, + .do_cycle = do_memcpy_cycle, + .do_gettimeofday = do_memcpy_gettimeofday, + .usage = bench_mem_memcpy_usage, + }; + + return bench_mem_common(argc, argv, prefix, &info); +} + +static void memset_alloc_mem(void **dst, size_t length) +{ + *dst = zalloc(length); + if (!*dst) + die("memory allocation failed - maybe length is too large?\n"); +} + +static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + memset_t fn = r->fn.memset; + void *dst = NULL; + int i; + + memset_alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + cycle_start = get_cycle(); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + cycle_end = get_cycle(); + + free(dst); + return cycle_end - cycle_start; +} + +static double do_memset_gettimeofday(const struct routine *r, size_t len, + bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + memset_t fn = r->fn.memset; + void *dst = NULL; + int i; + + memset_alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(dst); + return (double)(((double)len * iterations) / timeval2double(&tv_diff)); +} + +static const char * const bench_mem_memset_usage[] = { + "perf bench mem memset <options>", + NULL +}; + +static const struct routine memset_routines[] = { + { .name ="default", + .desc = "Default memset() provided by glibc", + .fn.memset = memset }, +#ifdef HAVE_ARCH_X86_64_SUPPORT + +#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, +#include "mem-memset-x86-64-asm-def.h" +#undef MEMSET_FN + +#endif + + { .name = NULL, + .desc = NULL, + .fn.memset = NULL } +}; + +int bench_mem_memset(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + struct bench_mem_info info = { + .routines = memset_routines, + .do_cycle = do_memset_cycle, + .do_gettimeofday = do_memset_gettimeofday, + .usage = bench_mem_memset_usage, + }; + + return bench_mem_common(argc, argv, prefix, &info); +} diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c deleted file mode 100644 index 75fc3e6..0000000 --- a/tools/perf/bench/mem-memset.c +++ /dev/null @@ -1,304 +0,0 @@ -/* - * mem-memset.c - * - * memset: Simple memory set in various ways - * - * Trivial clone of mem-memcpy.c. - */ - -#include "../perf.h" -#include "../util/util.h" -#include "../util/parse-options.h" -#include "../util/header.h" -#include "../util/cloexec.h" -#include "bench.h" -#include "mem-memset-arch.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <errno.h> - -#define K 1024 - -static const char *length_str = "1MB"; -static const char *routine = "default"; -static int iterations = 1; -static bool use_cycle; -static int cycle_fd; -static bool only_prefault; -static bool no_prefault; - -static const struct option options[] = { - OPT_STRING('l', "length", &length_str, "1MB", - "Specify length of memory to set. " - "Available units: B, KB, MB, GB and TB (upper and lower)"), - OPT_STRING('r', "routine", &routine, "default", - "Specify routine to set"), - OPT_INTEGER('i', "iterations", &iterations, - "repeat memset() invocation this number of times"), - OPT_BOOLEAN('c', "cycle", &use_cycle, - "Use cycles event instead of gettimeofday() for measuring"), - OPT_BOOLEAN('o', "only-prefault", &only_prefault, - "Show only the result with page faults before memset()"), - OPT_BOOLEAN('n', "no-prefault", &no_prefault, - "Show only the result without page faults before memset()"), - OPT_END() -}; - -typedef void *(*memset_t)(void *, int, size_t); - -struct routine { - const char *name; - const char *desc; - memset_t fn; -}; - -static const struct routine routines[] = { - { "default", - "Default memset() provided by glibc", - memset }, -#ifdef HAVE_ARCH_X86_64_SUPPORT - -#define MEMSET_FN(fn, name, desc) { name, desc, fn }, -#include "mem-memset-x86-64-asm-def.h" -#undef MEMSET_FN - -#endif - - { NULL, - NULL, - NULL } -}; - -static const char * const bench_mem_memset_usage[] = { - "perf bench mem memset <options>", - NULL -}; - -static struct perf_event_attr cycle_attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES -}; - -static void init_cycle(void) -{ - cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, - perf_event_open_cloexec_flag()); - - if (cycle_fd < 0 && errno == ENOSYS) - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); - else - BUG_ON(cycle_fd < 0); -} - -static u64 get_cycle(void) -{ - int ret; - u64 clk; - - ret = read(cycle_fd, &clk, sizeof(u64)); - BUG_ON(ret != sizeof(u64)); - - return clk; -} - -static double timeval2double(struct timeval *ts) -{ - return (double)ts->tv_sec + - (double)ts->tv_usec / (double)1000000; -} - -static void alloc_mem(void **dst, size_t length) -{ - *dst = zalloc(length); - if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); -} - -static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - void *dst = NULL; - int i; - - alloc_mem(&dst, len); - - if (prefault) - fn(dst, -1, len); - - cycle_start = get_cycle(); - for (i = 0; i < iterations; ++i) - fn(dst, i, len); - cycle_end = get_cycle(); - - free(dst); - return cycle_end - cycle_start; -} - -static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) -{ - struct timeval tv_start, tv_end, tv_diff; - void *dst = NULL; - int i; - - alloc_mem(&dst, len); - - if (prefault) - fn(dst, -1, len); - - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) - fn(dst, i, len); - BUG_ON(gettimeofday(&tv_end, NULL)); - - timersub(&tv_end, &tv_start, &tv_diff); - - free(dst); - return (double)((double)len / timeval2double(&tv_diff)); -} - -#define pf (no_prefault ? 0 : 1) - -#define print_bps(x) do { \ - if (x < K) \ - printf(" %14lf B/Sec", x); \ - else if (x < K * K) \ - printf(" %14lfd KB/Sec", x / K); \ - else if (x < K * K * K) \ - printf(" %14lf MB/Sec", x / K / K); \ - else \ - printf(" %14lf GB/Sec", x / K / K / K); \ - } while (0) - -int bench_mem_memset(int argc, const char **argv, - const char *prefix __maybe_unused) -{ - int i; - size_t len; - double result_bps[2]; - u64 result_cycle[2]; - - argc = parse_options(argc, argv, options, - bench_mem_memset_usage, 0); - - if (no_prefault && only_prefault) { - fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); - return 1; - } - - if (use_cycle) - init_cycle(); - - len = (size_t)perf_atoll((char *)length_str); - - result_cycle[0] = result_cycle[1] = 0ULL; - result_bps[0] = result_bps[1] = 0.0; - - if ((s64)len <= 0) { - fprintf(stderr, "Invalid length:%s\n", length_str); - return 1; - } - - /* same to without specifying either of prefault and no-prefault */ - if (only_prefault && no_prefault) - only_prefault = no_prefault = false; - - for (i = 0; routines[i].name; i++) { - if (!strcmp(routines[i].name, routine)) - break; - } - if (!routines[i].name) { - printf("Unknown routine:%s\n", routine); - printf("Available routines...\n"); - for (i = 0; routines[i].name; i++) { - printf("\t%s ... %s\n", - routines[i].name, routines[i].desc); - } - return 1; - } - - if (bench_format == BENCH_FORMAT_DEFAULT) - printf("# Copying %s Bytes ...\n\n", length_str); - - if (!only_prefault && !no_prefault) { - /* show both of results */ - if (use_cycle) { - result_cycle[0] = - do_memset_cycle(routines[i].fn, len, false); - result_cycle[1] = - do_memset_cycle(routines[i].fn, len, true); - } else { - result_bps[0] = - do_memset_gettimeofday(routines[i].fn, - len, false); - result_bps[1] = - do_memset_gettimeofday(routines[i].fn, - len, true); - } - } else { - if (use_cycle) { - result_cycle[pf] = - do_memset_cycle(routines[i].fn, - len, only_prefault); - } else { - result_bps[pf] = - do_memset_gettimeofday(routines[i].fn, - len, only_prefault); - } - } - - switch (bench_format) { - case BENCH_FORMAT_DEFAULT: - if (!only_prefault && !no_prefault) { - if (use_cycle) { - printf(" %14lf Cycle/Byte\n", - (double)result_cycle[0] - / (double)len); - printf(" %14lf Cycle/Byte (with prefault)\n ", - (double)result_cycle[1] - / (double)len); - } else { - print_bps(result_bps[0]); - printf("\n"); - print_bps(result_bps[1]); - printf(" (with prefault)\n"); - } - } else { - if (use_cycle) { - printf(" %14lf Cycle/Byte", - (double)result_cycle[pf] - / (double)len); - } else - print_bps(result_bps[pf]); - - printf("%s\n", only_prefault ? " (with prefault)" : ""); - } - break; - case BENCH_FORMAT_SIMPLE: - if (!only_prefault && !no_prefault) { - if (use_cycle) { - printf("%lf %lf\n", - (double)result_cycle[0] / (double)len, - (double)result_cycle[1] / (double)len); - } else { - printf("%lf %lf\n", - result_bps[0], result_bps[1]); - } - } else { - if (use_cycle) { - printf("%lf\n", (double)result_cycle[pf] - / (double)len); - } else - printf("%lf\n", result_bps[pf]); - } - break; - default: - /* reaching this means there's some disaster: */ - die("unknown format: %d\n", bench_format); - break; - } - - return 0; -} diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 7038575..77d5cae 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -285,12 +285,11 @@ int cmd_buildid_cache(int argc, const char **argv, struct str_node *pos; int ret = 0; bool force = false; - char debugdir[PATH_MAX]; char const *add_name_list_str = NULL, *remove_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, - *kcore_filename; + *kcore_filename = NULL; char sbuf[STRERR_BUFSIZE]; struct perf_data_file file = { @@ -335,13 +334,11 @@ int cmd_buildid_cache(int argc, const char **argv, setup_pager(); - snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir); - if (add_name_list_str) { list = strlist__new(true, add_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__add_file(pos->s, debugdir)) { + if (build_id_cache__add_file(pos->s, buildid_dir)) { if (errno == EEXIST) { pr_debug("%s already in the cache\n", pos->s); @@ -359,7 +356,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, remove_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__remove_file(pos->s, debugdir)) { + if (build_id_cache__remove_file(pos->s, buildid_dir)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -380,7 +377,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, update_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__update_file(pos->s, debugdir)) { + if (build_id_cache__update_file(pos->s, buildid_dir)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -395,7 +392,7 @@ int cmd_buildid_cache(int argc, const char **argv, } if (kcore_filename && - build_id_cache__add_kcore(kcore_filename, debugdir, force)) + build_id_cache__add_kcore(kcore_filename, buildid_dir, force)) pr_warning("Couldn't add %s\n", kcore_filename); out: diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 3c0f3d4..0894a81 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1293,7 +1293,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, OPT_UINTEGER('d', "display", &kvm->display_time, "time in seconds between display updates"), OPT_STRING(0, "event", &kvm->report_event, "report event", - "event for reporting: vmexit, mmio, ioport"), + "event for reporting: " + "vmexit, mmio (x86 only), ioport (x86 only)"), OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu, "vcpu id to report"), OPT_STRING('k', "key", &kvm->sort_key, "sort-key", diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 83a4835..badfabc 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2045,7 +2045,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) unsigned long before; const bool forks = argc > 0; bool draining = false; - char sbuf[STRERR_BUFSIZE]; trace->live = true; @@ -2106,11 +2105,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_error_open; err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); - if (err < 0) { - fprintf(trace->output, "Couldn't mmap the events: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); - goto out_delete_evlist; - } + if (err < 0) + goto out_error_mmap; perf_evlist__enable(evlist); @@ -2210,6 +2206,10 @@ out_error_tp: perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); goto out_error; +out_error_mmap: + perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); + goto out_error; + out_error_open: perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); @@ -2485,7 +2485,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, .no_buffering = true, - .mmap_pages = 1024, + .mmap_pages = UINT_MAX, }, .output = stdout, .show_comm = true, diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 452a847..3700a7f 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -200,6 +200,16 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) *envchanged = 1; (*argv)++; (*argc)--; + } else if (!strcmp(cmd, "--buildid-dir")) { + if (*argc < 2) { + fprintf(stderr, "No directory given for --buildid-dir.\n"); + usage(perf_usage_string); + } + set_buildid_dir((*argv)[1]); + if (envchanged) + *envchanged = 1; + (*argv)++; + (*argc)--; } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR)); fprintf(stderr, "dir: %s\n", debugfs_mountpoint); @@ -499,7 +509,7 @@ int main(int argc, const char **argv) } if (!prefixcmp(cmd, "trace")) { #ifdef HAVE_LIBAUDIT_SUPPORT - set_buildid_dir(); + set_buildid_dir(NULL); setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv, NULL); @@ -514,7 +524,7 @@ int main(int argc, const char **argv) argc--; handle_options(&argv, &argc, NULL); commit_pager_choice(); - set_buildid_dir(); + set_buildid_dir(NULL); if (argc > 0) { if (!prefixcmp(argv[0], "--")) diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index f710b92..d3095da 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=96 +size=104 config=0 sample_period=4000 sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index dc3ada2..872ed7e 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=96 +size=104 config=0 sample_period=0 sample_type=0 diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 502daff..e6bb04b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1252,7 +1252,7 @@ static int hists__browser_title(struct hists *hists, nr_samples = convert_unit(nr_samples, &unit); printed = scnprintf(bf, size, - "Samples: %lu%c of event '%s', Event count (approx.): %lu", + "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64, nr_samples, unit, ev_name, nr_events); diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 2af1837..dc0d095 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -162,8 +162,8 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, return ret; nr_members = evsel->nr_members; - fields_a = calloc(sizeof(*fields_a), nr_members); - fields_b = calloc(sizeof(*fields_b), nr_members); + fields_a = calloc(nr_members, sizeof(*fields_a)); + fields_b = calloc(nr_members, sizeof(*fields_b)); if (!fields_a || !fields_b) goto out; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e8d79e5..0c72680 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -410,21 +410,18 @@ int perf_session__cache_build_ids(struct perf_session *session) { struct rb_node *nd; int ret; - char debugdir[PATH_MAX]; if (no_buildid_cache) return 0; - snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir); - - if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) + if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST) return -1; - ret = machine__cache_build_ids(&session->machines.host, debugdir); + ret = machine__cache_build_ids(&session->machines.host, buildid_dir); for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret |= machine__cache_build_ids(pos, debugdir); + ret |= machine__cache_build_ids(pos, buildid_dir); } return ret ? -1 : 0; } diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index cf524a3..64b377e 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -77,7 +77,7 @@ int parse_callchain_record_opt(const char *arg) ret = 0; } else pr_err("callchain: No more arguments " - "needed for -g fp\n"); + "needed for --call-graph fp\n"); break; #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 57ff826..e18f653 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -522,7 +522,7 @@ static int buildid_dir_command_config(const char *var, const char *value, const char *v; /* same dir for all commands */ - if (!prefixcmp(var, "buildid.") && !strcmp(var + 8, "dir")) { + if (!strcmp(var, "buildid.dir")) { v = perf_config_dirname(var, value); if (!v) return -1; @@ -539,12 +539,14 @@ static void check_buildid_dir_config(void) perf_config(buildid_dir_command_config, &c); } -void set_buildid_dir(void) +void set_buildid_dir(const char *dir) { - buildid_dir[0] = '\0'; + if (dir) + scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir); /* try config file */ - check_buildid_dir_config(); + if (buildid_dir[0] == '\0') + check_buildid_dir_config(); /* default to $HOME/.debug */ if (buildid_dir[0] == '\0') { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index cfbe2b9..cbab1fb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -8,6 +8,7 @@ */ #include "util.h" #include <api/fs/debugfs.h> +#include <api/fs/fs.h> #include <poll.h> #include "cpumap.h" #include "thread_map.h" @@ -24,6 +25,7 @@ #include <linux/bitops.h> #include <linux/hash.h> +#include <linux/log2.h> static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); @@ -892,10 +894,24 @@ out_unmap: static size_t perf_evlist__mmap_size(unsigned long pages) { - /* 512 kiB: default amount of unprivileged mlocked memory */ - if (pages == UINT_MAX) - pages = (512 * 1024) / page_size; - else if (!is_power_of_2(pages)) + if (pages == UINT_MAX) { + int max; + + if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { + /* + * Pick a once upon a time good value, i.e. things look + * strange since we can't read a sysctl value, but lets not + * die yet... + */ + max = 512; + } else { + max -= (page_size / 1024); + } + + pages = (max * 1024) / page_size; + if (!is_power_of_2(pages)) + pages = rounddown_pow_of_two(pages); + } else if (!is_power_of_2(pages)) return 0; return (pages + 1) * page_size; @@ -932,7 +948,7 @@ static long parse_pages_arg(const char *str, unsigned long min, /* leave number of pages at 0 */ } else if (!is_power_of_2(pages)) { /* round pages up to next power of 2 */ - pages = next_pow2_l(pages); + pages = roundup_pow_of_two(pages); if (!pages) return -EINVAL; pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", @@ -1483,6 +1499,37 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, return 0; } +int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) +{ + char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); + int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; + + switch (err) { + case EPERM: + sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); + printed += scnprintf(buf + printed, size - printed, + "Error:\t%s.\n" + "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" + "Hint:\tTried using %zd kB.\n", + emsg, pages_max_per_user, pages_attempted); + + if (pages_attempted >= pages_max_per_user) { + printed += scnprintf(buf + printed, size - printed, + "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", + pages_max_per_user + pages_attempted); + } + + printed += scnprintf(buf + printed, size - printed, + "Hint:\tTry using a smaller -m/--mmap-pages value."); + break; + default: + scnprintf(buf, size, "%s", emsg); + break; + } + + return 0; +} + void perf_evlist__to_front(struct perf_evlist *evlist, struct perf_evsel *move_evsel) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 649b0c5..0ba93f6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -185,6 +185,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); +int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) { diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h deleted file mode 100644 index c329416..0000000 --- a/tools/perf/util/include/linux/bitops.h +++ /dev/null @@ -1,162 +0,0 @@ -#ifndef _PERF_LINUX_BITOPS_H_ -#define _PERF_LINUX_BITOPS_H_ - -#include <linux/kernel.h> -#include <linux/compiler.h> -#include <asm/hweight.h> - -#ifndef __WORDSIZE -#define __WORDSIZE (__SIZEOF_LONG__ * 8) -#endif - -#define BITS_PER_LONG __WORDSIZE -#define BITS_PER_BYTE 8 -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) -#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) -#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) -#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE) -#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) -#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) - -#define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_first_bit((addr), (size)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) - -/* same as for_each_set_bit() but use bit as value to start with */ -#define for_each_set_bit_from(bit, addr, size) \ - for ((bit) = find_next_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) - -static inline void set_bit(int nr, unsigned long *addr) -{ - addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); -} - -static inline void clear_bit(int nr, unsigned long *addr) -{ - addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG)); -} - -static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) -{ - return ((1UL << (nr % BITS_PER_LONG)) & - (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; -} - -static inline unsigned long hweight_long(unsigned long w) -{ - return sizeof(w) == 4 ? hweight32(w) : hweight64(w); -} - -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) - -/** - * __ffs - find first bit in word. - * @word: The word to search - * - * Undefined if no bit exists, so code should check against 0 first. - */ -static __always_inline unsigned long __ffs(unsigned long word) -{ - int num = 0; - -#if BITS_PER_LONG == 64 - if ((word & 0xffffffff) == 0) { - num += 32; - word >>= 32; - } -#endif - if ((word & 0xffff) == 0) { - num += 16; - word >>= 16; - } - if ((word & 0xff) == 0) { - num += 8; - word >>= 8; - } - if ((word & 0xf) == 0) { - num += 4; - word >>= 4; - } - if ((word & 0x3) == 0) { - num += 2; - word >>= 2; - } - if ((word & 0x1) == 0) - num += 1; - return num; -} - -typedef const unsigned long __attribute__((__may_alias__)) long_alias_t; - -/* - * Find the first set bit in a memory region. - */ -static inline unsigned long -find_first_bit(const unsigned long *addr, unsigned long size) -{ - long_alias_t *p = (long_alias_t *) addr; - unsigned long result = 0; - unsigned long tmp; - - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - - tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found: - return result + __ffs(tmp); -} - -/* - * Find the next set bit in a memory region. - */ -static inline unsigned long -find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); -} - -#endif diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 15dd0a9..94de3e4 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1385,19 +1385,46 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, static int add_callchain_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, - int cpumode, + bool branch_history, u64 ip) { struct addr_location al; al.filtered = 0; al.sym = NULL; - if (cpumode == -1) + if (branch_history) thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al); - else + else { + u8 cpumode = PERF_RECORD_MISC_USER; + + if (ip >= PERF_CONTEXT_MAX) { + switch (ip) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: " + "%"PRId64"\n", (s64) ip); + /* + * It seems the callchain is corrupted. + * Discard all. + */ + callchain_cursor_reset(&callchain_cursor); + return 1; + } + return 0; + } thread__find_addr_location(thread, cpumode, MAP__FUNCTION, ip, &al); + } + if (al.sym != NULL) { if (sort__has_parent && !*parent && symbol__match_regex(al.sym, &parent_regex)) @@ -1480,11 +1507,8 @@ static int thread__resolve_callchain_sample(struct thread *thread, struct addr_location *root_al, int max_stack) { - u8 cpumode = PERF_RECORD_MISC_USER; int chain_nr = min(max_stack, (int)chain->nr); - int i; - int j; - int err; + int i, j, err; int skip_idx = -1; int first_call = 0; @@ -1542,10 +1566,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, for (i = 0; i < nr; i++) { err = add_callchain_ip(thread, parent, root_al, - -1, be[i].to); + true, be[i].to); if (!err) err = add_callchain_ip(thread, parent, root_al, - -1, be[i].from); + true, be[i].from); if (err == -EINVAL) break; if (err) @@ -1574,36 +1598,10 @@ check_calls: #endif ip = chain->ips[j]; - if (ip >= PERF_CONTEXT_MAX) { - switch (ip) { - case PERF_CONTEXT_HV: - cpumode = PERF_RECORD_MISC_HYPERVISOR; - break; - case PERF_CONTEXT_KERNEL: - cpumode = PERF_RECORD_MISC_KERNEL; - break; - case PERF_CONTEXT_USER: - cpumode = PERF_RECORD_MISC_USER; - break; - default: - pr_debug("invalid callchain context: " - "%"PRId64"\n", (s64) ip); - /* - * It seems the callchain is corrupted. - * Discard all. - */ - callchain_cursor_reset(&callchain_cursor); - return 0; - } - continue; - } + err = add_callchain_ip(thread, parent, root_al, false, ip); - err = add_callchain_ip(thread, parent, root_al, - cpumode, ip); - if (err == -EINVAL) - break; if (err) - return err; + return (err < 0) ? err : 0; } return 0; diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index cf69325..8acd0df 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -137,16 +137,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) static int get_max_rate(unsigned int *rate) { - char path[PATH_MAX]; - const char *procfs = procfs__mountpoint(); - - if (!procfs) - return -1; - - snprintf(path, PATH_MAX, - "%s/sys/kernel/perf_event_max_sample_rate", procfs); - - return filename__read_int(path, (int *) rate); + return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate); } static int record_opts__config_freq(struct record_opts *opts) diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index e73b6a5..c93fb0c 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -20,7 +20,7 @@ struct a2l_data { const char *input; - unsigned long addr; + u64 addr; bool found; const char *filename; @@ -147,7 +147,7 @@ static void addr2line_cleanup(struct a2l_data *a2l) free(a2l); } -static int addr2line(const char *dso_name, unsigned long addr, +static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line, struct dso *dso) { int ret = 0; @@ -193,7 +193,7 @@ void dso__free_a2l(struct dso *dso) #else /* HAVE_LIBBFD_SUPPORT */ -static int addr2line(const char *dso_name, unsigned long addr, +static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, struct dso *dso __maybe_unused) { @@ -252,7 +252,7 @@ void dso__free_a2l(struct dso *dso __maybe_unused) */ #define A2L_FAIL_LIMIT 123 -char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym, +char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym) { char *file = NULL; @@ -293,10 +293,10 @@ out: dso__free_a2l(dso); } if (sym) { - if (asprintf(&srcline, "%s+%ld", show_sym ? sym->name : "", + if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", addr - sym->start) < 0) return SRCLINE_UNKNOWN; - } else if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0) + } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0) return SRCLINE_UNKNOWN; return srcline; } diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index fa585c6..d7efb03 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -129,6 +129,7 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { void *tmp; + long offset; if (need_swap) { phdr->p_type = bswap_32(phdr->p_type); @@ -140,12 +141,13 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) continue; buf_size = phdr->p_filesz; + offset = phdr->p_offset; tmp = realloc(buf, buf_size); if (tmp == NULL) goto out_free; buf = tmp; - fseek(fp, phdr->p_offset, SEEK_SET); + fseek(fp, offset, SEEK_SET); if (fread(buf, buf_size, 1, fp) != 1) goto out_free; @@ -178,6 +180,7 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { void *tmp; + long offset; if (need_swap) { phdr->p_type = bswap_32(phdr->p_type); @@ -189,12 +192,13 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) continue; buf_size = phdr->p_filesz; + offset = phdr->p_offset; tmp = realloc(buf, buf_size); if (tmp == NULL) goto out_free; buf = tmp; - fseek(fp, phdr->p_offset, SEEK_SET); + fseek(fp, offset, SEEK_SET); if (fread(buf, buf_size, 1, fp) != 1) goto out_free; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index d5eab3f3..b86744f 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -442,23 +442,6 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) return (unsigned long) -1; } -int filename__read_int(const char *filename, int *value) -{ - char line[64]; - int fd = open(filename, O_RDONLY), err = -1; - - if (fd < 0) - return -1; - - if (read(fd, line, sizeof(line)) > 0) { - *value = atoi(line); - err = 0; - } - - close(fd); - return err; -} - int filename__read_str(const char *filename, char **buf, size_t *sizep) { size_t size = 0, alloc_size = 0; @@ -523,16 +506,9 @@ const char *get_filename_for_perf_kvm(void) int perf_event_paranoid(void) { - char path[PATH_MAX]; - const char *procfs = procfs__mountpoint(); int value; - if (!procfs) - return INT_MAX; - - scnprintf(path, PATH_MAX, "%s/sys/kernel/perf_event_paranoid", procfs); - - if (filename__read_int(path, &value)) + if (sysctl__read_int("kernel/perf_event_paranoid", &value)) return INT_MAX; return value; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 419bee0..027a515 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -153,7 +153,7 @@ extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))) extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); extern int prefixcmp(const char *str, const char *prefix); -extern void set_buildid_dir(void); +extern void set_buildid_dir(const char *dir); static inline const char *skip_prefix(const char *str, const char *prefix) { @@ -269,35 +269,6 @@ void event_attr_init(struct perf_event_attr *attr); #define _STR(x) #x #define STR(x) _STR(x) -/* - * Determine whether some value is a power of two, where zero is - * *not* considered a power of two. - */ - -static inline __attribute__((const)) -bool is_power_of_2(unsigned long n) -{ - return (n != 0 && ((n & (n - 1)) == 0)); -} - -static inline unsigned next_pow2(unsigned x) -{ - if (!x) - return 1; - return 1ULL << (32 - __builtin_clz(x - 1)); -} - -static inline unsigned long next_pow2_l(unsigned long x) -{ -#if BITS_PER_LONG == 64 - if (x <= (1UL << 31)) - return next_pow2(x); - return (unsigned long)next_pow2(x >> 32) << 32; -#else - return next_pow2(x); -#endif -} - size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); @@ -339,11 +310,10 @@ static inline int path__join3(char *bf, size_t size, struct dso; struct symbol; -char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym, +char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym); void free_srcline(char *srcline); -int filename__read_int(const char *filename, int *value); int filename__read_str(const char *filename, char **buf, size_t *sizep); int perf_event_paranoid(void); diff --git a/tools/thermal/tmon/sysfs.c b/tools/thermal/tmon/sysfs.c index dfe4548..1c12536 100644 --- a/tools/thermal/tmon/sysfs.c +++ b/tools/thermal/tmon/sysfs.c @@ -446,7 +446,7 @@ int probe_thermal_sysfs(void) return -1; } - ptdata.tzi = calloc(sizeof(struct tz_info), ptdata.max_tz_instance+1); + ptdata.tzi = calloc(ptdata.max_tz_instance+1, sizeof(struct tz_info)); if (!ptdata.tzi) { fprintf(stderr, "Err: allocate tz_info\n"); return -1; @@ -454,8 +454,8 @@ int probe_thermal_sysfs(void) /* we still show thermal zone information if there is no cdev */ if (ptdata.nr_cooling_dev) { - ptdata.cdi = calloc(sizeof(struct cdev_info), - ptdata.max_cdev_instance + 1); + ptdata.cdi = calloc(ptdata.max_cdev_instance + 1, + sizeof(struct cdev_info)); if (!ptdata.cdi) { free(ptdata.tzi); fprintf(stderr, "Err: allocate cdev_info\n"); diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index 9325f46..505ad51 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -3,7 +3,7 @@ test: virtio_test vringh_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o -CFLAGS += -g -O2 -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE +CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE vpath %.c ../../drivers/virtio ../../drivers/vhost mod: ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 8eb6421..a3e0701 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -6,6 +6,7 @@ /* TODO: empty stubs for now. Broken but enough for virtio_ring.c */ #define list_add_tail(a, b) do {} while (0) #define list_del(a) do {} while (0) +#define list_for_each_entry(a, b, c) while (0) /* end of stubs */ struct virtio_device { diff --git a/tools/virtio/linux/virtio_byteorder.h b/tools/virtio/linux/virtio_byteorder.h new file mode 100644 index 0000000..9de9e6a --- /dev/null +++ b/tools/virtio/linux/virtio_byteorder.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_VIRTIO_BYTEORDER_STUB_H +#define _LINUX_VIRTIO_BYTEORDER_STUB_H + +#include <asm/byteorder.h> +#include "../../include/linux/byteorder/generic.h" +#include "../../include/linux/virtio_byteorder.h" + +#endif diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index 83b27e8..806d683 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -1,6 +1,72 @@ -#define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 32 +#include <linux/virtio_byteorder.h> +#include <linux/virtio.h> +#include <uapi/linux/virtio_config.h> + +/* + * __virtio_test_bit - helper to test feature bits. For use by transports. + * Devices should normally use virtio_has_feature, + * which includes more checks. + * @vdev: the device + * @fbit: the feature bit + */ +static inline bool __virtio_test_bit(const struct virtio_device *vdev, + unsigned int fbit) +{ + return vdev->features & (1ULL << fbit); +} + +/** + * __virtio_set_bit - helper to set feature bits. For use by transports. + * @vdev: the device + * @fbit: the feature bit + */ +static inline void __virtio_set_bit(struct virtio_device *vdev, + unsigned int fbit) +{ + vdev->features |= (1ULL << fbit); +} + +/** + * __virtio_clear_bit - helper to clear feature bits. For use by transports. + * @vdev: the device + * @fbit: the feature bit + */ +static inline void __virtio_clear_bit(struct virtio_device *vdev, + unsigned int fbit) +{ + vdev->features &= ~(1ULL << fbit); +} #define virtio_has_feature(dev, feature) \ (__virtio_test_bit((dev), feature)) +static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val) +{ + return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); +} + +static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val) +{ + return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); +} + +static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val) +{ + return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); +} + +static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val) +{ + return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); +} + +static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val) +{ + return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); +} + +static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) +{ + return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); +} + diff --git a/tools/virtio/uapi/linux/virtio_types.h b/tools/virtio/uapi/linux/virtio_types.h new file mode 100644 index 0000000..e7a1096 --- /dev/null +++ b/tools/virtio/uapi/linux/virtio_types.h @@ -0,0 +1 @@ +#include "../../include/uapi/linux/virtio_types.h" diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index db3437c..e044589 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -11,6 +11,7 @@ #include <sys/types.h> #include <fcntl.h> #include <stdbool.h> +#include <linux/virtio_types.h> #include <linux/vhost.h> #include <linux/virtio.h> #include <linux/virtio_ring.h> @@ -227,6 +228,14 @@ const struct option longopts[] = { .val = 'i', }, { + .name = "virtio-1", + .val = '1', + }, + { + .name = "no-virtio-1", + .val = '0', + }, + { .name = "delayed-interrupt", .val = 'D', }, @@ -243,6 +252,7 @@ static void help(void) fprintf(stderr, "Usage: virtio_test [--help]" " [--no-indirect]" " [--no-event-idx]" + " [--no-virtio-1]" " [--delayed-interrupt]" "\n"); } @@ -251,7 +261,7 @@ int main(int argc, char **argv) { struct vdev_info dev; unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | - (1ULL << VIRTIO_RING_F_EVENT_IDX); + (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1); int o; bool delayed = false; @@ -272,6 +282,9 @@ int main(int argc, char **argv) case 'i': features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); break; + case '0': + features &= ~(1ULL << VIRTIO_F_VERSION_1); + break; case 'D': delayed = true; break; diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index 9d4b1bc..5f94f51 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -7,6 +7,7 @@ #include <linux/virtio.h> #include <linux/vringh.h> #include <linux/virtio_ring.h> +#include <linux/virtio_config.h> #include <linux/uaccess.h> #include <sys/types.h> #include <sys/stat.h> @@ -131,7 +132,7 @@ static inline int vringh_get_head(struct vringh *vrh, u16 *head) return 1; } -static int parallel_test(unsigned long features, +static int parallel_test(u64 features, bool (*getrange)(struct vringh *vrh, u64 addr, struct vringh_range *r), bool fast_vringh) @@ -456,6 +457,8 @@ int main(int argc, char *argv[]) __virtio_set_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC); else if (strcmp(argv[1], "--eventidx") == 0) __virtio_set_bit(&vdev, VIRTIO_RING_F_EVENT_IDX); + else if (strcmp(argv[1], "--virtio-1") == 0) + __virtio_set_bit(&vdev, VIRTIO_F_VERSION_1); else if (strcmp(argv[1], "--slow-range") == 0) getrange = getrange_slow; else if (strcmp(argv[1], "--fast-vringh") == 0) |