From cf3a13d790f9760c4bfcae802aaba80efb3519af Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 1 Jul 2013 10:21:51 +0800 Subject: arch: tile: include: asm: add cmpxchg64() definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need add cmpxchg64(), or will cause compiling issue. Need define it as cmpxchg() only for 64-bit operation, since cmpxchg() can support 8 bytes. The related error (with allmodconfig): drivers/block/blockconsole.c: In function ‘bcon_advance_console_bytes’: drivers/block/blockconsole.c:164:2: error: implicit declaration of function ‘cmpxchg64’ [-Werror=implicit-function-declaration] Signed-off-by: Chen Gang Signed-off-by: Chris Metcalf --- arch/tile/include/asm/cmpxchg.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h index 276f067..1da5bfb 100644 --- a/arch/tile/include/asm/cmpxchg.h +++ b/arch/tile/include/asm/cmpxchg.h @@ -68,6 +68,12 @@ extern unsigned long __cmpxchg_called_with_bad_pointer(void); #define tas(ptr) (xchg((ptr), 1)) +#define cmpxchg64(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg((ptr), (o), (n)); \ +}) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_TILE_CMPXCHG_H */ -- cgit v1.1 From dd78bc11fb2050b6a3990d0421feca4c68ca4335 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 23 Jul 2013 17:32:04 -0400 Subject: tile: convert uses of "inv" to "finv" The "inv" (invalidate) instruction is generally less safe than "finv" (flush and invalidate), as it will drop dirty data from the cache. It turns out we have almost no need for "inv" (other than for the older 32-bit architecture in some limited cases), so convert to "finv" where possible and delete the extra "inv" infrastructure. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/cacheflush.h | 44 +++++++++++++++++--------------------- arch/tile/include/asm/uaccess.h | 31 --------------------------- 2 files changed, 20 insertions(+), 55 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h index 0fc63c4..92ee4c8 100644 --- a/arch/tile/include/asm/cacheflush.h +++ b/arch/tile/include/asm/cacheflush.h @@ -75,23 +75,6 @@ static inline void copy_to_user_page(struct vm_area_struct *vma, #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ memcpy((dst), (src), (len)) -/* - * Invalidate a VA range; pads to L2 cacheline boundaries. - * - * Note that on TILE64, __inv_buffer() actually flushes modified - * cache lines in addition to invalidating them, i.e., it's the - * same as __finv_buffer(). - */ -static inline void __inv_buffer(void *buffer, size_t size) -{ - char *next = (char *)((long)buffer & -L2_CACHE_BYTES); - char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size); - while (next < finish) { - __insn_inv(next); - next += CHIP_INV_STRIDE(); - } -} - /* Flush a VA range; pads to L2 cacheline boundaries. */ static inline void __flush_buffer(void *buffer, size_t size) { @@ -115,13 +98,6 @@ static inline void __finv_buffer(void *buffer, size_t size) } -/* Invalidate a VA range and wait for it to be complete. */ -static inline void inv_buffer(void *buffer, size_t size) -{ - __inv_buffer(buffer, size); - mb(); -} - /* * Flush a locally-homecached VA range and wait for the evicted * cachelines to hit memory. @@ -142,6 +118,26 @@ static inline void finv_buffer_local(void *buffer, size_t size) mb_incoherent(); } +#ifdef __tilepro__ +/* Invalidate a VA range; pads to L2 cacheline boundaries. */ +static inline void __inv_buffer(void *buffer, size_t size) +{ + char *next = (char *)((long)buffer & -L2_CACHE_BYTES); + char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size); + while (next < finish) { + __insn_inv(next); + next += CHIP_INV_STRIDE(); + } +} + +/* Invalidate a VA range and wait for it to be complete. */ +static inline void inv_buffer(void *buffer, size_t size) +{ + __inv_buffer(buffer, size); + mb(); +} +#endif + /* * Flush and invalidate a VA range that is homed remotely, waiting * until the memory controller holds the flushed values. If "hfh" is diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index e4d44bd..f68503f 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -567,37 +567,6 @@ static inline unsigned long __must_check flush_user( } /** - * inv_user: - Invalidate a block of memory in user space from cache. - * @mem: Destination address, in user space. - * @len: Number of bytes to invalidate. - * - * Returns number of bytes that could not be invalidated. - * On success, this will be zero. - * - * Note that on Tile64, the "inv" operation is in fact a - * "flush and invalidate", so cache write-backs will occur prior - * to the cache being marked invalid. - */ -extern unsigned long inv_user_asm(void __user *mem, unsigned long len); -static inline unsigned long __must_check __inv_user( - void __user *mem, unsigned long len) -{ - int retval; - - might_fault(); - retval = inv_user_asm(mem, len); - mb_incoherent(); - return retval; -} -static inline unsigned long __must_check inv_user( - void __user *mem, unsigned long len) -{ - if (access_ok(VERIFY_WRITE, mem, len)) - return __inv_user(mem, len); - return len; -} - -/** * finv_user: - Flush-inval a block of memory in user space from cache. * @mem: Destination address, in user space. * @len: Number of bytes to invalidate. -- cgit v1.1 From 5916700c768803546b6fe7d093dcba40d22fcf57 Mon Sep 17 00:00:00 2001 From: Ken Steele Date: Thu, 1 Aug 2013 15:55:07 -0400 Subject: tile: optimize strnlen using SIMD instructions Using strlen as a model, add length checking to create strnlen. Signed-off-by: Ken Steele Signed-off-by: Chris Metcalf --- arch/tile/include/asm/string.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/string.h b/arch/tile/include/asm/string.h index 7535cf1..92b271b 100644 --- a/arch/tile/include/asm/string.h +++ b/arch/tile/include/asm/string.h @@ -21,8 +21,10 @@ #define __HAVE_ARCH_MEMMOVE #define __HAVE_ARCH_STRCHR #define __HAVE_ARCH_STRLEN +#define __HAVE_ARCH_STRNLEN extern __kernel_size_t strlen(const char *); +extern __kernel_size_t strnlen(const char *, __kernel_size_t); extern char *strchr(const char *s, int c); extern void *memchr(const void *s, int c, size_t n); extern void *memset(void *, int, __kernel_size_t); -- cgit v1.1 From 9bbb08faa91db7711614c9cb0983644086b97aca Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 2 Aug 2013 11:59:06 -0400 Subject: tile PCI RC: cleanups for tilepro PCI RC - remove unneeded include in pci.c - eliminate unused pci_controller.first_busno field - prefer msleep to mdelay - remove stale comment about pci_scan_bus_parented() Signed-off-by: Chris Metcalf --- arch/tile/include/asm/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index 54a9242..cd10e65 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -29,7 +29,6 @@ struct pci_controller { int index; /* PCI domain number */ struct pci_bus *root_bus; - int first_busno; int last_busno; int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */ -- cgit v1.1 From 803c874abe1358998ab65a8cca728684ebb50a13 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 2 Aug 2013 12:24:42 -0400 Subject: tile: support LSI MEGARAID SAS HBA hybrid dma_ops The LSI MEGARAID SAS HBA suffers from the problem where it can do 64-bit DMA to streaming buffers but not to consistent buffers. In other words, 64-bit DMA is used for disk data transfers and 32-bit DMA must be used for control message transfers. According to LSI, the firmware is not fully functional yet. This change implements a kind of hybrid dma_ops to support this. Note that on most other platforms, the 64-bit DMA addressing space is the same as the 32-bit DMA space and they overlap the physical memory space. No special arrangement is needed to support this kind of mixed DMA capability. On TILE-Gx, the 64-bit DMA space is completely separate from the 32-bit DMA space. Due to the use of the IOMMU, the 64-bit DMA space doesn't overlap the physical memory space. On the other hand, the 32-bit DMA space overlaps the physical memory space under 4GB. The separate address spaces make it necessary to have separate dma_ops. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/dma-mapping.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index f2ff191..4a60059 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -23,6 +23,7 @@ extern struct dma_map_ops *tile_dma_map_ops; extern struct dma_map_ops *gx_pci_dma_map_ops; extern struct dma_map_ops *gx_legacy_pci_dma_map_ops; +extern struct dma_map_ops *gx_hybrid_pci_dma_map_ops; static inline struct dma_map_ops *get_dma_ops(struct device *dev) { @@ -44,12 +45,12 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off) static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { - return paddr + get_dma_offset(dev); + return paddr; } static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { - return daddr - get_dma_offset(dev); + return daddr; } static inline void dma_mark_clean(void *addr, size_t size) {} @@ -88,7 +89,10 @@ dma_set_mask(struct device *dev, u64 mask) struct dma_map_ops *dma_ops = get_dma_ops(dev); /* Handle legacy PCI devices with limited memory addressability. */ - if ((dma_ops == gx_pci_dma_map_ops) && (mask <= DMA_BIT_MASK(32))) { + if ((dma_ops == gx_pci_dma_map_ops || + dma_ops == gx_hybrid_pci_dma_map_ops || + dma_ops == gx_legacy_pci_dma_map_ops) && + (mask <= DMA_BIT_MASK(32))) { set_dma_ops(dev, gx_legacy_pci_dma_map_ops); set_dma_offset(dev, 0); if (mask > dev->archdata.max_direct_dma_addr) -- cgit v1.1 From cf89c4262bd5fa70e67953126001c08ecea4f346 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 2 Aug 2013 16:45:22 -0400 Subject: tile PCI RC: support I/O space access To enable this functionality, configure CONFIG_TILE_PCI_IO. Without this flag, the kernel still assigns I/O address ranges to the devices, but no TRIO resource and mapping support is provided. We assign disjoint I/O address ranges to separate PCIe domains. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/io.h | 126 ++++++++++++++++++++++++++++++++++++++++---- arch/tile/include/asm/pci.h | 11 +++- 2 files changed, 127 insertions(+), 10 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index 3167291..73b319e 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -19,7 +19,8 @@ #include #include -#define IO_SPACE_LIMIT 0xfffffffful +/* Maximum PCI I/O space address supported. */ +#define IO_SPACE_LIMIT 0xffffffff /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem @@ -281,8 +282,108 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, #endif +#if CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO) + +static inline u8 inb(unsigned long addr) +{ + return readb((volatile void __iomem *) addr); +} + +static inline u16 inw(unsigned long addr) +{ + return readw((volatile void __iomem *) addr); +} + +static inline u32 inl(unsigned long addr) +{ + return readl((volatile void __iomem *) addr); +} + +static inline void outb(u8 b, unsigned long addr) +{ + writeb(b, (volatile void __iomem *) addr); +} + +static inline void outw(u16 b, unsigned long addr) +{ + writew(b, (volatile void __iomem *) addr); +} + +static inline void outl(u32 b, unsigned long addr) +{ + writel(b, (volatile void __iomem *) addr); +} + +static inline void insb(unsigned long addr, void *buffer, int count) +{ + if (count) { + u8 *buf = buffer; + do { + u8 x = inb(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void insw(unsigned long addr, void *buffer, int count) +{ + if (count) { + u16 *buf = buffer; + do { + u16 x = inw(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void insl(unsigned long addr, void *buffer, int count) +{ + if (count) { + u32 *buf = buffer; + do { + u32 x = inl(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void outsb(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u8 *buf = buffer; + do { + outb(*buf++, addr); + } while (--count); + } +} + +static inline void outsw(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u16 *buf = buffer; + do { + outw(*buf++, addr); + } while (--count); + } +} + +static inline void outsl(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u32 *buf = buffer; + do { + outl(*buf++, addr); + } while (--count); + } +} + +extern void __iomem *ioport_map(unsigned long port, unsigned int len); +extern void ioport_unmap(void __iomem *addr); + +#else + /* - * The Tile architecture does not support IOPORT, even with PCI. + * The TilePro architecture does not support IOPORT, even with PCI. * Unfortunately we can't yet simply not declare these methods, * since some generic code that compiles into the kernel, but * we never run, uses them unconditionally. @@ -290,7 +391,12 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, static inline long ioport_panic(void) { +#ifdef __tilegx__ + panic("PCI IO space support is disabled. Configure the kernel with" + " CONFIG_TILE_PCI_IO to enable it"); +#else panic("inb/outb and friends do not exist on tile"); +#endif return 0; } @@ -335,13 +441,6 @@ static inline void outl(u32 b, unsigned long addr) ioport_panic(); } -#define inb_p(addr) inb(addr) -#define inw_p(addr) inw(addr) -#define inl_p(addr) inl(addr) -#define outb_p(x, addr) outb((x), (addr)) -#define outw_p(x, addr) outw((x), (addr)) -#define outl_p(x, addr) outl((x), (addr)) - static inline void insb(unsigned long addr, void *buffer, int count) { ioport_panic(); @@ -372,6 +471,15 @@ static inline void outsl(unsigned long addr, const void *buffer, int count) ioport_panic(); } +#endif /* CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO) */ + +#define inb_p(addr) inb(addr) +#define inw_p(addr) inw(addr) +#define inl_p(addr) inl(addr) +#define outb_p(x, addr) outb((x), (addr)) +#define outw_p(x, addr) outw((x), (addr)) +#define outl_p(x, addr) outl((x), (addr)) + #define ioread16be(addr) be16_to_cpu(ioread16(addr)) #define ioread32be(addr) be32_to_cpu(ioread32(addr)) #define iowrite16be(v, addr) iowrite16(be16_to_cpu(v), (addr)) diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index cd10e65..9cf5308 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -144,6 +144,10 @@ struct pci_controller { int pio_mem_index; /* PIO region index for memory access */ +#ifdef CONFIG_TILE_PCI_IO + int pio_io_index; /* PIO region index for I/O space access */ +#endif + /* * Mem-Map regions for all the memory controllers so that Linux can * map all of its physical memory space to the PCI bus. @@ -153,6 +157,10 @@ struct pci_controller { int index; /* PCI domain number */ struct pci_bus *root_bus; + /* PCI I/O space resource for this controller. */ + struct resource io_space; + char io_space_name[32]; + /* PCI memory space resource for this controller. */ struct resource mem_space; char mem_space_name[32]; @@ -210,7 +218,8 @@ static inline int pcibios_assign_all_busses(void) } #define PCIBIOS_MIN_MEM 0 -#define PCIBIOS_MIN_IO 0 +/* Minimum PCI I/O address, starting at the page boundary. */ +#define PCIBIOS_MIN_IO PAGE_SIZE /* Use any cpu for PCI. */ #define cpumask_of_pcibus(bus) cpu_online_mask -- cgit v1.1 From 1c43649a9929ca4394ae389b4510c61f3876a12b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 13:18:02 -0400 Subject: tile PCI RC: restructure TRIO initialization The TRIO shim initialization is shared with other kernel drivers such as the endpoint and StreamIO drivers, so reorganize the initialization flow to ensure that the root complex driver properly initializes TRIO state regardless of what kind of TRIO driver will end up using the shim. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index 9cf5308..1f1b654 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -180,6 +180,7 @@ struct pci_controller { extern struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES]; extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO]; +extern int num_trio_shims; extern void pci_iounmap(struct pci_dev *dev, void __iomem *); -- cgit v1.1 From 1198168733c8d6fbc6898fd8d7fcfb42befabb41 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 13:18:34 -0400 Subject: tile PCI RC: eliminate pci_controller.mem_resources field The .mem_resources[] field in the pci_controller struct is now obsoleted by the .mem_space and .io_space fields. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/pci.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index 1f1b654..2c001b2 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -173,9 +173,6 @@ struct pci_controller { /* Table that maps the INTx numbers to Linux irq numbers. */ int irq_intx_table[4]; - - /* Address ranges that are routed to this controller/bridge. */ - struct resource mem_resources[3]; }; extern struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES]; -- cgit v1.1 From dc7d5cf2cab6d1fbb43c5c0569f43b7e4c822760 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 13:40:47 -0400 Subject: tile PCI RC: add dma_get_required_mask() The standard kernel function dma_get_required_mask() uses the highest DRAM address to determine if 32-bit or 64-bit DMA addressing is needed. This only works on architectures that have direct mapping between the PA and the PCI address space, i.e. those that don't have I/O TLBs or have I/O TLB but choose to use direct mapping. Neither of these are true for tilegx. Whether to use 64-bit DMA should depend on the PCI device's capability only, not on the amount of DRAM installeds, so we now advertise a 64-bit DMA mask unconditionally. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/dma-mapping.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 4a60059..6f522d5 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -20,6 +20,10 @@ #include #include +#ifdef __tilegx__ +#define ARCH_HAS_DMA_GET_REQUIRED_MASK +#endif + extern struct dma_map_ops *tile_dma_map_ops; extern struct dma_map_ops *gx_pci_dma_map_ops; extern struct dma_map_ops *gx_legacy_pci_dma_map_ops; -- cgit v1.1 From 7c29b78a0e0c7df6e0ba0092fee334ddc3086f16 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 14:40:55 -0400 Subject: tile PCI RC: remove stale include of linux/numa.h Signed-off-by: Chris Metcalf --- arch/tile/include/asm/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index 2c001b2..c99ad44 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -17,7 +17,6 @@ #include #include -#include #include #ifndef __tilegx__ -- cgit v1.1 From bda0f5bad812df076a28fa5e58d86dfe68415251 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 6 Aug 2013 14:11:21 -0400 Subject: tile: various console improvements This change improves and cleans up the tile console. - We enable HVC_IRQ support on tilegx, with the addition of a new Tilera hypervisor API for tilegx to allow a console IPI. If IPI support is not available we fall back to the previous polling mode. - We simplify the earlyprintk code to use CON_BOOT and eliminate some of the other supporting earlyprintk code. - A new tile_console_write() primitive is used to send output to the console and is factored out of the hvc_tile driver. This lets us support a "sim_console" boot argument to allow using simulator hooks to send output to the "console" as a slightly faster alternative to emulating the hardware more directly. Signed-off-by: Chris Metcalf Acked-by: Greg Kroah-Hartman --- arch/tile/include/asm/setup.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h index d048888..e989090 100644 --- a/arch/tile/include/asm/setup.h +++ b/arch/tile/include/asm/setup.h @@ -24,9 +24,8 @@ */ #define MAXMEM_PFN PFN_DOWN(MAXMEM) +int tile_console_write(const char *buf, int count); void early_panic(const char *fmt, ...); -void warn_early_printk(void); -void __init disable_early_printk(void); /* Init-time routine to do tile-specific per-cpu setup. */ void setup_cpu(int boot); -- cgit v1.1 From 2f9ac29eec71a696cb0dcc5fb82c0f8d4dac28c9 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 6 Aug 2013 16:04:13 -0400 Subject: tile: fast-path unaligned memory access for tilegx This change enables unaligned userspace memory access via a kernel fast path on tilegx. The kernel tracks user PC/instruction pairs per-thread using a direct-mapped cache in userspace. The cache maps those PC/instruction pairs to JIT'ed instruction sequences that load or store using byte-wide load store intructions and then synthesize 2-, 4- or 8-byte load or store results. Once an instruction has been seen to generate an unaligned access once, subsequent hits on that instruction typically require overhead of only around 50 cycles if cache and TLB is hot. We support the prctl() PR_GET_UNALIGN / PR_SET_UNALIGN sys call to enable or disable unaligned fixups on a per-process basis. To do this we pull some of the tilepro unaligned support out of the single_step.c file; tilepro uses instruction disassembly for both single-step and unaligned access support. Since tilegx actually has hardware singlestep support, though, it's cleaner to keep the tilegx unaligned access code in a separate file. While we're at it, properly rename the tilepro-specific types, etc., to have tilepro suffixes instead of generic tile suffixes. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/processor.h | 7 +++++++ arch/tile/include/asm/ptrace.h | 3 +-- arch/tile/include/asm/sections.h | 4 +++- arch/tile/include/asm/thread_info.h | 6 ++++++ arch/tile/include/asm/traps.h | 11 +++++++++++ 5 files changed, 28 insertions(+), 3 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index b3f1049..cda2724 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -247,6 +247,13 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(task) task_pc(task) #define KSTK_ESP(task) task_sp(task) +/* Fine-grained unaligned JIT support */ +#define GET_UNALIGN_CTL(tsk, adr) get_unalign_ctl((tsk), (adr)) +#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val)) + +extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr); +extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); + /* Standard format for printing registers and other word-size data. */ #ifdef __tilegx__ # define REGFMT "0x%016lx" diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h index fd41226..73b681b 100644 --- a/arch/tile/include/asm/ptrace.h +++ b/arch/tile/include/asm/ptrace.h @@ -79,8 +79,7 @@ extern void single_step_execve(void); struct task_struct; -extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code); +extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs); #ifdef __tilegx__ /* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */ diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h index 7d8a935..cc95276 100644 --- a/arch/tile/include/asm/sections.h +++ b/arch/tile/include/asm/sections.h @@ -28,7 +28,9 @@ extern char __w1data_begin[], __w1data_end[]; /* Not exactly sections, but PC comparison points in the code. */ extern char __rt_sigreturn[], __rt_sigreturn_end[]; -#ifndef __tilegx__ +#ifdef __tilegx__ +extern char __start_unalign_asm_code[], __end_unalign_asm_code[]; +#else extern char sys_cmpxchg[], __sys_cmpxchg_end[]; extern char __sys_cmpxchg_grab_lock[]; extern char __start_atomic_asm_code[], __end_atomic_asm_code[]; diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index d1733de..b8aa6df 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -39,6 +39,11 @@ struct thread_info { struct restart_block restart_block; struct single_step_state *step_state; /* single step state (if non-zero) */ + int align_ctl; /* controls unaligned access */ +#ifdef __tilegx__ + unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */ + void __user *unalign_jit_base; /* unalign fixup JIT base */ +#endif }; /* @@ -56,6 +61,7 @@ struct thread_info { .fn = do_no_restart_syscall, \ }, \ .step_state = NULL, \ + .align_ctl = 0, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index e28c3df4..5f172b2 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h @@ -15,6 +15,7 @@ #ifndef _ASM_TILE_TRAPS_H #define _ASM_TILE_TRAPS_H +#ifndef __ASSEMBLY__ #include /* mm/fault.c */ @@ -69,6 +70,16 @@ void gx_singlestep_handle(struct pt_regs *, int fault_num); /* kernel/intvec_64.S */ void fill_ra_stack(void); + +/* Handle unalign data fixup. */ +extern void do_unaligned(struct pt_regs *regs, int vecnum); +#endif + +#endif /* __ASSEMBLY__ */ + +#ifdef __tilegx__ +/* 128 byte JIT per unalign fixup. */ +#define UNALIGN_JIT_SHIFT 7 #endif #endif /* _ASM_TILE_TRAPS_H */ -- cgit v1.1 From 3ef23111546df9e9dab2e2befb412a9563db0628 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 6 Aug 2013 16:10:23 -0400 Subject: tile: avoid recursive backtrace faults This change adds support for avoiding recursive backtracer crashes; we haven't seen this in practice other than when things are seriously corrupt, but it may help avoid losing the root cause of a crash. Also, don't abort kernel backtracers for invalid userspace PC's. If we do, we lose the ability to backtrace through a userspace call to a bad address above PAGE_OFFSET, even though that it can be perfectly reasonable to continue the backtrace in such a case. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/processor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index cda2724..fed1c04 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -110,6 +110,8 @@ struct thread_struct { unsigned long long interrupt_mask; /* User interrupt-control 0 state */ unsigned long intctrl_0; + /* Is this task currently doing a backtrace? */ + bool in_backtrace; #if CHIP_HAS_PROC_STATUS_SPR() /* Any other miscellaneous processor state bits */ unsigned long proc_status; -- cgit v1.1 From bc1a298f4e04833db4c430df59b90039f0170515 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 11:36:54 -0400 Subject: tile: support CONFIG_PREEMPT This change adds support for CONFIG_PREEMPT (full kernel preemption). In addition to the core support, this change includes a number of places where we fix up uses of smp_processor_id() and per-cpu variables. I also eliminate the PAGE_HOME_HERE and PAGE_HOME_UNKNOWN values for page homing, as it turns out they weren't being used. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/homecache.h | 8 -------- arch/tile/include/asm/irqflags.h | 21 ++++++++++++++++++--- 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h index 7b77713..49d19df 100644 --- a/arch/tile/include/asm/homecache.h +++ b/arch/tile/include/asm/homecache.h @@ -44,16 +44,8 @@ struct zone; */ #define PAGE_HOME_INCOHERENT -3 -#if CHIP_HAS_CBOX_HOME_MAP() /* Home for the page is distributed via hash-for-home. */ #define PAGE_HOME_HASH -4 -#endif - -/* Homing is unknown or unspecified. Not valid for page_home(). */ -#define PAGE_HOME_UNKNOWN -5 - -/* Home on the current cpu. Not valid for page_home(). */ -#define PAGE_HOME_HERE -6 /* Support wrapper to use instead of explicit hv_flush_remote(). */ extern void flush_remote(unsigned long cache_pfn, unsigned long cache_length, diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index c96f9bb..71af574 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h @@ -124,6 +124,12 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); #define INITIAL_INTERRUPTS_ENABLED (1ULL << INT_MEM_ERROR) +#ifdef CONFIG_DEBUG_PREEMPT +/* Due to inclusion issues, we can't rely on here. */ +extern unsigned int debug_smp_processor_id(void); +# define smp_processor_id() debug_smp_processor_id() +#endif + /* Disable interrupts. */ #define arch_local_irq_disable() \ interrupt_mask_set_mask(LINUX_MASKABLE_INTERRUPTS) @@ -132,9 +138,18 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); #define arch_local_irq_disable_all() \ interrupt_mask_set_mask(-1ULL) +/* + * Read the set of maskable interrupts. + * We avoid the preemption warning here via __this_cpu_ptr since even + * if irqs are already enabled, it's harmless to read the wrong cpu's + * enabled mask. + */ +#define arch_local_irqs_enabled() \ + (*__this_cpu_ptr(&interrupts_enabled_mask)) + /* Re-enable all maskable interrupts. */ #define arch_local_irq_enable() \ - interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask)) + interrupt_mask_reset_mask(arch_local_irqs_enabled()) /* Disable or enable interrupts based on flag argument. */ #define arch_local_irq_restore(disabled) do { \ @@ -161,7 +176,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Prevent the given interrupt from being enabled next time we enable irqs. */ #define arch_local_irq_mask(interrupt) \ - (__get_cpu_var(interrupts_enabled_mask) &= ~(1ULL << (interrupt))) + this_cpu_and(interrupts_enabled_mask, ~(1ULL << (interrupt))) /* Prevent the given interrupt from being enabled immediately. */ #define arch_local_irq_mask_now(interrupt) do { \ @@ -171,7 +186,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Allow the given interrupt to be enabled next time we enable irqs. */ #define arch_local_irq_unmask(interrupt) \ - (__get_cpu_var(interrupts_enabled_mask) |= (1ULL << (interrupt))) + this_cpu_or(interrupts_enabled_mask, (1ULL << (interrupt))) /* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */ #define arch_local_irq_unmask_now(interrupt) do { \ -- cgit v1.1 From ba02f0eb826da6dbdc5a2958ac52304ee441234f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 11:55:35 -0400 Subject: tile: improve big-endian support First, fix a bug in asm/unaligned.h; we need to just use the asm-generic unaligned.h so we properly choose endian-correct flavors. Second, keep the hv/hypervisor.h ABI fully "native" in the sense that we don't have __BIG_ENDIAN__ ifdefs there. Instead, we use macros in the head_NN.S assembly code to properly extract two 32-bit structure members from a 64-bit register holding the structure. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/unaligned.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/unaligned.h b/arch/tile/include/asm/unaligned.h index 37dfbe5..5a58a0d 100644 --- a/arch/tile/include/asm/unaligned.h +++ b/arch/tile/include/asm/unaligned.h @@ -15,11 +15,15 @@ #ifndef _ASM_TILE_UNALIGNED_H #define _ASM_TILE_UNALIGNED_H -#include -#include -#include -#define get_unaligned __get_unaligned_le -#define put_unaligned __put_unaligned_le +/* + * We could implement faster get_unaligned_[be/le]64 using the ldna + * instruction on tilegx; however, we need to either copy all of the + * other generic functions to here (which is pretty ugly) or else + * modify both the generic code and other arch code to allow arch + * specific unaligned data access functions. Given these functions + * are not often called, we'll stick with the generic version. + */ +#include /* * Is the kernel doing fixups of unaligned accesses? If <0, no kernel -- cgit v1.1 From 4a556f4f56da3110b27e265b79f0e7582115445c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 15:33:32 -0400 Subject: tile: implement gettimeofday() via vDSO This change creates the framework for vDSO calls, makes the existing rt_sigreturn() mechanism use it, and adds a fast gettimeofday(). Now that we need to expose the vDSO address to userspace, we add AT_SYSINFO_EHDR to the set of aux entries provided to userspace. (You can disable any extra vDSO support by booting with vdso=0, but the rt_sigreturn vDSO page will still be provided.) Note that glibc has supported the tile vDSO since release 2.17. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/elf.h | 5 ++++ arch/tile/include/asm/mmu.h | 1 + arch/tile/include/asm/page.h | 8 ++++++- arch/tile/include/asm/processor.h | 6 ++--- arch/tile/include/asm/sections.h | 4 ++++ arch/tile/include/asm/vdso.h | 49 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 arch/tile/include/asm/vdso.h (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h index ff8a934..31d854f 100644 --- a/arch/tile/include/asm/elf.h +++ b/arch/tile/include/asm/elf.h @@ -132,6 +132,11 @@ extern int dump_task_regs(struct task_struct *, elf_gregset_t *); struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); +#define ARCH_DLINFO \ +do { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \ +} while (0) + #ifdef CONFIG_COMPAT #define COMPAT_ELF_PLATFORM "tilegx-m32" diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h index e2c7890..0cab118 100644 --- a/arch/tile/include/asm/mmu.h +++ b/arch/tile/include/asm/mmu.h @@ -22,6 +22,7 @@ struct mm_context { * semaphore but atomically, but it is conservatively set. */ unsigned long priority_cached; + unsigned long vdso_base; }; typedef struct mm_context mm_context_t; diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index dd033a4..b4f96c0 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -39,6 +39,12 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1)) /* + * We do define AT_SYSINFO_EHDR to support vDSO, + * but don't use the gate mechanism. + */ +#define __HAVE_ARCH_GATE_AREA 1 + +/* * If the Kconfig doesn't specify, set a maximum zone order that * is enough so that we can create huge pages from small pages given * the respective sizes of the two page types. See . @@ -246,7 +252,7 @@ static inline __attribute_const__ int get_order(unsigned long size) #endif /* __tilegx__ */ -#ifndef __ASSEMBLY__ +#if !defined(__ASSEMBLY__) && !defined(VDSO_BUILD) #ifdef CONFIG_HIGHMEM diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index fed1c04..461322b 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -180,10 +180,10 @@ struct thread_struct { #define TASK_SIZE TASK_SIZE_MAX #endif -/* We provide a minimal "vdso" a la x86; just the sigreturn code for now. */ -#define VDSO_BASE (TASK_SIZE - PAGE_SIZE) +#define VDSO_BASE ((unsigned long)current->active_mm->context.vdso_base) +#define VDSO_SYM(x) (VDSO_BASE + (unsigned long)(x)) -#define STACK_TOP VDSO_BASE +#define STACK_TOP TASK_SIZE /* STACK_TOP_MAX is used temporarily in execve and should not check COMPAT. */ #define STACK_TOP_MAX TASK_SIZE_MAX diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h index cc95276..5d5d3b7 100644 --- a/arch/tile/include/asm/sections.h +++ b/arch/tile/include/asm/sections.h @@ -25,6 +25,10 @@ extern char _sinitdata[], _einitdata[]; /* Write-once data is writable only till the end of initialization. */ extern char __w1data_begin[], __w1data_end[]; +extern char vdso_start[], vdso_end[]; +#ifdef CONFIG_COMPAT +extern char vdso32_start[], vdso32_end[]; +#endif /* Not exactly sections, but PC comparison points in the code. */ extern char __rt_sigreturn[], __rt_sigreturn_end[]; diff --git a/arch/tile/include/asm/vdso.h b/arch/tile/include/asm/vdso.h new file mode 100644 index 0000000..9f6a78d --- /dev/null +++ b/arch/tile/include/asm/vdso.h @@ -0,0 +1,49 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __TILE_VDSO_H__ +#define __TILE_VDSO_H__ + +#include + +/* + * Note about the vdso_data structure: + * + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the + * structure is supposed to be known only to the function in the vdso + * itself and may change without notice. + */ + +struct vdso_data { + __u64 tz_update_count; /* Timezone atomicity ctr */ + __u64 tb_update_count; /* Timebase atomicity ctr */ + __u64 xtime_tod_stamp; /* TOD clock for xtime */ + __u64 xtime_clock_sec; /* Kernel time second */ + __u64 xtime_clock_nsec; /* Kernel time nanosecond */ + __u64 wtom_clock_sec; /* Wall to monotonic clock second */ + __u64 wtom_clock_nsec; /* Wall to monotonic clock nanosecond */ + __u32 mult; /* Cycle to nanosecond multiplier */ + __u32 shift; /* Cycle to nanosecond divisor (power of two) */ + __u32 tz_minuteswest; /* Minutes west of Greenwich */ + __u32 tz_dsttime; /* Type of dst correction */ +}; + +extern struct vdso_data *vdso_data; + +/* __vdso_rt_sigreturn is defined with the addresses in the vdso page. */ +extern void __vdso_rt_sigreturn(void); + +extern int setup_vdso_pages(void); + +#endif /* __TILE_VDSO_H__ */ -- cgit v1.1 From a61fd5e3662d576998d72f80376f23b6ef083d6e Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Fri, 9 Aug 2013 13:26:09 -0400 Subject: tile: support ftrace on tilegx This commit adds support for static ftrace, graph function support, and dynamic tracer support. Signed-off-by: Tony Lu Signed-off-by: Chris Metcalf --- arch/tile/include/asm/ftrace.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h index 461459b..13a9bb8 100644 --- a/arch/tile/include/asm/ftrace.h +++ b/arch/tile/include/asm/ftrace.h @@ -15,6 +15,26 @@ #ifndef _ASM_TILE_FTRACE_H #define _ASM_TILE_FTRACE_H -/* empty */ +#ifdef CONFIG_FUNCTION_TRACER + +#define MCOUNT_ADDR ((unsigned long)(__mcount)) +#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +extern void __mcount(void); + +#ifdef CONFIG_DYNAMIC_FTRACE +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +struct dyn_arch_ftrace { +}; +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_FUNCTION_TRACER */ #endif /* _ASM_TILE_FTRACE_H */ -- cgit v1.1 From 3fa17c395bb0c358745fbe0c8aa039d6cdac1735 Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Fri, 9 Aug 2013 15:08:57 -0400 Subject: tile: support kprobes on tilegx This change includes support for Kprobes, Jprobes and Return Probes. Reviewed-by: Masami Hiramatsu Signed-off-by: Tony Lu Signed-off-by: Chris Metcalf --- arch/tile/include/asm/Kbuild | 1 - arch/tile/include/asm/kdebug.h | 28 +++++++++++++++ arch/tile/include/asm/kprobes.h | 79 +++++++++++++++++++++++++++++++++++++++++ arch/tile/include/asm/ptrace.h | 1 + 4 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 arch/tile/include/asm/kdebug.h create mode 100644 arch/tile/include/asm/kprobes.h (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index b17b9b8..4c0b3c2 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -15,7 +15,6 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h -generic-y += kdebug.h generic-y += local.h generic-y += msgbuf.h generic-y += mutex.h diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h new file mode 100644 index 0000000..5bbbfa9 --- /dev/null +++ b/arch/tile/include/asm/kdebug.h @@ -0,0 +1,28 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_KDEBUG_H +#define _ASM_TILE_KDEBUG_H + +#include + +enum die_val { + DIE_OOPS = 1, + DIE_BREAK, + DIE_SSTEPBP, + DIE_PAGE_FAULT, + DIE_COMPILED_BPT +}; + +#endif /* _ASM_TILE_KDEBUG_H */ diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h new file mode 100644 index 0000000..d8f9a83 --- /dev/null +++ b/arch/tile/include/asm/kprobes.h @@ -0,0 +1,79 @@ +/* + * arch/tile/include/asm/kprobes.h + * + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_KPROBES_H +#define _ASM_TILE_KPROBES_H + +#include +#include +#include + +#include + +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 2 + +#define kretprobe_blacklist_size 0 + +typedef tile_bundle_bits kprobe_opcode_t; + +#define flush_insn_slot(p) \ + flush_icache_range((unsigned long)p->addr, \ + (unsigned long)p->addr + \ + (MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) + +struct kprobe; + +/* Architecture specific copy of original instruction. */ +struct arch_specific_insn { + kprobe_opcode_t *insn; +}; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; + unsigned long saved_pc; +}; + +#define MAX_JPROBES_STACK_SIZE 128 +#define MAX_JPROBES_STACK_ADDR \ + (((unsigned long)current_thread_info()) + THREAD_SIZE - 32 \ + - sizeof(struct pt_regs)) + +#define MIN_JPROBES_STACK_SIZE(ADDR) \ + ((((ADDR) + MAX_JPROBES_STACK_SIZE) > MAX_JPROBES_STACK_ADDR) \ + ? MAX_JPROBES_STACK_ADDR - (ADDR) \ + : MAX_JPROBES_STACK_SIZE) + +/* per-cpu kprobe control block. */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_saved_pc; + unsigned long jprobe_saved_sp; + struct prev_kprobe prev_kprobe; + struct pt_regs jprobe_saved_regs; + char jprobes_stack[MAX_JPROBES_STACK_SIZE]; +}; + +extern tile_bundle_bits breakpoint2_insn; +extern tile_bundle_bits breakpoint_insn; + +void arch_remove_kprobe(struct kprobe *); + +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + +#endif /* _ASM_TILE_KPROBES_H */ diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h index 73b681b..0d25c21 100644 --- a/arch/tile/include/asm/ptrace.h +++ b/arch/tile/include/asm/ptrace.h @@ -33,6 +33,7 @@ typedef unsigned long pt_reg_t; #ifndef __ASSEMBLY__ +#define regs_return_value(regs) ((regs)->regs[0]) #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) #define user_stack_pointer(regs) ((regs)->sp) -- cgit v1.1 From d4d9eab4ade468b6a97b6853fdd72e8f21474324 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 9 Aug 2013 15:38:43 -0400 Subject: tile: use proper .align directives on __ex_table sections This may fix a reported bug where an R_TILEGX_64 in a module was not pointing to an aligned address. Reported-by: Simon Marchi Signed-off-by: Chris Metcalf --- arch/tile/include/asm/futex.h | 1 + arch/tile/include/asm/uaccess.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h index 5909ac3..1a6ef1b 100644 --- a/arch/tile/include/asm/futex.h +++ b/arch/tile/include/asm/futex.h @@ -43,6 +43,7 @@ ".pushsection .fixup,\"ax\"\n" \ "0: { movei %0, %5; j 9f }\n" \ ".section __ex_table,\"a\"\n" \ + ".align 8\n" \ ".quad 1b, 0b\n" \ ".popsection\n" \ "9:" \ diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index f68503f..b6cde32 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -127,8 +127,10 @@ extern int fixup_exception(struct pt_regs *regs); #ifdef __LP64__ #define _ASM_PTR ".quad" +#define _ASM_ALIGN ".align 8" #else #define _ASM_PTR ".long" +#define _ASM_ALIGN ".align 4" #endif #define __get_user_asm(OP, x, ptr, ret) \ @@ -137,6 +139,7 @@ extern int fixup_exception(struct pt_regs *regs); "0: { movei %1, 0; movei %0, %3 }\n" \ "j 9f\n" \ ".section __ex_table,\"a\"\n" \ + _ASM_ALIGN "\n" \ _ASM_PTR " 1b, 0b\n" \ ".popsection\n" \ "9:" \ @@ -168,6 +171,7 @@ extern int fixup_exception(struct pt_regs *regs); "0: { movei %1, 0; movei %2, 0 }\n" \ "{ movei %0, %4; j 9f }\n" \ ".section __ex_table,\"a\"\n" \ + ".align 4\n" \ ".word 1b, 0b\n" \ ".word 2b, 0b\n" \ ".popsection\n" \ @@ -224,6 +228,7 @@ extern int __get_user_bad(void) ".pushsection .fixup,\"ax\"\n" \ "0: { movei %0, %3; j 9f }\n" \ ".section __ex_table,\"a\"\n" \ + _ASM_ALIGN "\n" \ _ASM_PTR " 1b, 0b\n" \ ".popsection\n" \ "9:" \ @@ -248,6 +253,7 @@ extern int __get_user_bad(void) ".pushsection .fixup,\"ax\"\n" \ "0: { movei %0, %4; j 9f }\n" \ ".section __ex_table,\"a\"\n" \ + ".align 4\n" \ ".word 1b, 0b\n" \ ".word 2b, 0b\n" \ ".popsection\n" \ -- cgit v1.1 From b2eca4274c1813c76291eab4859ca3e86e6fd35b Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Fri, 9 Aug 2013 15:45:24 -0400 Subject: tile: support ASLR fully With this change, tile Linux now supports address-space layout randomization for shared objects, stack, heap and vdso. Acked-by: Jiri Kosina Signed-off-by: Tony Lu Signed-off-by: Chris Metcalf --- arch/tile/include/asm/elf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h index 31d854f..e1da88e 100644 --- a/arch/tile/include/asm/elf.h +++ b/arch/tile/include/asm/elf.h @@ -137,6 +137,10 @@ do { \ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \ } while (0) +struct mm_struct; +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +#define arch_randomize_brk arch_randomize_brk + #ifdef CONFIG_COMPAT #define COMPAT_ELF_PLATFORM "tilegx-m32" -- cgit v1.1 From 084fe6a0f53f61fd5f9b33391af1077addec0ce0 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 9 Aug 2013 16:17:03 -0400 Subject: tile: remove set/clear_fixmap APIs Nothing in the codebase was using them, and as written they took "unsigned long" as the physical address rather than "phys_addr_t", which is wrong on tilepro anyway. Rather than fixing stale APIs, just remove them; if there's ever demand for them on this platform, we can put them back. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/fixmap.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h index e16dbf9..c6b9c1b 100644 --- a/arch/tile/include/asm/fixmap.h +++ b/arch/tile/include/asm/fixmap.h @@ -78,14 +78,6 @@ enum fixed_addresses { #endif }; -extern void __set_fixmap(enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); - -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -#define clear_fixmap(idx) \ - __set_fixmap(idx, 0, __pgprot(0)) - #define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) #define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE) -- cgit v1.1 From 6fbeee29a2b87574527897c3ded1f92e236518c7 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 9 Aug 2013 16:53:50 -0400 Subject: tile: fix some -Wsign-compare warnings Normally the build doesn't include these warnings, but at one point I built with -Wsign-compare, and noticed a few things that are technically bugs. This change fixes those things. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/io.h | 6 +++--- arch/tile/include/asm/mmzone.h | 2 +- arch/tile/include/asm/spinlock_64.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index 73b319e..9fe4349 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -255,7 +255,7 @@ static inline void writeq(u64 val, unsigned long addr) static inline void memset_io(volatile void *dst, int val, size_t len) { - int x; + size_t x; BUG_ON((unsigned long)dst & 0x3); val = (val & 0xff) * 0x01010101; for (x = 0; x < len; x += 4) @@ -265,7 +265,7 @@ static inline void memset_io(volatile void *dst, int val, size_t len) static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t len) { - int x; + size_t x; BUG_ON((unsigned long)src & 0x3); for (x = 0; x < len; x += 4) *(u32 *)(dst + x) = readl(src + x); @@ -274,7 +274,7 @@ static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, static inline void memcpy_toio(volatile void __iomem *dst, const void *src, size_t len) { - int x; + size_t x; BUG_ON((unsigned long)dst & 0x3); for (x = 0; x < len; x += 4) writel(*(u32 *)(src + x), dst + x); diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h index 9d3dbce..804f109 100644 --- a/arch/tile/include/asm/mmzone.h +++ b/arch/tile/include/asm/mmzone.h @@ -42,7 +42,7 @@ static inline int pfn_to_nid(unsigned long pfn) #define kern_addr_valid(kaddr) virt_addr_valid((void *)kaddr) -static inline int pfn_valid(int pfn) +static inline int pfn_valid(unsigned long pfn) { int nid = pfn_to_nid(pfn); diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h index 5f8b6a0..9a12b9c 100644 --- a/arch/tile/include/asm/spinlock_64.h +++ b/arch/tile/include/asm/spinlock_64.h @@ -27,7 +27,7 @@ * Return the "current" portion of a ticket lock value, * i.e. the number that currently owns the lock. */ -static inline int arch_spin_current(u32 val) +static inline u32 arch_spin_current(u32 val) { return val >> __ARCH_SPIN_CURRENT_SHIFT; } @@ -36,7 +36,7 @@ static inline int arch_spin_current(u32 val) * Return the "next" portion of a ticket lock value, * i.e. the number that the next task to try to acquire the lock will get. */ -static inline int arch_spin_next(u32 val) +static inline u32 arch_spin_next(u32 val) { return val & __ARCH_SPIN_NEXT_MASK; } -- cgit v1.1 From 35f059761c5ac313d13372fe3cdaa41bce3d0dbf Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sat, 10 Aug 2013 12:35:02 -0400 Subject: tilegx: change how we find the kernel stack Previously, we used a special-purpose register (SPR_SYSTEM_SAVE_K_0) to hold the CPU number and the top of the current kernel stack by using the low bits to hold the CPU number, and using the high bits to hold the address of the page just above where we'd want the kernel stack to be. That way we could initialize a new SP when first entering the kernel by just masking the SPR value and subtracting a couple of words. However, it's actually more useful to be able to place an arbitrary kernel-top value in the SPR. This allows us to create a new stack context (e.g. for virtualization) with an arbitrary top-of-stack VA. To make this work, we now store the CPU number in the high bits, above the highest legal VA bit (42 bits in the current tilegx microarchitecture). The full 42 bits are thus available to store the top of stack value. Getting the current cpu (a relatively common operation) is still fast; it's now a shift rather than a mask. We make this change only for tilegx, since tilepro has too few SPR bits to do this, and we don't need this support on tilepro anyway. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/processor.h | 47 +++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 14 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 461322b..230b830 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -148,9 +148,10 @@ struct thread_struct { /* * Start with "sp" this many bytes below the top of the kernel stack. - * This preserves the invariant that a called function may write to *sp. + * This allows us to be cache-aware when handling the initial save + * of the pt_regs value to the stack. */ -#define STACK_TOP_DELTA 8 +#define STACK_TOP_DELTA 64 /* * When entering the kernel via a fault, start with the top of the @@ -234,15 +235,15 @@ extern int do_work_pending(struct pt_regs *regs, u32 flags); unsigned long get_wchan(struct task_struct *p); /* Return initial ksp value for given task. */ -#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE) +#define task_ksp0(task) \ + ((unsigned long)(task)->stack + THREAD_SIZE - STACK_TOP_DELTA) /* Return some info about the user process TASK. */ -#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA) #define task_pt_regs(task) \ - ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) + ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) #define current_pt_regs() \ - ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \ - (KSTK_PTREGS_GAP - 1)) - 1) + ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \ + STACK_TOP_DELTA - (KSTK_PTREGS_GAP - 1)) - 1) #define task_sp(task) (task_pt_regs(task)->sp) #define task_pc(task) (task_pt_regs(task)->pc) /* Aliases for pc and sp (used in fs/proc/array.c) */ @@ -355,20 +356,38 @@ extern int kdata_huge; #define KERNEL_PL CONFIG_KERNEL_PL /* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */ -#define CPU_LOG_MASK_VALUE 12 -#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1) -#if CONFIG_NR_CPUS > CPU_MASK_VALUE -# error Too many cpus! +#ifdef __tilegx__ +#define CPU_SHIFT 48 +#if CHIP_VA_WIDTH() > CPU_SHIFT +# error Too many VA bits! #endif +#define MAX_CPU_ID ((1 << (64 - CPU_SHIFT)) - 1) #define raw_smp_processor_id() \ - ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE) + ((int)(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) >> CPU_SHIFT)) #define get_current_ksp0() \ - (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE) + ((unsigned long)(((long)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) << \ + (64 - CPU_SHIFT)) >> (64 - CPU_SHIFT))) +#define next_current_ksp0(task) ({ \ + unsigned long __ksp0 = task_ksp0(task) & ((1UL << CPU_SHIFT) - 1); \ + unsigned long __cpu = (long)raw_smp_processor_id() << CPU_SHIFT; \ + __ksp0 | __cpu; \ +}) +#else +#define LOG2_NR_CPU_IDS 6 +#define MAX_CPU_ID ((1 << LOG2_NR_CPU_IDS) - 1) +#define raw_smp_processor_id() \ + ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & MAX_CPU_ID) +#define get_current_ksp0() \ + (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~MAX_CPU_ID) #define next_current_ksp0(task) ({ \ unsigned long __ksp0 = task_ksp0(task); \ int __cpu = raw_smp_processor_id(); \ - BUG_ON(__ksp0 & CPU_MASK_VALUE); \ + BUG_ON(__ksp0 & MAX_CPU_ID); \ __ksp0 | __cpu; \ }) +#endif +#if CONFIG_NR_CPUS > (MAX_CPU_ID + 1) +# error Too many cpus! +#endif #endif /* _ASM_TILE_PROCESSOR_H */ -- cgit v1.1 From a718e10cbaa28035b7ab510d68fef68524454e1a Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sat, 10 Aug 2013 13:15:46 -0400 Subject: tile: handle super huge pages in virt_to_pte This tile-specific API had a minor bug, in that if a super huge (>4GB) page mapped a particular address range, we wouldn't handle it correctly. As part of fixing that bug, I also cleaned up some of the pud and pmd accessors to make them more consistent. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/pgtable_32.h | 2 ++ arch/tile/include/asm/pgtable_64.h | 24 +++++++++++++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index 4ce4a7a..e5bdc0e 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -84,6 +84,8 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */; /* We have no pmd or pud since we are strictly a two-level page table */ #include +static inline int pud_huge_page(pud_t pud) { return 0; } + /* We don't define any pgds for these addresses. */ static inline int pgd_addr_invalid(unsigned long addr) { diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index 2492fa5..7cb8d35 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -63,6 +63,15 @@ /* We have no pud since we are a three-level page table. */ #include +/* + * pmds are the same as pgds and ptes, so converting is a no-op. + */ +#define pmd_pte(pmd) (pmd) +#define pmdp_ptep(pmdp) (pmdp) +#define pte_pmd(pte) (pte) + +#define pud_pte(pud) ((pud).pgd) + static inline int pud_none(pud_t pud) { return pud_val(pud) == 0; @@ -73,6 +82,11 @@ static inline int pud_present(pud_t pud) return pud_val(pud) & _PAGE_PRESENT; } +static inline int pud_huge_page(pud_t pud) +{ + return pud_val(pud) & _PAGE_HUGE_PAGE; +} + #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e)) @@ -89,6 +103,9 @@ static inline int pud_bad(pud_t pud) /* Return the page-table frame number (ptfn) that a pud_t points at. */ #define pud_ptfn(pud) hv_pte_get_ptfn((pud).pgd) +/* Return the page frame number (pfn) that a pud_t points at. */ +#define pud_pfn(pud) pte_pfn(pud_pte(pud)) + /* * A given kernel pud_t maps to a kernel pmd_t table at a specific * virtual address. Since kernel pmd_t tables can be aligned at @@ -152,13 +169,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, return hv_pte(__insn_exch(&ptep->val, 0UL)); } -/* - * pmds are the same as pgds and ptes, so converting is a no-op. - */ -#define pmd_pte(pmd) (pmd) -#define pmdp_ptep(pmdp) (pmdp) -#define pte_pmd(pte) (pte) - #endif /* __ASSEMBLY__ */ #endif /* _ASM_TILE_PGTABLE_64_H */ -- cgit v1.1 From 051168df528fe4456d63f5f65b041c147c26fe97 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 3 Sep 2013 14:45:52 -0400 Subject: tile: don't assume user privilege is zero Technically, user privilege is anything less than kernel privilege. We modify the existing user_mode() macro to have this semantic (and use it in a couple of places it wasn't being used before), and add an IS_KERNEL_EX1() macro to the assembly code as well. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/processor.h | 4 ++-- arch/tile/include/asm/ptrace.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 230b830..c72fcba 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -15,6 +15,8 @@ #ifndef _ASM_TILE_PROCESSOR_H #define _ASM_TILE_PROCESSOR_H +#include + #ifndef __ASSEMBLY__ /* @@ -25,7 +27,6 @@ #include #include -#include #include struct task_struct; @@ -347,7 +348,6 @@ extern int kdata_huge; /* * Provide symbolic constants for PLs. - * Note that assembly code assumes that USER_PL is zero. */ #define USER_PL 0 #if CONFIG_KERNEL_PL == 2 diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h index 0d25c21..b9620c0 100644 --- a/arch/tile/include/asm/ptrace.h +++ b/arch/tile/include/asm/ptrace.h @@ -39,7 +39,7 @@ typedef unsigned long pt_reg_t; #define user_stack_pointer(regs) ((regs)->sp) /* Does the process account for user or for system time? */ -#define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL) +#define user_mode(regs) (EX1_PL((regs)->ex1) < KERNEL_PL) /* Fill in a struct pt_regs with the current kernel registers. */ struct pt_regs *get_pt_regs(struct pt_regs *); -- cgit v1.1 From acbde1db294932623aad15dd8cc6e37b28340f26 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 3 Sep 2013 14:41:36 -0400 Subject: tile: parameterize VA and PA space more cleanly The existing code relied on the hardware definition () to specify how much VA and PA space was available. It's convenient to allow customizing this for some configurations, so provide symbols MAX_PA_WIDTH and MAX_VA_WIDTH in that can be modified if desired. Additionally, move away from the MEM_XX_INTRPT nomenclature to define the start of various regions within the VA space. In fact the cleaner symbol is, for example, MEM_SV_START, to indicate the start of the area used for supervisor code; the actual address of the interrupt vectors is not as important, and can be changed if desired. As part of this change, convert from "intrpt1" nomenclature (which built in the old privilege-level 1 model) to a simple "intrpt". Also strip out some tilepro-specific code supporting modifying the PL the kernel could run at, since we don't actually support using different PLs in tilepro, only tilegx. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/page.h | 52 +++++++++++++++----------------------- arch/tile/include/asm/pgtable_32.h | 2 +- arch/tile/include/asm/pgtable_64.h | 3 +-- arch/tile/include/asm/processor.h | 2 +- 4 files changed, 24 insertions(+), 35 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index b4f96c0..980843d 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -148,8 +148,12 @@ static inline __attribute_const__ int get_order(unsigned long size) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif +/* Allow overriding how much VA or PA the kernel will use. */ +#define MAX_PA_WIDTH CHIP_PA_WIDTH() +#define MAX_VA_WIDTH CHIP_VA_WIDTH() + /* Each memory controller has PAs distinct in their high bits. */ -#define NR_PA_HIGHBIT_SHIFT (CHIP_PA_WIDTH() - CHIP_LOG_NUM_MSHIMS()) +#define NR_PA_HIGHBIT_SHIFT (MAX_PA_WIDTH - CHIP_LOG_NUM_MSHIMS()) #define NR_PA_HIGHBIT_VALUES (1 << CHIP_LOG_NUM_MSHIMS()) #define __pa_to_highbits(pa) ((phys_addr_t)(pa) >> NR_PA_HIGHBIT_SHIFT) #define __pfn_to_highbits(pfn) ((pfn) >> (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT)) @@ -160,7 +164,7 @@ static inline __attribute_const__ int get_order(unsigned long size) * We reserve the lower half of memory for user-space programs, and the * upper half for system code. We re-map all of physical memory in the * upper half, which takes a quarter of our VA space. Then we have - * the vmalloc regions. The supervisor code lives at 0xfffffff700000000, + * the vmalloc regions. The supervisor code lives at the highest address, * with the hypervisor above that. * * Loadable kernel modules are placed immediately after the static @@ -172,26 +176,19 @@ static inline __attribute_const__ int get_order(unsigned long size) * Similarly, for now we don't play any struct page mapping games. */ -#if CHIP_PA_WIDTH() + 2 > CHIP_VA_WIDTH() +#if MAX_PA_WIDTH + 2 > MAX_VA_WIDTH # error Too much PA to map with the VA available! #endif -#define HALF_VA_SPACE (_AC(1, UL) << (CHIP_VA_WIDTH() - 1)) -#define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */ -#define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */ -#define PAGE_OFFSET MEM_HIGH_START -#define FIXADDR_BASE _AC(0xfffffff400000000, UL) /* 4 GB */ -#define FIXADDR_TOP _AC(0xfffffff500000000, UL) /* 4 GB */ +#define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1))) +#define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */ +#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x400000000) /* 4 GB */ +#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ #define _VMALLOC_START FIXADDR_TOP -#define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */ -#define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */ -#define MEM_SV_INTRPT MEM_SV_START -#define MEM_MODULE_START _AC(0xfffffff710000000, UL) /* 256 MB */ +#define HUGE_VMAP_BASE (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ +#define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */ +#define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */ #define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024)) -#define MEM_HV_START _AC(0xfffffff800000000, UL) /* 32 GB */ - -/* Highest DTLB address we will use */ -#define KERNEL_HIGH_VADDR MEM_SV_START #else /* !__tilegx__ */ @@ -213,25 +210,18 @@ static inline __attribute_const__ int get_order(unsigned long size) * values, and after that, we show "typical" values, since the actual * addresses depend on kernel #defines. * - * MEM_HV_INTRPT 0xfe000000 - * MEM_SV_INTRPT (kernel code) 0xfd000000 + * MEM_HV_START 0xfe000000 + * MEM_SV_START (kernel code) 0xfd000000 * MEM_USER_INTRPT (user vector) 0xfc000000 - * FIX_KMAP_xxx 0xf8000000 (via NR_CPUS * KM_TYPE_NR) - * PKMAP_BASE 0xf7000000 (via LAST_PKMAP) - * HUGE_VMAP 0xf3000000 (via CONFIG_NR_HUGE_VMAPS) - * VMALLOC_START 0xf0000000 (via __VMALLOC_RESERVE) + * FIX_KMAP_xxx 0xfa000000 (via NR_CPUS * KM_TYPE_NR) + * PKMAP_BASE 0xf9000000 (via LAST_PKMAP) + * VMALLOC_START 0xf7000000 (via VMALLOC_RESERVE) * mapped LOWMEM 0xc0000000 */ #define MEM_USER_INTRPT _AC(0xfc000000, UL) -#if CONFIG_KERNEL_PL == 1 -#define MEM_SV_INTRPT _AC(0xfd000000, UL) -#define MEM_HV_INTRPT _AC(0xfe000000, UL) -#else -#define MEM_GUEST_INTRPT _AC(0xfd000000, UL) -#define MEM_SV_INTRPT _AC(0xfe000000, UL) -#define MEM_HV_INTRPT _AC(0xff000000, UL) -#endif +#define MEM_SV_START _AC(0xfd000000, UL) +#define MEM_HV_START _AC(0xfe000000, UL) #define INTRPT_SIZE 0x4000 diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index e5bdc0e..63142ab 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -89,7 +89,7 @@ static inline int pud_huge_page(pud_t pud) { return 0; } /* We don't define any pgds for these addresses. */ static inline int pgd_addr_invalid(unsigned long addr) { - return addr >= MEM_HV_INTRPT; + return addr >= MEM_HV_START; } /* diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index 7cb8d35..3421177 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -140,8 +140,7 @@ static inline unsigned long pgd_addr_normalize(unsigned long addr) /* We don't define any pgds for these addresses. */ static inline int pgd_addr_invalid(unsigned long addr) { - return addr >= MEM_HV_START || - (addr > MEM_LOW_END && addr < MEM_HIGH_START); + return addr >= KERNEL_HIGH_VADDR || addr != pgd_addr_normalize(addr); } /* diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index c72fcba..5aa5431 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -168,7 +168,7 @@ struct thread_struct { #ifndef __ASSEMBLY__ #ifdef __tilegx__ -#define TASK_SIZE_MAX (MEM_LOW_END + 1) +#define TASK_SIZE_MAX (_AC(1, UL) << (MAX_VA_WIDTH - 1)) #else #define TASK_SIZE_MAX PAGE_OFFSET #endif -- cgit v1.1 From 8157107b13099d6eb2e8ccd00b9aba009c698c38 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 28 Aug 2013 19:53:17 -0400 Subject: tilegx: support KGDB Enter kernel debugger at boot with: --hvd UART_1=1 --hvx kgdbwait --hvx kgdboc=ttyS1,115200 or at runtime with: echo ttyS1,115200 > /sys/module/kgdboc/parameters/kgdboc echo g > /proc/sysrq-trigger Signed-off-by: Chris Metcalf --- arch/tile/include/asm/kgdb.h | 71 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 arch/tile/include/asm/kgdb.h (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/kgdb.h b/arch/tile/include/asm/kgdb.h new file mode 100644 index 0000000..280c181 --- /dev/null +++ b/arch/tile/include/asm/kgdb.h @@ -0,0 +1,71 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE-Gx KGDB support. + */ + +#ifndef __TILE_KGDB_H__ +#define __TILE_KGDB_H__ + +#include +#include + +#define GDB_SIZEOF_REG sizeof(unsigned long) + +/* + * TILE-Gx gdb is expecting the following register layout: + * 56 GPRs(R0 - R52, TP, SP, LR), 8 special GPRs(networks and ZERO), + * plus the PC and the faultnum. + * + * Even though kernel not use the 8 special GPRs, they need to be present + * in the registers sent for correct processing in the host-side gdb. + * + */ +#define DBG_MAX_REG_NUM (56+8+2) +#define NUMREGBYTES (DBG_MAX_REG_NUM * GDB_SIZEOF_REG) + +/* + * BUFMAX defines the maximum number of characters in inbound/outbound + * buffers at least NUMREGBYTES*2 are needed for register packets, + * Longer buffer is needed to list all threads. + */ +#define BUFMAX 2048 + +#define BREAK_INSTR_SIZE TILEGX_BUNDLE_SIZE_IN_BYTES + +/* + * Require cache flush for set/clear a software breakpoint or write memory. + */ +#define CACHE_FLUSH_IS_SAFE 1 + +/* + * The compiled-in breakpoint instruction can be used to "break" into + * the debugger via magic system request key (sysrq-G). + */ +static tile_bundle_bits compiled_bpt = TILEGX_BPT_BUNDLE | DIE_COMPILED_BPT; + +enum tilegx_regnum { + TILEGX_PC_REGNUM = TREG_LAST_GPR + 9, + TILEGX_FAULTNUM_REGNUM, +}; + +/* + * Generate a breakpoint exception to "break" into the debugger. + */ +static inline void arch_kgdb_breakpoint(void) +{ + asm volatile (".quad %0\n\t" + ::""(compiled_bpt)); +} + +#endif /* __TILE_KGDB_H__ */ -- cgit v1.1 From 640710a33b54de8d90ae140ef633ed0feba76a75 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 12 Aug 2013 15:08:09 -0400 Subject: tile: add virt_to_kpte() API and clean up and document behavior We use virt_to_pte(NULL, va) a lot, which isn't very obvious. I added virt_to_kpte(va) as a more obvious wrapper function, that also validates the va as being a kernel adddress. And, I fixed the semantics of virt_to_pte() so that we handle the pud and pmd the same way, and we now document the fact that we handle the final pte level differently. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/mmu_context.h | 2 +- arch/tile/include/asm/page.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h index 37f0b74..4734215 100644 --- a/arch/tile/include/asm/mmu_context.h +++ b/arch/tile/include/asm/mmu_context.h @@ -45,7 +45,7 @@ static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot) static inline void install_page_table(pgd_t *pgdir, int asid) { - pte_t *ptep = virt_to_pte(NULL, (unsigned long)pgdir); + pte_t *ptep = virt_to_kpte((unsigned long)pgdir); __install_page_table(pgdir, asid, *ptep); } diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index 980843d..6346888 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -328,6 +328,7 @@ static inline int pfn_valid(unsigned long pfn) struct mm_struct; extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); +extern pte_t *virt_to_kpte(unsigned long kaddr); #endif /* !__ASSEMBLY__ */ -- cgit v1.1 From 5e7705df28720c424c11bdedf0d568177351c55a Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 12 Aug 2013 15:25:22 -0400 Subject: tile PCI RC: add comment about "PCI hole" problem Explain the rationale of not overlapping the 64-bit DMA window with the PA range. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index c99ad44..dfedd7a 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -122,6 +122,11 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit * devices, we create a separate map region that handles the low * 4GB. + * + * This design lets us avoid the "PCI hole" problem where the host bridge + * won't pass DMA traffic with target addresses that happen to fall within the + * BAR space. This enables us to use all the physical memory for DMA, instead + * of wasting the same amount of physical memory as the BAR window size. */ #define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH()) -- cgit v1.1 From 4c63de8df0b8c43374959fb6fdfdb08f6ac41f34 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 13 Aug 2013 14:00:04 -0400 Subject: tile: use asm-generic version of Signed-off-by: Chris Metcalf --- arch/tile/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 4c0b3c2..c5737f1 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h generic-y += local.h +generic-y += local64.h generic-y += msgbuf.h generic-y += mutex.h generic-y += param.h -- cgit v1.1 From 729b25a4642001296b8c12a360459d61adf7d75c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 13 Aug 2013 14:01:58 -0400 Subject: tile: use standard 'generic-y' model for Signed-off-by: Chris Metcalf --- arch/tile/include/asm/Kbuild | 1 + arch/tile/include/asm/hw_irq.h | 18 ------------------ 2 files changed, 1 insertion(+), 18 deletions(-) delete mode 100644 arch/tile/include/asm/hw_irq.h (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index c5737f1..664d6ad 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += errno.h generic-y += exec.h generic-y += fb.h generic-y += fcntl.h +generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h diff --git a/arch/tile/include/asm/hw_irq.h b/arch/tile/include/asm/hw_irq.h deleted file mode 100644 index 4fac5fbf..0000000 --- a/arch/tile/include/asm/hw_irq.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#ifndef _ASM_TILE_HW_IRQ_H -#define _ASM_TILE_HW_IRQ_H - -#endif /* _ASM_TILE_HW_IRQ_H */ -- cgit v1.1 From d6a0aa314c06743b702931cb468f400b7615c5c9 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 14 Aug 2013 22:07:30 +0900 Subject: tile: use asm-generic/bitops/builtin-*.h The definisions of __ffs(), __fls(), and ffs() for tile are almost same as asm-generic/bitops-*.h. The only difference is that it is defined as __always_inline or inline. So this switches to use those headers. Signed-off-by: Akinobu Mita Signed-off-by: Chris Metcalf [moved #includes to end] --- arch/tile/include/asm/bitops.h | 41 +++-------------------------------------- 1 file changed, 3 insertions(+), 38 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h index bd186c4..d5a2068 100644 --- a/arch/tile/include/asm/bitops.h +++ b/arch/tile/include/asm/bitops.h @@ -29,17 +29,6 @@ #endif /** - * __ffs - find first set bit in word - * @word: The word to search - * - * Undefined if no set bit exists, so code should check against 0 first. - */ -static inline unsigned long __ffs(unsigned long word) -{ - return __builtin_ctzl(word); -} - -/** * ffz - find first zero bit in word * @word: The word to search * @@ -50,33 +39,6 @@ static inline unsigned long ffz(unsigned long word) return __builtin_ctzl(~word); } -/** - * __fls - find last set bit in word - * @word: The word to search - * - * Undefined if no set bit exists, so code should check against 0 first. - */ -static inline unsigned long __fls(unsigned long word) -{ - return (sizeof(word) * 8) - 1 - __builtin_clzl(word); -} - -/** - * ffs - find first set bit in word - * @x: the word to search - * - * This is defined the same way as the libc and compiler builtin ffs - * routines, therefore differs in spirit from the other bitops. - * - * ffs(value) returns 0 if value is 0 or the position of the first - * set bit if value is nonzero. The first (least significant) bit - * is at position 1. - */ -static inline int ffs(int x) -{ - return __builtin_ffs(x); -} - static inline int fls64(__u64 w) { return (sizeof(__u64) * 8) - __builtin_clzll(w); @@ -118,6 +80,9 @@ static inline unsigned long __arch_hweight64(__u64 w) return __builtin_popcountll(w); } +#include +#include +#include #include #include #include -- cgit v1.1 From d7c9661115fd23b4dabb710b3080dd9919dfa891 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 15 Aug 2013 16:23:24 -0400 Subject: tile: remove support for TILE64 This chip is no longer being actively developed for (it was superceded by the TILEPro64 in 2008), and in any case the existing compiler and toolchain in the community do not support it. It's unlikely that the kernel works with TILE64 at this point as the configuration has not been tested in years. The support is also awkward as it requires maintaining a significant number of ifdefs. So, just remove it altogether. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/atomic_32.h | 17 ----------------- arch/tile/include/asm/barrier.h | 4 ---- arch/tile/include/asm/elf.h | 1 - arch/tile/include/asm/homecache.h | 3 +-- arch/tile/include/asm/processor.h | 16 ---------------- arch/tile/include/asm/smp.h | 2 -- arch/tile/include/asm/traps.h | 2 +- 7 files changed, 2 insertions(+), 43 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index e7fb5cf..96156f5 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -252,21 +252,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) * Internal definitions only beyond this point. */ -#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \ - (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP)) - -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - -/* Number of entries in atomic_lock_ptr[]. */ -#define ATOMIC_HASH_L1_SHIFT 6 -#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT) - -/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */ -#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2) -#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT) - -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* * Number of atomic locks in atomic_locks[]. Must be a power of two. * There is no reason for more than PAGE_SIZE / 8 entries, since that @@ -281,8 +266,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) extern int atomic_locks[]; #endif -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* * All the code that may fault while holding an atomic lock must * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h index 990a217..a9a73da 100644 --- a/arch/tile/include/asm/barrier.h +++ b/arch/tile/include/asm/barrier.h @@ -77,7 +77,6 @@ #define __sync() __insn_mf() -#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() #include /* * Issue an uncacheable load to each memory controller, then @@ -96,7 +95,6 @@ static inline void __mb_incoherent(void) "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29"); } -#endif /* Fence to guarantee visibility of stores to incoherent memory. */ static inline void @@ -104,7 +102,6 @@ mb_incoherent(void) { __insn_mf(); -#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() { #if CHIP_HAS_TILE_WRITE_PENDING() const unsigned long WRITE_TIMEOUT_CYCLES = 400; @@ -116,7 +113,6 @@ mb_incoherent(void) #endif /* CHIP_HAS_TILE_WRITE_PENDING() */ (void) __mb_incoherent(); } -#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */ } #define fast_wmb() __sync() diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h index e1da88e..41d9878 100644 --- a/arch/tile/include/asm/elf.h +++ b/arch/tile/include/asm/elf.h @@ -30,7 +30,6 @@ typedef unsigned long elf_greg_t; #define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t)) typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -#define EM_TILE64 187 #define EM_TILEPRO 188 #define EM_TILEGX 191 diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h index 49d19df..7ddd1b8 100644 --- a/arch/tile/include/asm/homecache.h +++ b/arch/tile/include/asm/homecache.h @@ -33,8 +33,7 @@ struct zone; /* * Is this page immutable (unwritable) and thus able to be cached more - * widely than would otherwise be possible? On tile64 this means we - * mark the PTE to cache locally; on tilepro it means we have "nc" set. + * widely than would otherwise be possible? This means we have "nc" set. */ #define PAGE_HOME_IMMUTABLE -2 diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 5aa5431..4232363 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -113,18 +113,14 @@ struct thread_struct { unsigned long intctrl_0; /* Is this task currently doing a backtrace? */ bool in_backtrace; -#if CHIP_HAS_PROC_STATUS_SPR() /* Any other miscellaneous processor state bits */ unsigned long proc_status; -#endif #if !CHIP_HAS_FIXED_INTVEC_BASE() /* Interrupt base for PL0 interrupts */ unsigned long interrupt_vector_base; #endif -#if CHIP_HAS_TILE_RTF_HWM() /* Tile cache retry fifo high-water mark */ unsigned long tile_rtf_hwm; -#endif #if CHIP_HAS_DSTREAM_PF() /* Data stream prefetch control */ unsigned long dstream_pf; @@ -137,12 +133,6 @@ struct thread_struct { /* Async DMA TLB fault information */ struct async_tlb dma_async_tlb; #endif -#if CHIP_HAS_SN_PROC() - /* Was static network processor when we were switched out? */ - int sn_proc_running; - /* Async SNI TLB fault information */ - struct async_tlb sn_async_tlb; -#endif }; #endif /* !__ASSEMBLY__ */ @@ -286,7 +276,6 @@ extern char chip_model[64]; /* Data on which physical memory controller corresponds to which NUMA node. */ extern int node_controller[]; -#if CHIP_HAS_CBOX_HOME_MAP() /* Does the heap allocator return hash-for-home pages by default? */ extern int hash_default; @@ -296,11 +285,6 @@ extern int kstack_hash; /* Does MAP_ANONYMOUS return hash-for-home pages by default? */ #define uheap_hash hash_default -#else -#define hash_default 0 -#define kstack_hash 0 -#define uheap_hash 0 -#endif /* Are we using huge pages in the TLB for kernel data? */ extern int kdata_huge; diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h index 1aa759a..9a326b6 100644 --- a/arch/tile/include/asm/smp.h +++ b/arch/tile/include/asm/smp.h @@ -101,10 +101,8 @@ void print_disabled_cpus(void); extern struct cpumask cpu_lotar_map; #define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map) -#if CHIP_HAS_CBOX_HOME_MAP() /* Which processors are used for hash-for-home mapping */ extern struct cpumask hash_for_home_map; -#endif /* Which cpus can have their cache flushed by hv_flush_remote(). */ extern struct cpumask cpu_cacheable_map; diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index 5f172b2..4b99a1c 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h @@ -21,7 +21,7 @@ /* mm/fault.c */ void do_page_fault(struct pt_regs *, int fault_num, unsigned long address, unsigned long write); -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() +#if CHIP_HAS_TILE_DMA() void do_async_page_fault(struct pt_regs *); #endif -- cgit v1.1 From ce61cdc270a5e0dd18057bbf29bd3471abccbda8 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 15 Aug 2013 16:29:02 -0400 Subject: tile: make __write_once a synonym for __read_mostly This was really only useful for TILE64 when we mapped the kernel data with small pages. Now we use a huge page and we really don't want to map different parts of the kernel data in different ways. We retain the __write_once name in case we want to bring it back to life at some point in the future. Note that this change uncovered a latent bug where the "smp_topology" variable happened to always be aligned mod 8 so we could store two "int" values at once, but when we eliminated __write_once it ended up only aligned mod 4. Fix with an explicit annotation. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/cache.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h index a9a5299..6160761 100644 --- a/arch/tile/include/asm/cache.h +++ b/arch/tile/include/asm/cache.h @@ -49,9 +49,16 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) /* - * Attribute for data that is kept read/write coherent until the end of - * initialization, then bumped to read/only incoherent for performance. + * Originally we used small TLB pages for kernel data and grouped some + * things together as "write once", enforcing the property at the end + * of initialization by making those pages read-only and non-coherent. + * This allowed better cache utilization since cache inclusion did not + * need to be maintained. However, to do this requires an extra TLB + * entry, which on balance is more of a performance hit than the + * non-coherence is a performance gain, so we now just make "read + * mostly" and "write once" be synonyms. We keep the attribute + * separate in case we change our minds at a future date. */ -#define __write_once __attribute__((__section__(".w1data"))) +#define __write_once __read_mostly #endif /* _ASM_TILE_CACHE_H */ -- cgit v1.1 From b40f451d56de69477a2244a0b4082f644699673f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Aug 2013 10:12:36 -0400 Subject: tile PCI RC: make default consistent DMA mask 32-bit This change sets the PCI devices' initial DMA capabilities conservatively and promotes them at the request of the driver, as opposed to assuming advanced DMA capabilities. The old design runs the risk of breaking drivers that assume default capabilities. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/device.h | 5 ++++- arch/tile/include/asm/dma-mapping.h | 21 +++++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h index 5182705..6ab8bf1 100644 --- a/arch/tile/include/asm/device.h +++ b/arch/tile/include/asm/device.h @@ -23,7 +23,10 @@ struct dev_archdata { /* Offset of the DMA address from the PA. */ dma_addr_t dma_offset; - /* Highest DMA address that can be generated by this device. */ + /* + * Highest DMA address that can be generated by devices that + * have limited DMA capability, i.e. non 64-bit capable. + */ dma_addr_t max_direct_dma_addr; }; diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 6f522d5..1eae359 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -92,14 +92,19 @@ dma_set_mask(struct device *dev, u64 mask) { struct dma_map_ops *dma_ops = get_dma_ops(dev); - /* Handle legacy PCI devices with limited memory addressability. */ - if ((dma_ops == gx_pci_dma_map_ops || - dma_ops == gx_hybrid_pci_dma_map_ops || - dma_ops == gx_legacy_pci_dma_map_ops) && - (mask <= DMA_BIT_MASK(32))) { - set_dma_ops(dev, gx_legacy_pci_dma_map_ops); - set_dma_offset(dev, 0); - if (mask > dev->archdata.max_direct_dma_addr) + /* + * For PCI devices with 64-bit DMA addressing capability, promote + * the dma_ops to hybrid, with the consistent memory DMA space limited + * to 32-bit. For 32-bit capable devices, limit the streaming DMA + * address range to max_direct_dma_addr. + */ + if (dma_ops == gx_pci_dma_map_ops || + dma_ops == gx_hybrid_pci_dma_map_ops || + dma_ops == gx_legacy_pci_dma_map_ops) { + if (mask == DMA_BIT_MASK(64) && + dma_ops == gx_legacy_pci_dma_map_ops) + set_dma_ops(dev, gx_hybrid_pci_dma_map_ops); + else if (mask > dev->archdata.max_direct_dma_addr) mask = dev->archdata.max_direct_dma_addr; } -- cgit v1.1 From 6dc9658fa1af9f58d358692b68135f464c167e10 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 6 Sep 2013 08:56:45 -0400 Subject: tile: rework The macrology in cmpxchg.h was designed to allow arbitrary pointer and integer values to be passed through the routines. To support cmpxchg() on 64-bit values on the 32-bit tilepro architecture, we used the idiom "(typeof(val))(typeof(val-val))". This way, in the "size 8" branch of the switch, when the underlying cmpxchg routine returns a 64-bit quantity, we cast it first to a typeof(val-val) quantity (i.e. size_t if "val" is a pointer) with no warnings about casting between pointers and integers of different sizes, then cast onwards to typeof(val), again with no warnings. If val is not a pointer type, the additional cast is a no-op. We can't replace the typeof(val-val) cast with (for example) unsigned long, since then if "val" is really a 64-bit type, we cast away the high bits. HOWEVER, this fails with current gcc (through 4.7 at least) if "val" is a pointer to an incomplete type. Unfortunately gcc isn't smart enough to realize that "val - val" will always be a size_t type even if it's an incomplete type pointer. Accordingly, I've reworked the way we handle the casting. We have given up the ability to use cmpxchg() on 64-bit values on tilepro, which is OK in the kernel since we should use cmpxchg64() explicitly on such values anyway. As a result, I can just use simple "unsigned long" casts internally. As I reworked it, I realized it would be cleaner to move the architecture-specific conditionals for cmpxchg and xchg out of the atomic.h headers and into cmpxchg, and then use the cmpxchg() and xchg() primitives directly in atomic.h and elsewhere. This allowed the cmpxchg.h header to stand on its own without relying on the implicit include of it that is performed by . It also allowed collapsing the atomic_xchg/atomic_cmpxchg routines from atomic_{32,64}.h into atomic.h. I improved the tests that guard the allowed size of the arguments to the routines to use a __compiletime_error() test. (By avoiding the use of BUILD_BUG, I could include cmpxchg.h into bitops.h as well and use the macros there, which is otherwise impossible due to include order dependency issues.) The tilepro _atomic_xxx internal methods were previously set up to take atomic_t and atomic64_t arguments, which isn't as convenient with the new model, so I modified them to take int or u64 arguments, which is consistent with how they used the arguments internally anyway, so provided some nice simplification there too. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/atomic.h | 52 +++++++++++++++++++++ arch/tile/include/asm/atomic_32.h | 85 ++++------------------------------ arch/tile/include/asm/atomic_64.h | 42 +---------------- arch/tile/include/asm/bitops_32.h | 2 +- arch/tile/include/asm/bitops_64.h | 8 ++-- arch/tile/include/asm/cmpxchg.h | 97 +++++++++++++++++++++++++++++---------- 6 files changed, 140 insertions(+), 146 deletions(-) (limited to 'arch/tile/include/asm') diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index e71387a..d385eaa 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -114,6 +114,32 @@ static inline int atomic_read(const atomic_t *v) #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) /** + * atomic_xchg - atomically exchange contents of memory with a new value + * @v: pointer of type atomic_t + * @i: integer value to store in memory + * + * Atomically sets @v to @i and returns old @v + */ +static inline int atomic_xchg(atomic_t *v, int n) +{ + return xchg(&v->counter, n); +} + +/** + * atomic_cmpxchg - atomically exchange contents of memory if it matches + * @v: pointer of type atomic_t + * @o: old value that memory should have + * @n: new value to write to memory if it matches + * + * Atomically checks if @v holds @o and replaces it with @n if so. + * Returns the old value at @v. + */ +static inline int atomic_cmpxchg(atomic_t *v, int o, int n) +{ + return cmpxchg(&v->counter, o, n); +} + +/** * atomic_add_negative - add and test if negative * @v: pointer of type atomic_t * @i: integer value to add @@ -133,6 +159,32 @@ static inline int atomic_read(const atomic_t *v) #ifndef __ASSEMBLY__ +/** + * atomic64_xchg - atomically exchange contents of memory with a new value + * @v: pointer of type atomic64_t + * @i: integer value to store in memory + * + * Atomically sets @v to @i and returns old @v + */ +static inline u64 atomic64_xchg(atomic64_t *v, u64 n) +{ + return xchg64(&v->counter, n); +} + +/** + * atomic64_cmpxchg - atomically exchange contents of memory if it matches + * @v: pointer of type atomic64_t + * @o: old value that memory should have + * @n: new value to write to memory if it matches + * + * Atomically checks if @v holds @o and replaces it with @n if so. + * Returns the old value at @v. + */ +static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) +{ + return cmpxchg64(&v->counter, o, n); +} + static inline long long atomic64_dec_if_positive(atomic64_t *v) { long long c, old, dec; diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index 96156f5..0d0395b 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -22,40 +22,6 @@ #ifndef __ASSEMBLY__ -/* Tile-specific routines to support . */ -int _atomic_xchg(atomic_t *v, int n); -int _atomic_xchg_add(atomic_t *v, int i); -int _atomic_xchg_add_unless(atomic_t *v, int a, int u); -int _atomic_cmpxchg(atomic_t *v, int o, int n); - -/** - * atomic_xchg - atomically exchange contents of memory with a new value - * @v: pointer of type atomic_t - * @i: integer value to store in memory - * - * Atomically sets @v to @i and returns old @v - */ -static inline int atomic_xchg(atomic_t *v, int n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic_xchg(v, n); -} - -/** - * atomic_cmpxchg - atomically exchange contents of memory if it matches - * @v: pointer of type atomic_t - * @o: old value that memory should have - * @n: new value to write to memory if it matches - * - * Atomically checks if @v holds @o and replaces it with @n if so. - * Returns the old value at @v. - */ -static inline int atomic_cmpxchg(atomic_t *v, int o, int n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic_cmpxchg(v, o, n); -} - /** * atomic_add - add integer to atomic variable * @i: integer value to add @@ -65,7 +31,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int o, int n) */ static inline void atomic_add(int i, atomic_t *v) { - _atomic_xchg_add(v, i); + _atomic_xchg_add(&v->counter, i); } /** @@ -78,7 +44,7 @@ static inline void atomic_add(int i, atomic_t *v) static inline int atomic_add_return(int i, atomic_t *v) { smp_mb(); /* barrier for proper semantics */ - return _atomic_xchg_add(v, i) + i; + return _atomic_xchg_add(&v->counter, i) + i; } /** @@ -93,7 +59,7 @@ static inline int atomic_add_return(int i, atomic_t *v) static inline int __atomic_add_unless(atomic_t *v, int a, int u) { smp_mb(); /* barrier for proper semantics */ - return _atomic_xchg_add_unless(v, a, u); + return _atomic_xchg_add_unless(&v->counter, a, u); } /** @@ -108,7 +74,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) */ static inline void atomic_set(atomic_t *v, int n) { - _atomic_xchg(v, n); + _atomic_xchg(&v->counter, n); } /* A 64bit atomic type */ @@ -119,11 +85,6 @@ typedef struct { #define ATOMIC64_INIT(val) { (val) } -u64 _atomic64_xchg(atomic64_t *v, u64 n); -u64 _atomic64_xchg_add(atomic64_t *v, u64 i); -u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u); -u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n); - /** * atomic64_read - read atomic variable * @v: pointer of type atomic64_t @@ -137,35 +98,7 @@ static inline u64 atomic64_read(const atomic64_t *v) * Casting away const is safe since the atomic support routines * do not write to memory if the value has not been modified. */ - return _atomic64_xchg_add((atomic64_t *)v, 0); -} - -/** - * atomic64_xchg - atomically exchange contents of memory with a new value - * @v: pointer of type atomic64_t - * @i: integer value to store in memory - * - * Atomically sets @v to @i and returns old @v - */ -static inline u64 atomic64_xchg(atomic64_t *v, u64 n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic64_xchg(v, n); -} - -/** - * atomic64_cmpxchg - atomically exchange contents of memory if it matches - * @v: pointer of type atomic64_t - * @o: old value that memory should have - * @n: new value to write to memory if it matches - * - * Atomically checks if @v holds @o and replaces it with @n if so. - * Returns the old value at @v. - */ -static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic64_cmpxchg(v, o, n); + return _atomic64_xchg_add((u64 *)&v->counter, 0); } /** @@ -177,7 +110,7 @@ static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) */ static inline void atomic64_add(u64 i, atomic64_t *v) { - _atomic64_xchg_add(v, i); + _atomic64_xchg_add(&v->counter, i); } /** @@ -190,7 +123,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v) static inline u64 atomic64_add_return(u64 i, atomic64_t *v) { smp_mb(); /* barrier for proper semantics */ - return _atomic64_xchg_add(v, i) + i; + return _atomic64_xchg_add(&v->counter, i) + i; } /** @@ -205,7 +138,7 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) { smp_mb(); /* barrier for proper semantics */ - return _atomic64_xchg_add_unless(v, a, u) != u; + return _atomic64_xchg_add_unless(&v->counter, a, u) != u; } /** @@ -220,7 +153,7 @@ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) */ static inline void atomic64_set(atomic64_t *v, u64 n) { - _atomic64_xchg(v, n); + _atomic64_xchg(&v->counter, n); } #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index f4500c6..ad220ee 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -32,25 +32,6 @@ * on any routine which updates memory and returns a value. */ -static inline int atomic_cmpxchg(atomic_t *v, int o, int n) -{ - int val; - __insn_mtspr(SPR_CMPEXCH_VALUE, o); - smp_mb(); /* barrier for proper semantics */ - val = __insn_cmpexch4((void *)&v->counter, n); - smp_mb(); /* barrier for proper semantics */ - return val; -} - -static inline int atomic_xchg(atomic_t *v, int n) -{ - int val; - smp_mb(); /* barrier for proper semantics */ - val = __insn_exch4((void *)&v->counter, n); - smp_mb(); /* barrier for proper semantics */ - return val; -} - static inline void atomic_add(int i, atomic_t *v) { __insn_fetchadd4((void *)&v->counter, i); @@ -72,7 +53,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) if (oldval == u) break; guess = oldval; - oldval = atomic_cmpxchg(v, guess, guess + a); + oldval = cmpxchg(&v->counter, guess, guess + a); } while (guess != oldval); return oldval; } @@ -84,25 +65,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #define atomic64_read(v) ((v)->counter) #define atomic64_set(v, i) ((v)->counter = (i)) -static inline long atomic64_cmpxchg(atomic64_t *v, long o, long n) -{ - long val; - smp_mb(); /* barrier for proper semantics */ - __insn_mtspr(SPR_CMPEXCH_VALUE, o); - val = __insn_cmpexch((void *)&v->counter, n); - smp_mb(); /* barrier for proper semantics */ - return val; -} - -static inline long atomic64_xchg(atomic64_t *v, long n) -{ - long val; - smp_mb(); /* barrier for proper semantics */ - val = __insn_exch((void *)&v->counter, n); - smp_mb(); /* barrier for proper semantics */ - return val; -} - static inline void atomic64_add(long i, atomic64_t *v) { __insn_fetchadd((void *)&v->counter, i); @@ -124,7 +86,7 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u) if (oldval == u) break; guess = oldval; - oldval = atomic64_cmpxchg(v, guess, guess + a); + oldval = cmpxchg(&v->counter, guess, guess + a); } while (guess != oldval); return oldval != u; } diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h index ddc4c1e..386865ad 100644 --- a/arch/tile/include/asm/bitops_32.h +++ b/arch/tile/include/asm/bitops_32.h @@ -16,7 +16,7 @@ #define _ASM_TILE_BITOPS_32_H #include -#include +#include /* Tile-specific routines to support . */ unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask); diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h index 60b87ee..ad34cd0 100644 --- a/arch/tile/include/asm/bitops_64.h +++ b/arch/tile/include/asm/bitops_64.h @@ -16,7 +16,7 @@ #define _ASM_TILE_BITOPS_64_H #include -#include +#include /* See for API comments. */ @@ -44,8 +44,7 @@ static inline void change_bit(unsigned nr, volatile unsigned long *addr) oldval = *addr; do { guess = oldval; - oldval = atomic64_cmpxchg((atomic64_t *)addr, - guess, guess ^ mask); + oldval = cmpxchg(addr, guess, guess ^ mask); } while (guess != oldval); } @@ -90,8 +89,7 @@ static inline int test_and_change_bit(unsigned nr, oldval = *addr; do { guess = oldval; - oldval = atomic64_cmpxchg((atomic64_t *)addr, - guess, guess ^ mask); + oldval = cmpxchg(addr, guess, guess ^ mask); } while (guess != oldval); return (oldval & mask) != 0; } diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h index 1da5bfb..4001d5e 100644 --- a/arch/tile/include/asm/cmpxchg.h +++ b/arch/tile/include/asm/cmpxchg.h @@ -20,59 +20,108 @@ #ifndef __ASSEMBLY__ -/* Nonexistent functions intended to cause link errors. */ -extern unsigned long __xchg_called_with_bad_pointer(void); -extern unsigned long __cmpxchg_called_with_bad_pointer(void); +#include -#define xchg(ptr, x) \ +/* Nonexistent functions intended to cause compile errors. */ +extern void __xchg_called_with_bad_pointer(void) + __compiletime_error("Bad argument size for xchg"); +extern void __cmpxchg_called_with_bad_pointer(void) + __compiletime_error("Bad argument size for cmpxchg"); + +#ifndef __tilegx__ + +/* Note the _atomic_xxx() routines include a final mb(). */ +int _atomic_xchg(int *ptr, int n); +int _atomic_xchg_add(int *v, int i); +int _atomic_xchg_add_unless(int *v, int a, int u); +int _atomic_cmpxchg(int *ptr, int o, int n); +u64 _atomic64_xchg(u64 *v, u64 n); +u64 _atomic64_xchg_add(u64 *v, u64 i); +u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u); +u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); + +#define xchg(ptr, n) \ + ({ \ + if (sizeof(*(ptr)) != 4) \ + __xchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic_xchg((int *)(ptr), (int)(n)); \ + }) + +#define cmpxchg(ptr, o, n) \ + ({ \ + if (sizeof(*(ptr)) != 4) \ + __cmpxchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o, (int)n); \ + }) + +#define xchg64(ptr, n) \ + ({ \ + if (sizeof(*(ptr)) != 8) \ + __xchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic64_xchg((u64 *)(ptr), (u64)(n)); \ + }) + +#define cmpxchg64(ptr, o, n) \ + ({ \ + if (sizeof(*(ptr)) != 8) \ + __cmpxchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic64_cmpxchg((u64 *)ptr, (u64)o, (u64)n); \ + }) + +#else + +#define xchg(ptr, n) \ ({ \ typeof(*(ptr)) __x; \ + smp_mb(); \ switch (sizeof(*(ptr))) { \ case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((x)-(x)))(x)); \ + __x = (typeof(__x))(unsigned long) \ + __insn_exch4((ptr), (u32)(unsigned long)(n)); \ break; \ case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((x)-(x)))(x)); \ + __x = (typeof(__x)) \ + __insn_exch((ptr), (unsigned long)(n)); \ break; \ default: \ __xchg_called_with_bad_pointer(); \ + break; \ } \ + smp_mb(); \ __x; \ }) #define cmpxchg(ptr, o, n) \ ({ \ typeof(*(ptr)) __x; \ + __insn_mtspr(SPR_CMPEXCH_VALUE, (unsigned long)(o)); \ + smp_mb(); \ switch (sizeof(*(ptr))) { \ case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((o)-(o)))(o), \ - (u32)(typeof((n)-(n)))(n)); \ + __x = (typeof(__x))(unsigned long) \ + __insn_cmpexch4((ptr), (u32)(unsigned long)(n)); \ break; \ case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((o)-(o)))(o), \ - (u64)(typeof((n)-(n)))(n)); \ + __x = (typeof(__x))__insn_cmpexch((ptr), (u64)(n)); \ break; \ default: \ __cmpxchg_called_with_bad_pointer(); \ + break; \ } \ + smp_mb(); \ __x; \ }) -#define tas(ptr) (xchg((ptr), 1)) +#define xchg64 xchg +#define cmpxchg64 cmpxchg -#define cmpxchg64(ptr, o, n) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg((ptr), (o), (n)); \ -}) +#endif + +#define tas(ptr) xchg((ptr), 1) #endif /* __ASSEMBLY__ */ -- cgit v1.1