From 7d937719e3c5c6c9ad00584f6b62230d2ef7f9f1 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 23 Jul 2013 17:18:05 -0400 Subject: tile: various minor cleanups to hardwall subsystem First, clean up active hardwalls in exit_thread(). This is a better place than in arch_release_thread_info(). Second, mask out any non-online cpus from the cpumask after validating any required semantics of the cpu set. Signed-off-by: Chris Metcalf --- arch/tile/kernel/hardwall.c | 10 +++++++++- arch/tile/kernel/process.c | 23 +++++++++-------------- 2 files changed, 18 insertions(+), 15 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 38ac189..7db8893d 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -540,6 +540,14 @@ static struct hardwall_info *hardwall_create(struct hardwall_type *hwt, } } + /* + * Eliminate cpus that are not part of this Linux client. + * Note that this allows for configurations that we might not want to + * support, such as one client on every even cpu, another client on + * every odd cpu. + */ + cpumask_and(&info->cpumask, &info->cpumask, cpu_online_mask); + /* Confirm it doesn't overlap and add it to the list. */ spin_lock_irqsave(&hwt->lock, flags); list_for_each_entry(iter, &hwt->list, list) { @@ -612,7 +620,7 @@ static int hardwall_activate(struct hardwall_info *info) /* * Deactivate a task's hardwall. Must hold lock for hardwall_type. - * This method may be called from free_task(), so we don't want to + * This method may be called from exit_thread(), so we don't want to * rely on too many fields of struct task_struct still being valid. * We assume the cpus_allowed, pid, and comm fields are still valid. */ diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 8ac3044..8d6c51d 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -74,19 +74,6 @@ void arch_release_thread_info(struct thread_info *info) { struct single_step_state *step_state = info->step_state; -#ifdef CONFIG_HARDWALL - /* - * We free a thread_info from the context of the task that has - * been scheduled next, so the original task is already dead. - * Calling deactivate here just frees up the data structures. - * If the task we're freeing held the last reference to a - * hardwall fd, it would have been released prior to this point - * anyway via exit_files(), and the hardwall_task.info pointers - * would be NULL by now. - */ - hardwall_deactivate_all(info->task); -#endif - if (step_state) { /* @@ -564,7 +551,15 @@ void flush_thread(void) */ void exit_thread(void) { - /* Nothing */ +#ifdef CONFIG_HARDWALL + /* + * Remove the task from the list of tasks that are associated + * with any live hardwalls. (If the task that is exiting held + * the last reference to a hardwall fd, it would already have + * been released and deactivated at this point.) + */ + hardwall_deactivate_all(current); +#endif } void show_regs(struct pt_regs *regs) -- cgit v1.1 From dd78bc11fb2050b6a3990d0421feca4c68ca4335 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 23 Jul 2013 17:32:04 -0400 Subject: tile: convert uses of "inv" to "finv" The "inv" (invalidate) instruction is generally less safe than "finv" (flush and invalidate), as it will drop dirty data from the cache. It turns out we have almost no need for "inv" (other than for the older 32-bit architecture in some limited cases), so convert to "finv" where possible and delete the extra "inv" infrastructure. Signed-off-by: Chris Metcalf --- arch/tile/kernel/head_32.S | 2 +- arch/tile/kernel/head_64.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index ac11530..80cd407 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -64,7 +64,7 @@ ENTRY(_start) auli r0, r0, ha16(swapper_pg_dir - PAGE_OFFSET) } { - inv r6 + finv r6 move r1, zero /* high 32 bits of CPA is zero */ } { diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index 6093964..e23e5f7 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -77,7 +77,7 @@ ENTRY(_start) { /* After initializing swapper_pgprot, HV_PTE_GLOBAL is set. */ bfextu r7, r1, HV_PTE_INDEX_GLOBAL, HV_PTE_INDEX_GLOBAL - inv r4 + finv r4 } bnez r7, .Lno_write { -- cgit v1.1 From 9bbb08faa91db7711614c9cb0983644086b97aca Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 2 Aug 2013 11:59:06 -0400 Subject: tile PCI RC: cleanups for tilepro PCI RC - remove unneeded include in pci.c - eliminate unused pci_controller.first_busno field - prefer msleep to mdelay - remove stale comment about pci_scan_bus_parented() Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index 67237d3..1dae3b2 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -192,7 +191,6 @@ int __init tile_pci_init(void) controller->hv_cfg_fd[0] = hv_cfg_fd0; controller->hv_cfg_fd[1] = hv_cfg_fd1; controller->hv_mem_fd = hv_mem_fd; - controller->first_busno = 0; controller->last_busno = 0xff; controller->ops = &tile_cfg_ops; @@ -283,7 +281,7 @@ int __init pcibios_init(void) * known to require at least 20ms here, but we use a more * conservative value. */ - mdelay(250); + msleep(250); /* Scan all of the recorded PCI controllers. */ for (i = 0; i < TILE_NUM_PCIE; i++) { @@ -304,18 +302,10 @@ int __init pcibios_init(void) pr_info("PCI: initializing controller #%d\n", i); - /* - * This comes from the generic Linux PCI driver. - * - * It reads the PCI tree for this bus into the Linux - * data structures. - * - * This is inlined in linux/pci.h and calls into - * pci_scan_bus_parented() in probe.c. - */ pci_add_resource(&resources, &ioport_resource); pci_add_resource(&resources, &iomem_resource); - bus = pci_scan_root_bus(NULL, 0, controller->ops, controller, &resources); + bus = pci_scan_root_bus(NULL, 0, controller->ops, + controller, &resources); controller->root_bus = bus; controller->last_busno = bus->busn_res.end; } -- cgit v1.1 From 523c178edf45fe04d61aa99ac496bc7494b99810 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 2 Aug 2013 16:18:58 -0400 Subject: tile PCI RC: tilepro conflict with PCI and RAM addresses Fix a bug in the tilepro PCI resource allocation code that could make the bootmem allocator unhappy if 4GB is installed on mshim 0. Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 68b5426..676e155 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -614,11 +614,12 @@ static void __init setup_bootmem_allocator_node(int i) /* * Throw away any memory aliased by the PCI region. */ - if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start) - reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn), - PFN_PHYS(pci_reserve_end_pfn - - pci_reserve_start_pfn), + if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start) { + start = max(pci_reserve_start_pfn, start); + end = min(pci_reserve_end_pfn, end); + reserve_bootmem(PFN_PHYS(start), PFN_PHYS(end - start), BOOTMEM_EXCLUSIVE); + } #endif } -- cgit v1.1 From 2be705523fb3dd716d76ed371eaadaced55fe4a3 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 15:02:03 -0400 Subject: tile PCI RC: support pci=off boot arg for tilepro Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index 1dae3b2..af75835 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c @@ -51,6 +51,8 @@ * */ +static int pci_probe = 1; + /* * This flag tells if the platform is TILEmpower that needs * special configuration for the PLX switch chip. @@ -143,6 +145,11 @@ int __init tile_pci_init(void) { int i; + if (!pci_probe) { + pr_info("PCI: disabled by boot argument\n"); + return 0; + } + pr_info("PCI: Searching for controllers...\n"); /* Re-init number of PCIe controllers to support hot-plug feature. */ @@ -378,6 +385,16 @@ void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling. */ } +/* Process any "pci=" kernel boot arguments. */ +char * __init pcibios_setup(char *str) +{ + if (!strcmp(str, "off")) { + pci_probe = 0; + return NULL; + } + return str; +} + /* * Enable memory and/or address decoding, as appropriate, for the * device described by the 'dev' struct. -- cgit v1.1 From b3ad73a33ebfd585b3ab551a9fc65d684b82adc3 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 2 Aug 2013 11:47:29 -0400 Subject: tile PCI RC: tweak the the pcie_rc_delay support Allow longer delays if requested, and print the info messages as we are performing the delay, not when parsing the arguments. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci_gx.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index 1142563..e99809e 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -69,17 +69,14 @@ static int pcie_rc[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES]; * a HW PCIe link-training bug. The exact delay is specified with * a kernel boot argument in the form of "pcie_rc_delay=T,P,S", * where T is the TRIO instance number, P is the port number and S is - * the delay in seconds. If the delay is not provided, the value - * will be DEFAULT_RC_DELAY. + * the delay in seconds. If the argument is specified, but the delay is + * not provided, the value will be DEFAULT_RC_DELAY. */ static int rc_delay[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES]; /* Default number of seconds that the PCIe RC port probe can be delayed. */ #define DEFAULT_RC_DELAY 10 -/* Max number of seconds that the PCIe RC port probe can be delayed. */ -#define MAX_RC_DELAY 20 - /* Array of the PCIe ports configuration info obtained from the BIB. */ struct pcie_port_property pcie_ports[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES]; @@ -570,14 +567,9 @@ static int setup_pcie_rc_delay(char *str) if (!isdigit(*str)) return -EINVAL; delay = simple_strtoul(str, (char **)&str, 10); - if (delay > MAX_RC_DELAY) - return -EINVAL; } rc_delay[trio_index][mac] = delay ? : DEFAULT_RC_DELAY; - pr_info("Delaying PCIe RC link training for %u sec" - " on MAC %lu on TRIO %lu\n", rc_delay[trio_index][mac], - mac, trio_index); return 0; } early_param("pcie_rc_delay", setup_pcie_rc_delay); @@ -682,12 +674,6 @@ int __init pcibios_init(void) continue; } - /* - * Delay the RC link training if needed. - */ - if (rc_delay[trio_index][mac]) - msleep(rc_delay[trio_index][mac] * 1000); - ret = gxio_trio_force_rc_link_up(trio_context, mac); if (ret < 0) pr_err("PCI: PCIE_FORCE_LINK_UP failure, " @@ -697,10 +683,21 @@ int __init pcibios_init(void) trio_index, controller->mac); /* - * Wait a bit here because some EP devices take longer - * to come up. + * Delay the bus probe if needed. */ - msleep(1000); + if (rc_delay[trio_index][mac]) { + pr_info("Delaying PCIe RC bus enumerating %d sec" + " on MAC %d on TRIO %d\n", + rc_delay[trio_index][mac], mac, + trio_index); + msleep(rc_delay[trio_index][mac] * 1000); + } else { + /* + * Wait a bit here because some EP devices + * take longer to come up. + */ + msleep(1000); + } /* * Check for PCIe link-up status. -- cgit v1.1 From 26cde05a2cb7d4c0f4cd1d4aeeadc2939c972786 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 2 Aug 2013 11:56:44 -0400 Subject: tile PCI RC: handle case that PCI link is already up If we are rebooting (e.g. via kexec) then the PCI RC link may already be up. In that case, we don't want to do the software fixup to force the link up, since that can degrade it to Gen1. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci_gx.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index e99809e..2cc3e64 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -674,10 +674,33 @@ int __init pcibios_init(void) continue; } - ret = gxio_trio_force_rc_link_up(trio_context, mac); - if (ret < 0) - pr_err("PCI: PCIE_FORCE_LINK_UP failure, " - "MAC %d on TRIO %d\n", mac, trio_index); + /* + * Check for PCIe link-up status to decide if we need + * to force the link to come up. + */ + reg_offset = + (TRIO_PCIE_INTFC_PORT_STATUS << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + port_status.word = + __gxio_mmio_read(trio_context->mmio_base_mac + + reg_offset); + if (!port_status.dl_up) { + if (rc_delay[trio_index][mac]) { + pr_info("Delaying PCIe RC TRIO init %d sec" + " on MAC %d on TRIO %d\n", + rc_delay[trio_index][mac], mac, + trio_index); + msleep(rc_delay[trio_index][mac] * 1000); + } + ret = gxio_trio_force_rc_link_up(trio_context, mac); + if (ret < 0) + pr_err("PCI: PCIE_FORCE_LINK_UP failure, " + "MAC %d on TRIO %d\n", mac, trio_index); + } pr_info("PCI: Found PCI controller #%d on TRIO %d MAC %d\n", i, trio_index, controller->mac); @@ -700,16 +723,8 @@ int __init pcibios_init(void) } /* - * Check for PCIe link-up status. + * Check for PCIe link-up status again. */ - - reg_offset = - (TRIO_PCIE_INTFC_PORT_STATUS << - TRIO_CFG_REGION_ADDR__REG_SHIFT) | - (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE << - TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) | - (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); - port_status.word = __gxio_mmio_read(trio_context->mmio_base_mac + reg_offset); -- cgit v1.1 From 803c874abe1358998ab65a8cca728684ebb50a13 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 2 Aug 2013 12:24:42 -0400 Subject: tile: support LSI MEGARAID SAS HBA hybrid dma_ops The LSI MEGARAID SAS HBA suffers from the problem where it can do 64-bit DMA to streaming buffers but not to consistent buffers. In other words, 64-bit DMA is used for disk data transfers and 32-bit DMA must be used for control message transfers. According to LSI, the firmware is not fully functional yet. This change implements a kind of hybrid dma_ops to support this. Note that on most other platforms, the 64-bit DMA addressing space is the same as the 32-bit DMA space and they overlap the physical memory space. No special arrangement is needed to support this kind of mixed DMA capability. On TILE-Gx, the 64-bit DMA space is completely separate from the 32-bit DMA space. Due to the use of the IOMMU, the 64-bit DMA space doesn't overlap the physical memory space. On the other hand, the 32-bit DMA space overlaps the physical memory space under 4GB. The separate address spaces make it necessary to have separate dma_ops. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci-dma.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index b9fe80e..7e22e73 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c @@ -357,7 +357,7 @@ static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size, addr = page_to_phys(pg); - *dma_handle = phys_to_dma(dev, addr); + *dma_handle = addr + get_dma_offset(dev); return page_address(pg); } @@ -387,7 +387,7 @@ static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist, sg->dma_address = sg_phys(sg); __dma_prep_pa_range(sg->dma_address, sg->length, direction); - sg->dma_address = phys_to_dma(dev, sg->dma_address); + sg->dma_address = sg->dma_address + get_dma_offset(dev); #ifdef CONFIG_NEED_SG_DMA_LENGTH sg->dma_length = sg->length; #endif @@ -422,7 +422,7 @@ static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page, BUG_ON(offset + size > PAGE_SIZE); __dma_prep_page(page, offset, size, direction); - return phys_to_dma(dev, page_to_pa(page) + offset); + return page_to_pa(page) + offset + get_dma_offset(dev); } static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address, @@ -432,7 +432,7 @@ static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address, { BUG_ON(!valid_dma_direction(direction)); - dma_address = dma_to_phys(dev, dma_address); + dma_address -= get_dma_offset(dev); __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)), dma_address & PAGE_OFFSET, size, direction); @@ -445,7 +445,7 @@ static void tile_pci_dma_sync_single_for_cpu(struct device *dev, { BUG_ON(!valid_dma_direction(direction)); - dma_handle = dma_to_phys(dev, dma_handle); + dma_handle -= get_dma_offset(dev); __dma_complete_pa_range(dma_handle, size, direction); } @@ -456,7 +456,7 @@ static void tile_pci_dma_sync_single_for_device(struct device *dev, enum dma_data_direction direction) { - dma_handle = dma_to_phys(dev, dma_handle); + dma_handle -= get_dma_offset(dev); __dma_prep_pa_range(dma_handle, size, direction); } @@ -558,21 +558,43 @@ static struct dma_map_ops pci_swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, }; +static struct dma_map_ops pci_hybrid_dma_ops = { + .alloc = tile_swiotlb_alloc_coherent, + .free = tile_swiotlb_free_coherent, + .map_page = tile_pci_dma_map_page, + .unmap_page = tile_pci_dma_unmap_page, + .map_sg = tile_pci_dma_map_sg, + .unmap_sg = tile_pci_dma_unmap_sg, + .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu, + .sync_single_for_device = tile_pci_dma_sync_single_for_device, + .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu, + .sync_sg_for_device = tile_pci_dma_sync_sg_for_device, + .mapping_error = tile_pci_dma_mapping_error, + .dma_supported = tile_pci_dma_supported +}; + struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops; +struct dma_map_ops *gx_hybrid_pci_dma_map_ops = &pci_hybrid_dma_ops; #else struct dma_map_ops *gx_legacy_pci_dma_map_ops; +struct dma_map_ops *gx_hybrid_pci_dma_map_ops; #endif EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops); +EXPORT_SYMBOL(gx_hybrid_pci_dma_map_ops); #ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK int dma_set_coherent_mask(struct device *dev, u64 mask) { struct dma_map_ops *dma_ops = get_dma_ops(dev); - /* Handle legacy PCI devices with limited memory addressability. */ - if (((dma_ops == gx_pci_dma_map_ops) || - (dma_ops == gx_legacy_pci_dma_map_ops)) && + /* Handle hybrid PCI devices with limited memory addressability. */ + if ((dma_ops == gx_pci_dma_map_ops || + dma_ops == gx_hybrid_pci_dma_map_ops || + dma_ops == gx_legacy_pci_dma_map_ops) && (mask <= DMA_BIT_MASK(32))) { + if (dma_ops == gx_pci_dma_map_ops) + set_dma_ops(dev, gx_hybrid_pci_dma_map_ops); + if (mask > dev->archdata.max_direct_dma_addr) mask = dev->archdata.max_direct_dma_addr; } -- cgit v1.1 From 90d9dd66957a744831146dbb1a9e4f96a9106100 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 2 Aug 2013 12:55:15 -0400 Subject: tile PCI RC: support more MSI-X interrupt vectors To support PCIe devices with higher number of MSI-X interrupt vectors, e.g. 16 for the LSI RAID card, enhance the Gx RC stack to provide more MSI-X vectors by using the TRIO Scatter Queues, which provide 8 more vectors in addition to ~10 from the Map Mem regions. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci_gx.c | 59 +++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 20 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index 2cc3e64..e0d6664 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -1474,32 +1474,55 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) trio_context = controller->trio; /* - * Allocate the Mem-Map that will accept the MSI write and - * trigger the TILE-side interrupts. + * Allocate a scatter-queue that will accept the MSI write and + * trigger the TILE-side interrupts. We use the scatter-queue regions + * before the mem map regions, because the latter are needed by more + * applications. */ - mem_map = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0); - if (mem_map < 0) { - dev_printk(KERN_INFO, &pdev->dev, - "%s Mem-Map alloc failure. " - "Failed to initialize MSI interrupts. " - "Falling back to legacy interrupts.\n", - desc->msi_attrib.is_msix ? "MSI-X" : "MSI"); + mem_map = gxio_trio_alloc_scatter_queues(trio_context, 1, 0, 0); + if (mem_map >= 0) { + TRIO_MAP_SQ_DOORBELL_FMT_t doorbell_template = {{ + .pop = 0, + .doorbell = 1, + }}; + + mem_map += TRIO_NUM_MAP_MEM_REGIONS; + mem_map_base = MEM_MAP_INTR_REGIONS_BASE + + mem_map * MEM_MAP_INTR_REGION_SIZE; + mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1; + + msi_addr = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 8; + msg.data = (unsigned int)doorbell_template.word; + } else { + /* SQ regions are out, allocate from map mem regions. */ + mem_map = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0); + if (mem_map < 0) { + dev_printk(KERN_INFO, &pdev->dev, + "%s Mem-Map alloc failure. " + "Failed to initialize MSI interrupts. " + "Falling back to legacy interrupts.\n", + desc->msi_attrib.is_msix ? "MSI-X" : "MSI"); + ret = -ENOMEM; + goto msi_mem_map_alloc_failure; + } - ret = -ENOMEM; - goto msi_mem_map_alloc_failure; + mem_map_base = MEM_MAP_INTR_REGIONS_BASE + + mem_map * MEM_MAP_INTR_REGION_SIZE; + mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1; + + msi_addr = mem_map_base + TRIO_MAP_MEM_REG_INT3 - + TRIO_MAP_MEM_REG_INT0; + + msg.data = mem_map; } /* We try to distribute different IRQs to different tiles. */ cpu = tile_irq_cpu(irq); /* - * Now call up to the HV to configure the Mem-Map interrupt and + * Now call up to the HV to configure the MSI interrupt and * set up the IPI binding. */ - mem_map_base = MEM_MAP_INTR_REGIONS_BASE + - mem_map * MEM_MAP_INTR_REGION_SIZE; - mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1; - ret = gxio_trio_config_msi_intr(trio_context, cpu_x(cpu), cpu_y(cpu), KERNEL_PL, irq, controller->mac, mem_map, mem_map_base, mem_map_limit, @@ -1512,13 +1535,9 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) irq_set_msi_desc(irq, desc); - msi_addr = mem_map_base + TRIO_MAP_MEM_REG_INT3 - TRIO_MAP_MEM_REG_INT0; - msg.address_hi = msi_addr >> 32; msg.address_lo = msi_addr & 0xffffffff; - msg.data = mem_map; - write_msi_msg(irq, &msg); irq_set_chip_and_handler(irq, &tilegx_msi_chip, handle_level_irq); irq_set_handler_data(irq, controller); -- cgit v1.1 From a3c4f2fb26974b5134861af8f7593040ae61a1f4 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 2 Aug 2013 16:12:49 -0400 Subject: tile PCI RC: gentler warning for missing plug-in PCI Besides using pr_info() to print the linkdown status for a plug-in slot, add extra indication that this is expected if the slot is empty. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci_gx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index e0d6664..bf8c69d 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -729,8 +729,14 @@ int __init pcibios_init(void) __gxio_mmio_read(trio_context->mmio_base_mac + reg_offset); if (!port_status.dl_up) { - pr_err("PCI: link is down, MAC %d on TRIO %d\n", - mac, trio_index); + if (pcie_ports[trio_index][mac].removable) { + pr_info("PCI: link is down, MAC %d on TRIO %d\n", + mac, trio_index); + pr_info("This is expected if no PCIe card" + " is connected to this link\n"); + } else + pr_err("PCI: link is down, MAC %d on TRIO %d\n", + mac, trio_index); continue; } -- cgit v1.1 From cf89c4262bd5fa70e67953126001c08ecea4f346 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 2 Aug 2013 16:45:22 -0400 Subject: tile PCI RC: support I/O space access To enable this functionality, configure CONFIG_TILE_PCI_IO. Without this flag, the kernel still assigns I/O address ranges to the devices, but no TRIO resource and mapping support is provided. We assign disjoint I/O address ranges to separate PCIe domains. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci_gx.c | 128 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 120 insertions(+), 8 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index bf8c69d..fde3589 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -77,6 +77,9 @@ static int rc_delay[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES]; /* Default number of seconds that the PCIe RC port probe can be delayed. */ #define DEFAULT_RC_DELAY 10 +/* The PCI I/O space size in each PCI domain. */ +#define IO_SPACE_SIZE 0x10000 + /* Array of the PCIe ports configuration info obtained from the BIB. */ struct pcie_port_property pcie_ports[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES]; @@ -421,6 +424,17 @@ out: controller->index = i; controller->ops = &tile_cfg_ops; + controller->io_space.start = PCIBIOS_MIN_IO + + (i * IO_SPACE_SIZE); + controller->io_space.end = controller->io_space.start + + IO_SPACE_SIZE - 1; + BUG_ON(controller->io_space.end > IO_SPACE_LIMIT); + controller->io_space.flags = IORESOURCE_IO; + snprintf(controller->io_space_name, + sizeof(controller->io_space_name), + "PCI I/O domain %d", i); + controller->io_space.name = controller->io_space_name; + /* * The PCI memory resource is located above the PA space. * For every host bridge, the BAR window or the MMIO aperture @@ -861,17 +875,16 @@ int __init pcibios_init(void) /* * The PCI memory resource is located above the PA space. * The memory range for the PCI root bus should not overlap - * with the physical RAM + * with the physical RAM. */ pci_add_resource_offset(&resources, &controller->mem_space, controller->mem_offset); - + pci_add_resource(&resources, &controller->io_space); controller->first_busno = next_busno; bus = pci_scan_root_bus(NULL, next_busno, controller->ops, controller, &resources); controller->root_bus = bus; next_busno = bus->busn_res.end + 1; - } /* Do machine dependent PCI interrupt routing */ @@ -915,9 +928,9 @@ int __init pcibios_init(void) pci_controllers[i].mem_resources[0] = *next_bus->resource[0]; pci_controllers[i].mem_resources[1] = - *next_bus->resource[1]; + *next_bus->resource[1]; pci_controllers[i].mem_resources[2] = - *next_bus->resource[2]; + *next_bus->resource[2]; break; } @@ -967,6 +980,39 @@ int __init pcibios_init(void) continue; } +#ifdef CONFIG_TILE_PCI_IO + /* + * Alloc a PIO region for PCI I/O space access for each RC port. + */ + ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); + if (ret < 0) { + pr_err("PCI: I/O PIO alloc failure on TRIO %d mac %d, " + "give up\n", controller->trio_index, + controller->mac); + + continue; + } + + controller->pio_io_index = ret; + + /* + * For PIO IO, the bus_address_hi parameter is hard-coded 0 + * because PCI I/O address space is 32-bit. + */ + ret = gxio_trio_init_pio_region_aux(trio_context, + controller->pio_io_index, + controller->mac, + 0, + HV_TRIO_PIO_FLAG_IO_SPACE); + if (ret < 0) { + pr_err("PCI: I/O PIO init failure on TRIO %d mac %d, " + "give up\n", controller->trio_index, + controller->mac); + + continue; + } +#endif + /* * Configure a Mem-Map region for each memory controller so * that Linux can map all of its PA space to the PCI bus. @@ -1052,8 +1098,7 @@ char *pcibios_setup(char *str) /* * Enable memory address decoding, as appropriate, for the - * device described by the 'dev' struct. The I/O decoding - * is disabled, though the TILE-Gx supports I/O addressing. + * device described by the 'dev' struct. * * This is called from the generic PCI layer, and can be called * for bridges or endpoints. @@ -1134,10 +1179,77 @@ got_it: * We need to keep the PCI bus address's in-page offset in the VA. */ return iorpc_ioremap(trio_fd, offset, size) + - (phys_addr & (PAGE_SIZE - 1)); + (start & (PAGE_SIZE - 1)); } EXPORT_SYMBOL(ioremap); +#ifdef CONFIG_TILE_PCI_IO +/* Map a PCI I/O address into VA space. */ +void __iomem *ioport_map(unsigned long port, unsigned int size) +{ + struct pci_controller *controller = NULL; + resource_size_t bar_start; + resource_size_t bar_end; + resource_size_t offset; + resource_size_t start; + resource_size_t end; + int trio_fd; + int i; + + start = port; + end = port + size - 1; + + /* + * In the following, each PCI controller's mem_resources[0] + * represents its PCI I/O resource. By searching port in each + * controller's mem_resources[0], we can determine the controller + * that should accept the PCI I/O access. + */ + + for (i = 0; i < num_rc_controllers; i++) { + /* + * Skip controllers that are not properly initialized or + * have down links. + */ + if (pci_controllers[i].root_bus == NULL) + continue; + + bar_start = pci_controllers[i].mem_resources[0].start; + bar_end = pci_controllers[i].mem_resources[0].end; + + if ((start >= bar_start) && (end <= bar_end)) { + + controller = &pci_controllers[i]; + + goto got_it; + } + } + + if (controller == NULL) + return NULL; + +got_it: + trio_fd = controller->trio->fd; + + /* Convert the resource start to the bus address offset. */ + port -= controller->io_space.start; + + offset = HV_TRIO_PIO_OFFSET(controller->pio_io_index) + port; + + /* + * We need to keep the PCI bus address's in-page offset in the VA. + */ + return iorpc_ioremap(trio_fd, offset, size) + (port & (PAGE_SIZE - 1)); +} +EXPORT_SYMBOL(ioport_map); + +void ioport_unmap(void __iomem *addr) +{ + iounmap(addr); +} +EXPORT_SYMBOL(ioport_unmap); +#endif + void pci_iounmap(struct pci_dev *dev, void __iomem *addr) { iounmap(addr); -- cgit v1.1 From 9b6846cede40cf12429d0fa06b3af0a0f40ae596 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 12:37:24 -0400 Subject: tile PCI DMA: handle a NULL dev argument properly Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci-dma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index 7e22e73..adc369d 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c @@ -36,8 +36,9 @@ static void *tile_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) { - u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32); - int node = dev_to_node(dev); + u64 dma_mask = (dev && dev->coherent_dma_mask) ? + dev->coherent_dma_mask : DMA_BIT_MASK(32); + int node = dev ? dev_to_node(dev) : 0; int order = get_order(size); struct page *pg; dma_addr_t addr; -- cgit v1.1 From 1c43649a9929ca4394ae389b4510c61f3876a12b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 13:18:02 -0400 Subject: tile PCI RC: restructure TRIO initialization The TRIO shim initialization is shared with other kernel drivers such as the endpoint and StreamIO drivers, so reorganize the initialization flow to ensure that the root complex driver properly initializes TRIO state regardless of what kind of TRIO driver will end up using the shim. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci_gx.c | 209 ++++++++++++++++++++++++++-------------------- 1 file changed, 117 insertions(+), 92 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index fde3589..8051638 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -80,16 +80,28 @@ static int rc_delay[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES]; /* The PCI I/O space size in each PCI domain. */ #define IO_SPACE_SIZE 0x10000 +/* Provide shorter versions of some very long constant names. */ +#define AUTO_CONFIG_RC \ + TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC +#define AUTO_CONFIG_RC_G1 \ + TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC_G1 +#define AUTO_CONFIG_EP \ + TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT +#define AUTO_CONFIG_EP_G1 \ + TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT_G1 + /* Array of the PCIe ports configuration info obtained from the BIB. */ struct pcie_port_property pcie_ports[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES]; +/* Number of configured TRIO instances. */ +int num_trio_shims; + /* All drivers share the TRIO contexts defined here. */ gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO]; /* Pointer to an array of PCIe RC controllers. */ struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES]; int num_rc_controllers; -static int num_ep_controllers; static struct pci_ops tile_cfg_ops; @@ -141,13 +153,14 @@ static int tile_pcie_open(int trio_index) { gxio_trio_context_t *context = &trio_contexts[trio_index]; int ret; + int mac; /* * This opens a file descriptor to the TRIO shim. */ ret = gxio_trio_init(context, trio_index); if (ret < 0) - return ret; + goto gxio_trio_init_failure; /* * Allocate an ASID for the kernel. @@ -189,17 +202,89 @@ static int tile_pcie_open(int trio_index) } #endif + /* Get the properties of the PCIe ports on this TRIO instance. */ + ret = hv_dev_pread(context->fd, 0, + (HV_VirtAddr)&pcie_ports[trio_index][0], + sizeof(struct pcie_port_property) * TILEGX_TRIO_PCIES, + GXIO_TRIO_OP_GET_PORT_PROPERTY); + if (ret < 0) { + pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d," + " on TRIO %d\n", ret, trio_index); + goto get_port_property_failure; + } + + context->mmio_base_mac = + iorpc_ioremap(context->fd, 0, HV_TRIO_CONFIG_IOREMAP_SIZE); + if (context->mmio_base_mac == NULL) { + pr_err("PCI: TRIO config space mapping failure, error %d," + " on TRIO %d\n", ret, trio_index); + ret = -ENOMEM; + + goto trio_mmio_mapping_failure; + } + + /* Check the port strap state which will override the BIB setting. */ + for (mac = 0; mac < TILEGX_TRIO_PCIES; mac++) { + TRIO_PCIE_INTFC_PORT_CONFIG_t port_config; + unsigned int reg_offset; + + /* Ignore ports that are not specified in the BIB. */ + if (!pcie_ports[trio_index][mac].allow_rc && + !pcie_ports[trio_index][mac].allow_ep) + continue; + + reg_offset = + (TRIO_PCIE_INTFC_PORT_CONFIG << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + port_config.word = + __gxio_mmio_read(context->mmio_base_mac + reg_offset); + + if (port_config.strap_state != AUTO_CONFIG_RC && + port_config.strap_state != AUTO_CONFIG_RC_G1) { + /* + * If this is really intended to be an EP port, record + * it so that the endpoint driver will know about it. + */ + if (port_config.strap_state == AUTO_CONFIG_EP || + port_config.strap_state == AUTO_CONFIG_EP_G1) + pcie_ports[trio_index][mac].allow_ep = 1; + } + } + return ret; +trio_mmio_mapping_failure: +get_port_property_failure: asid_alloc_failure: #ifdef USE_SHARED_PCIE_CONFIG_REGION pio_alloc_failure: #endif hv_dev_close(context->fd); +gxio_trio_init_failure: + context->fd = -1; return ret; } +static int __init tile_trio_init(void) +{ + int i; + + /* We loop over all the TRIO shims. */ + for (i = 0; i < TILEGX_NUM_TRIO; i++) { + if (tile_pcie_open(i) < 0) + continue; + num_trio_shims++; + } + + return 0; +} +postcore_initcall(tile_trio_init); + static void tilegx_legacy_irq_ack(struct irq_data *d) { @@ -320,14 +405,39 @@ free_irqs: } /* + * Return 1 if the port is strapped to operate in RC mode. + */ +static int +strapped_for_rc(gxio_trio_context_t *trio_context, int mac) +{ + TRIO_PCIE_INTFC_PORT_CONFIG_t port_config; + unsigned int reg_offset; + + /* Check the port configuration. */ + reg_offset = + (TRIO_PCIE_INTFC_PORT_CONFIG << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + port_config.word = + __gxio_mmio_read(trio_context->mmio_base_mac + reg_offset); + + if (port_config.strap_state == AUTO_CONFIG_RC || + port_config.strap_state == AUTO_CONFIG_RC_G1) + return 1; + else + return 0; +} + +/* * Find valid controllers and fill in pci_controller structs for each * of them. * - * Returns the number of controllers discovered. + * Return the number of controllers discovered. */ int __init tile_pci_init(void) { - int num_trio_shims = 0; int ctl_index = 0; int i, j; @@ -338,19 +448,6 @@ int __init tile_pci_init(void) pr_info("PCI: Searching for controllers...\n"); - /* - * We loop over all the TRIO shims. - */ - for (i = 0; i < TILEGX_NUM_TRIO; i++) { - int ret; - - ret = tile_pcie_open(i); - if (ret < 0) - continue; - - num_trio_shims++; - } - if (num_trio_shims == 0 || sim_is_simulator()) return 0; @@ -361,29 +458,16 @@ int __init tile_pci_init(void) */ for (i = 0; i < TILEGX_NUM_TRIO; i++) { gxio_trio_context_t *context = &trio_contexts[i]; - int ret; if (context->fd < 0) continue; - ret = hv_dev_pread(context->fd, 0, - (HV_VirtAddr)&pcie_ports[i][0], - sizeof(struct pcie_port_property) * TILEGX_TRIO_PCIES, - GXIO_TRIO_OP_GET_PORT_PROPERTY); - if (ret < 0) { - pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d," - " on TRIO %d\n", ret, i); - continue; - } - for (j = 0; j < TILEGX_TRIO_PCIES; j++) { - if (pcie_ports[i][j].allow_rc) { + if (pcie_ports[i][j].allow_rc && + strapped_for_rc(context, j)) { pcie_rc[i][j] = 1; num_rc_controllers++; } - else if (pcie_ports[i][j].allow_ep) { - num_ep_controllers++; - } } } @@ -600,35 +684,10 @@ int __init pcibios_init(void) tile_pci_init(); - if (num_rc_controllers == 0 && num_ep_controllers == 0) + if (num_rc_controllers == 0) return 0; /* - * We loop over all the TRIO shims and set up the MMIO mappings. - */ - for (i = 0; i < TILEGX_NUM_TRIO; i++) { - gxio_trio_context_t *context = &trio_contexts[i]; - - if (context->fd < 0) - continue; - - /* - * Map in the MMIO space for the MAC. - */ - offset = 0; - context->mmio_base_mac = - iorpc_ioremap(context->fd, offset, - HV_TRIO_CONFIG_IOREMAP_SIZE); - if (context->mmio_base_mac == NULL) { - pr_err("PCI: MAC map failure on TRIO %d\n", i); - - hv_dev_close(context->fd); - context->fd = -1; - continue; - } - } - - /* * Delay a bit in case devices aren't ready. Some devices are * known to require at least 20ms here, but we use a more * conservative value. @@ -639,7 +698,6 @@ int __init pcibios_init(void) for (next_busno = 0, i = 0; i < num_rc_controllers; i++) { struct pci_controller *controller = &pci_controllers[i]; gxio_trio_context_t *trio_context = controller->trio; - TRIO_PCIE_INTFC_PORT_CONFIG_t port_config; TRIO_PCIE_INTFC_PORT_STATUS_t port_status; TRIO_PCIE_INTFC_TX_FIFO_CTL_t tx_fifo_ctl; struct pci_bus *bus; @@ -656,39 +714,6 @@ int __init pcibios_init(void) mac = controller->mac; /* - * Check the port strap state which will override the BIB - * setting. - */ - - reg_offset = - (TRIO_PCIE_INTFC_PORT_CONFIG << - TRIO_CFG_REGION_ADDR__REG_SHIFT) | - (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE << - TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) | - (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); - - port_config.word = - __gxio_mmio_read(trio_context->mmio_base_mac + - reg_offset); - - if ((port_config.strap_state != - TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC) && - (port_config.strap_state != - TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC_G1)) { - /* - * If this is really intended to be an EP port, - * record it so that the endpoint driver will know about it. - */ - if (port_config.strap_state == - TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT || - port_config.strap_state == - TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT_G1) - pcie_ports[trio_index][mac].allow_ep = 1; - - continue; - } - - /* * Check for PCIe link-up status to decide if we need * to force the link to come up. */ -- cgit v1.1 From 1198168733c8d6fbc6898fd8d7fcfb42befabb41 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 13:18:34 -0400 Subject: tile PCI RC: eliminate pci_controller.mem_resources field The .mem_resources[] field in the pci_controller struct is now obsoleted by the .mem_space and .io_space fields. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci_gx.c | 71 ++++++++--------------------------------------- 1 file changed, 12 insertions(+), 59 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index 8051638..6837be2 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -929,9 +929,6 @@ int __init pcibios_init(void) struct pci_controller *controller = &pci_controllers[i]; gxio_trio_context_t *trio_context = controller->trio; struct pci_bus *root_bus = pci_controllers[i].root_bus; - struct pci_bus *next_bus; - uint32_t bus_address_hi; - struct pci_dev *dev; int ret; int j; @@ -945,35 +942,6 @@ int __init pcibios_init(void) /* Configure the max_payload_size values for this domain. */ fixup_read_and_payload_sizes(controller); - list_for_each_entry(dev, &root_bus->devices, bus_list) { - /* Find the PCI host controller, ie. the 1st bridge. */ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && - (PCI_SLOT(dev->devfn) == 0)) { - next_bus = dev->subordinate; - pci_controllers[i].mem_resources[0] = - *next_bus->resource[0]; - pci_controllers[i].mem_resources[1] = - *next_bus->resource[1]; - pci_controllers[i].mem_resources[2] = - *next_bus->resource[2]; - - break; - } - } - - if (pci_controllers[i].mem_resources[1].flags & IORESOURCE_MEM) - bus_address_hi = - pci_controllers[i].mem_resources[1].start >> 32; - else if (pci_controllers[i].mem_resources[2].flags & IORESOURCE_PREFETCH) - bus_address_hi = - pci_controllers[i].mem_resources[2].start >> 32; - else { - /* This is unlikely. */ - pr_err("PCI: no memory resources on TRIO %d mac %d\n", - controller->trio_index, controller->mac); - continue; - } - /* * Alloc a PIO region for PCI memory access for each RC port. */ @@ -1153,16 +1121,13 @@ void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) resource_size_t start; resource_size_t end; int trio_fd; - int i, j; + int i; start = phys_addr; end = phys_addr + size - 1; /* - * In the following, each PCI controller's mem_resources[1] - * represents its (non-prefetchable) PCI memory resource and - * mem_resources[2] refers to its prefetchable PCI memory resource. - * By searching phys_addr in each controller's mem_resources[], we can + * By searching phys_addr in each controller's mem_space, we can * determine the controller that should accept the PCI memory access. */ @@ -1174,25 +1139,18 @@ void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) if (pci_controllers[i].root_bus == NULL) continue; - for (j = 1; j < 3; j++) { - bar_start = - pci_controllers[i].mem_resources[j].start; - bar_end = - pci_controllers[i].mem_resources[j].end; - - if ((start >= bar_start) && (end <= bar_end)) { + bar_start = pci_controllers[i].mem_space.start; + bar_end = pci_controllers[i].mem_space.end; - controller = &pci_controllers[i]; - - goto got_it; - } + if ((start >= bar_start) && (end <= bar_end)) { + controller = &pci_controllers[i]; + break; } } if (controller == NULL) return NULL; -got_it: trio_fd = controller->trio->fd; /* Convert the resource start to the bus address offset. */ @@ -1225,10 +1183,8 @@ void __iomem *ioport_map(unsigned long port, unsigned int size) end = port + size - 1; /* - * In the following, each PCI controller's mem_resources[0] - * represents its PCI I/O resource. By searching port in each - * controller's mem_resources[0], we can determine the controller - * that should accept the PCI I/O access. + * By searching the port in each controller's io_space, we can + * determine the controller that should accept the PCI I/O access. */ for (i = 0; i < num_rc_controllers; i++) { @@ -1239,21 +1195,18 @@ void __iomem *ioport_map(unsigned long port, unsigned int size) if (pci_controllers[i].root_bus == NULL) continue; - bar_start = pci_controllers[i].mem_resources[0].start; - bar_end = pci_controllers[i].mem_resources[0].end; + bar_start = pci_controllers[i].io_space.start; + bar_end = pci_controllers[i].io_space.end; if ((start >= bar_start) && (end <= bar_end)) { - controller = &pci_controllers[i]; - - goto got_it; + break; } } if (controller == NULL) return NULL; -got_it: trio_fd = controller->trio->fd; /* Convert the resource start to the bus address offset. */ -- cgit v1.1 From eafa5c8a10f52e5d05870a40e2ffb49537d4471f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 13:29:34 -0400 Subject: tile PCI RC: bomb comments and whitespace format This change is purely stylistic but improves the readability of the tile PCI RC driver. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci_gx.c | 180 +++++++++++++++------------------------------- 1 file changed, 56 insertions(+), 124 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index 6837be2..8352d85 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -108,17 +108,15 @@ static struct pci_ops tile_cfg_ops; /* Mask of CPUs that should receive PCIe interrupts. */ static struct cpumask intr_cpus_map; -/* - * We don't need to worry about the alignment of resources. - */ +/* We don't need to worry about the alignment of resources. */ resource_size_t pcibios_align_resource(void *data, const struct resource *res, - resource_size_t size, resource_size_t align) + resource_size_t size, + resource_size_t align) { return res->start; } EXPORT_SYMBOL(pcibios_align_resource); - /* * Pick a CPU to receive and handle the PCIe interrupts, based on the IRQ #. * For now, we simply send interrupts to non-dataplane CPUs. @@ -146,25 +144,19 @@ static int tile_irq_cpu(int irq) return cpu; } -/* - * Open a file descriptor to the TRIO shim. - */ +/* Open a file descriptor to the TRIO shim. */ static int tile_pcie_open(int trio_index) { gxio_trio_context_t *context = &trio_contexts[trio_index]; int ret; int mac; - /* - * This opens a file descriptor to the TRIO shim. - */ + /* This opens a file descriptor to the TRIO shim. */ ret = gxio_trio_init(context, trio_index); if (ret < 0) goto gxio_trio_init_failure; - /* - * Allocate an ASID for the kernel. - */ + /* Allocate an ASID for the kernel. */ ret = gxio_trio_alloc_asids(context, 1, 0, 0); if (ret < 0) { pr_err("PCI: ASID alloc failure on TRIO %d, give up\n", @@ -285,20 +277,17 @@ static int __init tile_trio_init(void) } postcore_initcall(tile_trio_init); -static void -tilegx_legacy_irq_ack(struct irq_data *d) +static void tilegx_legacy_irq_ack(struct irq_data *d) { __insn_mtspr(SPR_IPI_EVENT_RESET_K, 1UL << d->irq); } -static void -tilegx_legacy_irq_mask(struct irq_data *d) +static void tilegx_legacy_irq_mask(struct irq_data *d) { __insn_mtspr(SPR_IPI_MASK_SET_K, 1UL << d->irq); } -static void -tilegx_legacy_irq_unmask(struct irq_data *d) +static void tilegx_legacy_irq_unmask(struct irq_data *d) { __insn_mtspr(SPR_IPI_MASK_RESET_K, 1UL << d->irq); } @@ -319,8 +308,7 @@ static struct irq_chip tilegx_legacy_irq_chip = { * to Linux which just calls handle_level_irq() after clearing the * MAC INTx Assert status bit associated with this interrupt. */ -static void -trio_handle_level_irq(unsigned int irq, struct irq_desc *desc) +static void trio_handle_level_irq(unsigned int irq, struct irq_desc *desc) { struct pci_controller *controller = irq_desc_get_handler_data(desc); gxio_trio_context_t *trio_context = controller->trio; @@ -386,9 +374,7 @@ static int tile_init_irqs(struct pci_controller *controller) goto free_irqs; } - /* - * Register the IRQ handler with the kernel. - */ + /* Register the IRQ handler with the kernel. */ irq_set_chip_and_handler(irq, &tilegx_legacy_irq_chip, trio_handle_level_irq); irq_set_chip_data(irq, (void *)(uint64_t)i); @@ -471,15 +457,11 @@ int __init tile_pci_init(void) } } - /* - * Return if no PCIe ports are configured to operate in RC mode. - */ + /* Return if no PCIe ports are configured to operate in RC mode. */ if (num_rc_controllers == 0) return 0; - /* - * Set the TRIO pointer and MAC index for each PCIe RC port. - */ + /* Set the TRIO pointer and MAC index for each PCIe RC port. */ for (i = 0; i < TILEGX_NUM_TRIO; i++) { for (j = 0; j < TILEGX_TRIO_PCIES; j++) { if (pcie_rc[i][j]) { @@ -495,14 +477,10 @@ int __init tile_pci_init(void) } out: - /* - * Configure each PCIe RC port. - */ + /* Configure each PCIe RC port. */ for (i = 0; i < num_rc_controllers; i++) { - /* - * Configure the PCIe MAC to run in RC mode. - */ + /* Configure the PCIe MAC to run in RC mode. */ struct pci_controller *controller = &pci_controllers[i]; controller->index = i; @@ -525,7 +503,6 @@ out: * is in range [3GB, 4GB - 1] of a 4GB space beyond the * PA space. */ - controller->mem_offset = TILE_PCI_MEM_START + (i * TILE_PCI_BAR_WINDOW_TOP); controller->mem_space.start = controller->mem_offset + @@ -553,7 +530,6 @@ static int tile_map_irq(const struct pci_dev *dev, u8 device, u8 pin) return controller->irq_intx_table[pin - 1]; } - static void fixup_read_and_payload_sizes(struct pci_controller *controller) { gxio_trio_context_t *trio_context = controller->trio; @@ -567,9 +543,7 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller) mac = controller->mac; - /* - * Set our max read request size to be 4KB. - */ + /* Set our max read request size to be 4KB. */ reg_offset = (TRIO_PCIE_RC_DEVICE_CONTROL << TRIO_CFG_REGION_ADDR__REG_SHIFT) | @@ -578,10 +552,10 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller) (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); dev_control.word = __gxio_mmio_read32(trio_context->mmio_base_mac + - reg_offset); + reg_offset); dev_control.max_read_req_sz = 5; __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset, - dev_control.word); + dev_control.word); /* * Set the max payload size supported by this Gx PCIe MAC. @@ -597,10 +571,10 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller) (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); rc_dev_cap.word = __gxio_mmio_read32(trio_context->mmio_base_mac + - reg_offset); + reg_offset); rc_dev_cap.mps_sup = 1; __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset, - rc_dev_cap.word); + rc_dev_cap.word); /* Configure PCI Express MPS setting. */ list_for_each_entry(child, &root_bus->children, node) { @@ -628,7 +602,7 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller) dev_control.max_payload_size, dev_control.max_read_req_sz, mac); - if (err < 0) { + if (err < 0) { pr_err("PCI: PCIE_CONFIGURE_MAC_MPS_MRS failure, " "MAC %d on TRIO %d\n", mac, controller->trio_index); @@ -672,9 +646,7 @@ static int setup_pcie_rc_delay(char *str) } early_param("pcie_rc_delay", setup_pcie_rc_delay); -/* - * PCI initialization entry point, called by subsys_initcall. - */ +/* PCI initialization entry point, called by subsys_initcall. */ int __init pcibios_init(void) { resource_size_t offset; @@ -744,9 +716,7 @@ int __init pcibios_init(void) pr_info("PCI: Found PCI controller #%d on TRIO %d MAC %d\n", i, trio_index, controller->mac); - /* - * Delay the bus probe if needed. - */ + /* Delay the bus probe if needed. */ if (rc_delay[trio_index][mac]) { pr_info("Delaying PCIe RC bus enumerating %d sec" " on MAC %d on TRIO %d\n", @@ -761,9 +731,7 @@ int __init pcibios_init(void) msleep(1000); } - /* - * Check for PCIe link-up status again. - */ + /* Check for PCIe link-up status again. */ port_status.word = __gxio_mmio_read(trio_context->mmio_base_mac + reg_offset); @@ -801,7 +769,6 @@ int __init pcibios_init(void) * Change the device ID so that Linux bus crawl doesn't confuse * the internal bridge with any Tilera endpoints. */ - reg_offset = (TRIO_PCIE_RC_DEVICE_ID_VEN_ID << TRIO_CFG_REGION_ADDR__REG_SHIFT) | @@ -814,10 +781,7 @@ int __init pcibios_init(void) TRIO_PCIE_RC_DEVICE_ID_VEN_ID__DEV_ID_SHIFT) | TILERA_VENDOR_ID); - /* - * Set the internal P2P bridge class code. - */ - + /* Set the internal P2P bridge class code. */ reg_offset = (TRIO_PCIE_RC_REVISION_ID << TRIO_CFG_REGION_ADDR__REG_SHIFT) | @@ -828,26 +792,22 @@ int __init pcibios_init(void) class_code_revision = __gxio_mmio_read32(trio_context->mmio_base_mac + reg_offset); - class_code_revision = (class_code_revision & 0xff ) | - (PCI_CLASS_BRIDGE_PCI << 16); + class_code_revision = (class_code_revision & 0xff) | + (PCI_CLASS_BRIDGE_PCI << 16); __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset, class_code_revision); #ifdef USE_SHARED_PCIE_CONFIG_REGION - /* - * Map in the MMIO space for the PIO region. - */ + /* Map in the MMIO space for the PIO region. */ offset = HV_TRIO_PIO_OFFSET(trio_context->pio_cfg_index) | (((unsigned long long)mac) << TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT); #else - /* - * Alloc a PIO region for PCI config access per MAC. - */ + /* Alloc a PIO region for PCI config access per MAC. */ ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); if (ret < 0) { pr_err("PCI: PCI CFG PIO alloc failure for mac %d " @@ -858,9 +818,7 @@ int __init pcibios_init(void) trio_context->pio_cfg_index[mac] = ret; - /* - * For PIO CFG, the bus_address_hi parameter is 0. - */ + /* For PIO CFG, the bus_address_hi parameter is 0. */ ret = gxio_trio_init_pio_region_aux(trio_context, trio_context->pio_cfg_index[mac], mac, 0, HV_TRIO_PIO_FLAG_CONFIG_SPACE); @@ -887,9 +845,7 @@ int __init pcibios_init(void) continue; } - /* - * Initialize the PCIe interrupts. - */ + /* Initialize the PCIe interrupts. */ if (tile_init_irqs(controller)) { pr_err("PCI: IRQs init failure for mac %d on TRIO %d\n", mac, trio_index); @@ -921,7 +877,6 @@ int __init pcibios_init(void) * It allocates all of the resources (I/O memory, etc) * associated with the devices read in above. */ - pci_assign_unassigned_resources(); /* Record the I/O resources in the PCI controller structure. */ @@ -942,14 +897,12 @@ int __init pcibios_init(void) /* Configure the max_payload_size values for this domain. */ fixup_read_and_payload_sizes(controller); - /* - * Alloc a PIO region for PCI memory access for each RC port. - */ + /* Alloc a PIO region for PCI memory access for each RC port. */ ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); if (ret < 0) { pr_err("PCI: MEM PIO alloc failure on TRIO %d mac %d, " - "give up\n", controller->trio_index, - controller->mac); + "give up\n", controller->trio_index, + controller->mac); continue; } @@ -967,8 +920,8 @@ int __init pcibios_init(void) 0); if (ret < 0) { pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, " - "give up\n", controller->trio_index, - controller->mac); + "give up\n", controller->trio_index, + controller->mac); continue; } @@ -980,8 +933,8 @@ int __init pcibios_init(void) ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); if (ret < 0) { pr_err("PCI: I/O PIO alloc failure on TRIO %d mac %d, " - "give up\n", controller->trio_index, - controller->mac); + "give up\n", controller->trio_index, + controller->mac); continue; } @@ -999,8 +952,8 @@ int __init pcibios_init(void) HV_TRIO_PIO_FLAG_IO_SPACE); if (ret < 0) { pr_err("PCI: I/O PIO init failure on TRIO %d mac %d, " - "give up\n", controller->trio_index, - controller->mac); + "give up\n", controller->trio_index, + controller->mac); continue; } @@ -1020,9 +973,9 @@ int __init pcibios_init(void) 0); if (ret < 0) { pr_err("PCI: Mem-Map alloc failure on TRIO %d " - "mac %d for MC %d, give up\n", - controller->trio_index, - controller->mac, j); + "mac %d for MC %d, give up\n", + controller->trio_index, + controller->mac, j); goto alloc_mem_map_failed; } @@ -1053,9 +1006,9 @@ int __init pcibios_init(void) GXIO_TRIO_ORDER_MODE_UNORDERED); if (ret < 0) { pr_err("PCI: Mem-Map init failure on TRIO %d " - "mac %d for MC %d, give up\n", - controller->trio_index, - controller->mac, j); + "mac %d for MC %d, give up\n", + controller->trio_index, + controller->mac, j); goto alloc_mem_map_failed; } @@ -1064,22 +1017,18 @@ int __init pcibios_init(void) alloc_mem_map_failed: break; } - } return 0; } subsys_initcall(pcibios_init); -/* Note: to be deleted after Linux 3.6 merge. */ +/* No bus fixups needed. */ void pcibios_fixup_bus(struct pci_bus *bus) { } -/* - * This can be called from the generic PCI layer, but doesn't need to - * do anything. - */ +/* Process any "pci=" kernel boot arguments. */ char *pcibios_setup(char *str) { if (!strcmp(str, "off")) { @@ -1130,7 +1079,6 @@ void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) * By searching phys_addr in each controller's mem_space, we can * determine the controller that should accept the PCI memory access. */ - for (i = 0; i < num_rc_controllers; i++) { /* * Skip controllers that are not properly initialized or @@ -1158,9 +1106,7 @@ void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) offset = HV_TRIO_PIO_OFFSET(controller->pio_mem_index) + start; - /* - * We need to keep the PCI bus address's in-page offset in the VA. - */ + /* We need to keep the PCI bus address's in-page offset in the VA. */ return iorpc_ioremap(trio_fd, offset, size) + (start & (PAGE_SIZE - 1)); } @@ -1186,7 +1132,6 @@ void __iomem *ioport_map(unsigned long port, unsigned int size) * By searching the port in each controller's io_space, we can * determine the controller that should accept the PCI I/O access. */ - for (i = 0; i < num_rc_controllers; i++) { /* * Skip controllers that are not properly initialized or @@ -1214,9 +1159,7 @@ void __iomem *ioport_map(unsigned long port, unsigned int size) offset = HV_TRIO_PIO_OFFSET(controller->pio_io_index) + port; - /* - * We need to keep the PCI bus address's in-page offset in the VA. - */ + /* We need to keep the PCI bus address's in-page offset in the VA. */ return iorpc_ioremap(trio_fd, offset, size) + (port & (PAGE_SIZE - 1)); } EXPORT_SYMBOL(ioport_map); @@ -1249,7 +1192,6 @@ EXPORT_SYMBOL(pci_iounmap); * offset is in bytes, from the start of config space for the * specified bus & device. */ - static int tile_cfg_read(struct pci_bus *bus, unsigned int devfn, int offset, int size, u32 *val) { @@ -1299,7 +1241,6 @@ static int tile_cfg_read(struct pci_bus *bus, unsigned int devfn, int offset, * Accesses to the directly attached device have to be * sent as type-0 configs. */ - if (busnum == (controller->first_busno + 1)) { /* * There is only one device off of our built-in P2P bridge. @@ -1321,9 +1262,8 @@ static int tile_cfg_read(struct pci_bus *bus, unsigned int devfn, int offset, * Note that we don't set the mac field in cfg_addr because the * mapping is per port. */ - mmio_addr = trio_context->mmio_base_pio_cfg[controller->mac] + - cfg_addr.word; + cfg_addr.word; valid_device: @@ -1427,7 +1367,6 @@ static int tile_cfg_write(struct pci_bus *bus, unsigned int devfn, int offset, * Accesses to the directly attached device have to be * sent as type-0 configs. */ - if (busnum == (controller->first_busno + 1)) { /* * There is only one device off of our built-in P2P bridge. @@ -1449,7 +1388,6 @@ static int tile_cfg_write(struct pci_bus *bus, unsigned int devfn, int offset, * Note that we don't set the mac field in cfg_addr because the * mapping is per port. */ - mmio_addr = trio_context->mmio_base_pio_cfg[controller->mac] + cfg_addr.word; @@ -1487,11 +1425,8 @@ static struct pci_ops tile_cfg_ops = { }; -/* - * MSI support starts here. - */ -static unsigned int -tilegx_msi_startup(struct irq_data *d) +/* MSI support starts here. */ +static unsigned int tilegx_msi_startup(struct irq_data *d) { if (d->msi_desc) unmask_msi_irq(d); @@ -1499,21 +1434,18 @@ tilegx_msi_startup(struct irq_data *d) return 0; } -static void -tilegx_msi_ack(struct irq_data *d) +static void tilegx_msi_ack(struct irq_data *d) { __insn_mtspr(SPR_IPI_EVENT_RESET_K, 1UL << d->irq); } -static void -tilegx_msi_mask(struct irq_data *d) +static void tilegx_msi_mask(struct irq_data *d) { mask_msi_irq(d); __insn_mtspr(SPR_IPI_MASK_SET_K, 1UL << d->irq); } -static void -tilegx_msi_unmask(struct irq_data *d) +static void tilegx_msi_unmask(struct irq_data *d) { __insn_mtspr(SPR_IPI_MASK_RESET_K, 1UL << d->irq); unmask_msi_irq(d); -- cgit v1.1 From 8d9e53b93de7383d5bb4b3507f146bfcd83c6e5d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 13:32:52 -0400 Subject: tile PCI RC: use proper accessor function Using the low-level hv_dev_pread() API makes assumptions about the layout of datastructures in the Tilera hypervisor API; it's better to use the gxio_XXX accessor and the pcie_trio_ports_property struct. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci_gx.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index 8352d85..de5008b 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -91,7 +91,7 @@ static int rc_delay[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES]; TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT_G1 /* Array of the PCIe ports configuration info obtained from the BIB. */ -struct pcie_port_property pcie_ports[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES]; +struct pcie_trio_ports_property pcie_ports[TILEGX_NUM_TRIO]; /* Number of configured TRIO instances. */ int num_trio_shims; @@ -195,10 +195,7 @@ static int tile_pcie_open(int trio_index) #endif /* Get the properties of the PCIe ports on this TRIO instance. */ - ret = hv_dev_pread(context->fd, 0, - (HV_VirtAddr)&pcie_ports[trio_index][0], - sizeof(struct pcie_port_property) * TILEGX_TRIO_PCIES, - GXIO_TRIO_OP_GET_PORT_PROPERTY); + ret = gxio_trio_get_port_property(context, &pcie_ports[trio_index]); if (ret < 0) { pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d," " on TRIO %d\n", ret, trio_index); @@ -221,8 +218,8 @@ static int tile_pcie_open(int trio_index) unsigned int reg_offset; /* Ignore ports that are not specified in the BIB. */ - if (!pcie_ports[trio_index][mac].allow_rc && - !pcie_ports[trio_index][mac].allow_ep) + if (!pcie_ports[trio_index].ports[mac].allow_rc && + !pcie_ports[trio_index].ports[mac].allow_ep) continue; reg_offset = @@ -243,7 +240,7 @@ static int tile_pcie_open(int trio_index) */ if (port_config.strap_state == AUTO_CONFIG_EP || port_config.strap_state == AUTO_CONFIG_EP_G1) - pcie_ports[trio_index][mac].allow_ep = 1; + pcie_ports[trio_index].ports[mac].allow_ep = 1; } } @@ -438,9 +435,10 @@ int __init tile_pci_init(void) return 0; /* - * Now determine which PCIe ports are configured to operate in RC mode. - * We look at the Board Information Block first and then see if there - * are any overriding configuration by the HW strapping pin. + * Now determine which PCIe ports are configured to operate in RC + * mode. To use a port, it must be allowed to be in RC mode by the + * Board Information Block, and the hardware strapping pins must be + * set to RC mode. */ for (i = 0; i < TILEGX_NUM_TRIO; i++) { gxio_trio_context_t *context = &trio_contexts[i]; @@ -449,7 +447,7 @@ int __init tile_pci_init(void) continue; for (j = 0; j < TILEGX_TRIO_PCIES; j++) { - if (pcie_ports[i][j].allow_rc && + if (pcie_ports[i].ports[j].allow_rc && strapped_for_rc(context, j)) { pcie_rc[i][j] = 1; num_rc_controllers++; @@ -736,7 +734,7 @@ int __init pcibios_init(void) __gxio_mmio_read(trio_context->mmio_base_mac + reg_offset); if (!port_status.dl_up) { - if (pcie_ports[trio_index][mac].removable) { + if (pcie_ports[trio_index].ports[mac].removable) { pr_info("PCI: link is down, MAC %d on TRIO %d\n", mac, trio_index); pr_info("This is expected if no PCIe card" -- cgit v1.1 From dc7d5cf2cab6d1fbb43c5c0569f43b7e4c822760 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 13:40:47 -0400 Subject: tile PCI RC: add dma_get_required_mask() The standard kernel function dma_get_required_mask() uses the highest DRAM address to determine if 32-bit or 64-bit DMA addressing is needed. This only works on architectures that have direct mapping between the PA and the PCI address space, i.e. those that don't have I/O TLBs or have I/O TLB but choose to use direct mapping. Neither of these are true for tilegx. Whether to use 64-bit DMA should depend on the PCI device's capability only, not on the amount of DRAM installeds, so we now advertise a 64-bit DMA mask unconditionally. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci-dma.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index adc369d..9fef64d 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c @@ -607,3 +607,21 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); #endif + +#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK +/* + * The generic dma_get_required_mask() uses the highest physical address + * (max_pfn) to provide the hint to the PCI drivers regarding 32-bit or + * 64-bit DMA configuration. Since TILEGx has I/O TLB/MMU, allowing the + * DMAs to use the full 64-bit PCI address space and not limited by + * the physical memory space, we always let the PCI devices use + * 64-bit DMA if they have that capability, by returning the 64-bit + * DMA mask here. The device driver has the option to use 32-bit DMA if + * the device is not capable of 64-bit DMA. + */ +u64 dma_get_required_mask(struct device *dev) +{ + return DMA_BIT_MASK(64); +} +EXPORT_SYMBOL_GPL(dma_get_required_mask); +#endif -- cgit v1.1 From 02b67e09541b85d8f92e0a68a9deb1c33e6626bb Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 13:45:28 -0400 Subject: tile PCI DMA: fix bug in non-page-aligned accessors The code incorrectly masked with PAGE_OFFSET instead of PAGE_SIZE-1. This only matters when trying to do a non page-aligned DMA; it was noticed during code inspection. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci-dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index 9fef64d..d94f487 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c @@ -257,7 +257,7 @@ static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address, BUG_ON(!valid_dma_direction(direction)); __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)), - dma_address & PAGE_OFFSET, size, direction); + dma_address & (PAGE_SIZE - 1), size, direction); } static void tile_dma_sync_single_for_cpu(struct device *dev, @@ -436,7 +436,7 @@ static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address, dma_address -= get_dma_offset(dev); __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)), - dma_address & PAGE_OFFSET, size, direction); + dma_address & (PAGE_SIZE - 1), size, direction); } static void tile_pci_dma_sync_single_for_cpu(struct device *dev, -- cgit v1.1 From 5026dafa177133f9b6bf0000dfc98596fa4ad2fd Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 14:27:05 -0400 Subject: tile PCI RC: support PCIe TRIO 0 MAC 0 on Gx72 system On Tilera Gx72 systems, the logic for figuring out whether a given port is root complex is slightly different. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci_gx.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index de5008b..f2bf200 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -436,9 +436,26 @@ int __init tile_pci_init(void) /* * Now determine which PCIe ports are configured to operate in RC - * mode. To use a port, it must be allowed to be in RC mode by the + * mode. There is a differece in the port configuration capability + * between the Gx36 and Gx72 devices. + * + * The Gx36 has configuration capability for each of the 3 PCIe + * interfaces (disable, auto endpoint, auto RC, etc.). + * On the Gx72, you can only select one of the 3 PCIe interfaces per + * TRIO to train automatically. Further, the allowable training modes + * are reduced to four options (auto endpoint, auto RC, stream x1, + * stream x4). + * + * For Gx36 ports, it must be allowed to be in RC mode by the * Board Information Block, and the hardware strapping pins must be * set to RC mode. + * + * For Gx72 ports, the port will operate in RC mode if either of the + * following is true: + * 1. It is allowed to be in RC mode by the Board Information Block, + * and the BIB doesn't allow the EP mode. + * 2. It is allowed to be in either the RC or the EP mode by the BIB, + * and the hardware strapping pin is set to RC mode. */ for (i = 0; i < TILEGX_NUM_TRIO; i++) { gxio_trio_context_t *context = &trio_contexts[i]; @@ -447,8 +464,18 @@ int __init tile_pci_init(void) continue; for (j = 0; j < TILEGX_TRIO_PCIES; j++) { - if (pcie_ports[i].ports[j].allow_rc && - strapped_for_rc(context, j)) { + int is_rc = 0; + + if (pcie_ports[i].is_gx72 && + pcie_ports[i].ports[j].allow_rc) { + if (!pcie_ports[i].ports[j].allow_ep || + strapped_for_rc(context, j)) + is_rc = 1; + } else if (pcie_ports[i].ports[j].allow_rc && + strapped_for_rc(context, j)) { + is_rc = 1; + } + if (is_rc) { pcie_rc[i][j] = 1; num_rc_controllers++; } -- cgit v1.1 From ae2031fb29d12d02e9ce4220d5e03e75bb6b7bdf Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 5 Aug 2013 14:33:17 -0400 Subject: tile PCI RC: reduce driver's vmalloc space usage We can take advantage of the fact that bit 29 is hard-wired to zero in register TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR. This is handy since at the moment we only allocate one 4GB region for vmalloc, and with this change we can allocate four or more TRIO MACs without using up all the vmalloc space. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci_gx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index f2bf200..4843881 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -860,9 +860,15 @@ int __init pcibios_init(void) #endif + /* + * To save VMALLOC space, we take advantage of the fact that + * bit 29 in the PIO CFG address format is reserved 0. With + * TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT being 30, + * this cuts VMALLOC space usage from 1GB to 512MB per mac. + */ trio_context->mmio_base_pio_cfg[mac] = - iorpc_ioremap(trio_context->fd, offset, - (1 << TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT)); + iorpc_ioremap(trio_context->fd, offset, (1UL << + (TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT - 1))); if (trio_context->mmio_base_pio_cfg[mac] == NULL) { pr_err("PCI: PIO map failure for mac %d on TRIO %d\n", mac, trio_index); -- cgit v1.1 From bda0f5bad812df076a28fa5e58d86dfe68415251 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 6 Aug 2013 14:11:21 -0400 Subject: tile: various console improvements This change improves and cleans up the tile console. - We enable HVC_IRQ support on tilegx, with the addition of a new Tilera hypervisor API for tilegx to allow a console IPI. If IPI support is not available we fall back to the previous polling mode. - We simplify the earlyprintk code to use CON_BOOT and eliminate some of the other supporting earlyprintk code. - A new tile_console_write() primitive is used to send output to the console and is factored out of the hvc_tile driver. This lets us support a "sim_console" boot argument to allow using simulator hooks to send output to the "console" as a slightly faster alternative to emulating the hardware more directly. Signed-off-by: Chris Metcalf Acked-by: Greg Kroah-Hartman --- arch/tile/kernel/early_printk.c | 47 ++++++++++------------------------------- arch/tile/kernel/hvglue.lds | 3 ++- arch/tile/kernel/reboot.c | 2 -- 3 files changed, 13 insertions(+), 39 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c index 34d72a1..b608e00 100644 --- a/arch/tile/kernel/early_printk.c +++ b/arch/tile/kernel/early_printk.c @@ -23,19 +23,24 @@ static void early_hv_write(struct console *con, const char *s, unsigned n) { - hv_console_write((HV_VirtAddr) s, n); + tile_console_write(s, n); + + /* + * Convert NL to NLCR (close enough to CRNL) during early boot. + * We assume newlines are at the ends of strings, which turns out + * to be good enough for early boot console output. + */ + if (n && s[n-1] == '\n') + tile_console_write("\r", 1); } static struct console early_hv_console = { .name = "earlyhv", .write = early_hv_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_BOOT, .index = -1, }; -/* Direct interface for emergencies */ -static int early_console_complete; - void early_panic(const char *fmt, ...) { va_list ap; @@ -43,51 +48,21 @@ void early_panic(const char *fmt, ...) va_start(ap, fmt); early_printk("Kernel panic - not syncing: "); early_vprintk(fmt, ap); - early_console->write(early_console, "\n", 1); + early_printk("\n"); va_end(ap); dump_stack(); hv_halt(); } -static int __initdata keep_early; - static int __init setup_early_printk(char *str) { if (early_console) return 1; - if (str != NULL && strncmp(str, "keep", 4) == 0) - keep_early = 1; - early_console = &early_hv_console; register_console(early_console); return 0; } -void __init disable_early_printk(void) -{ - early_console_complete = 1; - if (!early_console) - return; - if (!keep_early) { - early_printk("disabling early console\n"); - unregister_console(early_console); - early_console = NULL; - } else { - early_printk("keeping early console\n"); - } -} - -void warn_early_printk(void) -{ - if (early_console_complete || early_console) - return; - early_printk("\ -Machine shutting down before console output is fully initialized.\n\ -You may wish to reboot and add the option 'earlyprintk' to your\n\ -boot command line to see any diagnostic early console output.\n\ -"); -} - early_param("earlyprintk", setup_early_printk); diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds index d44c5a6..ef52290 100644 --- a/arch/tile/kernel/hvglue.lds +++ b/arch/tile/kernel/hvglue.lds @@ -56,4 +56,5 @@ hv_inquire_realpa = TEXT_OFFSET + 0x106c0; hv_flush_all = TEXT_OFFSET + 0x106e0; hv_get_ipi_pte = TEXT_OFFSET + 0x10700; hv_set_pte_super_shift = TEXT_OFFSET + 0x10720; -hv_glue_internals = TEXT_OFFSET + 0x10740; +hv_console_set_ipi = TEXT_OFFSET + 0x107e0; +hv_glue_internals = TEXT_OFFSET + 0x10800; diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c index d1b5c91..6c5d2c0 100644 --- a/arch/tile/kernel/reboot.c +++ b/arch/tile/kernel/reboot.c @@ -27,7 +27,6 @@ void machine_halt(void) { - warn_early_printk(); arch_local_irq_disable_all(); smp_send_stop(); hv_halt(); @@ -35,7 +34,6 @@ void machine_halt(void) void machine_power_off(void) { - warn_early_printk(); arch_local_irq_disable_all(); smp_send_stop(); hv_power_off(); -- cgit v1.1 From 77f8c740d1a4947afb7493e7aafdd977e6d7939c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 6 Aug 2013 14:29:59 -0400 Subject: tile: support "memmap" boot parameter This change adds support for the "memmap" boot parameter similar to what x86 provides. The tile version supports "memmap=1G$5G", for example, as a way to reserve a 1 GB range starting at PA 5GB. The memory is reserved via bootmem during startup, and we create a suitable "struct resource" marked as "Reserved" so you can see the range reported by /proc/iomem. Up to 64 such regions can currently be reserved on the boot command line. We do not support the x86 options "memmap=nn@ss" (force some memory to be available at the given address) since it's pointless to try to have Linux use memory the Tilera hypervisor hasn't given it. We do not support "memmap=nn#ss" to add an ACPI range for later processing, since we don't support ACPI. We do not support "memmap=exactmap" since we don't support reading the e820 information from the BIOS like x86 does. I did add support for "memmap=nn" (and the synonym "mem=nn") which cap the highest PA value at "nn"; these are both just a synonym for the existing tile boot option "maxmem". Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 80 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 76 insertions(+), 4 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 676e155..b00e156 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -154,6 +154,65 @@ static int __init setup_maxnodemem(char *str) } early_param("maxnodemem", setup_maxnodemem); +struct memmap_entry { + u64 addr; /* start of memory segment */ + u64 size; /* size of memory segment */ +}; +static struct memmap_entry memmap_map[64]; +static int memmap_nr; + +static void add_memmap_region(u64 addr, u64 size) +{ + if (memmap_nr >= ARRAY_SIZE(memmap_map)) { + pr_err("Ooops! Too many entries in the memory map!\n"); + return; + } + memmap_map[memmap_nr].addr = addr; + memmap_map[memmap_nr].size = size; + memmap_nr++; +} + +static int __init setup_memmap(char *p) +{ + char *oldp; + u64 start_at, mem_size; + + if (!p) + return -EINVAL; + + if (!strncmp(p, "exactmap", 8)) { + pr_err("\"memmap=exactmap\" not valid on tile\n"); + return 0; + } + + oldp = p; + mem_size = memparse(p, &p); + if (p == oldp) + return -EINVAL; + + if (*p == '@') { + pr_err("\"memmap=nn@ss\" (force RAM) invalid on tile\n"); + } else if (*p == '#') { + pr_err("\"memmap=nn#ss\" (force ACPI data) invalid on tile\n"); + } else if (*p == '$') { + start_at = memparse(p+1, &p); + add_memmap_region(start_at, mem_size); + } else { + if (mem_size == 0) + return -EINVAL; + maxmem_pfn = (mem_size >> HPAGE_SHIFT) << + (HPAGE_SHIFT - PAGE_SHIFT); + } + return *p == '\0' ? 0 : -EINVAL; +} +early_param("memmap", setup_memmap); + +static int __init setup_mem(char *str) +{ + return setup_maxmem(str); +} +early_param("mem", setup_mem); /* compatibility with x86 */ + static int __init setup_isolnodes(char *str) { char buf[MAX_NUMNODES * 5]; @@ -629,6 +688,12 @@ static void __init setup_bootmem_allocator(void) for (i = 0; i < MAX_NUMNODES; ++i) setup_bootmem_allocator_node(i); + /* Reserve any memory excluded by "memmap" arguments. */ + for (i = 0; i < memmap_nr; ++i) { + struct memmap_entry *m = &memmap_map[i]; + reserve_bootmem(m->addr, m->size, 0); + } + #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) reserve_bootmem(crashk_res.start, resource_size(&crashk_res), 0); @@ -1562,11 +1627,11 @@ insert_non_bus_resource(void) #endif static struct resource* __init -insert_ram_resource(u64 start_pfn, u64 end_pfn) +insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved) { struct resource *res = kzalloc(sizeof(struct resource), GFP_ATOMIC); - res->name = "System RAM"; + res->name = reserved ? "Reserved" : "System RAM"; res->start = start_pfn << PAGE_SHIFT; res->end = (end_pfn << PAGE_SHIFT) - 1; res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; @@ -1601,11 +1666,11 @@ static int __init request_standard_resources(void) end_pfn > pci_reserve_start_pfn) { if (end_pfn > pci_reserve_end_pfn) insert_ram_resource(pci_reserve_end_pfn, - end_pfn); + end_pfn, 0); end_pfn = pci_reserve_start_pfn; } #endif - insert_ram_resource(start_pfn, end_pfn); + insert_ram_resource(start_pfn, end_pfn, 0); } code_resource.start = __pa(_text - CODE_DELTA); @@ -1616,6 +1681,13 @@ static int __init request_standard_resources(void) insert_resource(&iomem_resource, &code_resource); insert_resource(&iomem_resource, &data_resource); + /* Mark any "memmap" regions busy for the resource manager. */ + for (i = 0; i < memmap_nr; ++i) { + struct memmap_entry *m = &memmap_map[i]; + insert_ram_resource(PFN_DOWN(m->addr), + PFN_UP(m->addr + m->size - 1), 1); + } + #ifdef CONFIG_KEXEC insert_resource(&iomem_resource, &crashk_res); #endif -- cgit v1.1 From b63ea7121c60b38f2d0ca0f270697bc542f6b76c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 6 Aug 2013 14:32:49 -0400 Subject: tile: fix comment bug in sys_cmpxchg description Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index cb52d66..25966af 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1609,7 +1609,7 @@ ENTRY(sys_cmpxchg) * Because of C pointer arithmetic, we want to compute this: * * ((char*)atomic_locks + - * (((r0 >> 3) & (1 << (ATOMIC_HASH_SIZE - 1))) << 2)) + * (((r0 >> 3) & ((1 << ATOMIC_HASH_SHIFT) - 1)) << 2)) * * Instead of two shifts we just ">> 1", and use 'mm' * to ignore the low and high bits we don't want. -- cgit v1.1 From f10da5472c6907a3fbd6886224b36d21925ce47b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 10:34:10 -0400 Subject: tile: remove unnecessary backslashes in asm-offsets.c Pointed out by checkpatch. A few of the DEFINE() lines were properly written without backslash continuation; fix the rest. Signed-off-by: Chris Metcalf --- arch/tile/kernel/asm-offsets.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c index 01ddf19..8fff475 100644 --- a/arch/tile/kernel/asm-offsets.c +++ b/arch/tile/kernel/asm-offsets.c @@ -37,28 +37,28 @@ void foo(void) { - DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET, \ + DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET, offsetof(struct single_step_state, buffer)); - DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET, \ + DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET, offsetof(struct single_step_state, flags)); - DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET, \ + DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET, offsetof(struct single_step_state, orig_pc)); - DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET, \ + DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET, offsetof(struct single_step_state, next_pc)); - DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET, \ + DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET, offsetof(struct single_step_state, branch_next_pc)); - DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET, \ + DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET, offsetof(struct single_step_state, update_value)); - DEFINE(THREAD_INFO_TASK_OFFSET, \ + DEFINE(THREAD_INFO_TASK_OFFSET, offsetof(struct thread_info, task)); - DEFINE(THREAD_INFO_FLAGS_OFFSET, \ + DEFINE(THREAD_INFO_FLAGS_OFFSET, offsetof(struct thread_info, flags)); - DEFINE(THREAD_INFO_STATUS_OFFSET, \ + DEFINE(THREAD_INFO_STATUS_OFFSET, offsetof(struct thread_info, status)); - DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, \ + DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, offsetof(struct thread_info, homecache_cpu)); - DEFINE(THREAD_INFO_STEP_STATE_OFFSET, \ + DEFINE(THREAD_INFO_STEP_STATE_OFFSET, offsetof(struct thread_info, step_state)); DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET, @@ -66,11 +66,11 @@ void foo(void) DEFINE(TASK_STRUCT_THREAD_PC_OFFSET, offsetof(struct task_struct, thread.pc)); - DEFINE(HV_TOPOLOGY_WIDTH_OFFSET, \ + DEFINE(HV_TOPOLOGY_WIDTH_OFFSET, offsetof(HV_Topology, width)); - DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET, \ + DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET, offsetof(HV_Topology, height)); - DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET, \ + DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET, offsetof(irq_cpustat_t, irq_syscall_count)); } -- cgit v1.1 From 2f9ac29eec71a696cb0dcc5fb82c0f8d4dac28c9 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 6 Aug 2013 16:04:13 -0400 Subject: tile: fast-path unaligned memory access for tilegx This change enables unaligned userspace memory access via a kernel fast path on tilegx. The kernel tracks user PC/instruction pairs per-thread using a direct-mapped cache in userspace. The cache maps those PC/instruction pairs to JIT'ed instruction sequences that load or store using byte-wide load store intructions and then synthesize 2-, 4- or 8-byte load or store results. Once an instruction has been seen to generate an unaligned access once, subsequent hits on that instruction typically require overhead of only around 50 cycles if cache and TLB is hot. We support the prctl() PR_GET_UNALIGN / PR_SET_UNALIGN sys call to enable or disable unaligned fixups on a per-process basis. To do this we pull some of the tilepro unaligned support out of the single_step.c file; tilepro uses instruction disassembly for both single-step and unaligned access support. Since tilegx actually has hardware singlestep support, though, it's cleaner to keep the tilegx unaligned access code in a separate file. While we're at it, properly rename the tilepro-specific types, etc., to have tilepro suffixes instead of generic tile suffixes. Signed-off-by: Chris Metcalf --- arch/tile/kernel/Makefile | 3 +- arch/tile/kernel/asm-offsets.c | 6 + arch/tile/kernel/intvec_32.S | 1 - arch/tile/kernel/intvec_64.S | 231 +++++- arch/tile/kernel/proc.c | 2 - arch/tile/kernel/process.c | 21 + arch/tile/kernel/ptrace.c | 4 +- arch/tile/kernel/single_step.c | 116 +-- arch/tile/kernel/unaligned.c | 1609 ++++++++++++++++++++++++++++++++++++++++ 9 files changed, 1927 insertions(+), 66 deletions(-) create mode 100644 arch/tile/kernel/unaligned.c (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 5334be8..6846c4e 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -5,7 +5,8 @@ extra-y := vmlinux.lds head_$(BITS).o obj-y := backtrace.o entry.o irq.o messaging.o \ pci-dma.o proc.o process.o ptrace.o reboot.o \ - setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \ + setup.o signal.o single_step.o stack.o sys.o \ + sysfs.o time.o traps.o unaligned.o \ intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o obj-$(CONFIG_HARDWALL) += hardwall.o diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c index 8fff475..8652b0b 100644 --- a/arch/tile/kernel/asm-offsets.c +++ b/arch/tile/kernel/asm-offsets.c @@ -60,6 +60,12 @@ void foo(void) offsetof(struct thread_info, homecache_cpu)); DEFINE(THREAD_INFO_STEP_STATE_OFFSET, offsetof(struct thread_info, step_state)); +#ifdef __tilegx__ + DEFINE(THREAD_INFO_UNALIGN_JIT_BASE_OFFSET, + offsetof(struct thread_info, unalign_jit_base)); + DEFINE(THREAD_INFO_UNALIGN_JIT_TMP_OFFSET, + offsetof(struct thread_info, unalign_jit_tmp)); +#endif DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET, offsetof(struct task_struct, thread.ksp)); diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 25966af..3880613 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1420,7 +1420,6 @@ handle_ill: { lw r0, r0 /* indirect thru thread_info to get task_info*/ addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */ - move r2, zero /* load error code into r2 */ } jal send_sigtrap /* issue a SIGTRAP */ diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 85d4839..884af9e 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -17,11 +17,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -98,6 +100,189 @@ } .endm + /* + * Unalign data exception fast handling: In order to handle + * unaligned data access, a fast JIT version is generated and stored + * in a specific area in user space. We first need to do a quick poke + * to see if the JIT is available. We use certain bits in the fault + * PC (3 to 9 is used for 16KB page size) as index to address the JIT + * code area. The first 64bit word is the fault PC, and the 2nd one is + * the fault bundle itself. If these 2 words both match, then we + * directly "iret" to JIT code. If not, a slow path is invoked to + * generate new JIT code. Note: the current JIT code WILL be + * overwritten if it existed. So, ideally we can handle 128 unalign + * fixups via JIT. For lookup efficiency and to effectively support + * tight loops with multiple unaligned reference, a simple + * direct-mapped cache is used. + * + * SPR_EX_CONTEXT_K_0 is modified to return to JIT code. + * SPR_EX_CONTEXT_K_1 has ICS set. + * SPR_EX_CONTEXT_0_0 is setup to user program's next PC. + * SPR_EX_CONTEXT_0_1 = 0. + */ + .macro int_hand_unalign_fast vecnum, vecname + .org (\vecnum << 8) +intvec_\vecname: + /* Put r3 in SPR_SYSTEM_SAVE_K_1. */ + mtspr SPR_SYSTEM_SAVE_K_1, r3 + + mfspr r3, SPR_EX_CONTEXT_K_1 + /* + * Examine if exception comes from user without ICS set. + * If not, just go directly to the slow path. + */ + bnez r3, hand_unalign_slow_nonuser + + mfspr r3, SPR_SYSTEM_SAVE_K_0 + + /* Get &thread_info->unalign_jit_tmp[0] in r3. */ + mm r3, zero, LOG2_THREAD_SIZE, 63 +#if THREAD_SIZE < 65536 + addli r3, r3, -(PAGE_SIZE - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET) +#else + addli r3, r3, -(PAGE_SIZE/2) + addli r3, r3, -(PAGE_SIZE/2 - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET) +#endif + + /* + * Save r0, r1, r2 into thread_info array r3 points to + * from low to high memory in order. + */ + st_add r3, r0, 8 + st_add r3, r1, 8 + { + st_add r3, r2, 8 + andi r2, sp, 7 + } + + /* Save stored r3 value so we can revert it on a page fault. */ + mfspr r1, SPR_SYSTEM_SAVE_K_1 + st r3, r1 + + { + /* Generate a SIGBUS if sp is not 8-byte aligned. */ + bnez r2, hand_unalign_slow_badsp + } + + /* + * Get the thread_info in r0; load r1 with pc. Set the low bit of sp + * as an indicator to the page fault code in case we fault. + */ + { + ori sp, sp, 1 + mfspr r1, SPR_EX_CONTEXT_K_0 + } + + /* Add the jit_info offset in thread_info; extract r1 [3:9] into r2. */ + { + addli r0, r3, THREAD_INFO_UNALIGN_JIT_BASE_OFFSET - \ + (THREAD_INFO_UNALIGN_JIT_TMP_OFFSET + (3 * 8)) + bfextu r2, r1, 3, (2 + PAGE_SHIFT - UNALIGN_JIT_SHIFT) + } + + /* Load the jit_info; multiply r2 by 128. */ + { + ld r0, r0 + shli r2, r2, UNALIGN_JIT_SHIFT + } + + /* + * If r0 is NULL, the JIT page is not mapped, so go to slow path; + * add offset r2 to r0 at the same time. + */ + { + beqz r0, hand_unalign_slow + add r2, r0, r2 + } + + /* + * We are loading from userspace (both the JIT info PC and + * instruction word, and the instruction word we executed) + * and since either could fault while holding the interrupt + * critical section, we must tag this region and check it in + * do_page_fault() to handle it properly. + */ +ENTRY(__start_unalign_asm_code) + + /* Load first word of JIT in r0 and increment r2 by 8. */ + ld_add r0, r2, 8 + + /* + * Compare the PC with the 1st word in JIT; load the fault bundle + * into r1. + */ + { + cmpeq r0, r0, r1 + ld r1, r1 + } + + /* Go to slow path if PC doesn't match. */ + beqz r0, hand_unalign_slow + + /* + * Load the 2nd word of JIT, which is supposed to be the fault + * bundle for a cache hit. Increment r2; after this bundle r2 will + * point to the potential start of the JIT code we want to run. + */ + ld_add r0, r2, 8 + + /* No further accesses to userspace are done after this point. */ +ENTRY(__end_unalign_asm_code) + + /* Compare the real bundle with what is saved in the JIT area. */ + { + cmpeq r0, r1, r0 + mtspr SPR_EX_CONTEXT_0_1, zero + } + + /* Go to slow path if the fault bundle does not match. */ + beqz r0, hand_unalign_slow + + /* + * A cache hit is found. + * r2 points to start of JIT code (3rd word). + * r0 is the fault pc. + * r1 is the fault bundle. + * Reset the low bit of sp. + */ + { + mfspr r0, SPR_EX_CONTEXT_K_0 + andi sp, sp, ~1 + } + + /* Write r2 into EX_CONTEXT_K_0 and increment PC. */ + { + mtspr SPR_EX_CONTEXT_K_0, r2 + addi r0, r0, 8 + } + + /* + * Set ICS on kernel EX_CONTEXT_K_1 in order to "iret" to + * user with ICS set. This way, if the JIT fixup causes another + * unalign exception (which shouldn't be possible) the user + * process will be terminated with SIGBUS. Also, our fixup will + * run without interleaving with external interrupts. + * Each fixup is at most 14 bundles, so it won't hold ICS for long. + */ + { + movei r1, PL_ICS_EX1(USER_PL, 1) + mtspr SPR_EX_CONTEXT_0_0, r0 + } + + { + mtspr SPR_EX_CONTEXT_K_1, r1 + addi r3, r3, -(3 * 8) + } + + /* Restore r0..r3. */ + ld_add r0, r3, 8 + ld_add r1, r3, 8 + ld_add r2, r3, 8 + ld r3, r3 + + iret + ENDPROC(intvec_\vecname) + .endm #ifdef __COLLECT_LINKER_FEEDBACK__ .pushsection .text.intvec_feedback,"ax" @@ -118,15 +303,21 @@ intvec_feedback: * The "processing" argument specifies the code for processing * the interrupt. Defaults to "handle_interrupt". */ - .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt - .org (\vecnum << 8) + .macro __int_hand vecnum, vecname, c_routine,processing=handle_interrupt intvec_\vecname: /* Temporarily save a register so we have somewhere to work. */ mtspr SPR_SYSTEM_SAVE_K_1, r0 mfspr r0, SPR_EX_CONTEXT_K_1 - andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + /* + * The unalign data fastpath code sets the low bit in sp to + * force us to reset it here on fault. + */ + { + blbs sp, 2f + andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + } .ifc \vecnum, INT_DOUBLE_FAULT /* @@ -176,7 +367,7 @@ intvec_\vecname: } .endif - +2: /* * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and * the current stack top in the higher bits. So we recover @@ -1223,10 +1414,31 @@ STD_ENTRY(_sys_clone) j sys_clone STD_ENDPROC(_sys_clone) -/* The single-step support may need to read all the registers. */ + /* + * Recover r3, r2, r1 and r0 here saved by unalign fast vector. + * The vector area limit is 32 bundles, so we handle the reload here. + * r0, r1, r2 are in thread_info from low to high memory in order. + * r3 points to location the original r3 was saved. + * We put this code in the __HEAD section so it can be reached + * via a conditional branch from the fast path. + */ + __HEAD +hand_unalign_slow: + andi sp, sp, ~1 +hand_unalign_slow_badsp: + addi r3, r3, -(3 * 8) + ld_add r0, r3, 8 + ld_add r1, r3, 8 + ld r2, r3 +hand_unalign_slow_nonuser: + mfspr r3, SPR_SYSTEM_SAVE_K_1 + __int_hand INT_UNALIGN_DATA, UNALIGN_DATA_SLOW, int_unalign + +/* The unaligned data support needs to read all the registers. */ int_unalign: push_extra_callee_saves r0 - j do_trap + j do_unaligned +ENDPROC(hand_unalign_slow) /* Fill the return address stack with nonzero entries. */ STD_ENTRY(fill_ra_stack) @@ -1240,6 +1452,11 @@ STD_ENTRY(fill_ra_stack) 4: jrp r0 STD_ENDPROC(fill_ra_stack) + .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt + .org (\vecnum << 8) + __int_hand \vecnum, \vecname, \c_routine, \processing + .endm + /* Include .intrpt1 array of interrupt vectors */ .section ".intrpt1", "ax" @@ -1272,7 +1489,7 @@ STD_ENTRY(fill_ra_stack) int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall int_hand INT_SWINT_0, SWINT_0, do_trap int_hand INT_ILL_TRANS, ILL_TRANS, do_trap - int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign + int_hand_unalign_fast INT_UNALIGN_DATA, UNALIGN_DATA int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index dafc447..681100c 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -113,7 +113,6 @@ arch_initcall(proc_tile_init); * Support /proc/sys/tile directory */ -#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ static ctl_table unaligned_subtable[] = { { .procname = "enabled", @@ -160,4 +159,3 @@ static int __init proc_sys_tile_init(void) } arch_initcall(proc_sys_tile_init); -#endif diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 8d6c51d..25678b8 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef CONFIG_HARDWALL #include #endif @@ -147,6 +148,14 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, */ task_thread_info(p)->step_state = NULL; +#ifdef __tilegx__ + /* + * Do not clone unalign jit fixup from the parent; each thread + * must allocate its own on demand. + */ + task_thread_info(p)->unalign_jit_base = NULL; +#endif + /* * Copy the registers onto the kernel stack so the * return-from-interrupt code will reload it into registers. @@ -205,6 +214,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, return 0; } +int set_unalign_ctl(struct task_struct *tsk, unsigned int val) +{ + task_thread_info(tsk)->align_ctl = val; + return 0; +} + +int get_unalign_ctl(struct task_struct *tsk, unsigned long adr) +{ + return put_user(task_thread_info(tsk)->align_ctl, + (unsigned int __user *)adr); +} + /* * Return "current" if it looks plausible, or else a pointer to a dummy. * This can be helpful if we are just trying to emit a clean panic. diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index 0f83ed4..bac1874 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -272,7 +272,7 @@ void do_syscall_trace_exit(struct pt_regs *regs) trace_sys_exit(regs, regs->regs[0]); } -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs) { struct siginfo info; @@ -288,5 +288,5 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) /* Handle synthetic interrupt delivered only by the simulator. */ void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num) { - send_sigtrap(current, regs, fault_num); + send_sigtrap(current, regs); } diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 27742e8..5ef2e9e 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -12,41 +12,30 @@ * more details. * * A code-rewriter that enables instruction single-stepping. - * Derived from iLib's single-stepping code. */ -#ifndef __tilegx__ /* Hardware support for single step unavailable. */ - -/* These functions are only used on the TILE platform */ +#include +#include #include #include #include #include #include #include +#include #include +#include +#include #include #include +#include #include -#define signExtend17(val) sign_extend((val), 17) -#define TILE_X1_MASK (0xffffffffULL << 31) - -int unaligned_printk; -static int __init setup_unaligned_printk(char *str) -{ - long val; - if (strict_strtol(str, 0, &val) != 0) - return 0; - unaligned_printk = val; - pr_info("Printk for each unaligned data accesses is %s\n", - unaligned_printk ? "enabled" : "disabled"); - return 1; -} -__setup("unaligned_printk=", setup_unaligned_printk); +#ifndef __tilegx__ /* Hardware support for single step unavailable. */ -unsigned int unaligned_fixup_count; +#define signExtend17(val) sign_extend((val), 17) +#define TILE_X1_MASK (0xffffffffULL << 31) enum mem_op { MEMOP_NONE, @@ -56,12 +45,13 @@ enum mem_op { MEMOP_STORE_POSTINCR }; -static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) +static inline tilepro_bundle_bits set_BrOff_X1(tilepro_bundle_bits n, + s32 offset) { - tile_bundle_bits result; + tilepro_bundle_bits result; /* mask out the old offset */ - tile_bundle_bits mask = create_BrOff_X1(-1); + tilepro_bundle_bits mask = create_BrOff_X1(-1); result = n & (~mask); /* or in the new offset */ @@ -70,10 +60,11 @@ static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) return result; } -static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src) +static inline tilepro_bundle_bits move_X1(tilepro_bundle_bits n, int dest, + int src) { - tile_bundle_bits result; - tile_bundle_bits op; + tilepro_bundle_bits result; + tilepro_bundle_bits op; result = n & (~TILE_X1_MASK); @@ -87,13 +78,13 @@ static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src) return result; } -static inline tile_bundle_bits nop_X1(tile_bundle_bits n) +static inline tilepro_bundle_bits nop_X1(tilepro_bundle_bits n) { return move_X1(n, TREG_ZERO, TREG_ZERO); } -static inline tile_bundle_bits addi_X1( - tile_bundle_bits n, int dest, int src, int imm) +static inline tilepro_bundle_bits addi_X1( + tilepro_bundle_bits n, int dest, int src, int imm) { n &= ~TILE_X1_MASK; @@ -107,15 +98,26 @@ static inline tile_bundle_bits addi_X1( return n; } -static tile_bundle_bits rewrite_load_store_unaligned( +static tilepro_bundle_bits rewrite_load_store_unaligned( struct single_step_state *state, - tile_bundle_bits bundle, + tilepro_bundle_bits bundle, struct pt_regs *regs, enum mem_op mem_op, int size, int sign_ext) { unsigned char __user *addr; int val_reg, addr_reg, err, val; + int align_ctl; + + align_ctl = unaligned_fixup; + switch (task_thread_info(current)->align_ctl) { + case PR_UNALIGN_NOPRINT: + align_ctl = 1; + break; + case PR_UNALIGN_SIGBUS: + align_ctl = 0; + break; + } /* Get address and value registers */ if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) { @@ -160,7 +162,7 @@ static tile_bundle_bits rewrite_load_store_unaligned( * tilepro hardware would be doing, if it could provide us with the * actual bad address in an SPR, which it doesn't. */ - if (unaligned_fixup == 0) { + if (align_ctl == 0) { siginfo_t info = { .si_signo = SIGBUS, .si_code = BUS_ADRALN, @@ -209,14 +211,14 @@ static tile_bundle_bits rewrite_load_store_unaligned( if (err) { siginfo_t info = { - .si_signo = SIGSEGV, - .si_code = SEGV_MAPERR, + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, .si_addr = addr }; - trace_unhandled_signal("segfault", regs, - (unsigned long)addr, SIGSEGV); + trace_unhandled_signal("bad address for unaligned fixup", regs, + (unsigned long)addr, SIGBUS); force_sig_info(info.si_signo, &info, current); - return (tile_bundle_bits) 0; + return (tilepro_bundle_bits) 0; } if (unaligned_printk || unaligned_fixup_count == 0) { @@ -285,7 +287,7 @@ void single_step_execve(void) ti->step_state = NULL; } -/** +/* * single_step_once() - entry point when single stepping has been triggered. * @regs: The machine register state * @@ -304,20 +306,31 @@ void single_step_execve(void) */ void single_step_once(struct pt_regs *regs) { - extern tile_bundle_bits __single_step_ill_insn; - extern tile_bundle_bits __single_step_j_insn; - extern tile_bundle_bits __single_step_addli_insn; - extern tile_bundle_bits __single_step_auli_insn; + extern tilepro_bundle_bits __single_step_ill_insn; + extern tilepro_bundle_bits __single_step_j_insn; + extern tilepro_bundle_bits __single_step_addli_insn; + extern tilepro_bundle_bits __single_step_auli_insn; struct thread_info *info = (void *)current_thread_info(); struct single_step_state *state = info->step_state; int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); - tile_bundle_bits __user *buffer, *pc; - tile_bundle_bits bundle; + tilepro_bundle_bits __user *buffer, *pc; + tilepro_bundle_bits bundle; int temp_reg; int target_reg = TREG_LR; int err; enum mem_op mem_op = MEMOP_NONE; int size = 0, sign_ext = 0; /* happy compiler */ + int align_ctl; + + align_ctl = unaligned_fixup; + switch (task_thread_info(current)->align_ctl) { + case PR_UNALIGN_NOPRINT: + align_ctl = 1; + break; + case PR_UNALIGN_SIGBUS: + align_ctl = 0; + break; + } asm( " .pushsection .rodata.single_step\n" @@ -390,7 +403,7 @@ void single_step_once(struct pt_regs *regs) if (regs->faultnum == INT_SWINT_1) regs->pc -= 8; - pc = (tile_bundle_bits __user *)(regs->pc); + pc = (tilepro_bundle_bits __user *)(regs->pc); if (get_user(bundle, pc) != 0) { pr_err("Couldn't read instruction at %p trying to step\n", pc); return; @@ -627,9 +640,9 @@ void single_step_once(struct pt_regs *regs) /* * Check if we need to rewrite an unaligned load/store. - * Returning zero is a special value meaning we need to SIGSEGV. + * Returning zero is a special value meaning we generated a signal. */ - if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) { + if (mem_op != MEMOP_NONE && align_ctl >= 0) { bundle = rewrite_load_store_unaligned(state, bundle, regs, mem_op, size, sign_ext); if (bundle == 0) @@ -668,9 +681,9 @@ void single_step_once(struct pt_regs *regs) } /* End with a jump back to the next instruction */ - delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) - + delta = ((regs->pc + TILEPRO_BUNDLE_SIZE_IN_BYTES) - (unsigned long)buffer) >> - TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES; + TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES; bundle = __single_step_j_insn; bundle |= create_JOffLong_X1(delta); err |= __put_user(bundle, buffer++); @@ -698,9 +711,6 @@ void single_step_once(struct pt_regs *regs) } #else -#include -#include -#include static DEFINE_PER_CPU(unsigned long, ss_saved_pc); @@ -743,10 +753,10 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num) } else if ((*ss_pc != regs->pc) || (!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) { - ptrace_notify(SIGTRAP); control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); + send_sigtrap(current, regs); } } diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c new file mode 100644 index 0000000..b425fb6 --- /dev/null +++ b/arch/tile/kernel/unaligned.c @@ -0,0 +1,1609 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * A code-rewriter that handles unaligned exception. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * This file handles unaligned exception for tile-Gx. The tilepro's unaligned + * exception is supported out of single_step.c + */ + +int unaligned_printk; + +static int __init setup_unaligned_printk(char *str) +{ + long val; + if (kstrtol(str, 0, &val) != 0) + return 0; + unaligned_printk = val; + pr_info("Printk for each unaligned data accesses is %s\n", + unaligned_printk ? "enabled" : "disabled"); + return 1; +} +__setup("unaligned_printk=", setup_unaligned_printk); + +unsigned int unaligned_fixup_count; + +#ifdef __tilegx__ + +/* + * Unalign data jit fixup code fragement. Reserved space is 128 bytes. + * The 1st 64-bit word saves fault PC address, 2nd word is the fault + * instruction bundle followed by 14 JIT bundles. + */ + +struct unaligned_jit_fragment { + unsigned long pc; + tilegx_bundle_bits bundle; + tilegx_bundle_bits insn[14]; +}; + +/* + * Check if a nop or fnop at bundle's pipeline X0. + */ + +static bool is_bundle_x0_nop(tilegx_bundle_bits bundle) +{ + return (((get_UnaryOpcodeExtension_X0(bundle) == + NOP_UNARY_OPCODE_X0) && + (get_RRROpcodeExtension_X0(bundle) == + UNARY_RRR_0_OPCODE_X0) && + (get_Opcode_X0(bundle) == + RRR_0_OPCODE_X0)) || + ((get_UnaryOpcodeExtension_X0(bundle) == + FNOP_UNARY_OPCODE_X0) && + (get_RRROpcodeExtension_X0(bundle) == + UNARY_RRR_0_OPCODE_X0) && + (get_Opcode_X0(bundle) == + RRR_0_OPCODE_X0))); +} + +/* + * Check if nop or fnop at bundle's pipeline X1. + */ + +static bool is_bundle_x1_nop(tilegx_bundle_bits bundle) +{ + return (((get_UnaryOpcodeExtension_X1(bundle) == + NOP_UNARY_OPCODE_X1) && + (get_RRROpcodeExtension_X1(bundle) == + UNARY_RRR_0_OPCODE_X1) && + (get_Opcode_X1(bundle) == + RRR_0_OPCODE_X1)) || + ((get_UnaryOpcodeExtension_X1(bundle) == + FNOP_UNARY_OPCODE_X1) && + (get_RRROpcodeExtension_X1(bundle) == + UNARY_RRR_0_OPCODE_X1) && + (get_Opcode_X1(bundle) == + RRR_0_OPCODE_X1))); +} + +/* + * Check if nop or fnop at bundle's Y0 pipeline. + */ + +static bool is_bundle_y0_nop(tilegx_bundle_bits bundle) +{ + return (((get_UnaryOpcodeExtension_Y0(bundle) == + NOP_UNARY_OPCODE_Y0) && + (get_RRROpcodeExtension_Y0(bundle) == + UNARY_RRR_1_OPCODE_Y0) && + (get_Opcode_Y0(bundle) == + RRR_1_OPCODE_Y0)) || + ((get_UnaryOpcodeExtension_Y0(bundle) == + FNOP_UNARY_OPCODE_Y0) && + (get_RRROpcodeExtension_Y0(bundle) == + UNARY_RRR_1_OPCODE_Y0) && + (get_Opcode_Y0(bundle) == + RRR_1_OPCODE_Y0))); +} + +/* + * Check if nop or fnop at bundle's pipeline Y1. + */ + +static bool is_bundle_y1_nop(tilegx_bundle_bits bundle) +{ + return (((get_UnaryOpcodeExtension_Y1(bundle) == + NOP_UNARY_OPCODE_Y1) && + (get_RRROpcodeExtension_Y1(bundle) == + UNARY_RRR_1_OPCODE_Y1) && + (get_Opcode_Y1(bundle) == + RRR_1_OPCODE_Y1)) || + ((get_UnaryOpcodeExtension_Y1(bundle) == + FNOP_UNARY_OPCODE_Y1) && + (get_RRROpcodeExtension_Y1(bundle) == + UNARY_RRR_1_OPCODE_Y1) && + (get_Opcode_Y1(bundle) == + RRR_1_OPCODE_Y1))); +} + +/* + * Test if a bundle's y0 and y1 pipelines are both nop or fnop. + */ + +static bool is_y0_y1_nop(tilegx_bundle_bits bundle) +{ + return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle); +} + +/* + * Test if a bundle's x0 and x1 pipelines are both nop or fnop. + */ + +static bool is_x0_x1_nop(tilegx_bundle_bits bundle) +{ + return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle); +} + +/* + * Find the destination, source registers of fault unalign access instruction + * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and + * clob3, which are guaranteed different from any register used in the fault + * bundle. r_alias is used to return if the other instructions other than the + * unalign load/store shares same register with ra, rb and rd. + */ + +static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra, + uint64_t *rb, uint64_t *clob1, uint64_t *clob2, + uint64_t *clob3, bool *r_alias) +{ + int i; + uint64_t reg; + uint64_t reg_map = 0, alias_reg_map = 0, map; + bool alias; + + *ra = -1; + *rb = -1; + + if (rd) + *rd = -1; + + *clob1 = -1; + *clob2 = -1; + *clob3 = -1; + alias = false; + + /* + * Parse fault bundle, find potential used registers and mark + * corresponding bits in reg_map and alias_map. These 2 bit maps + * are used to find the scratch registers and determine if there + * is register alais. + */ + if (bundle & TILEGX_BUNDLE_MODE_MASK) { /* Y Mode Bundle. */ + + reg = get_SrcA_Y2(bundle); + reg_map |= 1ULL << reg; + *ra = reg; + reg = get_SrcBDest_Y2(bundle); + reg_map |= 1ULL << reg; + + if (rd) { + /* Load. */ + *rd = reg; + alias_reg_map = (1ULL << *rd) | (1ULL << *ra); + } else { + /* Store. */ + *rb = reg; + alias_reg_map = (1ULL << *ra) | (1ULL << *rb); + } + + if (!is_bundle_y1_nop(bundle)) { + reg = get_SrcA_Y1(bundle); + reg_map |= (1ULL << reg); + map = (1ULL << reg); + + reg = get_SrcB_Y1(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + reg = get_Dest_Y1(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + if (map & alias_reg_map) + alias = true; + } + + if (!is_bundle_y0_nop(bundle)) { + reg = get_SrcA_Y0(bundle); + reg_map |= (1ULL << reg); + map = (1ULL << reg); + + reg = get_SrcB_Y0(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + reg = get_Dest_Y0(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + if (map & alias_reg_map) + alias = true; + } + } else { /* X Mode Bundle. */ + + reg = get_SrcA_X1(bundle); + reg_map |= (1ULL << reg); + *ra = reg; + if (rd) { + /* Load. */ + reg = get_Dest_X1(bundle); + reg_map |= (1ULL << reg); + *rd = reg; + alias_reg_map = (1ULL << *rd) | (1ULL << *ra); + } else { + /* Store. */ + reg = get_SrcB_X1(bundle); + reg_map |= (1ULL << reg); + *rb = reg; + alias_reg_map = (1ULL << *ra) | (1ULL << *rb); + } + + if (!is_bundle_x0_nop(bundle)) { + reg = get_SrcA_X0(bundle); + reg_map |= (1ULL << reg); + map = (1ULL << reg); + + reg = get_SrcB_X0(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + reg = get_Dest_X0(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + if (map & alias_reg_map) + alias = true; + } + } + + /* + * "alias" indicates if the unalign access registers have collision + * with others in the same bundle. We jsut simply test all register + * operands case (RRR), ignored the case with immidate. If a bundle + * has no register alias, we may do fixup in a simple or fast manner. + * So if an immidata field happens to hit with a register, we may end + * up fall back to the generic handling. + */ + + *r_alias = alias; + + /* Flip bits on reg_map. */ + reg_map ^= -1ULL; + + /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */ + for (i = 0; i < TREG_SP; i++) { + if (reg_map & (0x1ULL << i)) { + if (*clob1 == -1) { + *clob1 = i; + } else if (*clob2 == -1) { + *clob2 = i; + } else if (*clob3 == -1) { + *clob3 = i; + return; + } + } + } +} + +/* + * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them + * is unexpected. + */ + +static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb, + uint64_t clob1, uint64_t clob2, uint64_t clob3) +{ + bool unexpected = false; + if ((ra >= 56) && (ra != TREG_ZERO)) + unexpected = true; + + if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56)) + unexpected = true; + + if (rd != -1) { + if ((rd >= 56) && (rd != TREG_ZERO)) + unexpected = true; + } else { + if ((rb >= 56) && (rb != TREG_ZERO)) + unexpected = true; + } + return unexpected; +} + + +#define GX_INSN_X0_MASK ((1ULL << 31) - 1) +#define GX_INSN_X1_MASK (((1ULL << 31) - 1) << 31) +#define GX_INSN_Y0_MASK ((0xFULL << 27) | (0xFFFFFULL)) +#define GX_INSN_Y1_MASK (GX_INSN_Y0_MASK << 31) +#define GX_INSN_Y2_MASK ((0x7FULL << 51) | (0x7FULL << 20)) + +#ifdef __LITTLE_ENDIAN +#define GX_INSN_BSWAP(_bundle_) (_bundle_) +#else +#define GX_INSN_BSWAP(_bundle_) swab64(_bundle_) +#endif /* __LITTLE_ENDIAN */ + +/* + * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section. + * The corresponding static function jix_x#_###(.) generates partial or + * whole bundle based on the template and given arguments. + */ + +#define __JIT_CODE(_X_) \ + asm (".pushsection .rodata.unalign_data, \"a\"\n" \ + _X_"\n" \ + ".popsection\n") + +__JIT_CODE("__unalign_jit_x1_mtspr: {mtspr 0, r0}"); +static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg) +{ + extern tilegx_bundle_bits __unalign_jit_x1_mtspr; + return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) | + create_MT_Imm14_X1(spr) | create_SrcA_X1(reg); +} + +__JIT_CODE("__unalign_jit_x1_mfspr: {mfspr r0, 0}"); +static tilegx_bundle_bits jit_x1_mfspr(int reg, int spr) +{ + extern tilegx_bundle_bits __unalign_jit_x1_mfspr; + return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) | + create_MF_Imm14_X1(spr) | create_Dest_X1(reg); +} + +__JIT_CODE("__unalign_jit_x0_addi: {addi r0, r0, 0; iret}"); +static tilegx_bundle_bits jit_x0_addi(int rd, int ra, int imm8) +{ + extern tilegx_bundle_bits __unalign_jit_x0_addi; + return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_Imm8_X0(imm8); +} + +__JIT_CODE("__unalign_jit_x1_ldna: {ldna r0, r0}"); +static tilegx_bundle_bits jit_x1_ldna(int rd, int ra) +{ + extern tilegx_bundle_bits __unalign_jit_x1_ldna; + return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) & GX_INSN_X1_MASK) | + create_Dest_X1(rd) | create_SrcA_X1(ra); +} + +__JIT_CODE("__unalign_jit_x0_dblalign: {dblalign r0, r0 ,r0}"); +static tilegx_bundle_bits jit_x0_dblalign(int rd, int ra, int rb) +{ + extern tilegx_bundle_bits __unalign_jit_x0_dblalign; + return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_SrcB_X0(rb); +} + +__JIT_CODE("__unalign_jit_x1_iret: {iret}"); +static tilegx_bundle_bits jit_x1_iret(void) +{ + extern tilegx_bundle_bits __unalign_jit_x1_iret; + return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK; +} + +__JIT_CODE("__unalign_jit_x01_fnop: {fnop;fnop}"); +static tilegx_bundle_bits jit_x0_fnop(void) +{ + extern tilegx_bundle_bits __unalign_jit_x01_fnop; + return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK; +} + +static tilegx_bundle_bits jit_x1_fnop(void) +{ + extern tilegx_bundle_bits __unalign_jit_x01_fnop; + return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK; +} + +__JIT_CODE("__unalign_jit_y2_dummy: {fnop; fnop; ld zero, sp}"); +static tilegx_bundle_bits jit_y2_dummy(void) +{ + extern tilegx_bundle_bits __unalign_jit_y2_dummy; + return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK; +} + +static tilegx_bundle_bits jit_y1_fnop(void) +{ + extern tilegx_bundle_bits __unalign_jit_y2_dummy; + return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK; +} + +__JIT_CODE("__unalign_jit_x1_st1_add: {st1_add r1, r0, 0}"); +static tilegx_bundle_bits jit_x1_st1_add(int ra, int rb, int imm8) +{ + extern tilegx_bundle_bits __unalign_jit_x1_st1_add; + return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) & + (~create_SrcA_X1(-1)) & + GX_INSN_X1_MASK) | create_SrcA_X1(ra) | + create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8); +} + +__JIT_CODE("__unalign_jit_x1_st: {crc32_8 r1, r0, r0; st r0, r0}"); +static tilegx_bundle_bits jit_x1_st(int ra, int rb) +{ + extern tilegx_bundle_bits __unalign_jit_x1_st; + return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) | + create_SrcA_X1(ra) | create_SrcB_X1(rb); +} + +__JIT_CODE("__unalign_jit_x1_st_add: {st_add r1, r0, 0}"); +static tilegx_bundle_bits jit_x1_st_add(int ra, int rb, int imm8) +{ + extern tilegx_bundle_bits __unalign_jit_x1_st_add; + return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) & + (~create_SrcA_X1(-1)) & + GX_INSN_X1_MASK) | create_SrcA_X1(ra) | + create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8); +} + +__JIT_CODE("__unalign_jit_x1_ld: {crc32_8 r1, r0, r0; ld r0, r0}"); +static tilegx_bundle_bits jit_x1_ld(int rd, int ra) +{ + extern tilegx_bundle_bits __unalign_jit_x1_ld; + return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) | + create_Dest_X1(rd) | create_SrcA_X1(ra); +} + +__JIT_CODE("__unalign_jit_x1_ld_add: {ld_add r1, r0, 0}"); +static tilegx_bundle_bits jit_x1_ld_add(int rd, int ra, int imm8) +{ + extern tilegx_bundle_bits __unalign_jit_x1_ld_add; + return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) & + (~create_Dest_X1(-1)) & + GX_INSN_X1_MASK) | create_Dest_X1(rd) | + create_SrcA_X1(ra) | create_Imm8_X1(imm8); +} + +__JIT_CODE("__unalign_jit_x0_bfexts: {bfexts r0, r0, 0, 0}"); +static tilegx_bundle_bits jit_x0_bfexts(int rd, int ra, int bfs, int bfe) +{ + extern tilegx_bundle_bits __unalign_jit_x0_bfexts; + return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) & + GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_BFStart_X0(bfs) | create_BFEnd_X0(bfe); +} + +__JIT_CODE("__unalign_jit_x0_bfextu: {bfextu r0, r0, 0, 0}"); +static tilegx_bundle_bits jit_x0_bfextu(int rd, int ra, int bfs, int bfe) +{ + extern tilegx_bundle_bits __unalign_jit_x0_bfextu; + return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) & + GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_BFStart_X0(bfs) | create_BFEnd_X0(bfe); +} + +__JIT_CODE("__unalign_jit_x1_addi: {bfextu r1, r1, 0, 0; addi r0, r0, 0}"); +static tilegx_bundle_bits jit_x1_addi(int rd, int ra, int imm8) +{ + extern tilegx_bundle_bits __unalign_jit_x1_addi; + return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) | + create_Dest_X1(rd) | create_SrcA_X1(ra) | + create_Imm8_X1(imm8); +} + +__JIT_CODE("__unalign_jit_x0_shrui: {shrui r0, r0, 0; iret}"); +static tilegx_bundle_bits jit_x0_shrui(int rd, int ra, int imm6) +{ + extern tilegx_bundle_bits __unalign_jit_x0_shrui; + return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) & + GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_ShAmt_X0(imm6); +} + +__JIT_CODE("__unalign_jit_x0_rotli: {rotli r0, r0, 0; iret}"); +static tilegx_bundle_bits jit_x0_rotli(int rd, int ra, int imm6) +{ + extern tilegx_bundle_bits __unalign_jit_x0_rotli; + return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) & + GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_ShAmt_X0(imm6); +} + +__JIT_CODE("__unalign_jit_x1_bnezt: {bnezt r0, __unalign_jit_x1_bnezt}"); +static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff) +{ + extern tilegx_bundle_bits __unalign_jit_x1_bnezt; + return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) & + GX_INSN_X1_MASK) | + create_SrcA_X1(ra) | create_BrOff_X1(broff); +} + +#undef __JIT_CODE + +/* + * This function generates unalign fixup JIT. + * + * We fist find unalign load/store instruction's destination, source + * reguisters: ra, rb and rd. and 3 scratch registers by calling + * find_regs(...). 3 scratch clobbers should not alias with any register + * used in the fault bundle. Then analyze the fault bundle to determine + * if it's a load or store, operand width, branch or address increment etc. + * At last generated JIT is copied into JIT code area in user space. + */ + +static +void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle, + int align_ctl) +{ + struct thread_info *info = current_thread_info(); + struct unaligned_jit_fragment frag; + struct unaligned_jit_fragment *jit_code_area; + tilegx_bundle_bits bundle_2 = 0; + /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */ + bool bundle_2_enable = true; + uint64_t ra, rb, rd = -1, clob1, clob2, clob3; + /* + * Indicate if the unalign access + * instruction's registers hit with + * others in the same bundle. + */ + bool alias = false; + bool load_n_store = true; + bool load_store_signed = false; + unsigned int load_store_size = 8; + bool y1_br = false; /* True, for a branch in same bundle at Y1.*/ + int y1_br_reg = 0; + /* True for link operation. i.e. jalr or lnk at Y1 */ + bool y1_lr = false; + int y1_lr_reg = 0; + bool x1_add = false;/* True, for load/store ADD instruction at X1*/ + int x1_add_imm8 = 0; + bool unexpected = false; + int n = 0, k; + + jit_code_area = + (struct unaligned_jit_fragment *)(info->unalign_jit_base); + + memset((void *)&frag, 0, sizeof(frag)); + + /* 0: X mode, Otherwise: Y mode. */ + if (bundle & TILEGX_BUNDLE_MODE_MASK) { + unsigned int mod, opcode; + + if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 && + get_RRROpcodeExtension_Y1(bundle) == + UNARY_RRR_1_OPCODE_Y1) { + + opcode = get_UnaryOpcodeExtension_Y1(bundle); + + /* + * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1 + * pipeline. + */ + switch (opcode) { + case JALR_UNARY_OPCODE_Y1: + case JALRP_UNARY_OPCODE_Y1: + y1_lr = true; + y1_lr_reg = 55; /* Link register. */ + /* FALLTHROUGH */ + case JR_UNARY_OPCODE_Y1: + case JRP_UNARY_OPCODE_Y1: + y1_br = true; + y1_br_reg = get_SrcA_Y1(bundle); + break; + case LNK_UNARY_OPCODE_Y1: + /* "lnk" at Y1 pipeline. */ + y1_lr = true; + y1_lr_reg = get_Dest_Y1(bundle); + break; + } + } + + opcode = get_Opcode_Y2(bundle); + mod = get_Mode(bundle); + + /* + * bundle_2 is bundle after making Y2 as a dummy operation + * - ld zero, sp + */ + bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy(); + + /* Make Y1 as fnop if Y1 is a branch or lnk operation. */ + if (y1_br || y1_lr) { + bundle_2 &= ~(GX_INSN_Y1_MASK); + bundle_2 |= jit_y1_fnop(); + } + + if (is_y0_y1_nop(bundle_2)) + bundle_2_enable = false; + + if (mod == MODE_OPCODE_YC2) { + /* Store. */ + load_n_store = false; + load_store_size = 1 << opcode; + load_store_signed = false; + find_regs(bundle, 0, &ra, &rb, &clob1, &clob2, + &clob3, &alias); + if (load_store_size > 8) + unexpected = true; + } else { + /* Load. */ + load_n_store = true; + if (mod == MODE_OPCODE_YB2) { + switch (opcode) { + case LD_OPCODE_Y2: + load_store_signed = false; + load_store_size = 8; + break; + case LD4S_OPCODE_Y2: + load_store_signed = true; + load_store_size = 4; + break; + case LD4U_OPCODE_Y2: + load_store_signed = false; + load_store_size = 4; + break; + default: + unexpected = true; + } + } else if (mod == MODE_OPCODE_YA2) { + if (opcode == LD2S_OPCODE_Y2) { + load_store_signed = true; + load_store_size = 2; + } else if (opcode == LD2U_OPCODE_Y2) { + load_store_signed = false; + load_store_size = 2; + } else + unexpected = true; + } else + unexpected = true; + find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2, + &clob3, &alias); + } + } else { + unsigned int opcode; + + /* bundle_2 is bundle after making X1 as "fnop". */ + bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop(); + + if (is_x0_x1_nop(bundle_2)) + bundle_2_enable = false; + + if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) { + opcode = get_UnaryOpcodeExtension_X1(bundle); + + if (get_RRROpcodeExtension_X1(bundle) == + UNARY_RRR_0_OPCODE_X1) { + load_n_store = true; + find_regs(bundle, &rd, &ra, &rb, &clob1, + &clob2, &clob3, &alias); + + switch (opcode) { + case LD_UNARY_OPCODE_X1: + load_store_signed = false; + load_store_size = 8; + break; + case LD4S_UNARY_OPCODE_X1: + load_store_signed = true; + /* FALLTHROUGH */ + case LD4U_UNARY_OPCODE_X1: + load_store_size = 4; + break; + + case LD2S_UNARY_OPCODE_X1: + load_store_signed = true; + /* FALLTHROUGH */ + case LD2U_UNARY_OPCODE_X1: + load_store_size = 2; + break; + default: + unexpected = true; + } + } else { + load_n_store = false; + load_store_signed = false; + find_regs(bundle, 0, &ra, &rb, + &clob1, &clob2, &clob3, + &alias); + + opcode = get_RRROpcodeExtension_X1(bundle); + switch (opcode) { + case ST_RRR_0_OPCODE_X1: + load_store_size = 8; + break; + case ST4_RRR_0_OPCODE_X1: + load_store_size = 4; + break; + case ST2_RRR_0_OPCODE_X1: + load_store_size = 2; + break; + default: + unexpected = true; + } + } + } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) { + load_n_store = true; + opcode = get_Imm8OpcodeExtension_X1(bundle); + switch (opcode) { + case LD_ADD_IMM8_OPCODE_X1: + load_store_size = 8; + break; + + case LD4S_ADD_IMM8_OPCODE_X1: + load_store_signed = true; + /* FALLTHROUGH */ + case LD4U_ADD_IMM8_OPCODE_X1: + load_store_size = 4; + break; + + case LD2S_ADD_IMM8_OPCODE_X1: + load_store_signed = true; + /* FALLTHROUGH */ + case LD2U_ADD_IMM8_OPCODE_X1: + load_store_size = 2; + break; + + case ST_ADD_IMM8_OPCODE_X1: + load_n_store = false; + load_store_size = 8; + break; + case ST4_ADD_IMM8_OPCODE_X1: + load_n_store = false; + load_store_size = 4; + break; + case ST2_ADD_IMM8_OPCODE_X1: + load_n_store = false; + load_store_size = 2; + break; + default: + unexpected = true; + } + + if (!unexpected) { + x1_add = true; + if (load_n_store) + x1_add_imm8 = get_Imm8_X1(bundle); + else + x1_add_imm8 = get_Dest_Imm8_X1(bundle); + } + + find_regs(bundle, load_n_store ? (&rd) : NULL, + &ra, &rb, &clob1, &clob2, &clob3, &alias); + } else + unexpected = true; + } + + /* + * Some sanity check for register numbers extracted from fault bundle. + */ + if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true) + unexpected = true; + + /* Give warning if register ra has an aligned address. */ + if (!unexpected) + WARN_ON(!((load_store_size - 1) & (regs->regs[ra]))); + + + /* + * Fault came from kernel space, here we only need take care of + * unaligned "get_user/put_user" macros defined in "uaccess.h". + * Basically, we will handle bundle like this: + * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0} + * (Refer to file "arch/tile/include/asm/uaccess.h" for details). + * For either load or store, byte-wise operation is performed by calling + * get_user() or put_user(). If the macro returns non-zero value, + * set the value to rx, otherwise set zero to rx. Finally make pc point + * to next bundle and return. + */ + + if (EX1_PL(regs->ex1) != USER_PL) { + + unsigned long rx = 0; + unsigned long x = 0, ret = 0; + + if (y1_br || y1_lr || x1_add || + (load_store_signed != + (load_n_store && load_store_size == 4))) { + /* No branch, link, wrong sign-ext or load/store add. */ + unexpected = true; + } else if (!unexpected) { + if (bundle & TILEGX_BUNDLE_MODE_MASK) { + /* + * Fault bundle is Y mode. + * Check if the Y1 and Y0 is the form of + * { movei rx, 0; nop/fnop }, if yes, + * find the rx. + */ + + if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1) + && (get_SrcA_Y1(bundle) == TREG_ZERO) && + (get_Imm8_Y1(bundle) == 0) && + is_bundle_y0_nop(bundle)) { + rx = get_Dest_Y1(bundle); + } else if ((get_Opcode_Y0(bundle) == + ADDI_OPCODE_Y0) && + (get_SrcA_Y0(bundle) == TREG_ZERO) && + (get_Imm8_Y0(bundle) == 0) && + is_bundle_y1_nop(bundle)) { + rx = get_Dest_Y0(bundle); + } else { + unexpected = true; + } + } else { + /* + * Fault bundle is X mode. + * Check if the X0 is 'movei rx, 0', + * if yes, find the rx. + */ + + if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0) + && (get_Imm8OpcodeExtension_X0(bundle) == + ADDI_IMM8_OPCODE_X0) && + (get_SrcA_X0(bundle) == TREG_ZERO) && + (get_Imm8_X0(bundle) == 0)) { + rx = get_Dest_X0(bundle); + } else { + unexpected = true; + } + } + + /* rx should be less than 56. */ + if (!unexpected && (rx >= 56)) + unexpected = true; + } + + if (!search_exception_tables(regs->pc)) { + /* No fixup in the exception tables for the pc. */ + unexpected = true; + } + + if (unexpected) { + /* Unexpected unalign kernel fault. */ + struct task_struct *tsk = validate_current(); + + bust_spinlocks(1); + + show_regs(regs); + + if (unlikely(tsk->pid < 2)) { + panic("Kernel unalign fault running %s!", + tsk->pid ? "init" : "the idle task"); + } +#ifdef SUPPORT_DIE + die("Oops", regs); +#endif + bust_spinlocks(1); + + do_group_exit(SIGKILL); + + } else { + unsigned long i, b = 0; + unsigned char *ptr = + (unsigned char *)regs->regs[ra]; + if (load_n_store) { + /* handle get_user(x, ptr) */ + for (i = 0; i < load_store_size; i++) { + ret = get_user(b, ptr++); + if (!ret) { + /* Success! update x. */ +#ifdef __LITTLE_ENDIAN + x |= (b << (8 * i)); +#else + x <<= 8; + x |= b; +#endif /* __LITTLE_ENDIAN */ + } else { + x = 0; + break; + } + } + + /* Sign-extend 4-byte loads. */ + if (load_store_size == 4) + x = (long)(int)x; + + /* Set register rd. */ + regs->regs[rd] = x; + + /* Set register rx. */ + regs->regs[rx] = ret; + + /* Bump pc. */ + regs->pc += 8; + + } else { + /* Handle put_user(x, ptr) */ + x = regs->regs[rb]; +#ifdef __LITTLE_ENDIAN + b = x; +#else + /* + * Swap x in order to store x from low + * to high memory same as the + * little-endian case. + */ + switch (load_store_size) { + case 8: + b = swab64(x); + break; + case 4: + b = swab32(x); + break; + case 2: + b = swab16(x); + break; + } +#endif /* __LITTLE_ENDIAN */ + for (i = 0; i < load_store_size; i++) { + ret = put_user(b, ptr++); + if (ret) + break; + /* Success! shift 1 byte. */ + b >>= 8; + } + /* Set register rx. */ + regs->regs[rx] = ret; + + /* Bump pc. */ + regs->pc += 8; + } + } + + unaligned_fixup_count++; + + if (unaligned_printk) { + pr_info("%s/%d. Unalign fixup for kernel access " + "to userspace %lx.", + current->comm, current->pid, regs->regs[ra]); + } + + /* Done! Return to the exception handler. */ + return; + } + + if ((align_ctl == 0) || unexpected) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = (unsigned char __user *)0 + }; + if (unaligned_printk) + pr_info("Unalign bundle: unexp @%llx, %llx", + (unsigned long long)regs->pc, + (unsigned long long)bundle); + + if (ra < 56) { + unsigned long uaa = (unsigned long)regs->regs[ra]; + /* Set bus Address. */ + info.si_addr = (unsigned char __user *)uaa; + } + + unaligned_fixup_count++; + + trace_unhandled_signal("unaligned fixup trap", regs, + (unsigned long)info.si_addr, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return; + } + +#ifdef __LITTLE_ENDIAN +#define UA_FIXUP_ADDR_DELTA 1 +#define UA_FIXUP_BFEXT_START(_B_) 0 +#define UA_FIXUP_BFEXT_END(_B_) (8 * (_B_) - 1) +#else /* __BIG_ENDIAN */ +#define UA_FIXUP_ADDR_DELTA -1 +#define UA_FIXUP_BFEXT_START(_B_) (64 - 8 * (_B_)) +#define UA_FIXUP_BFEXT_END(_B_) 63 +#endif /* __LITTLE_ENDIAN */ + + + + if ((ra != rb) && (rd != TREG_SP) && !alias && + !y1_br && !y1_lr && !x1_add) { + /* + * Simple case: ra != rb and no register alias found, + * and no branch or link. This will be the majority. + * We can do a little better for simplae case than the + * generic scheme below. + */ + if (!load_n_store) { + /* + * Simple store: ra != rb, no need for scratch register. + * Just store and rotate to right bytewise. + */ +#ifdef __BIG_ENDIAN + frag.insn[n++] = + jit_x0_addi(ra, ra, load_store_size - 1) | + jit_x1_fnop(); +#endif /* __BIG_ENDIAN */ + for (k = 0; k < load_store_size; k++) { + /* Store a byte. */ + frag.insn[n++] = + jit_x0_rotli(rb, rb, 56) | + jit_x1_st1_add(ra, rb, + UA_FIXUP_ADDR_DELTA); + } +#ifdef __BIG_ENDIAN + frag.insn[n] = jit_x1_addi(ra, ra, 1); +#else + frag.insn[n] = jit_x1_addi(ra, ra, + -1 * load_store_size); +#endif /* __LITTLE_ENDIAN */ + + if (load_store_size == 8) { + frag.insn[n] |= jit_x0_fnop(); + } else if (load_store_size == 4) { + frag.insn[n] |= jit_x0_rotli(rb, rb, 32); + } else { /* = 2 */ + frag.insn[n] |= jit_x0_rotli(rb, rb, 16); + } + n++; + if (bundle_2_enable) + frag.insn[n++] = bundle_2; + frag.insn[n++] = jit_x0_fnop() | jit_x1_iret(); + } else { + if (rd == ra) { + /* Use two clobber registers: clob1/2. */ + frag.insn[n++] = + jit_x0_addi(TREG_SP, TREG_SP, -16) | + jit_x1_fnop(); + frag.insn[n++] = + jit_x0_addi(clob1, ra, 7) | + jit_x1_st_add(TREG_SP, clob1, -8); + frag.insn[n++] = + jit_x0_addi(clob2, ra, 0) | + jit_x1_st(TREG_SP, clob2); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ldna(rd, ra); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ldna(clob1, clob1); + /* + * Note: we must make sure that rd must not + * be sp. Recover clob1/2 from stack. + */ + frag.insn[n++] = + jit_x0_dblalign(rd, clob1, clob2) | + jit_x1_ld_add(clob2, TREG_SP, 8); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ld_add(clob1, TREG_SP, 16); + } else { + /* Use one clobber register: clob1 only. */ + frag.insn[n++] = + jit_x0_addi(TREG_SP, TREG_SP, -16) | + jit_x1_fnop(); + frag.insn[n++] = + jit_x0_addi(clob1, ra, 7) | + jit_x1_st(TREG_SP, clob1); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ldna(rd, ra); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ldna(clob1, clob1); + /* + * Note: we must make sure that rd must not + * be sp. Recover clob1 from stack. + */ + frag.insn[n++] = + jit_x0_dblalign(rd, clob1, ra) | + jit_x1_ld_add(clob1, TREG_SP, 16); + } + + if (bundle_2_enable) + frag.insn[n++] = bundle_2; + /* + * For non 8-byte load, extract corresponding bytes and + * signed extension. + */ + if (load_store_size == 4) { + if (load_store_signed) + frag.insn[n++] = + jit_x0_bfexts( + rd, rd, + UA_FIXUP_BFEXT_START(4), + UA_FIXUP_BFEXT_END(4)) | + jit_x1_fnop(); + else + frag.insn[n++] = + jit_x0_bfextu( + rd, rd, + UA_FIXUP_BFEXT_START(4), + UA_FIXUP_BFEXT_END(4)) | + jit_x1_fnop(); + } else if (load_store_size == 2) { + if (load_store_signed) + frag.insn[n++] = + jit_x0_bfexts( + rd, rd, + UA_FIXUP_BFEXT_START(2), + UA_FIXUP_BFEXT_END(2)) | + jit_x1_fnop(); + else + frag.insn[n++] = + jit_x0_bfextu( + rd, rd, + UA_FIXUP_BFEXT_START(2), + UA_FIXUP_BFEXT_END(2)) | + jit_x1_fnop(); + } + + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_iret(); + } + } else if (!load_n_store) { + + /* + * Generic memory store cases: use 3 clobber registers. + * + * Alloc space for saveing clob2,1,3 on user's stack. + * register clob3 points to where clob2 saved, followed by + * clob1 and 3 from high to low memory. + */ + frag.insn[n++] = + jit_x0_addi(TREG_SP, TREG_SP, -32) | + jit_x1_fnop(); + frag.insn[n++] = + jit_x0_addi(clob3, TREG_SP, 16) | + jit_x1_st_add(TREG_SP, clob3, 8); +#ifdef __LITTLE_ENDIAN + frag.insn[n++] = + jit_x0_addi(clob1, ra, 0) | + jit_x1_st_add(TREG_SP, clob1, 8); +#else + frag.insn[n++] = + jit_x0_addi(clob1, ra, load_store_size - 1) | + jit_x1_st_add(TREG_SP, clob1, 8); +#endif + if (load_store_size == 8) { + /* + * We save one byte a time, not for fast, but compact + * code. After each store, data source register shift + * right one byte. unchanged after 8 stores. + */ + frag.insn[n++] = + jit_x0_addi(clob2, TREG_ZERO, 7) | + jit_x1_st_add(TREG_SP, clob2, 16); + frag.insn[n++] = + jit_x0_rotli(rb, rb, 56) | + jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA); + frag.insn[n++] = + jit_x0_addi(clob2, clob2, -1) | + jit_x1_bnezt(clob2, -1); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_addi(clob2, y1_br_reg, 0); + } else if (load_store_size == 4) { + frag.insn[n++] = + jit_x0_addi(clob2, TREG_ZERO, 3) | + jit_x1_st_add(TREG_SP, clob2, 16); + frag.insn[n++] = + jit_x0_rotli(rb, rb, 56) | + jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA); + frag.insn[n++] = + jit_x0_addi(clob2, clob2, -1) | + jit_x1_bnezt(clob2, -1); + /* + * same as 8-byte case, but need shift another 4 + * byte to recover rb for 4-byte store. + */ + frag.insn[n++] = jit_x0_rotli(rb, rb, 32) | + jit_x1_addi(clob2, y1_br_reg, 0); + } else { /* =2 */ + frag.insn[n++] = + jit_x0_addi(clob2, rb, 0) | + jit_x1_st_add(TREG_SP, clob2, 16); + for (k = 0; k < 2; k++) { + frag.insn[n++] = + jit_x0_shrui(rb, rb, 8) | + jit_x1_st1_add(clob1, rb, + UA_FIXUP_ADDR_DELTA); + } + frag.insn[n++] = + jit_x0_addi(rb, clob2, 0) | + jit_x1_addi(clob2, y1_br_reg, 0); + } + + if (bundle_2_enable) + frag.insn[n++] = bundle_2; + + if (y1_lr) { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_mfspr(y1_lr_reg, + SPR_EX_CONTEXT_0_0); + } + if (y1_br) { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_mtspr(SPR_EX_CONTEXT_0_0, + clob2); + } + if (x1_add) { + frag.insn[n++] = + jit_x0_addi(ra, ra, x1_add_imm8) | + jit_x1_ld_add(clob2, clob3, -8); + } else { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ld_add(clob2, clob3, -8); + } + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ld_add(clob1, clob3, -8); + frag.insn[n++] = jit_x0_fnop() | jit_x1_ld(clob3, clob3); + frag.insn[n++] = jit_x0_fnop() | jit_x1_iret(); + + } else { + /* + * Generic memory load cases. + * + * Alloc space for saveing clob1,2,3 on user's stack. + * register clob3 points to where clob1 saved, followed + * by clob2 and 3 from high to low memory. + */ + + frag.insn[n++] = + jit_x0_addi(TREG_SP, TREG_SP, -32) | + jit_x1_fnop(); + frag.insn[n++] = + jit_x0_addi(clob3, TREG_SP, 16) | + jit_x1_st_add(TREG_SP, clob3, 8); + frag.insn[n++] = + jit_x0_addi(clob2, ra, 0) | + jit_x1_st_add(TREG_SP, clob2, 8); + + if (y1_br) { + frag.insn[n++] = + jit_x0_addi(clob1, y1_br_reg, 0) | + jit_x1_st_add(TREG_SP, clob1, 16); + } else { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_st_add(TREG_SP, clob1, 16); + } + + if (bundle_2_enable) + frag.insn[n++] = bundle_2; + + if (y1_lr) { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_mfspr(y1_lr_reg, + SPR_EX_CONTEXT_0_0); + } + + if (y1_br) { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_mtspr(SPR_EX_CONTEXT_0_0, + clob1); + } + + frag.insn[n++] = + jit_x0_addi(clob1, clob2, 7) | + jit_x1_ldna(rd, clob2); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ldna(clob1, clob1); + frag.insn[n++] = + jit_x0_dblalign(rd, clob1, clob2) | + jit_x1_ld_add(clob1, clob3, -8); + if (x1_add) { + frag.insn[n++] = + jit_x0_addi(ra, ra, x1_add_imm8) | + jit_x1_ld_add(clob2, clob3, -8); + } else { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ld_add(clob2, clob3, -8); + } + + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ld(clob3, clob3); + + if (load_store_size == 4) { + if (load_store_signed) + frag.insn[n++] = + jit_x0_bfexts( + rd, rd, + UA_FIXUP_BFEXT_START(4), + UA_FIXUP_BFEXT_END(4)) | + jit_x1_fnop(); + else + frag.insn[n++] = + jit_x0_bfextu( + rd, rd, + UA_FIXUP_BFEXT_START(4), + UA_FIXUP_BFEXT_END(4)) | + jit_x1_fnop(); + } else if (load_store_size == 2) { + if (load_store_signed) + frag.insn[n++] = + jit_x0_bfexts( + rd, rd, + UA_FIXUP_BFEXT_START(2), + UA_FIXUP_BFEXT_END(2)) | + jit_x1_fnop(); + else + frag.insn[n++] = + jit_x0_bfextu( + rd, rd, + UA_FIXUP_BFEXT_START(2), + UA_FIXUP_BFEXT_END(2)) | + jit_x1_fnop(); + } + + frag.insn[n++] = jit_x0_fnop() | jit_x1_iret(); + } + + /* Max JIT bundle count is 14. */ + WARN_ON(n > 14); + + if (!unexpected) { + int status = 0; + int idx = (regs->pc >> 3) & + ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1); + + frag.pc = regs->pc; + frag.bundle = bundle; + + if (unaligned_printk) { + pr_info("%s/%d, Unalign fixup: pc=%lx " + "bundle=%lx %d %d %d %d %d %d %d %d.", + current->comm, current->pid, + (unsigned long)frag.pc, + (unsigned long)frag.bundle, + (int)alias, (int)rd, (int)ra, + (int)rb, (int)bundle_2_enable, + (int)y1_lr, (int)y1_br, (int)x1_add); + + for (k = 0; k < n; k += 2) + pr_info("[%d] %016llx %016llx", k, + (unsigned long long)frag.insn[k], + (unsigned long long)frag.insn[k+1]); + } + + /* Swap bundle byte order for big endian sys. */ +#ifdef __BIG_ENDIAN + frag.bundle = GX_INSN_BSWAP(frag.bundle); + for (k = 0; k < n; k++) + frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]); +#endif /* __BIG_ENDIAN */ + + status = copy_to_user((void __user *)&jit_code_area[idx], + &frag, sizeof(frag)); + if (status) { + /* Fail to copy JIT into user land. send SIGSEGV. */ + siginfo_t info = { + .si_signo = SIGSEGV, + .si_code = SEGV_MAPERR, + .si_addr = (void __user *)&jit_code_area[idx] + }; + + pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx", + current->pid, current->comm, + (unsigned long long)&jit_code_area[idx]); + + trace_unhandled_signal("segfault in unalign fixup", + regs, + (unsigned long)info.si_addr, + SIGSEGV); + force_sig_info(info.si_signo, &info, current); + return; + } + + + /* Do a cheaper increment, not accurate. */ + unaligned_fixup_count++; + __flush_icache_range((unsigned long)&jit_code_area[idx], + (unsigned long)&jit_code_area[idx] + + sizeof(frag)); + + /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/ + __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8); + __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0)); + + /* Modify pc at the start of new JIT. */ + regs->pc = (unsigned long)&jit_code_area[idx].insn[0]; + /* Set ICS in SPR_EX_CONTEXT_K_1. */ + regs->ex1 = PL_ICS_EX1(USER_PL, 1); + } +} + + +/* + * C function to generate unalign data JIT. Called from unalign data + * interrupt handler. + * + * First check if unalign fix is disabled or exception did not not come from + * user space or sp register points to unalign address, if true, generate a + * SIGBUS. Then map a page into user space as JIT area if it is not mapped + * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return + * back to exception handler. + * + * The exception handler will "iret" to new generated JIT code after + * restoring caller saved registers. In theory, the JIT code will perform + * another "iret" to resume user's program. + */ + +void do_unaligned(struct pt_regs *regs, int vecnum) +{ + tilegx_bundle_bits __user *pc; + tilegx_bundle_bits bundle; + struct thread_info *info = current_thread_info(); + int align_ctl; + + /* Checks the per-process unaligned JIT flags */ + align_ctl = unaligned_fixup; + switch (task_thread_info(current)->align_ctl) { + case PR_UNALIGN_NOPRINT: + align_ctl = 1; + break; + case PR_UNALIGN_SIGBUS: + align_ctl = 0; + break; + } + + /* Enable iterrupt in order to access user land. */ + local_irq_enable(); + + /* + * The fault came from kernel space. Two choices: + * (a) unaligned_fixup < 1, we will first call get/put_user fixup + * to return -EFAULT. If no fixup, simply panic the kernel. + * (b) unaligned_fixup >=1, we will try to fix the unaligned access + * if it was triggered by get_user/put_user() macros. Panic the + * kernel if it is not fixable. + */ + + if (EX1_PL(regs->ex1) != USER_PL) { + + if (align_ctl < 1) { + unaligned_fixup_count++; + /* If exception came from kernel, try fix it up. */ + if (fixup_exception(regs)) { + if (unaligned_printk) + pr_info("Unalign fixup: %d %llx @%llx", + (int)unaligned_fixup, + (unsigned long long)regs->ex1, + (unsigned long long)regs->pc); + return; + } + /* Not fixable. Go panic. */ + panic("Unalign exception in Kernel. pc=%lx", + regs->pc); + return; + } else { + /* + * Try to fix the exception. If we can't, panic the + * kernel. + */ + bundle = GX_INSN_BSWAP( + *((tilegx_bundle_bits *)(regs->pc))); + jit_bundle_gen(regs, bundle, align_ctl); + return; + } + } + + /* + * Fault came from user with ICS or stack is not aligned. + * If so, we will trigger SIGBUS. + */ + if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = (unsigned char __user *)0 + }; + + if (unaligned_printk) + pr_info("Unalign fixup: %d %llx @%llx", + (int)unaligned_fixup, + (unsigned long long)regs->ex1, + (unsigned long long)regs->pc); + + unaligned_fixup_count++; + + trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return; + } + + + /* Read the bundle casued the exception! */ + pc = (tilegx_bundle_bits __user *)(regs->pc); + if (get_user(bundle, pc) != 0) { + /* Probably never be here since pc is valid user address.*/ + siginfo_t info = { + .si_signo = SIGSEGV, + .si_code = SEGV_MAPERR, + .si_addr = (void __user *)pc + }; + pr_err("Couldn't read instruction at %p trying to step\n", pc); + trace_unhandled_signal("segfault in unalign fixup", regs, + (unsigned long)info.si_addr, SIGSEGV); + force_sig_info(info.si_signo, &info, current); + return; + } + + if (!info->unalign_jit_base) { + void __user *user_page; + + /* + * Allocate a page in userland. + * For 64-bit processes we try to place the mapping far + * from anything else that might be going on (specifically + * 64 GB below the top of the user address space). If it + * happens not to be possible to put it there, it's OK; + * the kernel will choose another location and we'll + * remember it for later. + */ + if (is_compat_task()) + user_page = NULL; + else + user_page = (void __user *)(TASK_SIZE - (1UL << 36)) + + (current->pid << PAGE_SHIFT); + + user_page = (void __user *) vm_mmap(NULL, + (unsigned long)user_page, + PAGE_SIZE, + PROT_EXEC | PROT_READ | + PROT_WRITE, +#ifdef CONFIG_HOMECACHE + MAP_CACHE_HOME_TASK | +#endif + MAP_PRIVATE | + MAP_ANONYMOUS, + 0); + + if (IS_ERR((void __force *)user_page)) { + pr_err("Out of kernel pages trying do_mmap.\n"); + return; + } + + /* Save the address in the thread_info struct */ + info->unalign_jit_base = user_page; + if (unaligned_printk) + pr_info("Unalign bundle: %d:%d, allocate page @%llx", + raw_smp_processor_id(), current->pid, + (unsigned long long)user_page); + } + + /* Generate unalign JIT */ + jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl); +} + +#endif /* __tilegx__ */ -- cgit v1.1 From 3ef23111546df9e9dab2e2befb412a9563db0628 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 6 Aug 2013 16:10:23 -0400 Subject: tile: avoid recursive backtrace faults This change adds support for avoiding recursive backtracer crashes; we haven't seen this in practice other than when things are seriously corrupt, but it may help avoid losing the root cause of a crash. Also, don't abort kernel backtracers for invalid userspace PC's. If we do, we lose the ability to backtrace through a userspace call to a bad address above PAGE_OFFSET, even though that it can be perfectly reasonable to continue the backtrace in such a case. Signed-off-by: Chris Metcalf --- arch/tile/kernel/stack.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index af8dfc9..c972689 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -103,8 +103,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) if (kbt->verbose) pr_err(" <%s while in kernel mode>\n", fault); } else if (EX1_PL(p->ex1) == USER_PL && - p->pc < PAGE_OFFSET && - p->sp < PAGE_OFFSET) { + p->sp < PAGE_OFFSET && p->sp != 0) { if (kbt->verbose) pr_err(" <%s while in user mode>\n", fault); } else if (kbt->verbose) { @@ -352,6 +351,26 @@ static void describe_addr(struct KBacktraceIterator *kbt, } /* + * Avoid possible crash recursion during backtrace. If it happens, it + * makes it easy to lose the actual root cause of the failure, so we + * put a simple guard on all the backtrace loops. + */ +static bool start_backtrace(void) +{ + if (current->thread.in_backtrace) { + pr_err("Backtrace requested while in backtrace!\n"); + return false; + } + current->thread.in_backtrace = true; + return true; +} + +static void end_backtrace(void) +{ + current->thread.in_backtrace = false; +} + +/* * This method wraps the backtracer's more generic support. * It is only invoked from the architecture-specific code; show_stack() * and dump_stack() (in entry.S) are architecture-independent entry points. @@ -361,6 +380,8 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) int i; int have_mmap_sem = 0; + if (!start_backtrace()) + return; if (headers) { /* * Add a blank line since if we are called from panic(), @@ -402,6 +423,7 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) pr_err("Stack dump complete\n"); if (have_mmap_sem) up_read(&kbt->task->mm->mmap_sem); + end_backtrace(); } EXPORT_SYMBOL(tile_show_stack); @@ -463,6 +485,8 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace) int skip = trace->skip; int i = 0; + if (!start_backtrace()) + goto done; if (task == NULL || task == current) KBacktraceIterator_init_current(&kbt); else @@ -476,6 +500,8 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace) break; trace->entries[i++] = kbt.it.pc; } + end_backtrace(); +done: trace->nr_entries = i; } EXPORT_SYMBOL(save_stack_trace_tsk); -- cgit v1.1 From bc1a298f4e04833db4c430df59b90039f0170515 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 11:36:54 -0400 Subject: tile: support CONFIG_PREEMPT This change adds support for CONFIG_PREEMPT (full kernel preemption). In addition to the core support, this change includes a number of places where we fix up uses of smp_processor_id() and per-cpu variables. I also eliminate the PAGE_HOME_HERE and PAGE_HOME_UNKNOWN values for page homing, as it turns out they weren't being used. Signed-off-by: Chris Metcalf --- arch/tile/kernel/asm-offsets.c | 2 ++ arch/tile/kernel/hardwall.c | 18 +++++++++--------- arch/tile/kernel/intvec_32.S | 27 ++++++++++++++++++++------- arch/tile/kernel/intvec_64.S | 30 ++++++++++++++++++++++++------ arch/tile/kernel/irq.c | 1 + arch/tile/kernel/smp.c | 2 +- arch/tile/kernel/smpboot.c | 8 +++++--- arch/tile/kernel/stack.c | 4 ++-- arch/tile/kernel/sys.c | 4 +++- 9 files changed, 67 insertions(+), 29 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c index 8652b0b..97ea6ac 100644 --- a/arch/tile/kernel/asm-offsets.c +++ b/arch/tile/kernel/asm-offsets.c @@ -58,6 +58,8 @@ void foo(void) offsetof(struct thread_info, status)); DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, offsetof(struct thread_info, homecache_cpu)); + DEFINE(THREAD_INFO_PREEMPT_COUNT_OFFSET, + offsetof(struct thread_info, preempt_count)); DEFINE(THREAD_INFO_STEP_STATE_OFFSET, offsetof(struct thread_info, step_state)); #ifdef __tilegx__ diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 7db8893d..df27a1f 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -272,9 +272,9 @@ static void hardwall_setup_func(void *info) struct hardwall_info *r = info; struct hardwall_type *hwt = r->type; - int cpu = smp_processor_id(); - int x = cpu % smp_width; - int y = cpu / smp_width; + int cpu = smp_processor_id(); /* on_each_cpu disables preemption */ + int x = cpu_x(cpu); + int y = cpu_y(cpu); int bits = 0; if (x == r->ulhc_x) bits |= W_PROTECT; @@ -317,6 +317,7 @@ static void hardwall_protect_rectangle(struct hardwall_info *r) on_each_cpu_mask(&rect_cpus, hardwall_setup_func, r, 1); } +/* Entered from INT_xDN_FIREWALL interrupt vector with irqs disabled. */ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) { struct hardwall_info *rect; @@ -325,7 +326,6 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) struct siginfo info; int cpu = smp_processor_id(); int found_processes; - unsigned long flags; struct pt_regs *old_regs = set_irq_regs(regs); irq_enter(); @@ -346,7 +346,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) BUG_ON(hwt->disabled); /* This tile trapped a network access; find the rectangle. */ - spin_lock_irqsave(&hwt->lock, flags); + spin_lock(&hwt->lock); list_for_each_entry(rect, &hwt->list, list) { if (cpumask_test_cpu(cpu, &rect->cpumask)) break; @@ -401,7 +401,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) pr_notice("hardwall: no associated processes!\n"); done: - spin_unlock_irqrestore(&hwt->lock, flags); + spin_unlock(&hwt->lock); /* * We have to disable firewall interrupts now, or else when we @@ -661,7 +661,7 @@ static int hardwall_deactivate(struct hardwall_type *hwt, return -EINVAL; printk(KERN_DEBUG "Pid %d (%s) deactivated for %s hardwall: cpu %d\n", - task->pid, task->comm, hwt->name, smp_processor_id()); + task->pid, task->comm, hwt->name, raw_smp_processor_id()); return 0; } @@ -803,8 +803,8 @@ static void reset_xdn_network_state(struct hardwall_type *hwt) /* Reset UDN coordinates to their standard value */ { unsigned int cpu = smp_processor_id(); - unsigned int x = cpu % smp_width; - unsigned int y = cpu / smp_width; + unsigned int x = cpu_x(cpu); + unsigned int y = cpu_y(cpu); __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7)); } diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 3880613..1076765 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -28,10 +28,6 @@ #include #include -#ifdef CONFIG_PREEMPT -# error "No support for kernel preemption currently" -#endif - #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) @@ -812,17 +808,34 @@ STD_ENTRY(interrupt_return) } lw r29, r29 andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + bzt r29, .Lresume_userspace + +#ifdef CONFIG_PREEMPT + /* Returning to kernel space. Check if we need preemption. */ + GET_THREAD_INFO(r29) + addli r28, r29, THREAD_INFO_FLAGS_OFFSET { - bzt r29, .Lresume_userspace - PTREGS_PTR(r29, PTREGS_OFFSET_PC) + lw r28, r28 + addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET + } + { + andi r28, r28, _TIF_NEED_RESCHED + lw r29, r29 } + bzt r28, 1f + bnz r29, 1f + jal preempt_schedule_irq + FEEDBACK_REENTER(interrupt_return) +1: +#endif /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */ { - lw r28, r29 + PTREGS_PTR(r29, PTREGS_OFFSET_PC) moveli r27, lo16(_cpu_idle_nap) } { + lw r28, r29 auli r27, r27, ha16(_cpu_idle_nap) } { diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 884af9e..38a60f2 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -30,10 +30,6 @@ #include #include -#ifdef CONFIG_PREEMPT -# error "No support for kernel preemption currently" -#endif - #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) @@ -820,11 +816,33 @@ STD_ENTRY(interrupt_return) andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ { beqzt r29, .Lresume_userspace - PTREGS_PTR(r29, PTREGS_OFFSET_PC) + move r29, sp + } + +#ifdef CONFIG_PREEMPT + /* Returning to kernel space. Check if we need preemption. */ + EXTRACT_THREAD_INFO(r29) + addli r28, r29, THREAD_INFO_FLAGS_OFFSET + { + ld r28, r28 + addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET } + { + andi r28, r28, _TIF_NEED_RESCHED + ld4s r29, r29 + } + beqzt r28, 1f + bnez r29, 1f + jal preempt_schedule_irq + FEEDBACK_REENTER(interrupt_return) +1: +#endif /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */ - moveli r27, hw2_last(_cpu_idle_nap) + { + moveli r27, hw2_last(_cpu_idle_nap) + PTREGS_PTR(r29, PTREGS_OFFSET_PC) + } { ld r28, r29 shl16insli r27, r27, hw1(_cpu_idle_nap) diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 02e6280..c90de6c 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -74,6 +74,7 @@ static DEFINE_SPINLOCK(available_irqs_lock); /* * The interrupt handling path, implemented in terms of HV interrupt * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx. + * Entered with interrupts disabled. */ void tile_dev_intr(struct pt_regs *regs, int intnum) { diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index cbc73a8..6cc520d 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -100,8 +100,8 @@ static void smp_start_cpu_interrupt(void) /* Handler to stop the current cpu. */ static void smp_stop_cpu_interrupt(void) { - set_cpu_online(smp_processor_id(), 0); arch_local_irq_disable_all(); + set_cpu_online(smp_processor_id(), 0); for (;;) asm("nap; nop"); } diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 44bab29..dee7f13 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -142,13 +142,15 @@ static struct cpumask cpu_started __cpuinitdata; */ static void __cpuinit start_secondary(void) { - int cpuid = smp_processor_id(); + int cpuid; + + preempt_disable(); + + cpuid = smp_processor_id(); /* Set our thread pointer appropriately. */ set_my_cpu_offset(__per_cpu_offset[cpuid]); - preempt_disable(); - /* * In large machines even this will slow us down, since we * will be contending for for the printk spinlock. diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index c972689..176ffe4 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -194,7 +194,7 @@ static int KBacktraceIterator_next_item_inclusive( */ static void validate_stack(struct pt_regs *regs) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); unsigned long ksp0 = get_current_ksp0(); unsigned long ksp0_base = ksp0 - THREAD_SIZE; unsigned long sp = stack_pointer; @@ -392,7 +392,7 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) pr_err("Starting stack dump of tid %d, pid %d (%s)" " on cpu %d at cycle %lld\n", kbt->task->pid, kbt->task->tgid, kbt->task->comm, - smp_processor_id(), get_cycles()); + raw_smp_processor_id(), get_cycles()); } kbt->verbose = 1; i = 0; diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c index b881a7be..38debe7 100644 --- a/arch/tile/kernel/sys.c +++ b/arch/tile/kernel/sys.c @@ -38,8 +38,10 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len, unsigned long, flags) { + /* DCACHE is not particularly effective if not bound to one cpu. */ if (flags & DCACHE) - homecache_evict(cpumask_of(smp_processor_id())); + homecache_evict(cpumask_of(raw_smp_processor_id())); + if (flags & ICACHE) flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(current->mm), 0, 0, 0, NULL, NULL, 0); -- cgit v1.1 From ba02f0eb826da6dbdc5a2958ac52304ee441234f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 11:55:35 -0400 Subject: tile: improve big-endian support First, fix a bug in asm/unaligned.h; we need to just use the asm-generic unaligned.h so we properly choose endian-correct flavors. Second, keep the hv/hypervisor.h ABI fully "native" in the sense that we don't have __BIG_ENDIAN__ ifdefs there. Instead, we use macros in the head_NN.S assembly code to properly extract two 32-bit structure members from a 64-bit register holding the structure. Signed-off-by: Chris Metcalf --- arch/tile/kernel/head_64.S | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index e23e5f7..ed51320 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -25,6 +25,15 @@ #include #include +/* Extract two 32-bit bit values that were read into one register. */ +#ifdef __BIG_ENDIAN__ +#define GET_FIRST_INT(rd, rs) shrsi rd, rs, 32 +#define GET_SECOND_INT(rd, rs) addxi rd, rs, 0 +#else +#define GET_FIRST_INT(rd, rs) addxi rd, rs, 0 +#define GET_SECOND_INT(rd, rs) shrsi rd, rs, 32 +#endif + /* * This module contains the entry code for kernel images. It performs the * minimal setup needed to call the generic C routines. @@ -61,7 +70,7 @@ ENTRY(_start) * other CPUs should see a properly-constructed page table. */ { - v4int_l r2, zero, r0 /* ASID for hv_install_context */ + GET_FIRST_INT(r2, r0) /* ASID for hv_install_context */ moveli r4, hw1_last(swapper_pgprot - PAGE_OFFSET) } { @@ -131,19 +140,14 @@ ENTRY(_start) shl16insli r0, r0, hw0(MEM_SV_START) mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0 - /* - * Get our processor number and save it away in SAVE_K_0. - * Extract stuff from the topology structure: r4 = y, r6 = x, - * r5 = width. FIXME: consider whether we want to just make these - * 64-bit values (and if so fix smp_topology write below, too). - */ + /* Get our processor number and save it away in SAVE_K_0. */ jal hv_inquire_topology { - v4int_l r5, zero, r1 /* r5 = width */ - shrui r4, r0, 32 /* r4 = y */ + GET_FIRST_INT(r5, r1) /* r5 = width */ + GET_SECOND_INT(r4, r0) /* r4 = y */ } { - v4int_l r6, zero, r0 /* r6 = x */ + GET_FIRST_INT(r6, r0) /* r6 = x */ mul_lu_lu r4, r4, r5 } { -- cgit v1.1 From dadf78bf0359ee87a9d0b19ce42dbce7d0a72a61 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 12:03:53 -0400 Subject: tile: make register dumps more readable It's much easier to read register dumps if you read vertically rather than horizontally, since the register numbers line up and lead the eye down more than to the right. Signed-off-by: Chris Metcalf --- arch/tile/kernel/process.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 25678b8..663289b 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -591,21 +591,21 @@ void show_regs(struct pt_regs *regs) pr_err("\n"); show_regs_print_info(KERN_ERR); #ifdef __tilegx__ - for (i = 0; i < 51; i += 3) + for (i = 0; i < 17; i++) pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n", - i, regs->regs[i], i+1, regs->regs[i+1], - i+2, regs->regs[i+2]); - pr_err(" r51: "REGFMT" r52: "REGFMT" tp : "REGFMT"\n", - regs->regs[51], regs->regs[52], regs->tp); + i, regs->regs[i], i+18, regs->regs[i+18], + i+36, regs->regs[i+36]); + pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n", + regs->regs[17], regs->regs[35], regs->tp); pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr); #else - for (i = 0; i < 52; i += 4) + for (i = 0; i < 13; i++) pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT " r%-2d: "REGFMT" r%-2d: "REGFMT"\n", - i, regs->regs[i], i+1, regs->regs[i+1], - i+2, regs->regs[i+2], i+3, regs->regs[i+3]); - pr_err(" r52: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n", - regs->regs[52], regs->tp, regs->sp, regs->lr); + i, regs->regs[i], i+14, regs->regs[i+14], + i+27, regs->regs[i+27], i+40, regs->regs[i+40]); + pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n", + regs->regs[13], regs->tp, regs->sp, regs->lr); #endif pr_err(" pc : "REGFMT" ex1: %ld faultnum: %ld\n", regs->pc, regs->ex1, regs->faultnum); -- cgit v1.1 From 70d2b5958a04139fbffecf27791cf913dce8038e Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 12:11:56 -0400 Subject: tile: improve illegal translation interrupt handling First, don't re-enable interrupts blindly in the Linux trap handler. We already handle page faults this way; synchronous interrupts like ILL_TRANS will fire even when interrupts are disabled, and we don't want to re-enable interrupts in that case. For ILL_TRANS, we now pass the ILL_VA_PC reason into the trap handler so we can report it properly; this is the address that caused the illegal translation trap. We print the address as part of the pr_alert() message now if it's coming from the kernel. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_64.S | 2 +- arch/tile/kernel/traps.c | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 11 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 38a60f2..562886d 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -492,7 +492,7 @@ intvec_\vecname: mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */ .else .ifc \vecnum, INT_ILL_TRANS - mfspr r2, ILL_TRANS_REASON + mfspr r2, ILL_VA_PC .else .ifc \vecnum, INT_DOUBLE_FAULT mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */ diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 5b19a23..a1bbc5de 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -222,8 +222,9 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, unsigned long address = 0; bundle_bits instr; - /* Re-enable interrupts. */ - local_irq_enable(); + /* Re-enable interrupts, if they were previously enabled. */ + if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) + local_irq_enable(); /* * If it hits in kernel mode and we can't fix it up, just exit the @@ -231,7 +232,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, */ if (!user_mode(regs)) { const char *name; - if (fixup_exception(regs)) /* only UNALIGN_DATA in practice */ + char buf[100]; + if (fixup_exception(regs)) /* ILL_TRANS or UNALIGN_DATA */ return; if (fault_num >= 0 && fault_num < sizeof(int_name)/sizeof(int_name[0]) && @@ -239,10 +241,16 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, name = int_name[fault_num]; else name = "Unknown interrupt"; - pr_alert("Kernel took bad trap %d (%s) at PC %#lx\n", - fault_num, name, regs->pc); if (fault_num == INT_GPV) - pr_alert("GPV_REASON is %#lx\n", reason); + snprintf(buf, sizeof(buf), "; GPV_REASON %#lx", reason); +#ifdef __tilegx__ + else if (fault_num == INT_ILL_TRANS) + snprintf(buf, sizeof(buf), "; address %#lx", reason); +#endif + else + buf[0] = '\0'; + pr_alert("Kernel took bad trap %d (%s) at PC %#lx%s\n", + fault_num, name, regs->pc, buf); show_regs(regs); do_exit(SIGKILL); /* FIXME: implement i386 die() */ return; @@ -324,11 +332,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, fill_ra_stack(); signo = SIGSEGV; + address = reason; code = SEGV_MAPERR; - if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK) - address = regs->pc; - else - address = 0; /* FIXME: GX: single-step for address */ break; } #endif -- cgit v1.1 From 4a556f4f56da3110b27e265b79f0e7582115445c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 15:33:32 -0400 Subject: tile: implement gettimeofday() via vDSO This change creates the framework for vDSO calls, makes the existing rt_sigreturn() mechanism use it, and adds a fast gettimeofday(). Now that we need to expose the vDSO address to userspace, we add AT_SYSINFO_EHDR to the set of aux entries provided to userspace. (You can disable any extra vDSO support by booting with vdso=0, but the rt_sigreturn vDSO page will still be provided.) Note that glibc has supported the tile vDSO since release 2.17. Signed-off-by: Chris Metcalf --- arch/tile/kernel/Makefile | 4 +- arch/tile/kernel/compat_signal.c | 3 +- arch/tile/kernel/entry.S | 16 --- arch/tile/kernel/signal.c | 3 +- arch/tile/kernel/stack.c | 5 +- arch/tile/kernel/time.c | 37 +++++- arch/tile/kernel/vdso.c | 212 ++++++++++++++++++++++++++++++++++ arch/tile/kernel/vdso/Makefile | 118 +++++++++++++++++++ arch/tile/kernel/vdso/vdso.S | 28 +++++ arch/tile/kernel/vdso/vdso.lds.S | 87 ++++++++++++++ arch/tile/kernel/vdso/vdso32.S | 28 +++++ arch/tile/kernel/vdso/vgettimeofday.c | 107 +++++++++++++++++ arch/tile/kernel/vdso/vrt_sigreturn.S | 30 +++++ 13 files changed, 656 insertions(+), 22 deletions(-) create mode 100644 arch/tile/kernel/vdso.c create mode 100644 arch/tile/kernel/vdso/Makefile create mode 100644 arch/tile/kernel/vdso/vdso.S create mode 100644 arch/tile/kernel/vdso/vdso.lds.S create mode 100644 arch/tile/kernel/vdso/vdso32.S create mode 100644 arch/tile/kernel/vdso/vgettimeofday.c create mode 100644 arch/tile/kernel/vdso/vrt_sigreturn.S (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 6846c4e..5157d1c 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -6,7 +6,7 @@ extra-y := vmlinux.lds head_$(BITS).o obj-y := backtrace.o entry.o irq.o messaging.o \ pci-dma.o proc.o process.o ptrace.o reboot.o \ setup.o signal.o single_step.o stack.o sys.o \ - sysfs.o time.o traps.o unaligned.o \ + sysfs.o time.o traps.o unaligned.o vdso.o \ intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o obj-$(CONFIG_HARDWALL) += hardwall.o @@ -21,3 +21,5 @@ else obj-$(CONFIG_PCI) += pci.o endif obj-$(CONFIG_TILE_USB) += usb.o + +obj-y += vdso/ diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index d0a052e..85e00b2 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -32,6 +32,7 @@ #include #include #include +#include #include struct compat_ucontext { @@ -227,7 +228,7 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (err) goto give_sigsegv; - restorer = VDSO_BASE; + restorer = VDSO_SYM(&__vdso_rt_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = ptr_to_compat_reg(ka->sa.sa_restorer); diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index f116cb0..3d91759 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -27,22 +27,6 @@ STD_ENTRY(current_text_addr) { move r0, lr; jrp lr } STD_ENDPROC(current_text_addr) -/* - * We don't run this function directly, but instead copy it to a page - * we map into every user process. See vdso_setup(). - * - * Note that libc has a copy of this function that it uses to compare - * against the PC when a stack backtrace ends, so if this code is - * changed, the libc implementation(s) should also be updated. - */ - .pushsection .data -ENTRY(__rt_sigreturn) - moveli TREG_SYSCALL_NR_NAME,__NR_rt_sigreturn - swint1 - ENDPROC(__rt_sigreturn) - ENTRY(__rt_sigreturn_end) - .popsection - STD_ENTRY(dump_stack) { move r2, lr; lnk r1 } { move r4, r52; addli r1, r1, dump_stack - . } diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 9531845b..2d1dbf3 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #define DEBUG_SIG 0 @@ -190,7 +191,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (err) goto give_sigsegv; - restorer = VDSO_BASE; + restorer = VDSO_SYM(&__vdso_rt_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = (unsigned long) ka->sa.sa_restorer; diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 176ffe4..a9db923 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -119,7 +120,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) /* Is the pc pointing to a sigreturn trampoline? */ static int is_sigreturn(unsigned long pc) { - return (pc == VDSO_BASE); + return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn)); } /* Return a pt_regs pointer for a valid signal handler frame */ @@ -128,7 +129,7 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, { BacktraceIterator *b = &kbt->it; - if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET && + if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET && b->sp % sizeof(long) == 0) { int retval; pagefault_disable(); diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index 5ac397e..36dc1e1 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -23,8 +23,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -110,7 +112,6 @@ void __init time_init(void) setup_tile_timer(); } - /* * Define the tile timer clock event device. The timer is driven by * the TILE_TIMER_CONTROL register, which consists of a 31-bit down @@ -237,3 +238,37 @@ cycles_t ns2cycles(unsigned long nsecs) struct clock_event_device *dev = &__raw_get_cpu_var(tile_timer); return ((u64)nsecs * dev->mult) >> dev->shift; } + +void update_vsyscall_tz(void) +{ + /* Userspace gettimeofday will spin while this value is odd. */ + ++vdso_data->tz_update_count; + smp_wmb(); + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + smp_wmb(); + ++vdso_data->tz_update_count; +} + +void update_vsyscall(struct timekeeper *tk) +{ + struct timespec wall_time = tk_xtime(tk); + struct timespec *wtm = &tk->wall_to_monotonic; + struct clocksource *clock = tk->clock; + + if (clock != &cycle_counter_cs) + return; + + /* Userspace gettimeofday will spin while this value is odd. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->xtime_tod_stamp = clock->cycle_last; + vdso_data->xtime_clock_sec = wall_time.tv_sec; + vdso_data->xtime_clock_nsec = wall_time.tv_nsec; + vdso_data->wtom_clock_sec = wtm->tv_sec; + vdso_data->wtom_clock_nsec = wtm->tv_nsec; + vdso_data->mult = clock->mult; + vdso_data->shift = clock->shift; + smp_wmb(); + ++vdso_data->tb_update_count; +} diff --git a/arch/tile/kernel/vdso.c b/arch/tile/kernel/vdso.c new file mode 100644 index 0000000..1533af2 --- /dev/null +++ b/arch/tile/kernel/vdso.c @@ -0,0 +1,212 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +/* The alignment of the vDSO. */ +#define VDSO_ALIGNMENT PAGE_SIZE + + +static unsigned int vdso_pages; +static struct page **vdso_pagelist; + +#ifdef CONFIG_COMPAT +static unsigned int vdso32_pages; +static struct page **vdso32_pagelist; +#endif +static int vdso_ready; + +/* + * The vdso data page. + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; + +struct vdso_data *vdso_data = &vdso_data_store.data; + +static unsigned int __read_mostly vdso_enabled = 1; + +static struct page **vdso_setup(void *vdso_kbase, unsigned int pages) +{ + int i; + struct page **pagelist; + + pagelist = kzalloc(sizeof(struct page *) * (pages + 1), GFP_KERNEL); + BUG_ON(pagelist == NULL); + for (i = 0; i < pages - 1; i++) { + struct page *pg = virt_to_page(vdso_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + pagelist[i] = pg; + } + pagelist[pages - 1] = virt_to_page(vdso_data); + pagelist[pages] = NULL; + + return pagelist; +} + +static int __init vdso_init(void) +{ + int data_pages = sizeof(vdso_data_store) >> PAGE_SHIFT; + + /* + * We can disable vDSO support generally, but we need to retain + * one page to support the two-bundle (16-byte) rt_sigreturn path. + */ + if (!vdso_enabled) { + size_t offset = (unsigned long)&__vdso_rt_sigreturn; + static struct page *sigret_page; + sigret_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + BUG_ON(sigret_page == NULL); + vdso_pagelist = &sigret_page; + vdso_pages = 1; + BUG_ON(offset >= PAGE_SIZE); + memcpy(page_address(sigret_page) + offset, + vdso_start + offset, 16); +#ifdef CONFIG_COMPAT + vdso32_pages = vdso_pages; + vdso32_pagelist = vdso_pagelist; +#endif + vdso_ready = 1; + return 0; + } + + vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; + vdso_pages += data_pages; + vdso_pagelist = vdso_setup(vdso_start, vdso_pages); + +#ifdef CONFIG_COMPAT + vdso32_pages = (vdso32_end - vdso32_start) >> PAGE_SHIFT; + vdso32_pages += data_pages; + vdso32_pagelist = vdso_setup(vdso32_start, vdso32_pages); +#endif + + smp_wmb(); + vdso_ready = 1; + + return 0; +} +arch_initcall(vdso_init); + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == VDSO_BASE) + return "[vdso]"; +#ifndef __tilegx__ + if (vma->vm_start == MEM_USER_INTRPT) + return "[intrpt]"; +#endif + return NULL; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} + +int in_gate_area(struct mm_struct *mm, unsigned long address) +{ + return 0; +} + +int in_gate_area_no_mm(unsigned long address) +{ + return 0; +} + +int setup_vdso_pages(void) +{ + struct page **pagelist; + unsigned long pages; + struct mm_struct *mm = current->mm; + unsigned long vdso_base = 0; + int retval = 0; + + if (!vdso_ready) + return 0; + + mm->context.vdso_base = 0; + + pagelist = vdso_pagelist; + pages = vdso_pages; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + pagelist = vdso32_pagelist; + pages = vdso32_pages; + } +#endif + + /* + * vDSO has a problem and was disabled, just don't "enable" it for the + * process. + */ + if (pages == 0) + return 0; + + vdso_base = get_unmapped_area(NULL, vdso_base, + (pages << PAGE_SHIFT) + + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), + 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + retval = vdso_base; + return retval; + } + + /* Add required alignment. */ + vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); + + /* + * Put vDSO base into mm struct. We need to do this before calling + * install_special_mapping or the perf counter mmap tracking code + * will fail to recognise it as a vDSO (since arch_vma_name fails). + */ + mm->context.vdso_base = vdso_base; + + /* + * our vma flags don't have VM_WRITE so by default, the process isn't + * allowed to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW on + * those pages but it's then your responsibility to never do that on + * the "data" page of the vDSO or you'll stop getting kernel updates + * and your nice userland gettimeofday will be totally dead. + * It's fine to use that for setting breakpoints in the vDSO code + * pages though + */ + retval = install_special_mapping(mm, vdso_base, + pages << PAGE_SHIFT, + VM_READ|VM_EXEC | + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + pagelist); + if (retval) + mm->context.vdso_base = 0; + + return retval; +} + +static __init int vdso_func(char *s) +{ + return kstrtouint(s, 0, &vdso_enabled); +} +__setup("vdso=", vdso_func); diff --git a/arch/tile/kernel/vdso/Makefile b/arch/tile/kernel/vdso/Makefile new file mode 100644 index 0000000..e2b7a2f --- /dev/null +++ b/arch/tile/kernel/vdso/Makefile @@ -0,0 +1,118 @@ +# Symbols present in the vdso +vdso-syms = rt_sigreturn gettimeofday + +# Files to link into the vdso +obj-vdso = $(patsubst %, v%.o, $(vdso-syms)) + +# Build rules +targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) + +# vdso32 is only for tilegx -m32 compat task. +VDSO32-$(CONFIG_COMPAT) := y + +obj-y += vdso.o +obj-$(VDSO32-y) += vdso32.o +extra-y += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# vDSO code runs in userspace and -pg doesn't help with profiling anyway. +CFLAGS_REMOVE_vdso.o = -pg +CFLAGS_REMOVE_vdso32.o = -pg +CFLAGS_REMOVE_vrt_sigreturn.o = -pg +CFLAGS_REMOVE_vrt_sigreturn32.o = -pg +CFLAGS_REMOVE_vgettimeofday.o = -pg +CFLAGS_REMOVE_vgettimeofday32.o = -pg + +ifdef CONFIG_FEEDBACK_COLLECT +# vDSO code runs in userspace, not collecting feedback data. +CFLAGS_REMOVE_vdso.o = -ffeedback-generate +CFLAGS_REMOVE_vdso32.o = -ffeedback-generate +CFLAGS_REMOVE_vrt_sigreturn.o = -ffeedback-generate +CFLAGS_REMOVE_vrt_sigreturn32.o = -ffeedback-generate +CFLAGS_REMOVE_vgettimeofday.o = -ffeedback-generate +CFLAGS_REMOVE_vgettimeofday32.o = -ffeedback-generate +endif + +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + +# Force dependency +$(obj)/vdso.o: $(obj)/vdso.so + +# link rule for the .so file, .lds has to be first +SYSCFLAGS_vdso.so.dbg = $(c_flags) +$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) + $(call if_changed,vdsold) + + +# We also create a special relocatable object that should mirror the symbol +# table and layout of the linked DSO. With ld -R we can then refer to +# these symbols in the kernel code rather than hand-coded addresses. +extra-y += vdso-syms.o +$(obj)/built-in.o: $(obj)/vdso-syms.o +$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o + +SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +SYSCFLAGS_vdso_syms.o = -r +$(obj)/vdso-syms.o: $(src)/vdso.lds $(obj)/vrt_sigreturn.o FORCE + $(call if_changed,vdsold) + + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# actual build commands +# The DSO images are built using a special linker script +# Add -lgcc so tilepro gets static muldi3 and lshrdi3 definitions. +# Make sure only to export the intended __vdso_xxx symbol offsets. +quiet_cmd_vdsold = VDSOLD $@ + cmd_vdsold = $(CC) $(KCFLAGS) -nostdlib $(SYSCFLAGS_$(@F)) \ + -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp -lgcc && \ + $(CROSS_COMPILE)objcopy \ + $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso32.so: $(obj)/vdso32.so.dbg + $(call cmd,vdso_install) + +vdso_install: vdso.so +vdso32_install: vdso32.so + + +KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_32 += -m32 -s +KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_32 += -m32 -fPIC -shared + +obj-vdso32 = $(patsubst %, v%32.o, $(vdso-syms)) +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +targets += $(obj-vdso32) vdso32.so vdso32.so.dbg + +$(obj-vdso32:%=%): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) +$(obj-vdso32:%=%): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) + +$(obj)/vgettimeofday32.o: $(obj)/vgettimeofday.c + $(call if_changed,cc_o_c) + +$(obj)/vrt_sigreturn32.o: $(obj)/vrt_sigreturn.S + $(call if_changed,as_o_S) + +# Force dependency +$(obj)/vdso32.o: $(obj)/vdso32.so + +SYSCFLAGS_vdso32.so.dbg = -m32 -shared -s -Wl,-soname=linux-vdso32.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +$(obj)/vdso32.so.dbg: $(src)/vdso.lds $(obj-vdso32) + $(call if_changed,vdsold) diff --git a/arch/tile/kernel/vdso/vdso.S b/arch/tile/kernel/vdso/vdso.S new file mode 100644 index 0000000..3467adb --- /dev/null +++ b/arch/tile/kernel/vdso/vdso.S @@ -0,0 +1,28 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include + + __PAGE_ALIGNED_DATA + + .global vdso_start, vdso_end + .align PAGE_SIZE +vdso_start: + .incbin "arch/tile/kernel/vdso/vdso.so" + .align PAGE_SIZE +vdso_end: + + .previous diff --git a/arch/tile/kernel/vdso/vdso.lds.S b/arch/tile/kernel/vdso/vdso.lds.S new file mode 100644 index 0000000..041cd6c --- /dev/null +++ b/arch/tile/kernel/vdso/vdso.lds.S @@ -0,0 +1,87 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#define VDSO_VERSION_STRING LINUX_2.6 + + +OUTPUT_ARCH(tile) + +/* The ELF entry point can be used to set the AT_SYSINFO value. */ +ENTRY(__vdso_rt_sigreturn); + + +SECTIONS +{ + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + + /* + * This linker script is used both with -r and with -shared. + * For the layouts to match, we need to skip more than enough + * space for the dynamic symbol table et al. If this amount + * is insufficient, ld -shared will barf. Just increase it here. + */ + . = 0x1000; + .text : { *(.text .text.*) } :text + + .data : { + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + } +} + + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + __vdso_rt_sigreturn; + __vdso_gettimeofday; + gettimeofday; + local:*; + }; +} diff --git a/arch/tile/kernel/vdso/vdso32.S b/arch/tile/kernel/vdso/vdso32.S new file mode 100644 index 0000000..1d1ac32 --- /dev/null +++ b/arch/tile/kernel/vdso/vdso32.S @@ -0,0 +1,28 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include + + __PAGE_ALIGNED_DATA + + .global vdso32_start, vdso32_end + .align PAGE_SIZE +vdso32_start: + .incbin "arch/tile/kernel/vdso/vdso32.so" + .align PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c new file mode 100644 index 0000000..51ec8e4 --- /dev/null +++ b/arch/tile/kernel/vdso/vgettimeofday.c @@ -0,0 +1,107 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#define VDSO_BUILD /* avoid some shift warnings for -m32 in */ +#include +#include +#include + +#if CHIP_HAS_SPLIT_CYCLE() +static inline cycles_t get_cycles_inline(void) +{ + unsigned int high = __insn_mfspr(SPR_CYCLE_HIGH); + unsigned int low = __insn_mfspr(SPR_CYCLE_LOW); + unsigned int high2 = __insn_mfspr(SPR_CYCLE_HIGH); + + while (unlikely(high != high2)) { + low = __insn_mfspr(SPR_CYCLE_LOW); + high = high2; + high2 = __insn_mfspr(SPR_CYCLE_HIGH); + } + + return (((cycles_t)high) << 32) | low; +} +#define get_cycles get_cycles_inline +#endif + +/* + * Find out the vDSO data page address in the process address space. + */ +inline unsigned long get_datapage(void) +{ + unsigned long ret; + + /* vdso data page located in the 2nd vDSO page. */ + asm volatile ("lnk %0" : "=r"(ret)); + ret &= ~(PAGE_SIZE - 1); + ret += PAGE_SIZE; + + return ret; +} + +int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + cycles_t cycles; + unsigned long count, sec, ns; + volatile struct vdso_data *vdso_data; + + vdso_data = (struct vdso_data *)get_datapage(); + /* The use of the timezone is obsolete, normally tz is NULL. */ + if (unlikely(tz != NULL)) { + while (1) { + /* Spin until the update finish. */ + count = vdso_data->tz_update_count; + if (count & 1) + continue; + + tz->tz_minuteswest = vdso_data->tz_minuteswest; + tz->tz_dsttime = vdso_data->tz_dsttime; + + /* Check whether updated, read again if so. */ + if (count == vdso_data->tz_update_count) + break; + } + } + + if (unlikely(tv == NULL)) + return 0; + + while (1) { + /* Spin until the update finish. */ + count = vdso_data->tb_update_count; + if (count & 1) + continue; + + cycles = (get_cycles() - vdso_data->xtime_tod_stamp); + ns = (cycles * vdso_data->mult) >> vdso_data->shift; + sec = vdso_data->xtime_clock_sec; + ns += vdso_data->xtime_clock_nsec; + if (ns >= NSEC_PER_SEC) { + ns -= NSEC_PER_SEC; + sec += 1; + } + + /* Check whether updated, read again if so. */ + if (count == vdso_data->tb_update_count) + break; + } + + tv->tv_sec = sec; + tv->tv_usec = ns / 1000; + + return 0; +} + +int gettimeofday(struct timeval *tv, struct timezone *tz) + __attribute__((weak, alias("__vdso_gettimeofday"))); diff --git a/arch/tile/kernel/vdso/vrt_sigreturn.S b/arch/tile/kernel/vdso/vrt_sigreturn.S new file mode 100644 index 0000000..6326caf --- /dev/null +++ b/arch/tile/kernel/vdso/vrt_sigreturn.S @@ -0,0 +1,30 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include + +/* + * Note that libc has a copy of this function that it uses to compare + * against the PC when a stack backtrace ends, so if this code is + * changed, the libc implementation(s) should also be updated. + */ +ENTRY(__vdso_rt_sigreturn) + moveli TREG_SYSCALL_NR_NAME, __NR_rt_sigreturn + swint1 + /* We don't use ENDPROC to avoid tagging this symbol as FUNC, + * which confuses the perf tool. + */ + END(__vdso_rt_sigreturn) -- cgit v1.1 From 9ae09838470a68edf0245cd60c623df2d5993a8f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 7 Aug 2013 16:03:08 -0400 Subject: tile: provide traceability for hypervisor calls This change adds infrastructure (CONFIG_TILE_HVGLUE_TRACE) that provides C code wrappers for the calls the kernel makes to the Tilera hypervisor. This allows standard kernel infrastructure like FTRACE to be able to instrument hypervisor calls. To allow direct calls to the true API, we export their names with a leading underscore as well. This is important for the few contexts where we need to make hypervisor calls without touching the stack. As part of this change, we also switch from creating the symbols with linker magic to creating them with assembler magic. This lets us provide a symbol type and generally make them appear more as symbols and less as just random values in the Elf namespace. Signed-off-by: Chris Metcalf --- arch/tile/kernel/Makefile | 3 +- arch/tile/kernel/head_32.S | 8 +- arch/tile/kernel/head_64.S | 8 +- arch/tile/kernel/hvglue.S | 74 +++++++++++ arch/tile/kernel/hvglue.lds | 60 --------- arch/tile/kernel/hvglue_trace.c | 266 ++++++++++++++++++++++++++++++++++++++++ arch/tile/kernel/intvec_32.S | 2 +- arch/tile/kernel/intvec_64.S | 2 +- arch/tile/kernel/vmlinux.lds.S | 5 +- 9 files changed, 356 insertions(+), 72 deletions(-) create mode 100644 arch/tile/kernel/hvglue.S delete mode 100644 arch/tile/kernel/hvglue.lds create mode 100644 arch/tile/kernel/hvglue_trace.c (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 5157d1c..c4a957a 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -3,7 +3,7 @@ # extra-y := vmlinux.lds head_$(BITS).o -obj-y := backtrace.o entry.o irq.o messaging.o \ +obj-y := backtrace.o entry.o hvglue.o irq.o messaging.o \ pci-dma.o proc.o process.o ptrace.o reboot.o \ setup.o signal.o single_step.o stack.o sys.o \ sysfs.o time.o traps.o unaligned.o vdso.o \ @@ -21,5 +21,6 @@ else obj-$(CONFIG_PCI) += pci.o endif obj-$(CONFIG_TILE_USB) += usb.o +obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o obj-y += vdso/ diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index 80cd407..d1527fc 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -39,12 +39,12 @@ ENTRY(_start) } { moveli r0, _HV_VERSION_OLD_HV_INIT - jal hv_init + jal _hv_init } /* Get a reasonable default ASID in r0 */ { move r0, zero - jal hv_inquire_asid + jal _hv_inquire_asid } /* Install the default page table */ { @@ -73,12 +73,12 @@ ENTRY(_start) } { auli lr, lr, ha16(1f) - j hv_install_context + j _hv_install_context } 1: /* Get our processor number and save it away in SAVE_K_0. */ - jal hv_inquire_topology + jal _hv_inquire_topology mulll_uu r4, r1, r2 /* r1 == y, r2 == width */ add r4, r4, r0 /* r0 == x, so r4 == cpu == y*width + x */ diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index ed51320..969e4f8 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -55,11 +55,11 @@ ENTRY(_start) movei r2, TILE_CHIP_REV movei r3, KERNEL_PL } - jal hv_init + jal _hv_init /* Get a reasonable default ASID in r0 */ { move r0, zero - jal hv_inquire_asid + jal _hv_inquire_asid } /* @@ -130,7 +130,7 @@ ENTRY(_start) } { moveli r3, CTX_PAGE_FLAG - j hv_install_context + j _hv_install_context } 1: @@ -141,7 +141,7 @@ ENTRY(_start) mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0 /* Get our processor number and save it away in SAVE_K_0. */ - jal hv_inquire_topology + jal _hv_inquire_topology { GET_FIRST_INT(r5, r1) /* r5 = width */ GET_SECOND_INT(r4, r0) /* r4 = y */ diff --git a/arch/tile/kernel/hvglue.S b/arch/tile/kernel/hvglue.S new file mode 100644 index 0000000..2ab4566 --- /dev/null +++ b/arch/tile/kernel/hvglue.S @@ -0,0 +1,74 @@ +/* Hypervisor call vector addresses; see */ +.macro gensym sym, val, size +.org \val +.global _\sym +.type _\sym,function +_\sym: +.size _\sym,\size +#ifndef CONFIG_TILE_HVGLUE_TRACE +.globl \sym +.set \sym,_\sym +#endif +.endm + +.section .hvglue,"x",@nobits +.align 8 +gensym hv_init, 0x20, 32 +gensym hv_install_context, 0x40, 32 +gensym hv_sysconf, 0x60, 32 +gensym hv_get_rtc, 0x80, 32 +gensym hv_set_rtc, 0xa0, 32 +gensym hv_flush_asid, 0xc0, 32 +gensym hv_flush_page, 0xe0, 32 +gensym hv_flush_pages, 0x100, 32 +gensym hv_restart, 0x120, 32 +gensym hv_halt, 0x140, 32 +gensym hv_power_off, 0x160, 32 +gensym hv_inquire_physical, 0x180, 32 +gensym hv_inquire_memory_controller, 0x1a0, 32 +gensym hv_inquire_virtual, 0x1c0, 32 +gensym hv_inquire_asid, 0x1e0, 32 +gensym hv_nanosleep, 0x200, 32 +gensym hv_console_read_if_ready, 0x220, 32 +gensym hv_console_write, 0x240, 32 +gensym hv_downcall_dispatch, 0x260, 32 +gensym hv_inquire_topology, 0x280, 32 +gensym hv_fs_findfile, 0x2a0, 32 +gensym hv_fs_fstat, 0x2c0, 32 +gensym hv_fs_pread, 0x2e0, 32 +gensym hv_physaddr_read64, 0x300, 32 +gensym hv_physaddr_write64, 0x320, 32 +gensym hv_get_command_line, 0x340, 32 +gensym hv_set_caching, 0x360, 32 +gensym hv_bzero_page, 0x380, 32 +gensym hv_register_message_state, 0x3a0, 32 +gensym hv_send_message, 0x3c0, 32 +gensym hv_receive_message, 0x3e0, 32 +gensym hv_inquire_context, 0x400, 32 +gensym hv_start_all_tiles, 0x420, 32 +gensym hv_dev_open, 0x440, 32 +gensym hv_dev_close, 0x460, 32 +gensym hv_dev_pread, 0x480, 32 +gensym hv_dev_pwrite, 0x4a0, 32 +gensym hv_dev_poll, 0x4c0, 32 +gensym hv_dev_poll_cancel, 0x4e0, 32 +gensym hv_dev_preada, 0x500, 32 +gensym hv_dev_pwritea, 0x520, 32 +gensym hv_flush_remote, 0x540, 32 +gensym hv_console_putc, 0x560, 32 +gensym hv_inquire_tiles, 0x580, 32 +gensym hv_confstr, 0x5a0, 32 +gensym hv_reexec, 0x5c0, 32 +gensym hv_set_command_line, 0x5e0, 32 +gensym hv_clear_intr, 0x600, 32 +gensym hv_enable_intr, 0x620, 32 +gensym hv_disable_intr, 0x640, 32 +gensym hv_raise_intr, 0x660, 32 +gensym hv_trigger_ipi, 0x680, 32 +gensym hv_store_mapping, 0x6a0, 32 +gensym hv_inquire_realpa, 0x6c0, 32 +gensym hv_flush_all, 0x6e0, 32 +gensym hv_get_ipi_pte, 0x700, 32 +gensym hv_set_pte_super_shift, 0x720, 32 +gensym hv_console_set_ipi, 0x7e0, 32 +gensym hv_glue_internals, 0x800, 30720 diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds deleted file mode 100644 index ef52290..0000000 --- a/arch/tile/kernel/hvglue.lds +++ /dev/null @@ -1,60 +0,0 @@ -/* Hypervisor call vector addresses; see */ -hv_init = TEXT_OFFSET + 0x10020; -hv_install_context = TEXT_OFFSET + 0x10040; -hv_sysconf = TEXT_OFFSET + 0x10060; -hv_get_rtc = TEXT_OFFSET + 0x10080; -hv_set_rtc = TEXT_OFFSET + 0x100a0; -hv_flush_asid = TEXT_OFFSET + 0x100c0; -hv_flush_page = TEXT_OFFSET + 0x100e0; -hv_flush_pages = TEXT_OFFSET + 0x10100; -hv_restart = TEXT_OFFSET + 0x10120; -hv_halt = TEXT_OFFSET + 0x10140; -hv_power_off = TEXT_OFFSET + 0x10160; -hv_inquire_physical = TEXT_OFFSET + 0x10180; -hv_inquire_memory_controller = TEXT_OFFSET + 0x101a0; -hv_inquire_virtual = TEXT_OFFSET + 0x101c0; -hv_inquire_asid = TEXT_OFFSET + 0x101e0; -hv_nanosleep = TEXT_OFFSET + 0x10200; -hv_console_read_if_ready = TEXT_OFFSET + 0x10220; -hv_console_write = TEXT_OFFSET + 0x10240; -hv_downcall_dispatch = TEXT_OFFSET + 0x10260; -hv_inquire_topology = TEXT_OFFSET + 0x10280; -hv_fs_findfile = TEXT_OFFSET + 0x102a0; -hv_fs_fstat = TEXT_OFFSET + 0x102c0; -hv_fs_pread = TEXT_OFFSET + 0x102e0; -hv_physaddr_read64 = TEXT_OFFSET + 0x10300; -hv_physaddr_write64 = TEXT_OFFSET + 0x10320; -hv_get_command_line = TEXT_OFFSET + 0x10340; -hv_set_caching = TEXT_OFFSET + 0x10360; -hv_bzero_page = TEXT_OFFSET + 0x10380; -hv_register_message_state = TEXT_OFFSET + 0x103a0; -hv_send_message = TEXT_OFFSET + 0x103c0; -hv_receive_message = TEXT_OFFSET + 0x103e0; -hv_inquire_context = TEXT_OFFSET + 0x10400; -hv_start_all_tiles = TEXT_OFFSET + 0x10420; -hv_dev_open = TEXT_OFFSET + 0x10440; -hv_dev_close = TEXT_OFFSET + 0x10460; -hv_dev_pread = TEXT_OFFSET + 0x10480; -hv_dev_pwrite = TEXT_OFFSET + 0x104a0; -hv_dev_poll = TEXT_OFFSET + 0x104c0; -hv_dev_poll_cancel = TEXT_OFFSET + 0x104e0; -hv_dev_preada = TEXT_OFFSET + 0x10500; -hv_dev_pwritea = TEXT_OFFSET + 0x10520; -hv_flush_remote = TEXT_OFFSET + 0x10540; -hv_console_putc = TEXT_OFFSET + 0x10560; -hv_inquire_tiles = TEXT_OFFSET + 0x10580; -hv_confstr = TEXT_OFFSET + 0x105a0; -hv_reexec = TEXT_OFFSET + 0x105c0; -hv_set_command_line = TEXT_OFFSET + 0x105e0; -hv_clear_intr = TEXT_OFFSET + 0x10600; -hv_enable_intr = TEXT_OFFSET + 0x10620; -hv_disable_intr = TEXT_OFFSET + 0x10640; -hv_raise_intr = TEXT_OFFSET + 0x10660; -hv_trigger_ipi = TEXT_OFFSET + 0x10680; -hv_store_mapping = TEXT_OFFSET + 0x106a0; -hv_inquire_realpa = TEXT_OFFSET + 0x106c0; -hv_flush_all = TEXT_OFFSET + 0x106e0; -hv_get_ipi_pte = TEXT_OFFSET + 0x10700; -hv_set_pte_super_shift = TEXT_OFFSET + 0x10720; -hv_console_set_ipi = TEXT_OFFSET + 0x107e0; -hv_glue_internals = TEXT_OFFSET + 0x10800; diff --git a/arch/tile/kernel/hvglue_trace.c b/arch/tile/kernel/hvglue_trace.c new file mode 100644 index 0000000..85c74ad --- /dev/null +++ b/arch/tile/kernel/hvglue_trace.c @@ -0,0 +1,266 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * Pull in the hypervisor header so we declare all the ABI functions + * with the underscore versions, then undef the names so that we can + * provide our own wrapper versions. + */ +#define hv_init _hv_init +#define hv_install_context _hv_install_context +#define hv_sysconf _hv_sysconf +#define hv_get_rtc _hv_get_rtc +#define hv_set_rtc _hv_set_rtc +#define hv_flush_asid _hv_flush_asid +#define hv_flush_page _hv_flush_page +#define hv_flush_pages _hv_flush_pages +#define hv_restart _hv_restart +#define hv_halt _hv_halt +#define hv_power_off _hv_power_off +#define hv_inquire_physical _hv_inquire_physical +#define hv_inquire_memory_controller _hv_inquire_memory_controller +#define hv_inquire_virtual _hv_inquire_virtual +#define hv_inquire_asid _hv_inquire_asid +#define hv_nanosleep _hv_nanosleep +#define hv_console_read_if_ready _hv_console_read_if_ready +#define hv_console_write _hv_console_write +#define hv_downcall_dispatch _hv_downcall_dispatch +#define hv_inquire_topology _hv_inquire_topology +#define hv_fs_findfile _hv_fs_findfile +#define hv_fs_fstat _hv_fs_fstat +#define hv_fs_pread _hv_fs_pread +#define hv_physaddr_read64 _hv_physaddr_read64 +#define hv_physaddr_write64 _hv_physaddr_write64 +#define hv_get_command_line _hv_get_command_line +#define hv_set_caching _hv_set_caching +#define hv_bzero_page _hv_bzero_page +#define hv_register_message_state _hv_register_message_state +#define hv_send_message _hv_send_message +#define hv_receive_message _hv_receive_message +#define hv_inquire_context _hv_inquire_context +#define hv_start_all_tiles _hv_start_all_tiles +#define hv_dev_open _hv_dev_open +#define hv_dev_close _hv_dev_close +#define hv_dev_pread _hv_dev_pread +#define hv_dev_pwrite _hv_dev_pwrite +#define hv_dev_poll _hv_dev_poll +#define hv_dev_poll_cancel _hv_dev_poll_cancel +#define hv_dev_preada _hv_dev_preada +#define hv_dev_pwritea _hv_dev_pwritea +#define hv_flush_remote _hv_flush_remote +#define hv_console_putc _hv_console_putc +#define hv_inquire_tiles _hv_inquire_tiles +#define hv_confstr _hv_confstr +#define hv_reexec _hv_reexec +#define hv_set_command_line _hv_set_command_line +#define hv_clear_intr _hv_clear_intr +#define hv_enable_intr _hv_enable_intr +#define hv_disable_intr _hv_disable_intr +#define hv_raise_intr _hv_raise_intr +#define hv_trigger_ipi _hv_trigger_ipi +#define hv_store_mapping _hv_store_mapping +#define hv_inquire_realpa _hv_inquire_realpa +#define hv_flush_all _hv_flush_all +#define hv_get_ipi_pte _hv_get_ipi_pte +#define hv_set_pte_super_shift _hv_set_pte_super_shift +#define hv_console_set_ipi _hv_console_set_ipi +#include +#undef hv_init +#undef hv_install_context +#undef hv_sysconf +#undef hv_get_rtc +#undef hv_set_rtc +#undef hv_flush_asid +#undef hv_flush_page +#undef hv_flush_pages +#undef hv_restart +#undef hv_halt +#undef hv_power_off +#undef hv_inquire_physical +#undef hv_inquire_memory_controller +#undef hv_inquire_virtual +#undef hv_inquire_asid +#undef hv_nanosleep +#undef hv_console_read_if_ready +#undef hv_console_write +#undef hv_downcall_dispatch +#undef hv_inquire_topology +#undef hv_fs_findfile +#undef hv_fs_fstat +#undef hv_fs_pread +#undef hv_physaddr_read64 +#undef hv_physaddr_write64 +#undef hv_get_command_line +#undef hv_set_caching +#undef hv_bzero_page +#undef hv_register_message_state +#undef hv_send_message +#undef hv_receive_message +#undef hv_inquire_context +#undef hv_start_all_tiles +#undef hv_dev_open +#undef hv_dev_close +#undef hv_dev_pread +#undef hv_dev_pwrite +#undef hv_dev_poll +#undef hv_dev_poll_cancel +#undef hv_dev_preada +#undef hv_dev_pwritea +#undef hv_flush_remote +#undef hv_console_putc +#undef hv_inquire_tiles +#undef hv_confstr +#undef hv_reexec +#undef hv_set_command_line +#undef hv_clear_intr +#undef hv_enable_intr +#undef hv_disable_intr +#undef hv_raise_intr +#undef hv_trigger_ipi +#undef hv_store_mapping +#undef hv_inquire_realpa +#undef hv_flush_all +#undef hv_get_ipi_pte +#undef hv_set_pte_super_shift +#undef hv_console_set_ipi + +/* + * Provide macros based on to provide a wrapper + * function that invokes the same function with an underscore prefix. + * We can't use the existing __SC_xxx macros because we need to + * support up to nine arguments rather than up to six, and also this + * way the file stands alone from possible changes in the + * implementation of . + */ +#define HV_WRAP0(type, name) \ + type name(void); \ + type name(void) \ + { \ + return _##name(); \ + } +#define __HV_DECL1(t1, a1) t1 a1 +#define __HV_DECL2(t2, a2, ...) t2 a2, __HV_DECL1(__VA_ARGS__) +#define __HV_DECL3(t3, a3, ...) t3 a3, __HV_DECL2(__VA_ARGS__) +#define __HV_DECL4(t4, a4, ...) t4 a4, __HV_DECL3(__VA_ARGS__) +#define __HV_DECL5(t5, a5, ...) t5 a5, __HV_DECL4(__VA_ARGS__) +#define __HV_DECL6(t6, a6, ...) t6 a6, __HV_DECL5(__VA_ARGS__) +#define __HV_DECL7(t7, a7, ...) t7 a7, __HV_DECL6(__VA_ARGS__) +#define __HV_DECL8(t8, a8, ...) t8 a8, __HV_DECL7(__VA_ARGS__) +#define __HV_DECL9(t9, a9, ...) t9 a9, __HV_DECL8(__VA_ARGS__) +#define __HV_PASS1(t1, a1) a1 +#define __HV_PASS2(t2, a2, ...) a2, __HV_PASS1(__VA_ARGS__) +#define __HV_PASS3(t3, a3, ...) a3, __HV_PASS2(__VA_ARGS__) +#define __HV_PASS4(t4, a4, ...) a4, __HV_PASS3(__VA_ARGS__) +#define __HV_PASS5(t5, a5, ...) a5, __HV_PASS4(__VA_ARGS__) +#define __HV_PASS6(t6, a6, ...) a6, __HV_PASS5(__VA_ARGS__) +#define __HV_PASS7(t7, a7, ...) a7, __HV_PASS6(__VA_ARGS__) +#define __HV_PASS8(t8, a8, ...) a8, __HV_PASS7(__VA_ARGS__) +#define __HV_PASS9(t9, a9, ...) a9, __HV_PASS8(__VA_ARGS__) +#define HV_WRAPx(x, type, name, ...) \ + type name(__HV_DECL##x(__VA_ARGS__)); \ + type name(__HV_DECL##x(__VA_ARGS__)) \ + { \ + return _##name(__HV_PASS##x(__VA_ARGS__)); \ + } +#define HV_WRAP1(type, name, ...) HV_WRAPx(1, type, name, __VA_ARGS__) +#define HV_WRAP2(type, name, ...) HV_WRAPx(2, type, name, __VA_ARGS__) +#define HV_WRAP3(type, name, ...) HV_WRAPx(3, type, name, __VA_ARGS__) +#define HV_WRAP4(type, name, ...) HV_WRAPx(4, type, name, __VA_ARGS__) +#define HV_WRAP5(type, name, ...) HV_WRAPx(5, type, name, __VA_ARGS__) +#define HV_WRAP6(type, name, ...) HV_WRAPx(6, type, name, __VA_ARGS__) +#define HV_WRAP7(type, name, ...) HV_WRAPx(7, type, name, __VA_ARGS__) +#define HV_WRAP8(type, name, ...) HV_WRAPx(8, type, name, __VA_ARGS__) +#define HV_WRAP9(type, name, ...) HV_WRAPx(9, type, name, __VA_ARGS__) + +/* List all the hypervisor API functions. */ +HV_WRAP4(void, hv_init, HV_VersionNumber, interface_version_number, + int, chip_num, int, chip_rev_num, int, client_pl) +HV_WRAP1(long, hv_sysconf, HV_SysconfQuery, query) +HV_WRAP3(int, hv_confstr, HV_ConfstrQuery, query, HV_VirtAddr, buf, int, len) +#if CHIP_HAS_IPI() +HV_WRAP3(int, hv_get_ipi_pte, HV_Coord, tile, int, pl, HV_PTE*, pte) +HV_WRAP3(int, hv_console_set_ipi, int, ipi, int, event, HV_Coord, coord); +#else +HV_WRAP1(void, hv_enable_intr, HV_IntrMask, enab_mask) +HV_WRAP1(void, hv_disable_intr, HV_IntrMask, disab_mask) +HV_WRAP1(void, hv_clear_intr, HV_IntrMask, clear_mask) +HV_WRAP1(void, hv_raise_intr, HV_IntrMask, raise_mask) +HV_WRAP2(HV_Errno, hv_trigger_ipi, HV_Coord, tile, int, interrupt) +#endif /* !CHIP_HAS_IPI() */ +HV_WRAP3(int, hv_store_mapping, HV_VirtAddr, va, unsigned int, len, + HV_PhysAddr, pa) +HV_WRAP2(HV_PhysAddr, hv_inquire_realpa, HV_PhysAddr, cpa, unsigned int, len) +HV_WRAP0(HV_RTCTime, hv_get_rtc) +HV_WRAP1(void, hv_set_rtc, HV_RTCTime, time) +HV_WRAP4(int, hv_install_context, HV_PhysAddr, page_table, HV_PTE, access, + HV_ASID, asid, __hv32, flags) +HV_WRAP2(int, hv_set_pte_super_shift, int, level, int, log2_count) +HV_WRAP0(HV_Context, hv_inquire_context) +HV_WRAP1(int, hv_flush_asid, HV_ASID, asid) +HV_WRAP2(int, hv_flush_page, HV_VirtAddr, address, HV_PageSize, page_size) +HV_WRAP3(int, hv_flush_pages, HV_VirtAddr, start, HV_PageSize, page_size, + unsigned long, size) +HV_WRAP1(int, hv_flush_all, int, preserve_global) +HV_WRAP2(void, hv_restart, HV_VirtAddr, cmd, HV_VirtAddr, args) +HV_WRAP0(void, hv_halt) +HV_WRAP0(void, hv_power_off) +HV_WRAP1(int, hv_reexec, HV_PhysAddr, entry) +HV_WRAP0(HV_Topology, hv_inquire_topology) +HV_WRAP3(HV_Errno, hv_inquire_tiles, HV_InqTileSet, set, HV_VirtAddr, cpumask, + int, length) +HV_WRAP1(HV_PhysAddrRange, hv_inquire_physical, int, idx) +HV_WRAP2(HV_MemoryControllerInfo, hv_inquire_memory_controller, HV_Coord, coord, + int, controller) +HV_WRAP1(HV_VirtAddrRange, hv_inquire_virtual, int, idx) +HV_WRAP1(HV_ASIDRange, hv_inquire_asid, int, idx) +HV_WRAP1(void, hv_nanosleep, int, nanosecs) +HV_WRAP0(int, hv_console_read_if_ready) +HV_WRAP1(void, hv_console_putc, int, byte) +HV_WRAP2(int, hv_console_write, HV_VirtAddr, bytes, int, len) +HV_WRAP0(void, hv_downcall_dispatch) +HV_WRAP1(int, hv_fs_findfile, HV_VirtAddr, filename) +HV_WRAP1(HV_FS_StatInfo, hv_fs_fstat, int, inode) +HV_WRAP4(int, hv_fs_pread, int, inode, HV_VirtAddr, buf, + int, length, int, offset) +HV_WRAP2(unsigned long long, hv_physaddr_read64, HV_PhysAddr, addr, + HV_PTE, access) +HV_WRAP3(void, hv_physaddr_write64, HV_PhysAddr, addr, HV_PTE, access, + unsigned long long, val) +HV_WRAP2(int, hv_get_command_line, HV_VirtAddr, buf, int, length) +HV_WRAP2(HV_Errno, hv_set_command_line, HV_VirtAddr, buf, int, length) +HV_WRAP1(void, hv_set_caching, unsigned long, bitmask) +HV_WRAP2(void, hv_bzero_page, HV_VirtAddr, va, unsigned int, size) +HV_WRAP1(HV_Errno, hv_register_message_state, HV_MsgState*, msgstate) +HV_WRAP4(int, hv_send_message, HV_Recipient *, recips, int, nrecip, + HV_VirtAddr, buf, int, buflen) +HV_WRAP3(HV_RcvMsgInfo, hv_receive_message, HV_MsgState, msgstate, + HV_VirtAddr, buf, int, buflen) +HV_WRAP0(void, hv_start_all_tiles) +HV_WRAP2(int, hv_dev_open, HV_VirtAddr, name, __hv32, flags) +HV_WRAP1(int, hv_dev_close, int, devhdl) +HV_WRAP5(int, hv_dev_pread, int, devhdl, __hv32, flags, HV_VirtAddr, va, + __hv32, len, __hv64, offset) +HV_WRAP5(int, hv_dev_pwrite, int, devhdl, __hv32, flags, HV_VirtAddr, va, + __hv32, len, __hv64, offset) +HV_WRAP3(int, hv_dev_poll, int, devhdl, __hv32, events, HV_IntArg, intarg) +HV_WRAP1(int, hv_dev_poll_cancel, int, devhdl) +HV_WRAP6(int, hv_dev_preada, int, devhdl, __hv32, flags, __hv32, sgl_len, + HV_SGL *, sglp, __hv64, offset, HV_IntArg, intarg) +HV_WRAP6(int, hv_dev_pwritea, int, devhdl, __hv32, flags, __hv32, sgl_len, + HV_SGL *, sglp, __hv64, offset, HV_IntArg, intarg) +HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa, + unsigned long, cache_control, unsigned long*, cache_cpumask, + HV_VirtAddr, tlb_va, unsigned long, tlb_length, + unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask, + HV_Remote_ASID*, asids, int, asidcount) diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 1076765..9c0c3cb 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -758,7 +758,7 @@ intvec_\vecname: .macro dc_dispatch vecnum, vecname .org (\vecnum << 8) intvec_\vecname: - j hv_downcall_dispatch + j _hv_downcall_dispatch ENDPROC(intvec_\vecname) .endm diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 562886d..df19d4f 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -772,7 +772,7 @@ intvec_\vecname: .macro dc_dispatch vecnum, vecname .org (\vecnum << 8) intvec_\vecname: - j hv_downcall_dispatch + j _hv_downcall_dispatch ENDPROC(intvec_\vecname) .endm diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index a13ed90..0f0edaf 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -31,7 +31,10 @@ SECTIONS } :intrpt1 =0 /* Hypervisor call vectors */ - #include "hvglue.lds" + . = ALIGN(0x10000); + .hvglue : AT (ADDR(.hvglue) - LOAD_OFFSET) { + *(.hvglue) + } :NONE /* Now the real code */ . = ALIGN(0x20000); -- cgit v1.1 From a61fd5e3662d576998d72f80376f23b6ef083d6e Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Fri, 9 Aug 2013 13:26:09 -0400 Subject: tile: support ftrace on tilegx This commit adds support for static ftrace, graph function support, and dynamic tracer support. Signed-off-by: Tony Lu Signed-off-by: Chris Metcalf --- arch/tile/kernel/Makefile | 6 + arch/tile/kernel/ftrace.c | 246 +++++++++++++++++++++++++++++++++++++++++ arch/tile/kernel/mcount_64.S | 224 +++++++++++++++++++++++++++++++++++++ arch/tile/kernel/vmlinux.lds.S | 1 + 4 files changed, 477 insertions(+) create mode 100644 arch/tile/kernel/ftrace.c create mode 100644 arch/tile/kernel/mcount_64.S (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index c4a957a..2e6eaa1 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -9,6 +9,11 @@ obj-y := backtrace.o entry.o hvglue.o irq.o messaging.o \ sysfs.o time.o traps.o unaligned.o vdso.o \ intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_early_printk.o = -pg +endif + obj-$(CONFIG_HARDWALL) += hardwall.o obj-$(CONFIG_COMPAT) += compat.o compat_signal.o obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o @@ -22,5 +27,6 @@ obj-$(CONFIG_PCI) += pci.o endif obj-$(CONFIG_TILE_USB) += usb.o obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o +obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o mcount_64.o obj-y += vdso/ diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c new file mode 100644 index 0000000..f1c4520 --- /dev/null +++ b/arch/tile/kernel/ftrace.c @@ -0,0 +1,246 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE-Gx specific ftrace support + */ + +#include +#include + +#include +#include +#include + +#include + +#ifdef CONFIG_DYNAMIC_FTRACE + +static inline tilegx_bundle_bits NOP(void) +{ + return create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) | + create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | + create_Opcode_X0(RRR_0_OPCODE_X0) | + create_UnaryOpcodeExtension_X1(NOP_UNARY_OPCODE_X1) | + create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | + create_Opcode_X1(RRR_0_OPCODE_X1); +} + +static int machine_stopped __read_mostly; + +int ftrace_arch_code_modify_prepare(void) +{ + machine_stopped = 1; + return 0; +} + +int ftrace_arch_code_modify_post_process(void) +{ + flush_icache_range(0, CHIP_L1I_CACHE_SIZE()); + machine_stopped = 0; + return 0; +} + +/* + * Put { move r10, lr; jal ftrace_caller } in a bundle, this lets dynamic + * tracer just add one cycle overhead to every kernel function when disabled. + */ +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) +{ + tilegx_bundle_bits opcode_x0, opcode_x1; + long pcrel_by_instr = (addr - pc) >> TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES; + + if (link) { + /* opcode: jal addr */ + opcode_x1 = + create_Opcode_X1(JUMP_OPCODE_X1) | + create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | + create_JumpOff_X1(pcrel_by_instr); + } else { + /* opcode: j addr */ + opcode_x1 = + create_Opcode_X1(JUMP_OPCODE_X1) | + create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | + create_JumpOff_X1(pcrel_by_instr); + } + + if (addr == FTRACE_ADDR) { + /* opcode: or r10, lr, zero */ + opcode_x0 = + create_Dest_X0(10) | + create_SrcA_X0(TREG_LR) | + create_SrcB_X0(TREG_ZERO) | + create_RRROpcodeExtension_X0(OR_RRR_0_OPCODE_X0) | + create_Opcode_X0(RRR_0_OPCODE_X0); + } else { + /* opcode: fnop */ + opcode_x0 = + create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) | + create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | + create_Opcode_X0(RRR_0_OPCODE_X0); + } + + return opcode_x1 | opcode_x0; +} + +static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) +{ + return NOP(); +} + +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + return ftrace_gen_branch(pc, addr, true); +} + +static int ftrace_modify_code(unsigned long pc, unsigned long old, + unsigned long new) +{ + unsigned long pc_wr; + + /* Check if the address is in kernel text space and module space. */ + if (!kernel_text_address(pc)) + return -EINVAL; + + /* Operate on writable kernel text mapping. */ + pc_wr = pc - MEM_SV_START + PAGE_OFFSET; + + if (probe_kernel_write((void *)pc_wr, &new, MCOUNT_INSN_SIZE)) + return -EPERM; + + smp_wmb(); + + if (!machine_stopped && num_online_cpus() > 1) + flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); + + return 0; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc, old; + unsigned long new; + int ret; + + pc = (unsigned long)&ftrace_call; + memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(pc, (unsigned long)func); + + ret = ftrace_modify_code(pc, old, new); + + return ret; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long new, old; + unsigned long ip = rec->ip; + + old = ftrace_nop_replace(rec); + new = ftrace_call_replace(ip, addr); + + return ftrace_modify_code(rec->ip, old, new); +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + unsigned long old; + unsigned long new; + int ret; + + old = ftrace_call_replace(ip, addr); + new = ftrace_nop_replace(rec); + ret = ftrace_modify_code(ip, old, new); + + return ret; +} + +int __init ftrace_dyn_arch_init(void *data) +{ + *(unsigned long *)data = 0; + + return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long) &return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + *parent = return_hooker; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; + +static int __ftrace_modify_caller(unsigned long *callsite, + void (*func) (void), bool enable) +{ + unsigned long caller_fn = (unsigned long) func; + unsigned long pc = (unsigned long) callsite; + unsigned long branch = ftrace_gen_branch(pc, caller_fn, false); + unsigned long nop = NOP(); + unsigned long old = enable ? nop : branch; + unsigned long new = enable ? branch : nop; + + return ftrace_modify_code(pc, old, new); +} + +static int ftrace_modify_graph_caller(bool enable) +{ + int ret; + + ret = __ftrace_modify_caller(&ftrace_graph_call, + ftrace_graph_caller, + enable); + + return ret; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/tile/kernel/mcount_64.S b/arch/tile/kernel/mcount_64.S new file mode 100644 index 0000000..70d7bb0 --- /dev/null +++ b/arch/tile/kernel/mcount_64.S @@ -0,0 +1,224 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE-Gx specific __mcount support + */ + +#include +#include + +#define REGSIZE 8 + + .text + .global __mcount + + .macro MCOUNT_SAVE_REGS + addli sp, sp, -REGSIZE + { + st sp, lr + addli r29, sp, - (12 * REGSIZE) + } + { + addli sp, sp, - (13 * REGSIZE) + st r29, sp + } + addli r29, r29, REGSIZE + { st r29, r0; addli r29, r29, REGSIZE } + { st r29, r1; addli r29, r29, REGSIZE } + { st r29, r2; addli r29, r29, REGSIZE } + { st r29, r3; addli r29, r29, REGSIZE } + { st r29, r4; addli r29, r29, REGSIZE } + { st r29, r5; addli r29, r29, REGSIZE } + { st r29, r6; addli r29, r29, REGSIZE } + { st r29, r7; addli r29, r29, REGSIZE } + { st r29, r8; addli r29, r29, REGSIZE } + { st r29, r9; addli r29, r29, REGSIZE } + { st r29, r10; addli r29, r29, REGSIZE } + .endm + + .macro MCOUNT_RESTORE_REGS + addli r29, sp, (2 * REGSIZE) + { ld r0, r29; addli r29, r29, REGSIZE } + { ld r1, r29; addli r29, r29, REGSIZE } + { ld r2, r29; addli r29, r29, REGSIZE } + { ld r3, r29; addli r29, r29, REGSIZE } + { ld r4, r29; addli r29, r29, REGSIZE } + { ld r5, r29; addli r29, r29, REGSIZE } + { ld r6, r29; addli r29, r29, REGSIZE } + { ld r7, r29; addli r29, r29, REGSIZE } + { ld r8, r29; addli r29, r29, REGSIZE } + { ld r9, r29; addli r29, r29, REGSIZE } + { ld r10, r29; addli lr, sp, (13 * REGSIZE) } + { ld lr, lr; addli sp, sp, (14 * REGSIZE) } + .endm + + .macro RETURN_BACK + { move r12, lr; move lr, r10 } + jrp r12 + .endm + +#ifdef CONFIG_DYNAMIC_FTRACE + + .align 64 +STD_ENTRY(__mcount) +__mcount: + j ftrace_stub +STD_ENDPROC(__mcount) + + .align 64 +STD_ENTRY(ftrace_caller) + moveli r11, hw2_last(function_trace_stop) + { shl16insli r11, r11, hw1(function_trace_stop); move r12, lr } + { shl16insli r11, r11, hw0(function_trace_stop); move lr, r10 } + ld r11, r11 + beqz r11, 1f + jrp r12 + +1: + { move r10, lr; move lr, r12 } + MCOUNT_SAVE_REGS + + /* arg1: self return address */ + /* arg2: parent's return address */ + { move r0, lr; move r1, r10 } + + .global ftrace_call +ftrace_call: + /* + * a placeholder for the call to a real tracing function, i.e. + * ftrace_trace_function() + */ + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .global ftrace_graph_call +ftrace_graph_call: + /* + * a placeholder for the call to a real tracing function, i.e. + * ftrace_graph_caller() + */ + nop +#endif + MCOUNT_RESTORE_REGS + .global ftrace_stub +ftrace_stub: + RETURN_BACK +STD_ENDPROC(ftrace_caller) + +#else /* ! CONFIG_DYNAMIC_FTRACE */ + + .align 64 +STD_ENTRY(__mcount) + moveli r11, hw2_last(function_trace_stop) + { shl16insli r11, r11, hw1(function_trace_stop); move r12, lr } + { shl16insli r11, r11, hw0(function_trace_stop); move lr, r10 } + ld r11, r11 + beqz r11, 1f + jrp r12 + +1: + { move r10, lr; move lr, r12 } + { + moveli r11, hw2_last(ftrace_trace_function) + moveli r13, hw2_last(ftrace_stub) + } + { + shl16insli r11, r11, hw1(ftrace_trace_function) + shl16insli r13, r13, hw1(ftrace_stub) + } + { + shl16insli r11, r11, hw0(ftrace_trace_function) + shl16insli r13, r13, hw0(ftrace_stub) + } + + ld r11, r11 + sub r14, r13, r11 + bnez r14, static_trace + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + moveli r15, hw2_last(ftrace_graph_return) + shl16insli r15, r15, hw1(ftrace_graph_return) + shl16insli r15, r15, hw0(ftrace_graph_return) + ld r15, r15 + sub r15, r15, r13 + bnez r15, ftrace_graph_caller + + { + moveli r16, hw2_last(ftrace_graph_entry) + moveli r17, hw2_last(ftrace_graph_entry_stub) + } + { + shl16insli r16, r16, hw1(ftrace_graph_entry) + shl16insli r17, r17, hw1(ftrace_graph_entry_stub) + } + { + shl16insli r16, r16, hw0(ftrace_graph_entry) + shl16insli r17, r17, hw0(ftrace_graph_entry_stub) + } + ld r16, r16 + sub r17, r16, r17 + bnez r17, ftrace_graph_caller + +#endif + RETURN_BACK + +static_trace: + MCOUNT_SAVE_REGS + + /* arg1: self return address */ + /* arg2: parent's return address */ + { move r0, lr; move r1, r10 } + + /* call ftrace_trace_function() */ + jalr r11 + + MCOUNT_RESTORE_REGS + + .global ftrace_stub +ftrace_stub: + RETURN_BACK +STD_ENDPROC(__mcount) + +#endif /* ! CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +STD_ENTRY(ftrace_graph_caller) +ftrace_graph_caller: +#ifndef CONFIG_DYNAMIC_FTRACE + MCOUNT_SAVE_REGS +#endif + + /* arg1: Get the location of the parent's return address */ + addi r0, sp, 12 * REGSIZE + /* arg2: Get self return address */ + move r1, lr + + jal prepare_ftrace_return + + MCOUNT_RESTORE_REGS + RETURN_BACK +STD_ENDPROC(ftrace_graph_caller) + + .global return_to_handler +return_to_handler: + MCOUNT_SAVE_REGS + + jal ftrace_return_to_handler + /* restore the real parent address */ + move r11, r0 + + MCOUNT_RESTORE_REGS + jr r11 + +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 0f0edaf..673d00a 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -43,6 +43,7 @@ SECTIONS HEAD_TEXT SCHED_TEXT LOCK_TEXT + IRQENTRY_TEXT __fix_text_end = .; /* tile-cpack won't rearrange before this */ TEXT_TEXT *(.text.*) -- cgit v1.1 From 3fa17c395bb0c358745fbe0c8aa039d6cdac1735 Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Fri, 9 Aug 2013 15:08:57 -0400 Subject: tile: support kprobes on tilegx This change includes support for Kprobes, Jprobes and Return Probes. Reviewed-by: Masami Hiramatsu Signed-off-by: Tony Lu Signed-off-by: Chris Metcalf --- arch/tile/kernel/Makefile | 1 + arch/tile/kernel/kprobes.c | 528 +++++++++++++++++++++++++++++++++++++++++ arch/tile/kernel/smp.c | 14 +- arch/tile/kernel/traps.c | 45 +++- arch/tile/kernel/vmlinux.lds.S | 1 + 5 files changed, 585 insertions(+), 4 deletions(-) create mode 100644 arch/tile/kernel/kprobes.c (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 2e6eaa1..b7c8b5e 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -28,5 +28,6 @@ endif obj-$(CONFIG_TILE_USB) += usb.o obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o mcount_64.o +obj-$(CONFIG_KPROBES) += kprobes.o obj-y += vdso/ diff --git a/arch/tile/kernel/kprobes.c b/arch/tile/kernel/kprobes.c new file mode 100644 index 0000000..27cdcac --- /dev/null +++ b/arch/tile/kernel/kprobes.c @@ -0,0 +1,528 @@ +/* + * arch/tile/kernel/kprobes.c + * Kprobes on TILE-Gx + * + * Some portions copied from the MIPS version. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright 2006 Sony Corp. + * Copyright 2010 Cavium Networks + * + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include + +#include + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +tile_bundle_bits breakpoint_insn = TILEGX_BPT_BUNDLE; +tile_bundle_bits breakpoint2_insn = TILEGX_BPT_BUNDLE | DIE_SSTEPBP; + +/* + * Check whether instruction is branch or jump, or if executing it + * has different results depending on where it is executed (e.g. lnk). + */ +static int __kprobes insn_has_control(kprobe_opcode_t insn) +{ + if (get_Mode(insn) != 0) { /* Y-format bundle */ + if (get_Opcode_Y1(insn) != RRR_1_OPCODE_Y1 || + get_RRROpcodeExtension_Y1(insn) != UNARY_RRR_1_OPCODE_Y1) + return 0; + + switch (get_UnaryOpcodeExtension_Y1(insn)) { + case JALRP_UNARY_OPCODE_Y1: + case JALR_UNARY_OPCODE_Y1: + case JRP_UNARY_OPCODE_Y1: + case JR_UNARY_OPCODE_Y1: + case LNK_UNARY_OPCODE_Y1: + return 1; + default: + return 0; + } + } + + switch (get_Opcode_X1(insn)) { + case BRANCH_OPCODE_X1: /* branch instructions */ + case JUMP_OPCODE_X1: /* jump instructions: j and jal */ + return 1; + + case RRR_0_OPCODE_X1: /* other jump instructions */ + if (get_RRROpcodeExtension_X1(insn) != UNARY_RRR_0_OPCODE_X1) + return 0; + switch (get_UnaryOpcodeExtension_X1(insn)) { + case JALRP_UNARY_OPCODE_X1: + case JALR_UNARY_OPCODE_X1: + case JRP_UNARY_OPCODE_X1: + case JR_UNARY_OPCODE_X1: + case LNK_UNARY_OPCODE_X1: + return 1; + default: + return 0; + } + default: + return 0; + } +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + + if (addr & (sizeof(kprobe_opcode_t) - 1)) + return -EINVAL; + + if (insn_has_control(*p->addr)) { + pr_notice("Kprobes for control instructions are not " + "supported\n"); + return -EINVAL; + } + + /* insn: must be on special executable page on tile. */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + + /* + * In the kprobe->ainsn.insn[] array we store the original + * instruction at index zero and a break trap instruction at + * index one. + */ + memcpy(&p->ainsn.insn[0], p->addr, sizeof(kprobe_opcode_t)); + p->ainsn.insn[1] = breakpoint2_insn; + p->opcode = *p->addr; + + return 0; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + unsigned long addr_wr; + + /* Operate on writable kernel text mapping. */ + addr_wr = (unsigned long)p->addr - MEM_SV_START + PAGE_OFFSET; + + if (probe_kernel_write((void *)addr_wr, &breakpoint_insn, + sizeof(breakpoint_insn))) + pr_err("%s: failed to enable kprobe\n", __func__); + + smp_wmb(); + flush_insn_slot(p); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *kp) +{ + unsigned long addr_wr; + + /* Operate on writable kernel text mapping. */ + addr_wr = (unsigned long)kp->addr - MEM_SV_START + PAGE_OFFSET; + + if (probe_kernel_write((void *)addr_wr, &kp->opcode, + sizeof(kp->opcode))) + pr_err("%s: failed to enable kprobe\n", __func__); + + smp_wmb(); + flush_insn_slot(kp); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.saved_pc = kcb->kprobe_saved_pc; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_saved_pc = kcb->prev_kprobe.saved_pc; +} + +static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, p); + kcb->kprobe_saved_pc = regs->pc; +} + +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ + /* Single step inline if the instruction is a break. */ + if (p->opcode == breakpoint_insn || + p->opcode == breakpoint2_insn) + regs->pc = (unsigned long)p->addr; + else + regs->pc = (unsigned long)&p->ainsn.insn[0]; +} + +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + int ret = 0; + kprobe_opcode_t *addr; + struct kprobe_ctlblk *kcb; + + addr = (kprobe_opcode_t *)regs->pc; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing. + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + + /* Check we're not actually recursing. */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if (kcb->kprobe_status == KPROBE_HIT_SS && + p->ainsn.insn[0] == breakpoint_insn) { + goto no_kprobe; + } + /* + * We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kprobes_inc_nmissed_count(p); + prepare_singlestep(p, regs); + kcb->kprobe_status = KPROBE_REENTER; + return 1; + } else { + if (*addr != breakpoint_insn) { + /* + * The breakpoint instruction was removed by + * another cpu right after we hit, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + goto no_kprobe; + } + p = __this_cpu_read(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) + goto ss_probe; + } + goto no_kprobe; + } + + p = get_kprobe(addr); + if (!p) { + if (*addr != breakpoint_insn) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + } + /* Not one of ours: let kernel handle it. */ + goto no_kprobe; + } + + set_current_kprobe(p, regs, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + if (p->pre_handler && p->pre_handler(p, regs)) { + /* Handler has already set things up, so skip ss setup. */ + return 1; + } + +ss_probe: + prepare_singlestep(p, regs); + kcb->kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +/* + * Called after single-stepping. p->addr is the address of the + * instruction that has been replaced by the breakpoint. To avoid the + * SMP problems that can occur when we temporarily put back the + * original opcode to single-step, we single-stepped a copy of the + * instruction. The address of this copy is p->ainsn.insn. + * + * This function prepares to return from the post-single-step + * breakpoint trap. + */ +static void __kprobes resume_execution(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + unsigned long orig_pc = kcb->kprobe_saved_pc; + regs->pc = orig_pc + 8; +} + +static inline int post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + resume_execution(cur, regs, kcb); + + /* Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); +out: + preempt_enable_no_resched(); + + return 1; +} + +static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + if (kcb->kprobe_status & KPROBE_HIT_SS) { + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the ip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + resume_execution(cur, regs, kcb); + reset_current_kprobe(); + preempt_enable_no_resched(); + } + return 0; +} + +/* + * Wrapper routine for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_BREAK: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEPBP: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id(). */ + preempt_disable(); + + if (kprobe_running() + && kprobe_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + preempt_enable(); + break; + default: + break; + } + return ret; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + kcb->jprobe_saved_regs = *regs; + kcb->jprobe_saved_sp = regs->sp; + + memcpy(kcb->jprobes_stack, (void *)kcb->jprobe_saved_sp, + MIN_JPROBES_STACK_SIZE(kcb->jprobe_saved_sp)); + + regs->pc = (unsigned long)(jp->entry); + + return 1; +} + +/* Defined in the inline asm below. */ +void jprobe_return_end(void); + +void __kprobes jprobe_return(void) +{ + asm volatile( + "bpt\n\t" + ".globl jprobe_return_end\n" + "jprobe_return_end:\n"); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (regs->pc >= (unsigned long)jprobe_return && + regs->pc <= (unsigned long)jprobe_return_end) { + *regs = kcb->jprobe_saved_regs; + memcpy((void *)kcb->jprobe_saved_sp, kcb->jprobes_stack, + MIN_JPROBES_STACK_SIZE(kcb->jprobe_saved_sp)); + preempt_enable_no_resched(); + + return 1; + } + return 0; +} + +/* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe causes the + * handlers to fire + */ +static void __used kretprobe_trampoline_holder(void) +{ + asm volatile( + "nop\n\t" + ".global kretprobe_trampoline\n" + "kretprobe_trampoline:\n\t" + "nop\n\t" + : : : "memory"); +} + +void kretprobe_trampoline(void); + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *) regs->lr; + + /* Replace the return addr with trampoline addr */ + regs->lr = (unsigned long)kretprobe_trampoline; +} + +/* + * Called when the probe at kretprobe trampoline is hit. + */ +static int __kprobes trampoline_probe_handler(struct kprobe *p, + struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = (unsigned long)kretprobe_trampoline; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because multiple functions in the call path have + * a return probe installed on them, and/or more than one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) { + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + instruction_pointer(regs) = orig_ret_address; + + reset_current_kprobe(); + kretprobe_hash_unlock(current, &flags); + preempt_enable_no_resched(); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + if (p->addr == (kprobe_opcode_t *)kretprobe_trampoline) + return 1; + + return 0; +} + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *)kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + register_kprobe(&trampoline_p); + return 0; +} diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 6cc520d..0ae1c59 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -20,6 +20,7 @@ #include #include #include +#include HV_Topology smp_topology __write_once; EXPORT_SYMBOL(smp_topology); @@ -167,9 +168,16 @@ static void ipi_flush_icache_range(void *info) void flush_icache_range(unsigned long start, unsigned long end) { struct ipi_flush flush = { start, end }; - preempt_disable(); - on_each_cpu(ipi_flush_icache_range, &flush, 1); - preempt_enable(); + + /* If invoked with irqs disabled, we can not issue IPIs. */ + if (irqs_disabled()) + flush_remote(0, HV_FLUSH_EVICT_L1I, NULL, 0, 0, 0, + NULL, NULL, 0); + else { + preempt_disable(); + on_each_cpu(ipi_flush_icache_range, &flush, 1); + preempt_enable(); + } } diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index a1bbc5de..cfff6f9 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -214,6 +215,43 @@ static const char *const int_name[] = { #endif }; +static int do_bpt(struct pt_regs *regs) +{ + unsigned long bundle, bcode, bpt; + + bundle = *(unsigned long *)instruction_pointer(regs); + + /* + * bpt shoule be { bpt; nop }, which is 0x286a44ae51485000ULL. + * we encode the unused least significant bits for other purpose. + */ + bpt = bundle & ~((1ULL << 12) - 1); + if (bpt != TILE_BPT_BUNDLE) + return 0; + + bcode = bundle & ((1ULL << 12) - 1); + /* + * notify the kprobe handlers, if instruction is likely to + * pertain to them. + */ + switch (bcode) { + /* breakpoint_insn */ + case 0: + notify_die(DIE_BREAK, "debug", regs, bundle, + INT_ILL, SIGTRAP); + break; + /* breakpoint2_insn */ + case DIE_SSTEPBP: + notify_die(DIE_SSTEPBP, "single_step", regs, bundle, + INT_ILL, SIGTRAP); + break; + default: + return 0; + } + + return 1; +} + void __kprobes do_trap(struct pt_regs *regs, int fault_num, unsigned long reason) { @@ -221,6 +259,11 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, int signo, code; unsigned long address = 0; bundle_bits instr; + int is_kernel = !user_mode(regs); + + /* Handle breakpoints, etc. */ + if (is_kernel && fault_num == INT_ILL && do_bpt(regs)) + return; /* Re-enable interrupts, if they were previously enabled. */ if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) @@ -230,7 +273,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, * If it hits in kernel mode and we can't fix it up, just exit the * current process and hope for the best. */ - if (!user_mode(regs)) { + if (is_kernel) { const char *name; char buf[100]; if (fixup_exception(regs)) /* ILL_TRANS or UNALIGN_DATA */ diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 673d00a..aab9955 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -43,6 +43,7 @@ SECTIONS HEAD_TEXT SCHED_TEXT LOCK_TEXT + KPROBES_TEXT IRQENTRY_TEXT __fix_text_end = .; /* tile-cpack won't rearrange before this */ TEXT_TEXT -- cgit v1.1 From 8d8cf0674023da578e0539dc7cf4d824b2cf1f9e Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 9 Aug 2013 15:41:09 -0400 Subject: tile: fix panic with large IRQ number The "available_irqs" value needs to actually reflect the IRQs available, not just start as an all-ones mask, since we only have 32 IRQs available even on a 64-bit platform. Signed-off-by: Chris Metcalf --- arch/tile/kernel/irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index c90de6c..0e6c521 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -55,7 +55,8 @@ static DEFINE_PER_CPU(int, irq_depth); /* State for allocating IRQs on Gx. */ #if CHIP_HAS_IPI() -static unsigned long available_irqs = ~(1UL << IRQ_RESCHEDULE); +static unsigned long available_irqs = ((1UL << NR_IRQS) - 1) & + (~(1UL << IRQ_RESCHEDULE)); static DEFINE_SPINLOCK(available_irqs_lock); #endif -- cgit v1.1 From 9b5bbf729d2db27ecb477b42c0701c1c97eb5603 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 9 Aug 2013 15:42:56 -0400 Subject: tile: correct r1 value during syscall tracing The r1 value is set based on the r0 value as we return to user space. So tracing tools won't automatically see the right value. Fix this by generating the correct r1 value in do_syscall_trace_exit() rather than trying to tamper with the hot path in syscall return. Signed-off-by: Chris Metcalf --- arch/tile/kernel/ptrace.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index bac1874..de98c6d 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -265,6 +265,21 @@ int do_syscall_trace_enter(struct pt_regs *regs) void do_syscall_trace_exit(struct pt_regs *regs) { + long errno; + + /* + * The standard tile calling convention returns the value (or negative + * errno) in r0, and zero (or positive errno) in r1. + * It saves a couple of cycles on the hot path to do this work in + * registers only as we return, rather than updating the in-memory + * struct ptregs. + */ + errno = (long) regs->regs[0]; + if (errno < 0 && errno > -4096) + regs->regs[1] = -errno; + else + regs->regs[1] = 0; + if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, 0); -- cgit v1.1 From 6f0142d501b35468d910b9f36b6853dbd8dc5ad5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 9 Aug 2013 16:04:34 -0400 Subject: tile: allow "initrd" boot argument for kexec This enables support for "kexec --initrd" for tile. Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index b00e156..774e819 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -694,6 +694,25 @@ static void __init setup_bootmem_allocator(void) reserve_bootmem(m->addr, m->size, 0); } +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) { + /* Make sure the initrd memory region is not modified. */ + if (reserve_bootmem(initrd_start, initrd_end - initrd_start, + BOOTMEM_EXCLUSIVE)) { + pr_crit("The initrd memory region has been polluted. Disabling it.\n"); + initrd_start = 0; + initrd_end = 0; + } else { + /* + * Translate initrd_start & initrd_end from PA to VA for + * future access. + */ + initrd_start += PAGE_OFFSET; + initrd_end += PAGE_OFFSET; + } + } +#endif + #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) reserve_bootmem(crashk_res.start, resource_size(&crashk_res), 0); @@ -1095,6 +1114,10 @@ static void __init load_hv_initrd(void) int fd, rc; void *initrd; + /* If initrd has already been set, skip initramfs file in hvfs. */ + if (initrd_start) + return; + fd = hv_fs_findfile((HV_VirtAddr) initramfs_file); if (fd == HV_ENOENT) { if (set_initramfs_file) { @@ -1133,6 +1156,25 @@ void __init free_initrd_mem(unsigned long begin, unsigned long end) free_bootmem(__pa(begin), end - begin); } +static int __init setup_initrd(char *str) +{ + char *endp; + unsigned long initrd_size; + + initrd_size = str ? simple_strtoul(str, &endp, 0) : 0; + if (initrd_size == 0 || *endp != '@') + return -EINVAL; + + initrd_start = simple_strtoul(endp+1, &endp, 0); + if (initrd_start == 0) + return -EINVAL; + + initrd_end = initrd_start + initrd_size; + + return 0; +} +early_param("initrd", setup_initrd); + #else static inline void load_hv_initrd(void) {} #endif /* CONFIG_BLK_DEV_INITRD */ -- cgit v1.1 From abe3265a6d2e5e805361e0fea3346622f3199d68 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 9 Aug 2013 16:09:41 -0400 Subject: tile: do less L1 I-cache eviction We had been doing an automatic full eviction of the L1 I$ everywhere whenever we did a kernel-space TLB flush. It turns out this isn't necessary, since all the callers already handle doing a flush if necessary. Signed-off-by: Chris Metcalf --- arch/tile/kernel/tlb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c index 3fd54d5..f23b535 100644 --- a/arch/tile/kernel/tlb.c +++ b/arch/tile/kernel/tlb.c @@ -91,8 +91,14 @@ void flush_tlb_all(void) } } +/* + * Callers need to flush the L1I themselves if necessary, e.g. for + * kernel module unload. Otherwise we assume callers are not using + * executable pgprot_t's. Using EVICT_L1I means that dataplane cpus + * will get an unnecessary interrupt otherwise. + */ void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask, + flush_remote(0, 0, NULL, start, end - start, PAGE_SIZE, cpu_online_mask, NULL, 0); } -- cgit v1.1 From e56059f2d3b8a053c965a51587a4a3328ac00a47 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 9 Aug 2013 16:50:36 -0400 Subject: tile: group .hottext* sections properly in vmlinux.lds With this change such sections are grouped with regular text in the vmlinux image; this change puts them at the front, which is where the standard Linux includes .text.hot*. This change should fix a recently-observed bug where a bunch of symbols were being omitted from the /proc/kallsyms output because they fell between _etext and _sinittext. Signed-off-by: Chris Metcalf --- arch/tile/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index aab9955..c7ae53d 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -46,6 +46,8 @@ SECTIONS KPROBES_TEXT IRQENTRY_TEXT __fix_text_end = .; /* tile-cpack won't rearrange before this */ + ALIGN_FUNCTION(); + *(.hottext*) TEXT_TEXT *(.text.*) *(.coldtext*) -- cgit v1.1 From 4036c7d3542ce82ea343bf95dd05ca46aefba9aa Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sat, 10 Aug 2013 12:22:43 -0400 Subject: tile: don't call show_regs_print_info() with corrupt current We use the validate_current() API to make sure that "current" seems plausible before using it. With the new show_regs_print_info() API, we want to check that current is OK before calling it, since otherwise we will end up in a recursive panic. Signed-off-by: Chris Metcalf --- arch/tile/kernel/process.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 663289b..44cdc4a 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -226,19 +226,20 @@ int get_unalign_ctl(struct task_struct *tsk, unsigned long adr) (unsigned int __user *)adr); } +static struct task_struct corrupt_current = { .comm = "" }; + /* * Return "current" if it looks plausible, or else a pointer to a dummy. * This can be helpful if we are just trying to emit a clean panic. */ struct task_struct *validate_current(void) { - static struct task_struct corrupt = { .comm = "" }; struct task_struct *tsk = current; if (unlikely((unsigned long)tsk < PAGE_OFFSET || (high_memory && (void *)tsk > high_memory) || ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) { pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer); - tsk = &corrupt; + tsk = &corrupt_current; } return tsk; } @@ -589,7 +590,8 @@ void show_regs(struct pt_regs *regs) int i; pr_err("\n"); - show_regs_print_info(KERN_ERR); + if (tsk != &corrupt_current) + show_regs_print_info(KERN_ERR); #ifdef __tilegx__ for (i = 0; i < 17; i++) pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n", -- cgit v1.1 From 35f059761c5ac313d13372fe3cdaa41bce3d0dbf Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sat, 10 Aug 2013 12:35:02 -0400 Subject: tilegx: change how we find the kernel stack Previously, we used a special-purpose register (SPR_SYSTEM_SAVE_K_0) to hold the CPU number and the top of the current kernel stack by using the low bits to hold the CPU number, and using the high bits to hold the address of the page just above where we'd want the kernel stack to be. That way we could initialize a new SP when first entering the kernel by just masking the SPR value and subtracting a couple of words. However, it's actually more useful to be able to place an arbitrary kernel-top value in the SPR. This allows us to create a new stack context (e.g. for virtualization) with an arbitrary top-of-stack VA. To make this work, we now store the CPU number in the high bits, above the highest legal VA bit (42 bits in the current tilegx microarchitecture). The full 42 bits are thus available to store the top of stack value. Getting the current cpu (a relatively common operation) is still fast; it's now a shift rather than a mask. We make this change only for tilegx, since tilepro has too few SPR bits to do this, and we don't need this support on tilepro anyway. Signed-off-by: Chris Metcalf --- arch/tile/kernel/head_32.S | 3 +-- arch/tile/kernel/head_64.S | 6 +++--- arch/tile/kernel/intvec_32.S | 7 +++++-- arch/tile/kernel/intvec_64.S | 21 ++++++++++----------- arch/tile/kernel/stack.c | 10 +++++----- 5 files changed, 24 insertions(+), 23 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index d1527fc..f3f17b0 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -86,7 +86,7 @@ ENTRY(_start) /* * Load up our per-cpu offset. When the first (master) tile * boots, this value is still zero, so we will load boot_pc - * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * with start_kernel, and boot_sp at the top of init_stack. * The master tile initializes the per-cpu offset array, so that * when subsequent (secondary) tiles boot, they will instead load * from their per-cpu versions of boot_sp and boot_pc. @@ -126,7 +126,6 @@ ENTRY(_start) lw sp, r1 or r4, sp, r4 mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ - addi sp, sp, -STACK_TOP_DELTA { move lr, zero /* stop backtraces in the called function */ jr r0 diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index 969e4f8..652b814 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -158,7 +158,7 @@ ENTRY(_start) /* * Load up our per-cpu offset. When the first (master) tile * boots, this value is still zero, so we will load boot_pc - * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * with start_kernel, and boot_sp with at the top of init_stack. * The master tile initializes the per-cpu offset array, so that * when subsequent (secondary) tiles boot, they will instead load * from their per-cpu versions of boot_sp and boot_pc. @@ -202,9 +202,9 @@ ENTRY(_start) } ld r0, r0 ld sp, r1 - or r4, sp, r4 + shli r4, r4, CPU_SHIFT + bfins r4, sp, 0, CPU_SHIFT-1 mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ - addi sp, sp, -STACK_TOP_DELTA { move lr, zero /* stop backtraces in the called function */ jr r0 diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 9c0c3cb..f3d26f4 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -185,7 +185,7 @@ intvec_\vecname: * point sp at the top aligned address on the actual stack page. */ mfspr r0, SPR_SYSTEM_SAVE_K_0 - mm r0, r0, zero, LOG2_THREAD_SIZE, 31 + mm r0, r0, zero, LOG2_NR_CPU_IDS, 31 0: /* @@ -203,6 +203,9 @@ intvec_\vecname: * cache line 1: r14...r29 * cache line 0: 2 x frame, r0..r13 */ +#if STACK_TOP_DELTA != 64 +#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs() +#endif andi r0, r0, -64 /* @@ -464,7 +467,7 @@ intvec_\vecname: } { auli r21, r21, ha16(__per_cpu_offset) - mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1 + mm r20, r20, zero, 0, LOG2_NR_CPU_IDS-1 } s2a r20, r20, r21 lw tp, r20 diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index df19d4f..3b35bb4 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -132,13 +132,9 @@ intvec_\vecname: mfspr r3, SPR_SYSTEM_SAVE_K_0 /* Get &thread_info->unalign_jit_tmp[0] in r3. */ + bfexts r3, r3, 0, CPU_SHIFT-1 mm r3, zero, LOG2_THREAD_SIZE, 63 -#if THREAD_SIZE < 65536 - addli r3, r3, -(PAGE_SIZE - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET) -#else - addli r3, r3, -(PAGE_SIZE/2) - addli r3, r3, -(PAGE_SIZE/2 - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET) -#endif + addli r3, r3, THREAD_INFO_UNALIGN_JIT_TMP_OFFSET /* * Save r0, r1, r2 into thread_info array r3 points to @@ -365,13 +361,13 @@ intvec_\vecname: 2: /* - * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and - * the current stack top in the higher bits. So we recover - * our stack top by just masking off the low bits, then + * SYSTEM_SAVE_K_0 holds the cpu number in the high bits, and + * the current stack top in the lower bits. So we recover + * our starting stack value by sign-extending the low bits, then * point sp at the top aligned address on the actual stack page. */ mfspr r0, SPR_SYSTEM_SAVE_K_0 - mm r0, zero, LOG2_THREAD_SIZE, 63 + bfexts r0, r0, 0, CPU_SHIFT-1 0: /* @@ -393,6 +389,9 @@ intvec_\vecname: * cache line 1: r6...r13 * cache line 0: 2 x frame, r0..r5 */ +#if STACK_TOP_DELTA != 64 +#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs() +#endif andi r0, r0, -64 /* @@ -690,7 +689,7 @@ intvec_\vecname: } { shl16insli r21, r21, hw1(__per_cpu_offset) - bfextu r20, r20, 0, LOG2_THREAD_SIZE-1 + bfextu r20, r20, CPU_SHIFT, 63 } shl16insli r21, r21, hw0(__per_cpu_offset) shl3add r20, r20, r21 diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index a9db923..24fd223 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -197,19 +197,19 @@ static void validate_stack(struct pt_regs *regs) { int cpu = raw_smp_processor_id(); unsigned long ksp0 = get_current_ksp0(); - unsigned long ksp0_base = ksp0 - THREAD_SIZE; + unsigned long ksp0_base = ksp0 & -THREAD_SIZE; unsigned long sp = stack_pointer; if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) { - pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n" + pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx underrun!\n" " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", - cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr); } else if (sp < ksp0_base + sizeof(struct thread_info)) { - pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n" + pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx overrun!\n" " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", - cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr); } } -- cgit v1.1 From 309272f99f3e65b9bb5c49a2901c63a1859172f3 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 3 Sep 2013 14:13:31 -0400 Subject: tile: clean up relocate_kernel_64 debug code We remove some debug code in relocate_kernel_64.S that made raw calls to the hv_console_putc Tilera hypervisor API, since everything should funnel through the early_hv_write() API. Signed-off-by: Chris Metcalf --- arch/tile/kernel/relocate_kernel_64.S | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S index 1c09a4f..02bc446 100644 --- a/arch/tile/kernel/relocate_kernel_64.S +++ b/arch/tile/kernel/relocate_kernel_64.S @@ -34,11 +34,11 @@ STD_ENTRY(relocate_new_kernel) addi sp, sp, -8 /* we now have a stack (whether we need one or not) */ +#ifdef RELOCATE_NEW_KERNEL_VERBOSE moveli r40, hw2_last(hv_console_putc) shl16insli r40, r40, hw1(hv_console_putc) shl16insli r40, r40, hw0(hv_console_putc) -#ifdef RELOCATE_NEW_KERNEL_VERBOSE moveli r0, 'r' jalr r40 @@ -176,10 +176,12 @@ STD_ENTRY(relocate_new_kernel) /* we should not get here */ +#ifdef RELOCATE_NEW_KERNEL_VERBOSE moveli r0, '?' jalr r40 moveli r0, '\n' jalr r40 +#endif j .Lhalt @@ -237,7 +239,9 @@ STD_ENTRY(relocate_new_kernel) j .Lloop -.Lerr: moveli r0, 'e' +.Lerr: +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'e' jalr r40 moveli r0, 'r' jalr r40 @@ -245,6 +249,7 @@ STD_ENTRY(relocate_new_kernel) jalr r40 moveli r0, '\n' jalr r40 +#endif .Lhalt: moveli r41, hw2_last(hv_halt) shl16insli r41, r41, hw1(hv_halt) -- cgit v1.1 From 051168df528fe4456d63f5f65b041c147c26fe97 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 3 Sep 2013 14:45:52 -0400 Subject: tile: don't assume user privilege is zero Technically, user privilege is anything less than kernel privilege. We modify the existing user_mode() macro to have this semantic (and use it in a couple of places it wasn't being used before), and add an IS_KERNEL_EX1() macro to the assembly code as well. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_64.S | 23 +++++++++++++++++------ arch/tile/kernel/stack.c | 2 +- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 3b35bb4..f020f01 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -34,6 +34,16 @@ #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) +#if CONFIG_KERNEL_PL == 1 || CONFIG_KERNEL_PL == 2 +/* + * Set "result" non-zero if ex1 holds the PL of the kernel + * (with or without ICS being set). Note this works only + * because we never find the PL at level 3. + */ +# define IS_KERNEL_EX1(result, ex1) andi result, ex1, CONFIG_KERNEL_PL +#else +# error Recode IS_KERNEL_EX1 for CONFIG_KERNEL_PL +#endif .macro push_reg reg, ptr=sp, delta=-8 { @@ -308,7 +318,7 @@ intvec_\vecname: */ { blbs sp, 2f - andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + IS_KERNEL_EX1(r0, r0) } .ifc \vecnum, INT_DOUBLE_FAULT @@ -641,11 +651,12 @@ intvec_\vecname: /* * If we will be returning to the kernel, we will need to * reset the interrupt masks to the state they had before. - * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled. + * Set DISABLE_IRQ in flags iff we came from kernel pl with + * irqs disabled. */ mfspr r32, SPR_EX_CONTEXT_K_1 { - andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + IS_KERNEL_EX1(r22, r22) PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS) } beqzt r32, 1f /* zero if from user space */ @@ -812,7 +823,7 @@ STD_ENTRY(interrupt_return) PTREGS_PTR(r29, PTREGS_OFFSET_EX1) } ld r29, r29 - andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + IS_KERNEL_EX1(r29, r29) { beqzt r29, .Lresume_userspace move r29, sp @@ -936,7 +947,7 @@ STD_ENTRY(interrupt_return) PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS) } { - andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK + IS_KERNEL_EX1(r0, r0) ld r32, r32 } bnez r0, 1f @@ -1007,7 +1018,7 @@ STD_ENTRY(interrupt_return) pop_reg r21, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_PC { mtspr SPR_EX_CONTEXT_K_1, lr - andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + IS_KERNEL_EX1(lr, lr) } { mtspr SPR_EX_CONTEXT_K_0, r21 diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 24fd223..362284a 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -103,7 +103,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) p->sp >= sp) { if (kbt->verbose) pr_err(" <%s while in kernel mode>\n", fault); - } else if (EX1_PL(p->ex1) == USER_PL && + } else if (user_mode(p) && p->sp < PAGE_OFFSET && p->sp != 0) { if (kbt->verbose) pr_err(" <%s while in user mode>\n", fault); -- cgit v1.1 From acbde1db294932623aad15dd8cc6e37b28340f26 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 3 Sep 2013 14:41:36 -0400 Subject: tile: parameterize VA and PA space more cleanly The existing code relied on the hardware definition () to specify how much VA and PA space was available. It's convenient to allow customizing this for some configurations, so provide symbols MAX_PA_WIDTH and MAX_VA_WIDTH in that can be modified if desired. Additionally, move away from the MEM_XX_INTRPT nomenclature to define the start of various regions within the VA space. In fact the cleaner symbol is, for example, MEM_SV_START, to indicate the start of the area used for supervisor code; the actual address of the interrupt vectors is not as important, and can be changed if desired. As part of this change, convert from "intrpt1" nomenclature (which built in the old privilege-level 1 model) to a simple "intrpt". Also strip out some tilepro-specific code supporting modifying the PL the kernel could run at, since we don't actually support using different PLs in tilepro, only tilegx. Signed-off-by: Chris Metcalf --- arch/tile/kernel/head_32.S | 4 ++-- arch/tile/kernel/head_64.S | 6 +++--- arch/tile/kernel/intvec_32.S | 6 +++--- arch/tile/kernel/intvec_64.S | 8 +++++--- arch/tile/kernel/setup.c | 8 ++++---- arch/tile/kernel/traps.c | 2 +- arch/tile/kernel/vmlinux.lds.S | 10 +++++----- 7 files changed, 23 insertions(+), 21 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index f3f17b0..8d5b40f 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -162,8 +162,8 @@ ENTRY(swapper_pg_dir) .set addr, addr + PGDIR_SIZE .endr - /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ - PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + /* The true text VAs are mapped as VA = PA + MEM_SV_START */ + PTE MEM_SV_START, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ (1 << (HV_PTE_INDEX_EXECUTABLE - 32)) .org swapper_pg_dir + PGDIR_SIZE END(swapper_pg_dir) diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index 652b814..bd0e12f 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -135,9 +135,9 @@ ENTRY(_start) 1: /* Install the interrupt base. */ - moveli r0, hw2_last(MEM_SV_START) - shl16insli r0, r0, hw1(MEM_SV_START) - shl16insli r0, r0, hw0(MEM_SV_START) + moveli r0, hw2_last(intrpt_start) + shl16insli r0, r0, hw1(intrpt_start) + shl16insli r0, r0, hw0(intrpt_start) mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0 /* Get our processor number and save it away in SAVE_K_0. */ diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index f3d26f4..f084f1c 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -353,7 +353,7 @@ intvec_\vecname: #ifdef __COLLECT_LINKER_FEEDBACK__ .pushsection .text.intvec_feedback,"ax" .org (\vecnum << 5) - FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8) + FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8) jrp lr .popsection #endif @@ -1890,8 +1890,8 @@ int_unalign: push_extra_callee_saves r0 j do_trap -/* Include .intrpt1 array of interrupt vectors */ - .section ".intrpt1", "ax" +/* Include .intrpt array of interrupt vectors */ + .section ".intrpt", "ax" #define op_handle_perf_interrupt bad_intr #define op_handle_aux_perf_interrupt bad_intr diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index f020f01..c3a2335 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -535,7 +535,7 @@ intvec_\vecname: #ifdef __COLLECT_LINKER_FEEDBACK__ .pushsection .text.intvec_feedback,"ax" .org (\vecnum << 5) - FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8) + FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8) jrp lr .popsection #endif @@ -1485,8 +1485,10 @@ STD_ENTRY(fill_ra_stack) __int_hand \vecnum, \vecname, \c_routine, \processing .endm -/* Include .intrpt1 array of interrupt vectors */ - .section ".intrpt1", "ax" +/* Include .intrpt array of interrupt vectors */ + .section ".intrpt", "ax" + .global intrpt_start +intrpt_start: #define op_handle_perf_interrupt bad_intr #define op_handle_aux_perf_interrupt bad_intr diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 774e819..1021784 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -268,7 +268,7 @@ early_param("vmalloc", parse_vmalloc); /* * Determine for each controller where its lowmem is mapped and how much of * it is mapped there. On controller zero, the first few megabytes are - * already mapped in as code at MEM_SV_INTRPT, so in principle we could + * already mapped in as code at MEM_SV_START, so in principle we could * start our data mappings higher up, but for now we don't bother, to avoid * additional confusion. * @@ -1242,7 +1242,7 @@ static void __init validate_va(void) #ifndef __tilegx__ /* FIXME: GX: probably some validation relevant here */ /* * Similarly, make sure we're only using allowed VAs. - * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_INTRPT, + * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_START, * and 0 .. KERNEL_HIGH_VADDR. * In addition, make sure we CAN'T use the end of memory, since * we use the last chunk of each pgd for the pgd_list. @@ -1257,7 +1257,7 @@ static void __init validate_va(void) if (range.size == 0) break; if (range.start <= MEM_USER_INTRPT && - range.start + range.size >= MEM_HV_INTRPT) + range.start + range.size >= MEM_HV_START) user_kernel_ok = 1; if (range.start == 0) max_va = range.size; @@ -1693,7 +1693,7 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved) static int __init request_standard_resources(void) { int i; - enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; + enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; #if defined(CONFIG_PCI) && !defined(__tilegx__) insert_non_bus_resource(); diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index cfff6f9..628661f 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -30,7 +30,7 @@ void __init trap_init(void) { - /* Nothing needed here since we link code at .intrpt1 */ + /* Nothing needed here since we link code at .intrpt */ } int unaligned_fixup = 1; diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index c7ae53d..8b20163 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -5,7 +5,7 @@ #include /* Text loads starting from the supervisor interrupt vector address. */ -#define TEXT_OFFSET MEM_SV_INTRPT +#define TEXT_OFFSET MEM_SV_START OUTPUT_ARCH(tile) ENTRY(_start) @@ -13,7 +13,7 @@ jiffies = jiffies_64; PHDRS { - intrpt1 PT_LOAD ; + intrpt PT_LOAD ; text PT_LOAD ; data PT_LOAD ; } @@ -24,11 +24,11 @@ SECTIONS #define LOAD_OFFSET TEXT_OFFSET /* Interrupt vectors */ - .intrpt1 (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */ + .intrpt (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */ { _text = .; - *(.intrpt1) - } :intrpt1 =0 + *(.intrpt) + } :intrpt =0 /* Hypervisor call vectors */ . = ALIGN(0x10000); -- cgit v1.1 From 8157107b13099d6eb2e8ccd00b9aba009c698c38 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 28 Aug 2013 19:53:17 -0400 Subject: tilegx: support KGDB Enter kernel debugger at boot with: --hvd UART_1=1 --hvx kgdbwait --hvx kgdboc=ttyS1,115200 or at runtime with: echo ttyS1,115200 > /sys/module/kgdboc/parameters/kgdboc echo g > /proc/sysrq-trigger Signed-off-by: Chris Metcalf --- arch/tile/kernel/Makefile | 1 + arch/tile/kernel/kgdb.c | 499 ++++++++++++++++++++++++++++++++++++++++++++++ arch/tile/kernel/traps.c | 5 + 3 files changed, 505 insertions(+) create mode 100644 arch/tile/kernel/kgdb.c (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index b7c8b5e..27a2bf3 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -29,5 +29,6 @@ obj-$(CONFIG_TILE_USB) += usb.o obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o mcount_64.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_KGDB) += kgdb.o obj-y += vdso/ diff --git a/arch/tile/kernel/kgdb.c b/arch/tile/kernel/kgdb.c new file mode 100644 index 0000000..4cd8838 --- /dev/null +++ b/arch/tile/kernel/kgdb.c @@ -0,0 +1,499 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE-Gx KGDB support. + */ + +#include +#include +#include +#include +#include +#include + +static tile_bundle_bits singlestep_insn = TILEGX_BPT_BUNDLE | DIE_SSTEPBP; +static unsigned long stepped_addr; +static tile_bundle_bits stepped_instr; + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0])}, + { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1])}, + { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2])}, + { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3])}, + { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4])}, + { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5])}, + { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6])}, + { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7])}, + { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8])}, + { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9])}, + { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10])}, + { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11])}, + { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12])}, + { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13])}, + { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14])}, + { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15])}, + { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16])}, + { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17])}, + { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18])}, + { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19])}, + { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20])}, + { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21])}, + { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22])}, + { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23])}, + { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24])}, + { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25])}, + { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26])}, + { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27])}, + { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28])}, + { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29])}, + { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30])}, + { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31])}, + { "r32", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[32])}, + { "r33", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[33])}, + { "r34", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[34])}, + { "r35", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[35])}, + { "r36", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[36])}, + { "r37", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[37])}, + { "r38", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[38])}, + { "r39", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[39])}, + { "r40", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[40])}, + { "r41", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[41])}, + { "r42", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[42])}, + { "r43", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[43])}, + { "r44", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[44])}, + { "r45", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[45])}, + { "r46", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[46])}, + { "r47", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[47])}, + { "r48", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[48])}, + { "r49", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[49])}, + { "r50", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[50])}, + { "r51", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[51])}, + { "r52", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[52])}, + { "tp", GDB_SIZEOF_REG, offsetof(struct pt_regs, tp)}, + { "sp", GDB_SIZEOF_REG, offsetof(struct pt_regs, sp)}, + { "lr", GDB_SIZEOF_REG, offsetof(struct pt_regs, lr)}, + { "sn", GDB_SIZEOF_REG, -1}, + { "idn0", GDB_SIZEOF_REG, -1}, + { "idn1", GDB_SIZEOF_REG, -1}, + { "udn0", GDB_SIZEOF_REG, -1}, + { "udn1", GDB_SIZEOF_REG, -1}, + { "udn2", GDB_SIZEOF_REG, -1}, + { "udn3", GDB_SIZEOF_REG, -1}, + { "zero", GDB_SIZEOF_REG, -1}, + { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, pc)}, + { "faultnum", GDB_SIZEOF_REG, offsetof(struct pt_regs, faultnum)}, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1) + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + else + memset(mem, 0, dbg_reg_def[regno].size); + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1) + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + return 0; +} + +/* + * Similar to pt_regs_to_gdb_regs() except that process is sleeping and so + * we may not be able to get all the info. + */ +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ + int reg; + struct pt_regs *thread_regs; + unsigned long *ptr = gdb_regs; + + if (task == NULL) + return; + + /* Initialize to zero. */ + memset(gdb_regs, 0, NUMREGBYTES); + + thread_regs = task_pt_regs(task); + for (reg = 0; reg <= TREG_LAST_GPR; reg++) + *(ptr++) = thread_regs->regs[reg]; + + gdb_regs[TILEGX_PC_REGNUM] = thread_regs->pc; + gdb_regs[TILEGX_FAULTNUM_REGNUM] = thread_regs->faultnum; +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->pc = pc; +} + +static void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmicallback(raw_smp_processor_id(), NULL); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, NULL, 0); + local_irq_disable(); +} + +/* + * Convert a kernel address to the writable kernel text mapping. + */ +static unsigned long writable_address(unsigned long addr) +{ + unsigned long ret = 0; + + if (core_kernel_text(addr)) + ret = addr - MEM_SV_START + PAGE_OFFSET; + else if (is_module_text_address(addr)) + ret = addr; + else + pr_err("Unknown virtual address 0x%lx\n", addr); + + return ret; +} + +/* + * Calculate the new address for after a step. + */ +static unsigned long get_step_address(struct pt_regs *regs) +{ + int src_reg; + int jump_off; + int br_off; + unsigned long addr; + unsigned int opcode; + tile_bundle_bits bundle; + + /* Move to the next instruction by default. */ + addr = regs->pc + TILEGX_BUNDLE_SIZE_IN_BYTES; + bundle = *(unsigned long *)instruction_pointer(regs); + + /* 0: X mode, Otherwise: Y mode. */ + if (bundle & TILEGX_BUNDLE_MODE_MASK) { + if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 && + get_RRROpcodeExtension_Y1(bundle) == + UNARY_RRR_1_OPCODE_Y1) { + opcode = get_UnaryOpcodeExtension_Y1(bundle); + + switch (opcode) { + case JALR_UNARY_OPCODE_Y1: + case JALRP_UNARY_OPCODE_Y1: + case JR_UNARY_OPCODE_Y1: + case JRP_UNARY_OPCODE_Y1: + src_reg = get_SrcA_Y1(bundle); + dbg_get_reg(src_reg, &addr, regs); + break; + } + } + } else if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) { + if (get_RRROpcodeExtension_X1(bundle) == + UNARY_RRR_0_OPCODE_X1) { + opcode = get_UnaryOpcodeExtension_X1(bundle); + + switch (opcode) { + case JALR_UNARY_OPCODE_X1: + case JALRP_UNARY_OPCODE_X1: + case JR_UNARY_OPCODE_X1: + case JRP_UNARY_OPCODE_X1: + src_reg = get_SrcA_X1(bundle); + dbg_get_reg(src_reg, &addr, regs); + break; + } + } + } else if (get_Opcode_X1(bundle) == JUMP_OPCODE_X1) { + opcode = get_JumpOpcodeExtension_X1(bundle); + + switch (opcode) { + case JAL_JUMP_OPCODE_X1: + case J_JUMP_OPCODE_X1: + jump_off = sign_extend(get_JumpOff_X1(bundle), 27); + addr = regs->pc + + (jump_off << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES); + break; + } + } else if (get_Opcode_X1(bundle) == BRANCH_OPCODE_X1) { + br_off = 0; + opcode = get_BrType_X1(bundle); + + switch (opcode) { + case BEQZT_BRANCH_OPCODE_X1: + case BEQZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) == 0) + br_off = get_BrOff_X1(bundle); + break; + case BGEZT_BRANCH_OPCODE_X1: + case BGEZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) >= 0) + br_off = get_BrOff_X1(bundle); + break; + case BGTZT_BRANCH_OPCODE_X1: + case BGTZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) > 0) + br_off = get_BrOff_X1(bundle); + break; + case BLBCT_BRANCH_OPCODE_X1: + case BLBC_BRANCH_OPCODE_X1: + if (!(get_SrcA_X1(bundle) & 1)) + br_off = get_BrOff_X1(bundle); + break; + case BLBST_BRANCH_OPCODE_X1: + case BLBS_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) & 1) + br_off = get_BrOff_X1(bundle); + break; + case BLEZT_BRANCH_OPCODE_X1: + case BLEZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) <= 0) + br_off = get_BrOff_X1(bundle); + break; + case BLTZT_BRANCH_OPCODE_X1: + case BLTZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) < 0) + br_off = get_BrOff_X1(bundle); + break; + case BNEZT_BRANCH_OPCODE_X1: + case BNEZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) != 0) + br_off = get_BrOff_X1(bundle); + break; + } + + if (br_off != 0) { + br_off = sign_extend(br_off, 17); + addr = regs->pc + + (br_off << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES); + } + } + + return addr; +} + +/* + * Replace the next instruction after the current instruction with a + * breakpoint instruction. + */ +static void do_single_step(struct pt_regs *regs) +{ + unsigned long addr_wr; + + /* Determine where the target instruction will send us to. */ + stepped_addr = get_step_address(regs); + probe_kernel_read((char *)&stepped_instr, (char *)stepped_addr, + BREAK_INSTR_SIZE); + + addr_wr = writable_address(stepped_addr); + probe_kernel_write((char *)addr_wr, (char *)&singlestep_insn, + BREAK_INSTR_SIZE); + smp_wmb(); + flush_icache_range(stepped_addr, stepped_addr + BREAK_INSTR_SIZE); +} + +static void undo_single_step(struct pt_regs *regs) +{ + unsigned long addr_wr; + + if (stepped_instr == 0) + return; + + addr_wr = writable_address(stepped_addr); + probe_kernel_write((char *)addr_wr, (char *)&stepped_instr, + BREAK_INSTR_SIZE); + stepped_instr = 0; + smp_wmb(); + flush_icache_range(stepped_addr, stepped_addr + BREAK_INSTR_SIZE); +} + +/* + * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, + * then try to fall into the debugger. + */ +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + int ret; + unsigned long flags; + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + +#ifdef CONFIG_KPROBES + /* + * Return immediately if the kprobes fault notifier has set + * DIE_PAGE_FAULT. + */ + if (cmd == DIE_PAGE_FAULT) + return NOTIFY_DONE; +#endif /* CONFIG_KPROBES */ + + switch (cmd) { + case DIE_BREAK: + case DIE_COMPILED_BPT: + break; + case DIE_SSTEPBP: + local_irq_save(flags); + kgdb_handle_exception(0, SIGTRAP, 0, regs); + local_irq_restore(flags); + return NOTIFY_STOP; + default: + /* Userspace events, ignore. */ + if (user_mode(regs)) + return NOTIFY_DONE; + } + + local_irq_save(flags); + ret = kgdb_handle_exception(args->trapnr, args->signr, args->err, regs); + local_irq_restore(flags); + if (ret) + return NOTIFY_DONE; + + return NOTIFY_STOP; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, +}; + +/* + * kgdb_arch_handle_exception - Handle architecture specific GDB packets. + * @vector: The error vector of the exception that happened. + * @signo: The signal number of the exception that happened. + * @err_code: The error code of the exception that happened. + * @remcom_in_buffer: The buffer of the packet we have read. + * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into. + * @regs: The &struct pt_regs of the current process. + * + * This function MUST handle the 'c' and 's' command packets, + * as well packets to set / remove a hardware breakpoint, if used. + * If there are additional packets which the hardware needs to handle, + * they are handled here. The code should return -1 if it wants to + * process more packets, and a %0 or %1 if it wants to exit from the + * kgdb callback. + */ +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *regs) +{ + char *ptr; + unsigned long address; + + /* Undo any stepping we may have done. */ + undo_single_step(regs); + + switch (remcom_in_buffer[0]) { + case 'c': + case 's': + case 'D': + case 'k': + /* + * Try to read optional parameter, pc unchanged if no parm. + * If this was a compiled-in breakpoint, we need to move + * to the next instruction or we will just breakpoint + * over and over again. + */ + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &address)) + regs->pc = address; + else if (*(unsigned long *)regs->pc == compiled_bpt) + regs->pc += BREAK_INSTR_SIZE; + + if (remcom_in_buffer[0] == 's') { + do_single_step(regs); + kgdb_single_step = 1; + atomic_set(&kgdb_cpu_doing_single_step, + raw_smp_processor_id()); + } else + atomic_set(&kgdb_cpu_doing_single_step, -1); + + return 0; + } + + return -1; /* this means that we do not want to exit from the handler */ +} + +struct kgdb_arch arch_kgdb_ops; + +/* + * kgdb_arch_init - Perform any architecture specific initalization. + * + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ + tile_bundle_bits bundle = TILEGX_BPT_BUNDLE; + + memcpy(arch_kgdb_ops.gdb_bpt_instr, &bundle, BREAK_INSTR_SIZE); + return register_die_notifier(&kgdb_notifier); +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_die_notifier(&kgdb_notifier); +} + +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + unsigned long addr_wr = writable_address(bpt->bpt_addr); + + if (addr_wr == 0) + return -1; + + err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + BREAK_INSTR_SIZE); + if (err) + return err; + + err = probe_kernel_write((char *)addr_wr, arch_kgdb_ops.gdb_bpt_instr, + BREAK_INSTR_SIZE); + smp_wmb(); + flush_icache_range((unsigned long)bpt->bpt_addr, + (unsigned long)bpt->bpt_addr + BREAK_INSTR_SIZE); + return err; +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + unsigned long addr_wr = writable_address(bpt->bpt_addr); + + if (addr_wr == 0) + return -1; + + err = probe_kernel_write((char *)addr_wr, (char *)bpt->saved_instr, + BREAK_INSTR_SIZE); + smp_wmb(); + flush_icache_range((unsigned long)bpt->bpt_addr, + (unsigned long)bpt->bpt_addr + BREAK_INSTR_SIZE); + return err; +} diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 628661f..a937365 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -240,6 +240,11 @@ static int do_bpt(struct pt_regs *regs) notify_die(DIE_BREAK, "debug", regs, bundle, INT_ILL, SIGTRAP); break; + /* compiled_bpt */ + case DIE_COMPILED_BPT: + notify_die(DIE_COMPILED_BPT, "debug", regs, bundle, + INT_ILL, SIGTRAP); + break; /* breakpoint2_insn */ case DIE_SSTEPBP: notify_die(DIE_SSTEPBP, "single_step", regs, bundle, -- cgit v1.1 From 80f184108e364ba1d08dd77339966034c9a9243e Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 12 Aug 2013 14:39:22 -0400 Subject: tile: support reporting Tilera hypervisor statistics Newer hypervisors have an API for reporting per-cpu statistics information. This change allows seeing that information via /sys/devices/system/cpu/cpuN/hv_stats file for each core. Signed-off-by: Chris Metcalf --- arch/tile/kernel/sysfs.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c index e25b0a8..a3ed12f 100644 --- a/arch/tile/kernel/sysfs.c +++ b/arch/tile/kernel/sysfs.c @@ -157,6 +157,67 @@ hvconfig_bin_read(struct file *filp, struct kobject *kobj, return count; } +static ssize_t hv_stats_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + int cpu = dev->id; + long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu)); + + ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS, + (unsigned long)page, PAGE_SIZE - 1, + lotar, 0); + n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1); + page[n] = '\0'; + return n; +} + +static ssize_t hv_stats_store(struct device *dev, + struct device_attribute *attr, + const char *page, + size_t count) +{ + int cpu = dev->id; + long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu)); + + ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS, 0, 0, lotar, 1); + return n < 0 ? n : count; +} + +static DEVICE_ATTR(hv_stats, 0644, hv_stats_show, hv_stats_store); + +static int hv_stats_device_add(struct device *dev, struct subsys_interface *sif) +{ + int err, cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + + err = sysfs_create_file(&dev->kobj, &dev_attr_hv_stats.attr); + + return err; +} + +static int hv_stats_device_remove(struct device *dev, + struct subsys_interface *sif) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + + sysfs_remove_file(&dev->kobj, &dev_attr_hv_stats.attr); + return 0; +} + + +static struct subsys_interface hv_stats_interface = { + .name = "hv_stats", + .subsys = &cpu_subsys, + .add_dev = hv_stats_device_add, + .remove_dev = hv_stats_device_remove, +}; + static int __init create_sysfs_entries(void) { int err = 0; @@ -188,6 +249,21 @@ static int __init create_sysfs_entries(void) err = sysfs_create_bin_file(hypervisor_kobj, &hvconfig_bin); } + if (!err) { + /* + * Don't bother adding the hv_stats files on each CPU if + * our hypervisor doesn't supply statistics. + */ + int cpu = raw_smp_processor_id(); + long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu)); + char dummy; + ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS, + (unsigned long) &dummy, 1, + lotar, 0); + if (n >= 0) + err = subsys_interface_register(&hv_stats_interface); + } + return err; } subsys_initcall(create_sysfs_entries); -- cgit v1.1 From 640710a33b54de8d90ae140ef633ed0feba76a75 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 12 Aug 2013 15:08:09 -0400 Subject: tile: add virt_to_kpte() API and clean up and document behavior We use virt_to_pte(NULL, va) a lot, which isn't very obvious. I added virt_to_kpte(va) as a more obvious wrapper function, that also validates the va as being a kernel adddress. And, I fixed the semantics of virt_to_pte() so that we handle the pud and pmd the same way, and we now document the fact that we handle the final pte level differently. Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 1021784..b79c312 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1600,7 +1600,7 @@ void __init setup_per_cpu_areas(void) /* Update the vmalloc mapping and page home. */ unsigned long addr = (unsigned long)ptr + i; - pte_t *ptep = virt_to_pte(NULL, addr); + pte_t *ptep = virt_to_kpte(addr); pte_t pte = *ptep; BUG_ON(pfn != pte_pfn(pte)); pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3); @@ -1609,12 +1609,12 @@ void __init setup_per_cpu_areas(void) /* Update the lowmem mapping for consistency. */ lowmem_va = (unsigned long)pfn_to_kaddr(pfn); - ptep = virt_to_pte(NULL, lowmem_va); + ptep = virt_to_kpte(lowmem_va); if (pte_huge(*ptep)) { printk(KERN_DEBUG "early shatter of huge page" " at %#lx\n", lowmem_va); shatter_pmd((pmd_t *)ptep); - ptep = virt_to_pte(NULL, lowmem_va); + ptep = virt_to_kpte(lowmem_va); BUG_ON(pte_huge(*ptep)); } BUG_ON(pfn != pte_pfn(*ptep)); -- cgit v1.1 From 9a55fed4739d48829af359f575790840cc7e91dd Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 13 Aug 2013 14:42:36 -0400 Subject: tile: check for correct compiler earlier in asm-offsets.c If we wait until after including a bunch of other files, we will have generated so much warning spew that it's hard to notice the error about using the wrong compiler. Signed-off-by: Chris Metcalf --- arch/tile/kernel/asm-offsets.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c index 97ea6ac..375e7c3 100644 --- a/arch/tile/kernel/asm-offsets.c +++ b/arch/tile/kernel/asm-offsets.c @@ -14,13 +14,6 @@ * Generates definitions from c-type structures used by assembly sources. */ -#include -#include -#include -#include -#include -#include - /* Check for compatible compiler early in the build. */ #ifdef CONFIG_TILEGX # ifndef __tilegx__ @@ -31,10 +24,17 @@ # endif #else # ifdef __tilegx__ -# error Can not build TILEPro/TILE64 configurations with tilegx compiler +# error Can not build TILEPro configurations with tilegx compiler # endif #endif +#include +#include +#include +#include +#include +#include + void foo(void) { DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET, -- cgit v1.1 From 7b770a6a0a8504327224ad891d6862893456290d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 13 Aug 2013 15:17:38 -0400 Subject: tile: mark pcibios_init() as __init It was bombed away because it was previously marked as __devinit, but it should be an __init function. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci.c | 2 +- arch/tile/kernel/pci_gx.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index af75835..b7180e6 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c @@ -386,7 +386,7 @@ void pcibios_set_master(struct pci_dev *dev) } /* Process any "pci=" kernel boot arguments. */ -char * __init pcibios_setup(char *str) +char *__init pcibios_setup(char *str) { if (!strcmp(str, "off")) { pci_probe = 0; diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index 4843881..66ef9db 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -1060,7 +1060,7 @@ void pcibios_fixup_bus(struct pci_bus *bus) } /* Process any "pci=" kernel boot arguments. */ -char *pcibios_setup(char *str) +char *__init pcibios_setup(char *str) { if (!strcmp(str, "off")) { pci_probe = 0; -- cgit v1.1 From 850a45209d44654afb8ee2cad53a67c07c1cef9c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 13 Aug 2013 15:24:14 -0400 Subject: tile: change to in comments Also fix mentions of it in #error statements. Signed-off-by: Chris Metcalf --- arch/tile/kernel/regs_32.S | 4 ++-- arch/tile/kernel/regs_64.S | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/regs_32.S b/arch/tile/kernel/regs_32.S index c12280c..542cae1 100644 --- a/arch/tile/kernel/regs_32.S +++ b/arch/tile/kernel/regs_32.S @@ -20,7 +20,7 @@ #include /* - * See ; called with prev and next task_struct pointers. + * See ; called with prev and next task_struct pointers. * "prev" is returned in r0 for _switch_to and also for ret_from_fork. * * We want to save pc/sp in "prev", and get the new pc/sp from "next". @@ -39,7 +39,7 @@ */ #if CALLEE_SAVED_REGS_COUNT != 24 -# error Mismatch between and kernel/entry.S +# error Mismatch between and kernel/entry.S #endif #define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 4) diff --git a/arch/tile/kernel/regs_64.S b/arch/tile/kernel/regs_64.S index 0829fd0..bbffcc6 100644 --- a/arch/tile/kernel/regs_64.S +++ b/arch/tile/kernel/regs_64.S @@ -20,7 +20,7 @@ #include /* - * See ; called with prev and next task_struct pointers. + * See ; called with prev and next task_struct pointers. * "prev" is returned in r0 for _switch_to and also for ret_from_fork. * * We want to save pc/sp in "prev", and get the new pc/sp from "next". @@ -39,7 +39,7 @@ */ #if CALLEE_SAVED_REGS_COUNT != 24 -# error Mismatch between and kernel/entry.S +# error Mismatch between and kernel/entry.S #endif #define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 8) -- cgit v1.1 From 43b7f2fb58d42d2b6bc02368f61e14abd002001a Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 13 Aug 2013 15:26:23 -0400 Subject: tile: simplify code referencing hypervisor API addresses There's no need to make up new ways of computing the addresses of the Tilera hypervisor APIs; just use the standard method of relying on the symbols to provide the addresses. Signed-off-by: Chris Metcalf --- arch/tile/kernel/relocate_kernel_32.S | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/relocate_kernel_32.S b/arch/tile/kernel/relocate_kernel_32.S index 010b418..f7fd37b 100644 --- a/arch/tile/kernel/relocate_kernel_32.S +++ b/arch/tile/kernel/relocate_kernel_32.S @@ -20,15 +20,6 @@ #include #include -#define ___hvb MEM_SV_INTRPT + HV_GLUE_START_CPA - -#define ___hv_dispatch(f) (___hvb + (HV_DISPATCH_ENTRY_SIZE * f)) - -#define ___hv_console_putc ___hv_dispatch(HV_DISPATCH_CONSOLE_PUTC) -#define ___hv_halt ___hv_dispatch(HV_DISPATCH_HALT) -#define ___hv_reexec ___hv_dispatch(HV_DISPATCH_REEXEC) -#define ___hv_flush_remote ___hv_dispatch(HV_DISPATCH_FLUSH_REMOTE) - #undef RELOCATE_NEW_KERNEL_VERBOSE STD_ENTRY(relocate_new_kernel) @@ -43,8 +34,8 @@ STD_ENTRY(relocate_new_kernel) addi sp, sp, -8 /* we now have a stack (whether we need one or not) */ - moveli r40, lo16(___hv_console_putc) - auli r40, r40, ha16(___hv_console_putc) + moveli r40, lo16(hv_console_putc) + auli r40, r40, ha16(hv_console_putc) #ifdef RELOCATE_NEW_KERNEL_VERBOSE moveli r0, 'r' @@ -114,11 +105,11 @@ STD_ENTRY(relocate_new_kernel) } { move r8, zero /* asids */ - moveli r20, lo16(___hv_flush_remote) + moveli r20, lo16(hv_flush_remote) } { move r9, zero /* asidcount */ - auli r20, r20, ha16(___hv_flush_remote) + auli r20, r20, ha16(hv_flush_remote) } jalr r20 @@ -175,8 +166,8 @@ STD_ENTRY(relocate_new_kernel) move r0, r32 moveli r1, 0 /* arg to hv_reexec is 64 bits */ - moveli r41, lo16(___hv_reexec) - auli r41, r41, ha16(___hv_reexec) + moveli r41, lo16(hv_reexec) + auli r41, r41, ha16(hv_reexec) jalr r41 @@ -267,8 +258,8 @@ STD_ENTRY(relocate_new_kernel) moveli r0, '\n' jalr r40 .Lhalt: - moveli r41, lo16(___hv_halt) - auli r41, r41, ha16(___hv_halt) + moveli r41, lo16(hv_halt) + auli r41, r41, ha16(hv_halt) jalr r41 STD_ENDPROC(relocate_new_kernel) -- cgit v1.1 From a0099303cd6336675a5532e0444c1b6fd493ca49 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 13 Aug 2013 15:33:53 -0400 Subject: tile: use standard tile_bundle_bits type in traps.c We were rolling our own bundle_bits, which is unnecessary. Signed-off-by: Chris Metcalf --- arch/tile/kernel/traps.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index a937365..6b603d5 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -101,13 +101,7 @@ static int retry_gpv(unsigned int gpv_reason) #endif /* CHIP_HAS_TILE_DMA() */ -#ifdef __tilegx__ -#define bundle_bits tilegx_bundle_bits -#else -#define bundle_bits tile_bundle_bits -#endif - -extern bundle_bits bpt_code; +extern tile_bundle_bits bpt_code; asm(".pushsection .rodata.bpt_code,\"a\";" ".align 8;" @@ -115,7 +109,7 @@ asm(".pushsection .rodata.bpt_code,\"a\";" ".size bpt_code,.-bpt_code;" ".popsection"); -static int special_ill(bundle_bits bundle, int *sigp, int *codep) +static int special_ill(tile_bundle_bits bundle, int *sigp, int *codep) { int sig, code, maxcode; @@ -263,7 +257,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, siginfo_t info = { 0 }; int signo, code; unsigned long address = 0; - bundle_bits instr; + tile_bundle_bits instr; int is_kernel = !user_mode(regs); /* Handle breakpoints, etc. */ -- cgit v1.1 From d7c9661115fd23b4dabb710b3080dd9919dfa891 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 15 Aug 2013 16:23:24 -0400 Subject: tile: remove support for TILE64 This chip is no longer being actively developed for (it was superceded by the TILEPro64 in 2008), and in any case the existing compiler and toolchain in the community do not support it. It's unlikely that the kernel works with TILE64 at this point as the configuration has not been tested in years. The support is also awkward as it requires maintaining a significant number of ifdefs. So, just remove it altogether. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_32.S | 69 ++--------------------------------- arch/tile/kernel/intvec_64.S | 2 - arch/tile/kernel/irq.c | 4 +- arch/tile/kernel/process.c | 44 +--------------------- arch/tile/kernel/relocate_kernel_32.S | 2 - arch/tile/kernel/relocate_kernel_64.S | 2 - arch/tile/kernel/setup.c | 13 ------- arch/tile/kernel/single_step.c | 2 - 8 files changed, 6 insertions(+), 132 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index f084f1c..088d5c1 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -32,12 +32,6 @@ #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) -#if !CHIP_HAS_WH64() - /* By making this an empty macro, we can use wh64 in the code. */ - .macro wh64 reg - .endm -#endif - .macro push_reg reg, ptr=sp, delta=-4 { sw \ptr, \reg @@ -325,18 +319,14 @@ intvec_\vecname: movei r3, -1 /* not used, but set for consistency */ } .else -#if CHIP_HAS_AUX_PERF_COUNTERS() .ifc \c_routine, op_handle_aux_perf_interrupt { mfspr r2, AUX_PERF_COUNT_STS movei r3, -1 /* not used, but set for consistency */ } .else -#endif movei r3, 0 -#if CHIP_HAS_AUX_PERF_COUNTERS() .endif -#endif .endif .endif .endif @@ -561,7 +551,6 @@ intvec_\vecname: .endif mtspr INTERRUPT_CRITICAL_SECTION, zero -#if CHIP_HAS_WH64() /* * Prepare the first 256 stack bytes to be rapidly accessible * without having to fetch the background data. We don't really @@ -582,7 +571,6 @@ intvec_\vecname: addi r52, r52, -64 } wh64 r52 -#endif #ifdef CONFIG_TRACE_IRQFLAGS .ifnc \function,handle_nmi @@ -1533,12 +1521,10 @@ STD_ENTRY(_sys_clone) __HEAD .align 64 /* Align much later jump on the start of a cache line. */ -#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() nop #if PAGE_SIZE >= 0x10000 nop #endif -#endif ENTRY(sys_cmpxchg) /* @@ -1572,45 +1558,6 @@ ENTRY(sys_cmpxchg) # error Code here assumes PAGE_OFFSET can be loaded with just hi16() #endif -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - { - /* Check for unaligned input. */ - bnz sp, .Lcmpxchg_badaddr - mm r25, r0, zero, 3, PAGE_SHIFT-1 - } - { - crc32_32 r25, zero, r25 - moveli r21, lo16(atomic_lock_ptr) - } - { - auli r21, r21, ha16(atomic_lock_ptr) - auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */ - } - { - shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT - slt_u r23, r0, r23 - lw r26, r0 /* see comment in the "#else" for the "lw r26". */ - } - { - s2a r21, r20, r21 - bbns r23, .Lcmpxchg_badaddr - } - { - lw r21, r21 - seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64 - andi r25, r25, ATOMIC_HASH_L2_SIZE - 1 - } - { - /* Branch away at this point if we're doing a 64-bit cmpxchg. */ - bbs r23, .Lcmpxchg64 - andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ - } - { - s2a ATOMIC_LOCK_REG_NAME, r25, r21 - j .Lcmpxchg32_tns /* see comment in the #else for the jump. */ - } - -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ { /* Check for unaligned input. */ bnz sp, .Lcmpxchg_badaddr @@ -1635,12 +1582,9 @@ ENTRY(sys_cmpxchg) /* * Ensure that the TLB is loaded before we take out the lock. - * On tilepro, this will start fetching the value all the way - * into our L1 as well (and if it gets modified before we - * grab the lock, it will be invalidated from our cache - * before we reload it). On tile64, we'll start fetching it - * into our L1 if we're the home, and if we're not, we'll - * still at least start fetching it into the home's L2. + * This will start fetching the value all the way into our L1 + * as well (and if it gets modified before we grab the lock, + * it will be invalidated from our cache before we reload it). */ lw r26, r0 } @@ -1683,8 +1627,6 @@ ENTRY(sys_cmpxchg) j .Lcmpxchg32_tns } -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* Symbol for do_page_fault_ics() to use to compare against the PC. */ .global __sys_cmpxchg_grab_lock __sys_cmpxchg_grab_lock: @@ -1822,9 +1764,6 @@ __sys_cmpxchg_grab_lock: .align 64 .Lcmpxchg64: { -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - s2a ATOMIC_LOCK_REG_NAME, r25, r21 -#endif bzt r23, .Lcmpxchg64_tns } j .Lcmpxchg_badaddr @@ -1959,10 +1898,8 @@ int_unalign: do_page_fault int_hand INT_SN_CPL, SN_CPL, bad_intr int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap -#if CHIP_HAS_AUX_PERF_COUNTERS() int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ op_handle_aux_perf_interrupt, handle_nmi -#endif /* Synthetic interrupt delivered only by the simulator */ int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index c3a2335..ec755d3 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -511,12 +511,10 @@ intvec_\vecname: .else .ifc \c_routine, op_handle_perf_interrupt mfspr r2, PERF_COUNT_STS -#if CHIP_HAS_AUX_PERF_COUNTERS() .else .ifc \c_routine, op_handle_aux_perf_interrupt mfspr r2, AUX_PERF_COUNT_STS .endif -#endif .endif .endif .endif diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 0e6c521..d8ba060 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -74,7 +74,7 @@ static DEFINE_SPINLOCK(available_irqs_lock); /* * The interrupt handling path, implemented in terms of HV interrupt - * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx. + * emulation on TILEPro, and IPI hardware on TILE-Gx. * Entered with interrupts disabled. */ void tile_dev_intr(struct pt_regs *regs, int intnum) @@ -235,7 +235,7 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type) { /* * We use handle_level_irq() by default because the pending - * interrupt vector (whether modeled by the HV on TILE64 and + * interrupt vector (whether modeled by the HV on * TILEPro or implemented in hardware on TILE-Gx) has * level-style semantics for each bit. An interrupt fires * whenever a bit is high, not just at edges. diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 44cdc4a..16ed589 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -187,16 +187,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb)); #endif -#if CHIP_HAS_SN_PROC() - /* Likewise, the new thread is not running static processor code. */ - p->thread.sn_proc_running = 0; - memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb)); -#endif - -#if CHIP_HAS_PROC_STATUS_SPR() /* New thread has its miscellaneous processor state bits clear. */ p->thread.proc_status = 0; -#endif #ifdef CONFIG_HARDWALL /* New thread does not own any networks. */ @@ -378,15 +370,11 @@ static void save_arch_state(struct thread_struct *t) t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2); t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3); t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS); -#if CHIP_HAS_PROC_STATUS_SPR() t->proc_status = __insn_mfspr(SPR_PROC_STATUS); -#endif #if !CHIP_HAS_FIXED_INTVEC_BASE() t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); #endif -#if CHIP_HAS_TILE_RTF_HWM() t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); -#endif #if CHIP_HAS_DSTREAM_PF() t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); #endif @@ -407,15 +395,11 @@ static void restore_arch_state(const struct thread_struct *t) __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]); __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]); __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0); -#if CHIP_HAS_PROC_STATUS_SPR() __insn_mtspr(SPR_PROC_STATUS, t->proc_status); -#endif #if !CHIP_HAS_FIXED_INTVEC_BASE() __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base); #endif -#if CHIP_HAS_TILE_RTF_HWM() __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm); -#endif #if CHIP_HAS_DSTREAM_PF() __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf); #endif @@ -424,26 +408,11 @@ static void restore_arch_state(const struct thread_struct *t) void _prepare_arch_switch(struct task_struct *next) { -#if CHIP_HAS_SN_PROC() - int snctl; -#endif #if CHIP_HAS_TILE_DMA() struct tile_dma_state *dma = ¤t->thread.tile_dma_state; if (dma->enabled) save_tile_dma_state(dma); #endif -#if CHIP_HAS_SN_PROC() - /* - * Suspend the static network processor if it was running. - * We do not suspend the fabric itself, just like we don't - * try to suspend the UDN. - */ - snctl = __insn_mfspr(SPR_SNCTL); - current->thread.sn_proc_running = - (snctl & SPR_SNCTL__FRZPROC_MASK) == 0; - if (current->thread.sn_proc_running) - __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK); -#endif } @@ -471,17 +440,6 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, /* Restore other arch state. */ restore_arch_state(&next->thread); -#if CHIP_HAS_SN_PROC() - /* - * Restart static network processor in the new process - * if it was running before. - */ - if (next->thread.sn_proc_running) { - int snctl = __insn_mfspr(SPR_SNCTL); - __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK); - } -#endif - #ifdef CONFIG_HARDWALL /* Enable or disable access to the network registers appropriately. */ hardwall_switch_tasks(prev, next); @@ -523,7 +481,7 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) schedule(); return 1; } -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() +#if CHIP_HAS_TILE_DMA() if (thread_info_flags & _TIF_ASYNC_TLB) { do_async_page_fault(regs); return 1; diff --git a/arch/tile/kernel/relocate_kernel_32.S b/arch/tile/kernel/relocate_kernel_32.S index f7fd37b..e44fbcf 100644 --- a/arch/tile/kernel/relocate_kernel_32.S +++ b/arch/tile/kernel/relocate_kernel_32.S @@ -77,7 +77,6 @@ STD_ENTRY(relocate_new_kernel) move r30, sp addi sp, sp, -8 -#if CHIP_HAS_CBOX_HOME_MAP() /* * On TILEPro, we need to flush all tiles' caches, since we may * have been doing hash-for-home caching there. Note that we @@ -113,7 +112,6 @@ STD_ENTRY(relocate_new_kernel) } jalr r20 -#endif /* r33 is destination pointer, default to zero */ diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S index 02bc446..d9d8cf6 100644 --- a/arch/tile/kernel/relocate_kernel_64.S +++ b/arch/tile/kernel/relocate_kernel_64.S @@ -78,7 +78,6 @@ STD_ENTRY(relocate_new_kernel) move r30, sp addi sp, sp, -16 -#if CHIP_HAS_CBOX_HOME_MAP() /* * On TILE-GX, we need to flush all tiles' caches, since we may * have been doing hash-for-home caching there. Note that we @@ -116,7 +115,6 @@ STD_ENTRY(relocate_new_kernel) shl16insli r20, r20, hw0(hv_flush_remote) jalr r20 -#endif /* r33 is destination pointer, default to zero */ diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index b79c312..128a2d0 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1046,9 +1046,6 @@ void __cpuinit setup_cpu(int boot) arch_local_irq_unmask(INT_DMATLB_MISS); arch_local_irq_unmask(INT_DMATLB_ACCESS); #endif -#if CHIP_HAS_SN_PROC() - arch_local_irq_unmask(INT_SNITLB_MISS); -#endif #ifdef __tilegx__ arch_local_irq_unmask(INT_SINGLE_STEP_K); #endif @@ -1063,10 +1060,6 @@ void __cpuinit setup_cpu(int boot) /* Static network is not restricted. */ __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1); #endif -#if CHIP_HAS_SN_PROC() - __insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1); - __insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1); -#endif /* * Set the MPL for interrupt control 0 & 1 to the corresponding @@ -1291,7 +1284,6 @@ static void __init validate_va(void) struct cpumask __write_once cpu_lotar_map; EXPORT_SYMBOL(cpu_lotar_map); -#if CHIP_HAS_CBOX_HOME_MAP() /* * hash_for_home_map lists all the tiles that hash-for-home data * will be cached on. Note that this may includes tiles that are not @@ -1301,7 +1293,6 @@ EXPORT_SYMBOL(cpu_lotar_map); */ struct cpumask hash_for_home_map; EXPORT_SYMBOL(hash_for_home_map); -#endif /* * cpu_cacheable_map lists all the cpus whose caches the hypervisor can @@ -1394,7 +1385,6 @@ static void __init setup_cpu_maps(void) cpu_lotar_map = *cpu_possible_mask; } -#if CHIP_HAS_CBOX_HOME_MAP() /* Retrieve set of CPUs used for hash-for-home caching */ rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE, (HV_VirtAddr) hash_for_home_map.bits, @@ -1402,9 +1392,6 @@ static void __init setup_cpu_maps(void) if (rc < 0) early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc); cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map); -#else - cpu_cacheable_map = *cpu_possible_mask; -#endif } diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 5ef2e9e..de07fa7 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -546,7 +546,6 @@ void single_step_once(struct pt_regs *regs) } break; -#if CHIP_HAS_WH64() /* postincrement operations */ case IMM_0_OPCODE_X1: switch (get_ImmOpcodeExtension_X1(bundle)) { @@ -581,7 +580,6 @@ void single_step_once(struct pt_regs *regs) break; } break; -#endif /* CHIP_HAS_WH64() */ } if (state->update) { -- cgit v1.1 From ce61cdc270a5e0dd18057bbf29bd3471abccbda8 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 15 Aug 2013 16:29:02 -0400 Subject: tile: make __write_once a synonym for __read_mostly This was really only useful for TILE64 when we mapped the kernel data with small pages. Now we use a huge page and we really don't want to map different parts of the kernel data in different ways. We retain the __write_once name in case we want to bring it back to life at some point in the future. Note that this change uncovered a latent bug where the "smp_topology" variable happened to always be aligned mod 8 so we could store two "int" values at once, but when we eliminated __write_once it ended up only aligned mod 4. Fix with an explicit annotation. Signed-off-by: Chris Metcalf --- arch/tile/kernel/smp.c | 6 +++++- arch/tile/kernel/vmlinux.lds.S | 12 ------------ 2 files changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 0ae1c59..01e8ab2 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -22,7 +22,11 @@ #include #include -HV_Topology smp_topology __write_once; +/* + * We write to width and height with a single store in head_NN.S, + * so make the variable aligned to "long". + */ +HV_Topology smp_topology __write_once __aligned(sizeof(long)); EXPORT_SYMBOL(smp_topology); #if CHIP_HAS_IPI() diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 8b20163..f1819423 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -74,20 +74,8 @@ SECTIONS __init_end = .; _sdata = .; /* Start of data section */ - RO_DATA_SECTION(PAGE_SIZE) - - /* initially writeable, then read-only */ - . = ALIGN(PAGE_SIZE); - __w1data_begin = .; - .w1data : AT(ADDR(.w1data) - LOAD_OFFSET) { - VMLINUX_SYMBOL(__w1data_begin) = .; - *(.w1data) - VMLINUX_SYMBOL(__w1data_end) = .; - } - RW_DATA_SECTION(L2_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) - _edata = .; EXCEPTION_TABLE(L2_CACHE_BYTES) -- cgit v1.1 From 6d715790ef6b5b903779a14d995ce3d9e680bec0 Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Wed, 21 Aug 2013 21:49:42 -0400 Subject: tile: add null check for kzalloc in tile/kernel/setup.c Should check the return value of kzalloc first to avoid the null pointer. Then can dereference the non-null pointer to access the fields of struct resource. Signed-off-by: Wang Sheng-Hui Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 128a2d0..c2c42f3 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1643,6 +1643,8 @@ insert_non_bus_resource(void) { struct resource *res = kzalloc(sizeof(struct resource), GFP_ATOMIC); + if (!res) + return NULL; res->name = "Non-Bus Physical Address Space"; res->start = (1ULL << 32); res->end = -1LL; @@ -1660,6 +1662,8 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved) { struct resource *res = kzalloc(sizeof(struct resource), GFP_ATOMIC); + if (!res) + return NULL; res->name = reserved ? "Reserved" : "System RAM"; res->start = start_pfn << PAGE_SHIFT; res->end = (end_pfn << PAGE_SHIFT) - 1; -- cgit v1.1 From b40f451d56de69477a2244a0b4082f644699673f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Aug 2013 10:12:36 -0400 Subject: tile PCI RC: make default consistent DMA mask 32-bit This change sets the PCI devices' initial DMA capabilities conservatively and promotes them at the request of the driver, as opposed to assuming advanced DMA capabilities. The old design runs the risk of breaking drivers that assume default capabilities. Signed-off-by: Chris Metcalf --- arch/tile/kernel/pci-dma.c | 21 ++++++++++++--------- arch/tile/kernel/pci_gx.c | 15 +++++++++++++-- 2 files changed, 25 insertions(+), 11 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index d94f487..09b5870 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c @@ -588,15 +588,18 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) { struct dma_map_ops *dma_ops = get_dma_ops(dev); - /* Handle hybrid PCI devices with limited memory addressability. */ - if ((dma_ops == gx_pci_dma_map_ops || - dma_ops == gx_hybrid_pci_dma_map_ops || - dma_ops == gx_legacy_pci_dma_map_ops) && - (mask <= DMA_BIT_MASK(32))) { - if (dma_ops == gx_pci_dma_map_ops) - set_dma_ops(dev, gx_hybrid_pci_dma_map_ops); - - if (mask > dev->archdata.max_direct_dma_addr) + /* + * For PCI devices with 64-bit DMA addressing capability, promote + * the dma_ops to full capability for both streams and consistent + * memory access. For 32-bit capable devices, limit the consistent + * memory DMA range to max_direct_dma_addr. + */ + if (dma_ops == gx_pci_dma_map_ops || + dma_ops == gx_hybrid_pci_dma_map_ops || + dma_ops == gx_legacy_pci_dma_map_ops) { + if (mask == DMA_BIT_MASK(64)) + set_dma_ops(dev, gx_pci_dma_map_ops); + else if (mask > dev->archdata.max_direct_dma_addr) mask = dev->archdata.max_direct_dma_addr; } diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index 66ef9db..29acac6 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -1081,13 +1081,24 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } -/* Called for each device after PCI setup is done. */ +/* + * Called for each device after PCI setup is done. + * We initialize the PCI device capabilities conservatively, assuming that + * all devices can only address the 32-bit DMA space. The exception here is + * that the device dma_offset is set to the value that matches the 64-bit + * capable devices. This is OK because dma_offset is not used by legacy + * dma_ops, nor by the hybrid dma_ops's streaming DMAs, which are 64-bit ops. + * This implementation matches the kernel design of setting PCI devices' + * coherent_dma_mask to 0xffffffffull by default, allowing the device drivers + * to skip calling pci_set_consistent_dma_mask(DMA_BIT_MASK(32)). + */ static void pcibios_fixup_final(struct pci_dev *pdev) { - set_dma_ops(&pdev->dev, gx_pci_dma_map_ops); + set_dma_ops(&pdev->dev, gx_legacy_pci_dma_map_ops); set_dma_offset(&pdev->dev, TILE_PCI_MEM_MAP_BASE_OFFSET); pdev->dev.archdata.max_direct_dma_addr = TILE_PCI_MAX_DIRECT_DMA_ADDRESS; + pdev->dev.coherent_dma_mask = TILE_PCI_MAX_DIRECT_DMA_ADDRESS; } DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final); -- cgit v1.1