diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-21 17:22:22 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-21 17:22:22 -0700 |
commit | 23dcfa61bac244e1200ff9ad19c6e9144dcb6bb5 (patch) | |
tree | 4033d868fe120f7778a6180a8eda60842309b665 | |
parent | a484147a52e6910d990ae7cf2a5d16b5bc58dcbe (diff) | |
parent | c67fe3752abe6ab47639e2f9b836900c3dc3da84 (diff) | |
download | op-kernel-dev-23dcfa61bac244e1200ff9ad19c6e9144dcb6bb5.zip op-kernel-dev-23dcfa61bac244e1200ff9ad19c6e9144dcb6bb5.tar.gz |
Merge branch 'akpm' (Andrew's patch-bomb)
Merge fixes from Andrew Morton.
Random drivers and some VM fixes.
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (17 commits)
mm: compaction: Abort async compaction if locks are contended or taking too long
mm: have order > 0 compaction start near a pageblock with free pages
rapidio/tsi721: fix unused variable compiler warning
rapidio/tsi721: fix inbound doorbell interrupt handling
drivers/rtc/rtc-rs5c348.c: fix hour decoding in 12-hour mode
mm: correct page->pfmemalloc to fix deactivate_slab regression
drivers/rtc/rtc-pcf2123.c: initialize dynamic sysfs attributes
mm/compaction.c: fix deferring compaction mistake
drivers/misc/sgi-xp/xpc_uv.c: SGI XPC fails to load when cpu 0 is out of IRQ resources
string: do not export memweight() to userspace
hugetlb: update hugetlbpage.txt
checkpatch: add control statement test to SINGLE_STATEMENT_DO_WHILE_MACRO
mm: hugetlbfs: correctly populate shared pmd
cciss: fix incorrect scsi status reporting
Documentation: update mount option in filesystem/vfat.txt
mm: change nr_ptes BUG_ON to WARN_ON
cs5535-clockevt: typo, it's MFGPT, not MFPGT
-rw-r--r-- | Documentation/filesystems/vfat.txt | 11 | ||||
-rw-r--r-- | Documentation/vm/hugetlbpage.txt | 10 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 21 | ||||
-rw-r--r-- | drivers/block/cciss_scsi.c | 11 | ||||
-rw-r--r-- | drivers/clocksource/cs5535-clockevt.c | 4 | ||||
-rw-r--r-- | drivers/misc/sgi-xp/xpc_uv.c | 84 | ||||
-rw-r--r-- | drivers/rapidio/devices/tsi721.c | 12 | ||||
-rw-r--r-- | drivers/rtc/rtc-pcf2123.c | 2 | ||||
-rw-r--r-- | drivers/rtc/rtc-rs5c348.c | 7 | ||||
-rw-r--r-- | include/linux/compaction.h | 4 | ||||
-rw-r--r-- | include/linux/string.h | 2 | ||||
-rw-r--r-- | mm/compaction.c | 156 | ||||
-rw-r--r-- | mm/internal.h | 1 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 38 | ||||
-rwxr-xr-x | scripts/checkpatch.pl | 3 |
16 files changed, 258 insertions, 110 deletions
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index ead764b..de1e6c4 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -137,6 +137,17 @@ errors=panic|continue|remount-ro without doing anything or remount the partition in read-only mode (default behavior). +discard -- If set, issues discard/TRIM commands to the block + device when blocks are freed. This is useful for SSD devices + and sparse/thinly-provisoned LUNs. + +nfs -- This option maintains an index (cache) of directory + inodes by i_logstart which is used by the nfs-related code to + improve look-ups. + + Enable this only if you want to export the FAT filesystem + over NFS + <bool>: 0,1,yes,no,true,false TODO diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt index f8551b3..4ac359b 100644 --- a/Documentation/vm/hugetlbpage.txt +++ b/Documentation/vm/hugetlbpage.txt @@ -299,11 +299,17 @@ map_hugetlb.c. ******************************************************************* /* - * hugepage-shm: see Documentation/vm/hugepage-shm.c + * map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c */ ******************************************************************* /* - * hugepage-mmap: see Documentation/vm/hugepage-mmap.c + * hugepage-shm: see tools/testing/selftests/vm/hugepage-shm.c + */ + +******************************************************************* + +/* + * hugepage-mmap: see tools/testing/selftests/vm/hugepage-mmap.c */ diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index f6679a7..b91e485 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -56,9 +56,16 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) } /* - * search for a shareable pmd page for hugetlb. + * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() + * and returns the corresponding pte. While this is not necessary for the + * !shared pmd case because we can allocate the pmd later as well, it makes the + * code much cleaner. pmd allocation is essential for the shared case because + * pud has to be populated inside the same i_mmap_mutex section - otherwise + * racing tasks could either miss the sharing (see huge_pte_offset) or select a + * bad pmd for sharing. */ -static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +static pte_t * +huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) { struct vm_area_struct *vma = find_vma(mm, addr); struct address_space *mapping = vma->vm_file->f_mapping; @@ -68,9 +75,10 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) struct vm_area_struct *svma; unsigned long saddr; pte_t *spte = NULL; + pte_t *pte; if (!vma_shareable(vma, addr)) - return; + return (pte_t *)pmd_alloc(mm, pud, addr); mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { @@ -97,7 +105,9 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) put_page(virt_to_page(spte)); spin_unlock(&mm->page_table_lock); out: + pte = (pte_t *)pmd_alloc(mm, pud, addr); mutex_unlock(&mapping->i_mmap_mutex); + return pte; } /* @@ -142,8 +152,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, } else { BUG_ON(sz != PMD_SIZE); if (pud_none(*pud)) - huge_pmd_share(mm, addr, pud); - pte = (pte_t *) pmd_alloc(mm, pud, addr); + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); } } BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index acda773..38aa6dd 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -763,16 +763,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, { case CMD_TARGET_STATUS: /* Pass it up to the upper layers... */ - if( ei->ScsiStatus) - { -#if 0 - printk(KERN_WARNING "cciss: cmd %p " - "has SCSI Status = %x\n", - c, ei->ScsiStatus); -#endif - cmd->result |= (ei->ScsiStatus << 1); - } - else { /* scsi status is zero??? How??? */ + if (!ei->ScsiStatus) { /* Ordinarily, this case should never happen, but there is a bug in some released firmware revisions that allows it to happen diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c index 540795c..d927938 100644 --- a/drivers/clocksource/cs5535-clockevt.c +++ b/drivers/clocksource/cs5535-clockevt.c @@ -53,7 +53,7 @@ static struct cs5535_mfgpt_timer *cs5535_event_clock; #define MFGPT_PERIODIC (MFGPT_HZ / HZ) /* - * The MFPGT timers on the CS5536 provide us with suitable timers to use + * The MFGPT timers on the CS5536 provide us with suitable timers to use * as clock event sources - not as good as a HPET or APIC, but certainly * better than the PIT. This isn't a general purpose MFGPT driver, but * a simplified one designed specifically to act as a clock event source. @@ -144,7 +144,7 @@ static int __init cs5535_mfgpt_init(void) timer = cs5535_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING); if (!timer) { - printk(KERN_ERR DRV_NAME ": Could not allocate MFPGT timer\n"); + printk(KERN_ERR DRV_NAME ": Could not allocate MFGPT timer\n"); return -ENODEV; } cs5535_event_clock = timer; diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index 87b251a..b9e2000 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -18,6 +18,8 @@ #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/device.h> +#include <linux/cpu.h> +#include <linux/module.h> #include <linux/err.h> #include <linux/slab.h> #include <asm/uv/uv_hub.h> @@ -59,6 +61,8 @@ static struct xpc_heartbeat_uv *xpc_heartbeat_uv; XPC_NOTIFY_MSG_SIZE_UV) #define XPC_NOTIFY_IRQ_NAME "xpc_notify" +static int xpc_mq_node = -1; + static struct xpc_gru_mq_uv *xpc_activate_mq_uv; static struct xpc_gru_mq_uv *xpc_notify_mq_uv; @@ -109,11 +113,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) #if defined CONFIG_X86_64 mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, UV_AFFINITY_CPU); - if (mq->irq < 0) { - dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", - -mq->irq); + if (mq->irq < 0) return mq->irq; - } mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); @@ -238,8 +239,9 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, mq->mmr_blade = uv_cpu_to_blade_id(cpu); nid = cpu_to_node(cpu); - page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, - pg_order); + page = alloc_pages_exact_node(nid, + GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + pg_order); if (page == NULL) { dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " "bytes of memory on nid=%d for GRU mq\n", mq_size, nid); @@ -1731,9 +1733,50 @@ static struct xpc_arch_operations xpc_arch_ops_uv = { .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv, }; +static int +xpc_init_mq_node(int nid) +{ + int cpu; + + get_online_cpus(); + + for_each_cpu(cpu, cpumask_of_node(nid)) { + xpc_activate_mq_uv = + xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid, + XPC_ACTIVATE_IRQ_NAME, + xpc_handle_activate_IRQ_uv); + if (!IS_ERR(xpc_activate_mq_uv)) + break; + } + if (IS_ERR(xpc_activate_mq_uv)) { + put_online_cpus(); + return PTR_ERR(xpc_activate_mq_uv); + } + + for_each_cpu(cpu, cpumask_of_node(nid)) { + xpc_notify_mq_uv = + xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid, + XPC_NOTIFY_IRQ_NAME, + xpc_handle_notify_IRQ_uv); + if (!IS_ERR(xpc_notify_mq_uv)) + break; + } + if (IS_ERR(xpc_notify_mq_uv)) { + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); + put_online_cpus(); + return PTR_ERR(xpc_notify_mq_uv); + } + + put_online_cpus(); + return 0; +} + int xpc_init_uv(void) { + int nid; + int ret = 0; + xpc_arch_ops = xpc_arch_ops_uv; if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { @@ -1742,21 +1785,21 @@ xpc_init_uv(void) return -E2BIG; } - xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, - XPC_ACTIVATE_IRQ_NAME, - xpc_handle_activate_IRQ_uv); - if (IS_ERR(xpc_activate_mq_uv)) - return PTR_ERR(xpc_activate_mq_uv); + if (xpc_mq_node < 0) + for_each_online_node(nid) { + ret = xpc_init_mq_node(nid); - xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, - XPC_NOTIFY_IRQ_NAME, - xpc_handle_notify_IRQ_uv); - if (IS_ERR(xpc_notify_mq_uv)) { - xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); - return PTR_ERR(xpc_notify_mq_uv); - } + if (!ret) + break; + } + else + ret = xpc_init_mq_node(xpc_mq_node); - return 0; + if (ret < 0) + dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n", + -ret); + + return ret; } void @@ -1765,3 +1808,6 @@ xpc_exit_uv(void) xpc_destroy_gru_mq_uv(xpc_notify_mq_uv); xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); } + +module_param(xpc_mq_node, int, 0); +MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues."); diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 722246c..5d44252 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -435,6 +435,9 @@ static void tsi721_db_dpc(struct work_struct *work) " info %4.4x\n", DBELL_SID(idb.bytes), DBELL_TID(idb.bytes), DBELL_INF(idb.bytes)); } + + wr_ptr = ioread32(priv->regs + + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE; } iowrite32(rd_ptr & (IDB_QSIZE - 1), @@ -445,6 +448,10 @@ static void tsi721_db_dpc(struct work_struct *work) regval |= TSI721_SR_CHINT_IDBQRCV; iowrite32(regval, priv->regs + TSI721_SR_CHINTE(IDB_QUEUE)); + + wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE; + if (wr_ptr != rd_ptr) + schedule_work(&priv->idb_work); } /** @@ -2212,7 +2219,7 @@ static int __devinit tsi721_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct tsi721_device *priv; - int i, cap; + int cap; int err; u32 regval; @@ -2232,12 +2239,15 @@ static int __devinit tsi721_probe(struct pci_dev *pdev, priv->pdev = pdev; #ifdef DEBUG + { + int i; for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n", i, (unsigned long long)pci_resource_start(pdev, i), (unsigned long)pci_resource_len(pdev, i), pci_resource_flags(pdev, i)); } + } #endif /* * Verify BAR configuration diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c index 8361187..13e4df6 100644 --- a/drivers/rtc/rtc-pcf2123.c +++ b/drivers/rtc/rtc-pcf2123.c @@ -43,6 +43,7 @@ #include <linux/rtc.h> #include <linux/spi/spi.h> #include <linux/module.h> +#include <linux/sysfs.h> #define DRV_VERSION "0.6" @@ -292,6 +293,7 @@ static int __devinit pcf2123_probe(struct spi_device *spi) pdata->rtc = rtc; for (i = 0; i < 16; i++) { + sysfs_attr_init(&pdata->regs[i].attr.attr); sprintf(pdata->regs[i].name, "%1x", i); pdata->regs[i].attr.attr.mode = S_IRUGO | S_IWUSR; pdata->regs[i].attr.attr.name = pdata->regs[i].name; diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c index 77074cc..fd5c7af 100644 --- a/drivers/rtc/rtc-rs5c348.c +++ b/drivers/rtc/rtc-rs5c348.c @@ -122,9 +122,12 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm) tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK); tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK); if (!pdata->rtc_24h) { - tm->tm_hour %= 12; - if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM) + if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM) { + tm->tm_hour -= 20; + tm->tm_hour %= 12; tm->tm_hour += 12; + } else + tm->tm_hour %= 12; } tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK); tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK); diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 133ddcf..ef65814 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask, - bool sync); + bool sync, bool *contended); extern int compact_pgdat(pg_data_t *pgdat, int order); extern unsigned long compaction_suitable(struct zone *zone, int order); @@ -64,7 +64,7 @@ static inline bool compaction_deferred(struct zone *zone, int order) #else static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync) + bool sync, bool *contended) { return COMPACT_CONTINUE; } diff --git a/include/linux/string.h b/include/linux/string.h index ffe0442..b917881 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -144,8 +144,8 @@ static inline bool strstarts(const char *str, const char *prefix) { return strncmp(str, prefix, strlen(prefix)) == 0; } -#endif extern size_t memweight(const void *ptr, size_t bytes); +#endif /* __KERNEL__ */ #endif /* _LINUX_STRING_H_ */ diff --git a/mm/compaction.c b/mm/compaction.c index e78cb96..7fcd3a5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -51,6 +51,47 @@ static inline bool migrate_async_suitable(int migratetype) } /* + * Compaction requires the taking of some coarse locks that are potentially + * very heavily contended. Check if the process needs to be scheduled or + * if the lock is contended. For async compaction, back out in the event + * if contention is severe. For sync compaction, schedule. + * + * Returns true if the lock is held. + * Returns false if the lock is released and compaction should abort + */ +static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags, + bool locked, struct compact_control *cc) +{ + if (need_resched() || spin_is_contended(lock)) { + if (locked) { + spin_unlock_irqrestore(lock, *flags); + locked = false; + } + + /* async aborts if taking too long or contended */ + if (!cc->sync) { + if (cc->contended) + *cc->contended = true; + return false; + } + + cond_resched(); + if (fatal_signal_pending(current)) + return false; + } + + if (!locked) + spin_lock_irqsave(lock, *flags); + return true; +} + +static inline bool compact_trylock_irqsave(spinlock_t *lock, + unsigned long *flags, struct compact_control *cc) +{ + return compact_checklock_irqsave(lock, flags, false, cc); +} + +/* * Isolate free pages onto a private freelist. Caller must hold zone->lock. * If @strict is true, will abort returning 0 on any invalid PFNs or non-free * pages inside of the pageblock (even though it may still end up isolating @@ -173,7 +214,7 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) } /* Update the number of anon and file isolated pages in the zone */ -static void acct_isolated(struct zone *zone, struct compact_control *cc) +static void acct_isolated(struct zone *zone, bool locked, struct compact_control *cc) { struct page *page; unsigned int count[2] = { 0, }; @@ -181,8 +222,14 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc) list_for_each_entry(page, &cc->migratepages, lru) count[!!page_is_file_cache(page)]++; - __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); + /* If locked we can use the interrupt unsafe versions */ + if (locked) { + __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); + __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); + } else { + mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); + mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); + } } /* Similar to reclaim, but different enough that they don't share logic */ @@ -228,6 +275,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, struct list_head *migratelist = &cc->migratepages; isolate_mode_t mode = 0; struct lruvec *lruvec; + unsigned long flags; + bool locked; /* * Ensure that there are not too many pages isolated from the LRU @@ -247,25 +296,22 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, /* Time to isolate some pages for migration */ cond_resched(); - spin_lock_irq(&zone->lru_lock); + spin_lock_irqsave(&zone->lru_lock, flags); + locked = true; for (; low_pfn < end_pfn; low_pfn++) { struct page *page; - bool locked = true; /* give a chance to irqs before checking need_resched() */ if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) { - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irqrestore(&zone->lru_lock, flags); locked = false; } - if (need_resched() || spin_is_contended(&zone->lru_lock)) { - if (locked) - spin_unlock_irq(&zone->lru_lock); - cond_resched(); - spin_lock_irq(&zone->lru_lock); - if (fatal_signal_pending(current)) - break; - } else if (!locked) - spin_lock_irq(&zone->lru_lock); + + /* Check if it is ok to still hold the lock */ + locked = compact_checklock_irqsave(&zone->lru_lock, &flags, + locked, cc); + if (!locked) + break; /* * migrate_pfn does not necessarily start aligned to a @@ -349,9 +395,10 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, } } - acct_isolated(zone, cc); + acct_isolated(zone, locked, cc); - spin_unlock_irq(&zone->lru_lock); + if (locked) + spin_unlock_irqrestore(&zone->lru_lock, flags); trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); @@ -384,6 +431,20 @@ static bool suitable_migration_target(struct page *page) } /* + * Returns the start pfn of the last page block in a zone. This is the starting + * point for full compaction of a zone. Compaction searches for free pages from + * the end of each zone, while isolate_freepages_block scans forward inside each + * page block. + */ +static unsigned long start_free_pfn(struct zone *zone) +{ + unsigned long free_pfn; + free_pfn = zone->zone_start_pfn + zone->spanned_pages; + free_pfn &= ~(pageblock_nr_pages-1); + return free_pfn; +} + +/* * Based on information in the current compact_control, find blocks * suitable for isolating free pages from and then isolate them. */ @@ -422,17 +483,6 @@ static void isolate_freepages(struct zone *zone, pfn -= pageblock_nr_pages) { unsigned long isolated; - /* - * Skip ahead if another thread is compacting in the area - * simultaneously. If we wrapped around, we can only skip - * ahead if zone->compact_cached_free_pfn also wrapped to - * above our starting point. - */ - if (cc->order > 0 && (!cc->wrapped || - zone->compact_cached_free_pfn > - cc->start_free_pfn)) - pfn = min(pfn, zone->compact_cached_free_pfn); - if (!pfn_valid(pfn)) continue; @@ -458,7 +508,16 @@ static void isolate_freepages(struct zone *zone, * are disabled */ isolated = 0; - spin_lock_irqsave(&zone->lock, flags); + + /* + * The zone lock must be held to isolate freepages. This + * unfortunately this is a very coarse lock and can be + * heavily contended if there are parallel allocations + * or parallel compactions. For async compaction do not + * spin on the lock + */ + if (!compact_trylock_irqsave(&zone->lock, &flags, cc)) + break; if (suitable_migration_target(page)) { end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); isolated = isolate_freepages_block(pfn, end_pfn, @@ -474,7 +533,15 @@ static void isolate_freepages(struct zone *zone, */ if (isolated) { high_pfn = max(high_pfn, pfn); - if (cc->order > 0) + + /* + * If the free scanner has wrapped, update + * compact_cached_free_pfn to point to the highest + * pageblock with free pages. This reduces excessive + * scanning of full pageblocks near the end of the + * zone + */ + if (cc->order > 0 && cc->wrapped) zone->compact_cached_free_pfn = high_pfn; } } @@ -484,6 +551,11 @@ static void isolate_freepages(struct zone *zone, cc->free_pfn = high_pfn; cc->nr_freepages = nr_freepages; + + /* If compact_cached_free_pfn is reset then set it now */ + if (cc->order > 0 && !cc->wrapped && + zone->compact_cached_free_pfn == start_free_pfn(zone)) + zone->compact_cached_free_pfn = high_pfn; } /* @@ -570,20 +642,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, return ISOLATE_SUCCESS; } -/* - * Returns the start pfn of the last page block in a zone. This is the starting - * point for full compaction of a zone. Compaction searches for free pages from - * the end of each zone, while isolate_freepages_block scans forward inside each - * page block. - */ -static unsigned long start_free_pfn(struct zone *zone) -{ - unsigned long free_pfn; - free_pfn = zone->zone_start_pfn + zone->spanned_pages; - free_pfn &= ~(pageblock_nr_pages-1); - return free_pfn; -} - static int compact_finished(struct zone *zone, struct compact_control *cc) { @@ -771,7 +829,7 @@ out: static unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - bool sync) + bool sync, bool *contended) { struct compact_control cc = { .nr_freepages = 0, @@ -780,6 +838,7 @@ static unsigned long compact_zone_order(struct zone *zone, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, .sync = sync, + .contended = contended, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -801,7 +860,7 @@ int sysctl_extfrag_threshold = 500; */ unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync) + bool sync, bool *contended) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); int may_enter_fs = gfp_mask & __GFP_FS; @@ -825,7 +884,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, nodemask) { int status; - status = compact_zone_order(zone, order, gfp_mask, sync); + status = compact_zone_order(zone, order, gfp_mask, sync, + contended); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ @@ -861,7 +921,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) if (cc->order > 0) { int ok = zone_watermark_ok(zone, cc->order, low_wmark_pages(zone), 0, 0); - if (ok && cc->order > zone->compact_order_failed) + if (ok && cc->order >= zone->compact_order_failed) zone->compact_order_failed = cc->order + 1; /* Currently async compaction is never deferred. */ else if (!ok && cc->sync) diff --git a/mm/internal.h b/mm/internal.h index 3314f79..b8c91b3 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -130,6 +130,7 @@ struct compact_control { int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; + bool *contended; /* True if a lock was contended */ }; unsigned long @@ -2309,7 +2309,7 @@ void exit_mmap(struct mm_struct *mm) } vm_unacct_memory(nr_accounted); - BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); + WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); } /* Insert vm structure into process list sorted by address diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 009ac28..c66fb87 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1928,6 +1928,17 @@ this_zone_full: zlc_active = 0; goto zonelist_scan; } + + if (page) + /* + * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was + * necessary to allocate the page. The expectation is + * that the caller is taking steps that will free more + * memory. The caller should avoid the page being used + * for !PFMEMALLOC purposes. + */ + page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); + return page; } @@ -2091,7 +2102,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, int migratetype, bool sync_migration, - bool *deferred_compaction, + bool *contended_compaction, bool *deferred_compaction, unsigned long *did_some_progress) { struct page *page; @@ -2106,7 +2117,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, current->flags |= PF_MEMALLOC; *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, - nodemask, sync_migration); + nodemask, sync_migration, + contended_compaction); current->flags &= ~PF_MEMALLOC; if (*did_some_progress != COMPACT_SKIPPED) { @@ -2152,7 +2164,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, int migratetype, bool sync_migration, - bool *deferred_compaction, + bool *contended_compaction, bool *deferred_compaction, unsigned long *did_some_progress) { return NULL; @@ -2325,6 +2337,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned long did_some_progress; bool sync_migration = false; bool deferred_compaction = false; + bool contended_compaction = false; /* * In the slowpath, we sanity check order to avoid ever trying to @@ -2389,14 +2402,6 @@ rebalance: zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); if (page) { - /* - * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was - * necessary to allocate the page. The expectation is - * that the caller is taking steps that will free more - * memory. The caller should avoid the page being used - * for !PFMEMALLOC purposes. - */ - page->pfmemalloc = true; goto got_pg; } } @@ -2422,6 +2427,7 @@ rebalance: nodemask, alloc_flags, preferred_zone, migratetype, sync_migration, + &contended_compaction, &deferred_compaction, &did_some_progress); if (page) @@ -2431,10 +2437,11 @@ rebalance: /* * If compaction is deferred for high-order allocations, it is because * sync compaction recently failed. In this is the case and the caller - * has requested the system not be heavily disrupted, fail the - * allocation now instead of entering direct reclaim + * requested a movable allocation that does not heavily disrupt the + * system then fail the allocation instead of entering direct reclaim. */ - if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD)) + if ((deferred_compaction || contended_compaction) && + (gfp_mask & __GFP_NO_KSWAPD)) goto nopage; /* Try direct reclaim and then allocating */ @@ -2505,6 +2512,7 @@ rebalance: nodemask, alloc_flags, preferred_zone, migratetype, sync_migration, + &contended_compaction, &deferred_compaction, &did_some_progress); if (page) @@ -2569,8 +2577,6 @@ retry_cpuset: page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); - else - page->pfmemalloc = false; trace_mm_page_alloc(page, order, gfp_mask, migratetype); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 913d6bd..ca05ba2 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3016,7 +3016,8 @@ sub process { $herectx .= raw_line($linenr, $n) . "\n"; } - if (($stmts =~ tr/;/;/) == 1) { + if (($stmts =~ tr/;/;/) == 1 && + $stmts !~ /^\s*(if|while|for|switch)\b/) { WARN("SINGLE_STATEMENT_DO_WHILE_MACRO", "Single statement macros should not use a do {} while (0) loop\n" . "$herectx"); } |