Merge branch 'linus'

author: James Bottomley <jejb@mulgrave.il.steeleye.com> 2007-01-31 11:24:00 -0600
committer: James Bottomley <jejb@mulgrave.il.steeleye.com> 2007-01-31 11:24:00 -0600
commit: 30716e07ef511ec7525c07eb1e8060ba8943c2a2 (patch)
tree: eb6a47cae63d3587fa773cc5a16781eaa2c7810b /mm
parent: 03c79cc56e4497cbd09d74a73c1bd0d1d9a8a16c (diff)
parent: f56df2f4db6e4af87fb8e941cff69f4501a111df (diff)
download: op-kernel-dev-30716e07ef511ec7525c07eb1e8060ba8943c2a2.zip
op-kernel-dev-30716e07ef511ec7525c07eb1e8060ba8943c2a2.tar.gz
7 files changed, 54 insertions, 36 deletions
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 45b3553..9dd9fbb 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -183,7 +183,7 @@ __xip_unmap (struct address_space * mapping,
 		address = vma->vm_start +
 			((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
 		BUG_ON(address < vma->vm_start || address >= vma->vm_end);
-		page = ZERO_PAGE(address);
+		page = ZERO_PAGE(0);
 		pte = page_check_address(page, mm, address, &ptl);
 		if (pte) {
 			/* Nuke the page table entry. */
@@ -246,7 +246,7 @@ xip_file_nopage(struct vm_area_struct * area,
 		__xip_unmap(mapping, pgoff);
 	} else {
 		/* not shared and writable, use ZERO_PAGE() */
-		page = ZERO_PAGE(address);
+		page = ZERO_PAGE(0);
 	}
 
 out:
diff --git a/mm/memory.c b/mm/memory.c
index af227d2..ef09f0ac 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2606,8 +2606,15 @@ static int __init gate_vma_init(void)
 	gate_vma.vm_mm = NULL;
 	gate_vma.vm_start = FIXADDR_USER_START;
 	gate_vma.vm_end = FIXADDR_USER_END;
-	gate_vma.vm_page_prot = PAGE_READONLY;
-	gate_vma.vm_flags = 0;
+	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+	gate_vma.vm_page_prot = __P101;
+	/*
+	 * Make sure the vDSO gets into every core dump.
+	 * Dumping its contents makes post-mortem fully interpretable later
+	 * without matching up the same kernel and hardware config to see
+	 * what PC values meant.
+	 */
+	gate_vma.vm_flags |= VM_ALWAYSDUMP;
 	return 0;
 }
 __initcall(gate_vma_init);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index da94639..c2aec0e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -884,6 +884,10 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len,
 	err = get_nodes(&nodes, nmask, maxnode);
 	if (err)
 		return err;
+#ifdef CONFIG_CPUSETS
+	/* Restrict the nodes to the allowed nodes in the cpuset */
+	nodes_and(nodes, nodes, current->mems_allowed);
+#endif
 	return do_mbind(start, len, mode, &nodes, flags);
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 9717337..cc3a208 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1477,6 +1477,7 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct rlimit *rlim = current->signal->rlim;
+	unsigned long new_start;
 
 	/* address space limit tests */
 	if (!may_expand_vm(mm, grow))
@@ -1496,6 +1497,12 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
 			return -ENOMEM;
 	}
 
+	/* Check to ensure the stack will not grow into a hugetlb-only region */
+	new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
+			vma->vm_end - size;
+	if (is_hugepage_only_range(vma->vm_mm, new_start, size))
+		return -EFAULT;
+
 	/*
 	 * Overcommit..  This must be the final test, as it will
 	 * update security statistics.
diff --git a/mm/mremap.c b/mm/mremap.c
index 9c769fa..5d4bd4f 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -105,7 +105,6 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		if (pte_none(*old_pte))
 			continue;
 		pte = ptep_clear_flush(vma, old_addr, old_pte);
-		/* ZERO_PAGE can be dependant on virtual addr */
 		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
 		set_pte_at(mm, new_addr, new_pte, pte);
 	}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1d2fc89c..be0efbd 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -133,11 +133,9 @@ get_dirty_limits(long *pbackground, long *pdirty,
 
 #ifdef CONFIG_HIGHMEM
 	/*
-	 * If this mapping can only allocate from low memory,
-	 * we exclude high memory from our count.
+	 * We always exclude high memory from our count.
 	 */
-	if (mapping && !(mapping_gfp_mask(mapping) & __GFP_HIGHMEM))
-		available_memory -= totalhigh_pages;
+	available_memory -= totalhigh_pages;
 #endif
 
 
@@ -526,28 +524,25 @@ static struct notifier_block __cpuinitdata ratelimit_nb = {
 };
 
 /*
- * If the machine has a large highmem:lowmem ratio then scale back the default
- * dirty memory thresholds: allowing too much dirty highmem pins an excessive
- * number of buffer_heads.
+ * Called early on to tune the page writeback dirty limits.
+ *
+ * We used to scale dirty pages according to how total memory
+ * related to pages that could be allocated for buffers (by
+ * comparing nr_free_buffer_pages() to vm_total_pages.
+ *
+ * However, that was when we used "dirty_ratio" to scale with
+ * all memory, and we don't do that any more. "dirty_ratio"
+ * is now applied to total non-HIGHPAGE memory (by subtracting
+ * totalhigh_pages from vm_total_pages), and as such we can't
+ * get into the old insane situation any more where we had
+ * large amounts of dirty pages compared to a small amount of
+ * non-HIGHMEM memory.
+ *
+ * But we might still want to scale the dirty_ratio by how
+ * much memory the box has..
  */
 void __init page_writeback_init(void)
 {
-	long buffer_pages = nr_free_buffer_pages();
-	long correction;
-
-	correction = (100 * 4 * buffer_pages) / vm_total_pages;
-
-	if (correction < 100) {
-		dirty_background_ratio *= correction;
-		dirty_background_ratio /= 100;
-		vm_dirty_ratio *= correction;
-		vm_dirty_ratio /= 100;
-
-		if (dirty_background_ratio <= 0)
-			dirty_background_ratio = 1;
-		if (vm_dirty_ratio <= 0)
-			vm_dirty_ratio = 1;
-	}
 	mod_timer(&wb_timer, jiffies + dirty_writeback_interval);
 	writeback_set_ratelimit();
 	register_cpu_notifier(&ratelimit_nb);
diff --git a/mm/truncate.c b/mm/truncate.c
index 6c79ca4..5df947d 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -51,15 +51,22 @@ static inline void truncate_partial_page(struct page *page, unsigned partial)
 		do_invalidatepage(page, partial);
 }
 
+/*
+ * This cancels just the dirty bit on the kernel page itself, it
+ * does NOT actually remove dirty bits on any mmap's that may be
+ * around. It also leaves the page tagged dirty, so any sync
+ * activity will still find it on the dirty lists, and in particular,
+ * clear_page_dirty_for_io() will still look at the dirty bits in
+ * the VM.
+ *
+ * Doing this should *normally* only ever be done when a page
+ * is truncated, and is not actually mapped anywhere at all. However,
+ * fs/buffer.c does this when it notices that somebody has cleaned
+ * out all the buffers on a page without actually doing it through
+ * the VM. Can you say "ext3 is horribly ugly"? Tought you could.
+ */
 void cancel_dirty_page(struct page *page, unsigned int account_size)
 {
-	/* If we're cancelling the page, it had better not be mapped any more */
-	if (page_mapped(page)) {
-		static unsigned int warncount;
-
-		WARN_ON(++warncount < 5);
-	}
-		
 	if (TestClearPageDirty(page)) {
 		struct address_space *mapping = page->mapping;
 		if (mapping && mapping_cap_account_dirty(mapping)) {
@@ -422,7 +429,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 		pagevec_release(&pvec);
 		cond_resched();
 	}
-	WARN_ON_ONCE(ret);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
author	James Bottomley <jejb@mulgrave.il.steeleye.com>	2007-01-31 11:24:00 -0600
committer	James Bottomley <jejb@mulgrave.il.steeleye.com>	2007-01-31 11:24:00 -0600
commit	30716e07ef511ec7525c07eb1e8060ba8943c2a2 (patch)
tree	eb6a47cae63d3587fa773cc5a16781eaa2c7810b /mm
parent	03c79cc56e4497cbd09d74a73c1bd0d1d9a8a16c (diff)
parent	f56df2f4db6e4af87fb8e941cff69f4501a111df (diff)
download	op-kernel-dev-30716e07ef511ec7525c07eb1e8060ba8943c2a2.zip op-kernel-dev-30716e07ef511ec7525c07eb1e8060ba8943c2a2.tar.gz