From 43a432b1559798d33970261f710030f787770231 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 9 Apr 2009 14:26:47 -0700 Subject: x86, CPA: Change idmap attribute before ioremap attribute setup Change the identity mapping with the requested attribute first, before we setup the virtual memory mapping with the new requested attribute. This makes sure that there is no window when identity map'ed attribute may disagree with ioremap range on the attribute type. This also avoids doing cpa on the ioremap'ed address twice (first in ioremap_page_range and then in ioremap_change_attr using vaddr), and should improve ioremap performance a bit. Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi LKML-Reference: <20090409212708.373330000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0dfa09d..329387e 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -280,15 +280,16 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; area->phys_addr = phys_addr; vaddr = (unsigned long) area->addr; - if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { + + if (kernel_map_sync_memtype(phys_addr, size, prot_val)) { free_memtype(phys_addr, phys_addr + size); free_vm_area(area); return NULL; } - if (ioremap_change_attr(vaddr, size, prot_val) < 0) { + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { free_memtype(phys_addr, phys_addr + size); - vunmap(area->addr); + free_vm_area(area); return NULL; } -- cgit v1.1 From a5593e0b329a14dea41ea173380dbf1533de2bd2 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 9 Apr 2009 14:26:48 -0700 Subject: x86, PAT: Change order of cpa and free in set_memory_wb To be free of aliasing due to races, set_memory_* interfaces should follow ordering of reserving, changing memtype to UC/WC, changing memtype back to WB followed by free. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha LKML-Reference: <20090409212708.512280000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index d71e1b6..d487eaa 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1021,15 +1021,19 @@ int _set_memory_wb(unsigned long addr, int numpages) int set_memory_wb(unsigned long addr, int numpages) { + int ret = _set_memory_wb(addr, numpages); free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); - - return _set_memory_wb(addr, numpages); + return ret; } EXPORT_SYMBOL(set_memory_wb); int set_memory_array_wb(unsigned long *addr, int addrinarray) { int i; + int ret; + + ret = change_page_attr_clear(addr, addrinarray, + __pgprot(_PAGE_CACHE_MASK), 1); for (i = 0; i < addrinarray; i++) { unsigned long start = __pa(addr[i]); @@ -1042,8 +1046,7 @@ int set_memory_array_wb(unsigned long *addr, int addrinarray) } free_memtype(start, end); } - return change_page_attr_clear(addr, addrinarray, - __pgprot(_PAGE_CACHE_MASK), 1); + return ret; } EXPORT_SYMBOL(set_memory_array_wb); -- cgit v1.1 From 9fa3ab390abfc8b49fc0dd7c845b0ad224ec429f Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 9 Apr 2009 14:26:49 -0700 Subject: x86, PAT: Handle faults cleanly in set_memory_ APIs Handle faults and do proper cleanups in set_memory_*() functions. In some cases, these functions were not doing proper free on failure paths. With the changes to tracking memtype of RAM pages in struct page instead of pat list, we do not need the changes in commits c5e147. This patch reverts that change. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha LKML-Reference: <20090409212708.653222000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 113 ++++++++++++++++++++++++++++--------------------- 1 file changed, 65 insertions(+), 48 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index d487eaa..985eef8 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -945,52 +945,56 @@ int _set_memory_uc(unsigned long addr, int numpages) int set_memory_uc(unsigned long addr, int numpages) { + int ret; + /* * for now UC MINUS. see comments in ioremap_nocache() */ - if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, - _PAGE_CACHE_UC_MINUS, NULL)) - return -EINVAL; + ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + _PAGE_CACHE_UC_MINUS, NULL); + if (ret) + goto out_err; + + ret = _set_memory_uc(addr, numpages); + if (ret) + goto out_free; + + return 0; - return _set_memory_uc(addr, numpages); +out_free: + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); +out_err: + return ret; } EXPORT_SYMBOL(set_memory_uc); int set_memory_array_uc(unsigned long *addr, int addrinarray) { - unsigned long start; - unsigned long end; - int i; + int i, j; + int ret; + /* * for now UC MINUS. see comments in ioremap_nocache() */ for (i = 0; i < addrinarray; i++) { - start = __pa(addr[i]); - for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { - if (end != __pa(addr[i + 1])) - break; - i++; - } - if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) - goto out; + ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, + _PAGE_CACHE_UC_MINUS, NULL); + if (ret) + goto out_free; } - return change_page_attr_set(addr, addrinarray, + ret = change_page_attr_set(addr, addrinarray, __pgprot(_PAGE_CACHE_UC_MINUS), 1); -out: - for (i = 0; i < addrinarray; i++) { - unsigned long tmp = __pa(addr[i]); - - if (tmp == start) - break; - for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { - if (end != __pa(addr[i + 1])) - break; - i++; - } - free_memtype(tmp, end); - } - return -EINVAL; + if (ret) + goto out_free; + + return 0; + +out_free: + for (j = 0; j < i; j++) + free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE); + + return ret; } EXPORT_SYMBOL(set_memory_array_uc); @@ -1002,14 +1006,26 @@ int _set_memory_wc(unsigned long addr, int numpages) int set_memory_wc(unsigned long addr, int numpages) { + int ret; + if (!pat_enabled) return set_memory_uc(addr, numpages); - if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, - _PAGE_CACHE_WC, NULL)) - return -EINVAL; + ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + _PAGE_CACHE_WC, NULL); + if (ret) + goto out_err; - return _set_memory_wc(addr, numpages); + ret = _set_memory_wc(addr, numpages); + if (ret) + goto out_free; + + return 0; + +out_free: + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); +out_err: + return ret; } EXPORT_SYMBOL(set_memory_wc); @@ -1021,9 +1037,14 @@ int _set_memory_wb(unsigned long addr, int numpages) int set_memory_wb(unsigned long addr, int numpages) { - int ret = _set_memory_wb(addr, numpages); + int ret; + + ret = _set_memory_wb(addr, numpages); + if (ret) + return ret; + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); - return ret; + return 0; } EXPORT_SYMBOL(set_memory_wb); @@ -1034,19 +1055,13 @@ int set_memory_array_wb(unsigned long *addr, int addrinarray) ret = change_page_attr_clear(addr, addrinarray, __pgprot(_PAGE_CACHE_MASK), 1); + if (ret) + return ret; - for (i = 0; i < addrinarray; i++) { - unsigned long start = __pa(addr[i]); - unsigned long end; + for (i = 0; i < addrinarray; i++) + free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE); - for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { - if (end != __pa(addr[i + 1])) - break; - i++; - } - free_memtype(start, end); - } - return ret; + return 0; } EXPORT_SYMBOL(set_memory_array_wb); @@ -1139,6 +1154,8 @@ int set_pages_array_wb(struct page **pages, int addrinarray) retval = cpa_clear_pages_array(pages, addrinarray, __pgprot(_PAGE_CACHE_MASK)); + if (retval) + return retval; for (i = 0; i < addrinarray; i++) { start = (unsigned long)page_address(pages[i]); @@ -1146,7 +1163,7 @@ int set_pages_array_wb(struct page **pages, int addrinarray) free_memtype(start, end); } - return retval; + return 0; } EXPORT_SYMBOL(set_pages_array_wb); -- cgit v1.1 From 3869c4aa18835c8c61b44bd0f3ace36e9d3b5bd0 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 9 Apr 2009 14:26:50 -0700 Subject: x86, PAT: Changing memtype to WC ensuring no WB alias As per SDM, there should not be any aliasing of a WC with any cacheable type across CPUs. That is if one CPU is changing the identity map memtype to _WC, no other CPU at the time of this change should not have a TLB for this page that carries a WB attribute. SDM suggests to make the page not present. But for that we will have to handle any page faults that can potentially happen due to these pages being not present. Other way to deal with this without having any WB mapping is to change the page first to UC and then to WC. This ensures that we meet the SDM requirement of no cacheable alais to WC page. This also has same or lower overhead than marking the page not present and making it present later. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha LKML-Reference: <20090409212708.797481000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 985eef8..797f9f1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1000,8 +1000,15 @@ EXPORT_SYMBOL(set_memory_array_uc); int _set_memory_wc(unsigned long addr, int numpages) { - return change_page_attr_set(&addr, numpages, + int ret; + ret = change_page_attr_set(&addr, numpages, + __pgprot(_PAGE_CACHE_UC_MINUS), 0); + + if (!ret) { + ret = change_page_attr_set(&addr, numpages, __pgprot(_PAGE_CACHE_WC), 0); + } + return ret; } int set_memory_wc(unsigned long addr, int numpages) -- cgit v1.1 From b6ff32d9aaeeeecf98f9a852d715569183585312 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 9 Apr 2009 14:26:51 -0700 Subject: x86, PAT: Consolidate code in pat_x_mtrr_type() and reserve_memtype() Fix pat_x_mtrr_type() to use UC_MINUS when the mtrr type return UC. This is to be consistent with ioremap() and ioremap_nocache() which uses UC_MINUS. Consolidate the code such that reserve_memtype() also uses pat_x_mtrr_type() when the caller doesn't specify any special attribute (non WB attribute). Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi LKML-Reference: <20090409212708.939936000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 3 ++- arch/x86/mm/pat.c | 35 +++++++++++++---------------------- 2 files changed, 15 insertions(+), 23 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 329387e..d4c4b2c 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -375,7 +375,8 @@ static void __iomem *ioremap_default(resource_size_t phys_addr, * - UC_MINUS for non-WB-able memory with no other conflicting mappings * - Inherit from confliting mappings otherwise */ - err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags); + err = reserve_memtype(phys_addr, phys_addr + size, + _PAGE_CACHE_WB, &flags); if (err < 0) return NULL; diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 640339e..8d3de95 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -182,10 +182,10 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) u8 mtrr_type; mtrr_type = mtrr_type_lookup(start, end); - if (mtrr_type == MTRR_TYPE_UNCACHABLE) - return _PAGE_CACHE_UC; - if (mtrr_type == MTRR_TYPE_WRCOMB) - return _PAGE_CACHE_WC; + if (mtrr_type != MTRR_TYPE_WRBACK) + return _PAGE_CACHE_UC_MINUS; + + return _PAGE_CACHE_WB; } return req_type; @@ -352,23 +352,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return 0; } - if (req_type == -1) { - /* - * Call mtrr_lookup to get the type hint. This is an - * optimization for /dev/mem mmap'ers into WB memory (BIOS - * tools and ACPI tools). Use WB request for WB memory and use - * UC_MINUS otherwise. - */ - u8 mtrr_type = mtrr_type_lookup(start, end); - - if (mtrr_type == MTRR_TYPE_WRBACK) - actual_type = _PAGE_CACHE_WB; - else - actual_type = _PAGE_CACHE_UC_MINUS; - } else { - actual_type = pat_x_mtrr_type(start, end, - req_type & _PAGE_CACHE_MASK); - } + /* + * Call mtrr_lookup to get the type hint. This is an + * optimization for /dev/mem mmap'ers into WB memory (BIOS + * tools and ACPI tools). Use WB request for WB memory and use + * UC_MINUS otherwise. + */ + actual_type = pat_x_mtrr_type(start, end, req_type & _PAGE_CACHE_MASK); if (new_type) *new_type = actual_type; @@ -587,7 +577,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (flags != -1) { retval = reserve_memtype(offset, offset + size, flags, NULL); } else { - retval = reserve_memtype(offset, offset + size, -1, &flags); + retval = reserve_memtype(offset, offset + size, + _PAGE_CACHE_WB, &flags); } if (retval < 0) -- cgit v1.1 From 0c3c8a18361a636069f5a5d9d0d0f9c2124e6b94 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 9 Apr 2009 14:26:52 -0700 Subject: x86, PAT: Remove duplicate memtype reserve in devmem mmap /dev/mem mmap code was doing memtype reserve/free for a while now. Recently we added memtype tracking in remap_pfn_range, and /dev/mem mmap uses it indirectly. So, we don't need seperate tracking in /dev/mem code any more. That means another ~100 lines of code removed :-). Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi LKML-Reference: <20090409212709.085210000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 60 ++----------------------------------------------------- 1 file changed, 2 insertions(+), 58 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 8d3de95..cc5e0e2 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -536,9 +536,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { - u64 offset = ((u64) pfn) << PAGE_SHIFT; - unsigned long flags = -1; - int retval; + unsigned long flags = _PAGE_CACHE_WB; if (!range_is_allowed(pfn, size)) return 0; @@ -566,65 +564,11 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, } #endif - /* - * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot. - * - * Without O_SYNC, we want to get - * - WB for WB-able memory and no other conflicting mappings - * - UC_MINUS for non-WB-able memory with no other conflicting mappings - * - Inherit from confliting mappings otherwise - */ - if (flags != -1) { - retval = reserve_memtype(offset, offset + size, flags, NULL); - } else { - retval = reserve_memtype(offset, offset + size, - _PAGE_CACHE_WB, &flags); - } - - if (retval < 0) - return 0; - - if (((pfn < max_low_pfn_mapped) || - (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) && - ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { - free_memtype(offset, offset + size); - printk(KERN_INFO - "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", - current->comm, current->pid, - cattr_name(flags), - offset, (unsigned long long)(offset + size)); - return 0; - } - *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | flags); return 1; } -void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) -{ - unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); - u64 addr = (u64)pfn << PAGE_SHIFT; - unsigned long flags; - - reserve_memtype(addr, addr + size, want_flags, &flags); - if (flags != want_flags) { - printk(KERN_INFO - "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n", - current->comm, current->pid, - cattr_name(want_flags), - addr, (unsigned long long)(addr + size), - cattr_name(flags)); - } -} - -void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) -{ - u64 addr = (u64)pfn << PAGE_SHIFT; - - free_memtype(addr, addr + size); -} - /* * Change the memory type for the physial address range in kernel identity * mapping space if that range is a part of identity map. @@ -662,8 +606,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, { int is_ram = 0; int ret; - unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); + unsigned long flags = want_flags; is_ram = pat_pagerange_is_ram(paddr, paddr + size); -- cgit v1.1 From 4b065046273afa01ec8e3de7da407e8d3599251d Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Wed, 8 Apr 2009 15:37:16 -0700 Subject: x86, PAT: Remove page granularity tracking for vm_insert_pfn maps This change resolves the problem of too many single page entries in pat_memtype_list and "freeing invalid memtype" errors with i915, reported here: http://marc.info/?l=linux-kernel&m=123845244713183&w=2 Remove page level granularity track and untrack of vm_insert_pfn. memtype tracking at page granularity does not scale and cleaner approach would be for the driver to request a type for a bigger IO address range or PCI io memory range for that device, either at mmap time or driver init time and just use that type during vm_insert_pfn. This patch just removes the track/untrack of vm_insert_pfn. That means we will be in same state as 2.6.28, with respect to these APIs. Newer APIs for the drivers to request a memtype for a bigger region is coming soon. [ Impact: fix Xorg startup warnings and hangs ] Reported-by: Arkadiusz Miskiewicz Tested-by: Arkadiusz Miskiewicz Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Cc: Jesse Barnes LKML-Reference: <20090408223716.GC3493@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 98 +++++++++++-------------------------------------------- 1 file changed, 19 insertions(+), 79 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index cc5e0e2..41c8057 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -669,29 +669,28 @@ static void free_pfn_range(u64 paddr, unsigned long size) * * If the vma has a linear pfn mapping for the entire range, we get the prot * from pte and reserve the entire vma range with single reserve_pfn_range call. - * Otherwise, we reserve the entire vma range, my ging through the PTEs page - * by page to get physical address and protection. */ int track_pfn_vma_copy(struct vm_area_struct *vma) { - int retval = 0; - unsigned long i, j; resource_size_t paddr; unsigned long prot; - unsigned long vma_start = vma->vm_start; - unsigned long vma_end = vma->vm_end; - unsigned long vma_size = vma_end - vma_start; + unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (!pat_enabled) return 0; + /* + * For now, only handle remap_pfn_range() vmas where + * is_linear_pfn_mapping() == TRUE. Handling of + * vm_insert_pfn() is TBD. + */ if (is_linear_pfn_mapping(vma)) { /* * reserve the whole chunk covered by vma. We need the * starting address and protection from pte. */ - if (follow_phys(vma, vma_start, 0, &prot, &paddr)) { + if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { WARN_ON_ONCE(1); return -EINVAL; } @@ -699,28 +698,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } - /* reserve entire vma page by page, using pfn and prot from pte */ - for (i = 0; i < vma_size; i += PAGE_SIZE) { - if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) - continue; - - pgprot = __pgprot(prot); - retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1); - if (retval) - goto cleanup_ret; - } return 0; - -cleanup_ret: - /* Reserve error: Cleanup partial reservation and return error */ - for (j = 0; j < i; j += PAGE_SIZE) { - if (follow_phys(vma, vma_start + j, 0, &prot, &paddr)) - continue; - - free_pfn_range(paddr, PAGE_SIZE); - } - - return retval; } /* @@ -730,50 +708,28 @@ cleanup_ret: * prot is passed in as a parameter for the new mapping. If the vma has a * linear pfn mapping for the entire range reserve the entire vma range with * single reserve_pfn_range call. - * Otherwise, we look t the pfn and size and reserve only the specified range - * page by page. - * - * Note that this function can be called with caller trying to map only a - * subrange/page inside the vma. */ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long size) { - int retval = 0; - unsigned long i, j; - resource_size_t base_paddr; resource_size_t paddr; - unsigned long vma_start = vma->vm_start; - unsigned long vma_end = vma->vm_end; - unsigned long vma_size = vma_end - vma_start; + unsigned long vma_size = vma->vm_end - vma->vm_start; if (!pat_enabled) return 0; + /* + * For now, only handle remap_pfn_range() vmas where + * is_linear_pfn_mapping() == TRUE. Handling of + * vm_insert_pfn() is TBD. + */ if (is_linear_pfn_mapping(vma)) { /* reserve the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; return reserve_pfn_range(paddr, vma_size, prot, 0); } - /* reserve page by page using pfn and size */ - base_paddr = (resource_size_t)pfn << PAGE_SHIFT; - for (i = 0; i < size; i += PAGE_SIZE) { - paddr = base_paddr + i; - retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0); - if (retval) - goto cleanup_ret; - } return 0; - -cleanup_ret: - /* Reserve error: Cleanup partial reservation and return error */ - for (j = 0; j < i; j += PAGE_SIZE) { - paddr = base_paddr + j; - free_pfn_range(paddr, PAGE_SIZE); - } - - return retval; } /* @@ -784,39 +740,23 @@ cleanup_ret: void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, unsigned long size) { - unsigned long i; resource_size_t paddr; - unsigned long prot; - unsigned long vma_start = vma->vm_start; - unsigned long vma_end = vma->vm_end; - unsigned long vma_size = vma_end - vma_start; + unsigned long vma_size = vma->vm_end - vma->vm_start; if (!pat_enabled) return; + /* + * For now, only handle remap_pfn_range() vmas where + * is_linear_pfn_mapping() == TRUE. Handling of + * vm_insert_pfn() is TBD. + */ if (is_linear_pfn_mapping(vma)) { /* free the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; free_pfn_range(paddr, vma_size); return; } - - if (size != 0 && size != vma_size) { - /* free page by page, using pfn and size */ - paddr = (resource_size_t)pfn << PAGE_SHIFT; - for (i = 0; i < size; i += PAGE_SIZE) { - paddr = paddr + i; - free_pfn_range(paddr, PAGE_SIZE); - } - } else { - /* free entire vma, page by page, using the pfn from pte */ - for (i = 0; i < vma_size; i += PAGE_SIZE) { - if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) - continue; - - free_pfn_range(paddr, PAGE_SIZE); - } - } } pgprot_t pgprot_writecombine(pgprot_t prot) -- cgit v1.1 From dc098551918093901d8ac8936e9d1a1b891b56ed Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Fri, 17 Apr 2009 09:22:42 -0500 Subject: x86/uv: fix init of memory-less nodes Add support for nodes that have cpus but no memory. The current code was failing to add these nodes to the nodes_present_map. v2: Fixes case caught by David Rientjes - missed support for the x2apic SRAT table. [ Impact: fix potential boot crash on memory-less UV nodes. ] Reported-by: David Rientjes Signed-off-by: Jack Steiner LKML-Reference: <20090417142242.GA23743@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/srat_64.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index c7d272b..33c5fa5 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -28,6 +28,7 @@ int acpi_numa __initdata; static struct acpi_table_slit *acpi_slit; static nodemask_t nodes_parsed __initdata; +static nodemask_t cpu_nodes_parsed __initdata; static struct bootnode nodes[MAX_NUMNODES] __initdata; static struct bootnode nodes_add[MAX_NUMNODES]; static int found_add_area __initdata; @@ -141,6 +142,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) apic_id = pa->apic_id; apicid_to_node[apic_id] = node; + node_set(node, cpu_nodes_parsed); acpi_numa = 1; printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", pxm, apic_id, node); @@ -174,6 +176,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) else apic_id = pa->apic_id; apicid_to_node[apic_id] = node; + node_set(node, cpu_nodes_parsed); acpi_numa = 1; printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", pxm, apic_id, node); @@ -402,7 +405,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return -1; } - node_possible_map = nodes_parsed; + /* Account for nodes with cpus and no memory */ + nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); /* Finally register nodes */ for_each_node_mask(i, node_possible_map) -- cgit v1.1 From a81b6314e0aa480b8ac6dd02779d44cd0bee0a34 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 17 Apr 2009 23:31:20 +0530 Subject: x86: mm/numa_32.c calculate_numa_remap_pages should use __init calculate_numa_remap_pages() is called only by __init initmem_init() further calculate_numa_remap_pages is calling: __init find_e820_area() and __init reserve_early() So calculate_numa_remap_pages() should be __init calculate_numa_remap_pages(). WARNING: arch/x86/built-in.o(.text+0x82ea3): Section mismatch in reference from the function calculate_numa_remap_pages() to the function .init.text:find_e820_area() The function calculate_numa_remap_pages() references the function __init find_e820_area(). This is often because calculate_numa_remap_pages lacks a __init annotation or the annotation of find_e820_area is wrong. WARNING: arch/x86/built-in.o(.text+0x82f5f): Section mismatch in reference from the function calculate_numa_remap_pages() to the function .init.text:reserve_early() The function calculate_numa_remap_pages() references the function __init reserve_early(). This is often because calculate_numa_remap_pages lacks a __init annotation or the annotation of reserve_early is wrong. [ Impact: save memory, address Section mismatch warning ] Signed-off-by: Jaswinder Singh Rajput Cc: Sam Ravnborg LKML-Reference: <1239991281.3153.4.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/mm/numa_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 3daefa0..d253006 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -257,7 +257,7 @@ void resume_map_numa_kva(pgd_t *pgd_base) } #endif -static unsigned long calculate_numa_remap_pages(void) +static __init unsigned long calculate_numa_remap_pages(void) { int nid; unsigned long size, reserve_pages = 0; -- cgit v1.1 From 4c31e92b97b6d7e7b19ee5e54a22571ffdebb305 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 22 Apr 2009 14:19:27 -0700 Subject: x86: check boundary in setup_node_bootmem() Commit dc09855 ("x86/uv: fix init of memory-less nodes") causes a two sockets system (where node-1 doesn't have RAM installed) to crash. That commit makes node_possible include cpu nodes that do not have memory. So check boundary in setup_node_bootmem(). [ Impact: fix boot crash on RAM-less NUMA node system ] Signed-off-by: Yinghai Lu Cc: Jack Steiner LKML-Reference: <49EF89DF.9090404@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/numa_64.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index d73aaa8..2d05a12 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -188,6 +188,9 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); int nid; + if (!end) + return; + start = roundup(start, ZONE_ALIGN); printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, -- cgit v1.1 From 33015c85995716d03f6293346cf05a1908b0fb9a Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Tue, 28 Apr 2009 20:17:48 +0100 Subject: tracing: x86, mmiotrace: fix range test Matching on (addr == (p->addr + p->len)) causes problems when mappings are adjacent. [ Impact: fix mmiotrace confusion on adjacent iomaps ] Signed-off-by: Stuart Bennett Acked-by: Pekka Paalanen Cc: Steven Rostedt LKML-Reference: <1240946271-7083-2-git-send-email-stuart@freedesktop.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 4f115e0..50dc802 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -87,7 +87,7 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) { struct kmmio_probe *p; list_for_each_entry_rcu(p, &kmmio_probes, list) { - if (addr >= p->addr && addr <= (p->addr + p->len)) + if (addr >= p->addr && addr < (p->addr + p->len)) return p; } return NULL; -- cgit v1.1 From 7eccf7b227b6d3b1745b937ce35efc9c27f9b0e5 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 5 May 2009 12:50:02 -0700 Subject: x86, srat: do not register nodes beyond e820 map The mem= option will truncate the memory map at a specified address so it's not possible to register nodes with memory beyond the e820 upper bound. unparse_node() is only called when then node had memory associated with it, although with the mem= option it is no longer addressable. [ Impact: fix boot hang on certain (large) systems ] Reported-by: "Zhang, Yanmin" Signed-off-by: David Rientjes Acked-by: Jack Steiner LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/mm/srat_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 33c5fa5..0176595 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -361,6 +361,7 @@ static void __init unparse_node(int node) { int i; node_clear(node, nodes_parsed); + node_clear(node, cpu_nodes_parsed); for (i = 0; i < MAX_LOCAL_APIC; i++) { if (apicid_to_node[i] == node) apicid_to_node[i] = NUMA_NO_NODE; -- cgit v1.1 From e0e5ea3268db428d19e1c5fa00e6f583861cbdbd Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Mon, 4 May 2009 09:08:26 +0530 Subject: x86: Fix a typo in a printk message [ Impact: printk message cleanup ] Signed-off-by: Nikanth Karthikesan LKML-Reference: <200905040908.27299.knikanth@suse.de> Signed-off-by: Ingo Molnar --- arch/x86/mm/srat_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 16ae70f..29a0e37 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -216,7 +216,7 @@ int __init get_memcfg_from_srat(void) if (num_memory_chunks == 0) { printk(KERN_WARNING - "could not finy any ACPI SRAT memory areas.\n"); + "could not find any ACPI SRAT memory areas.\n"); goto out_fail; } -- cgit v1.1 From 498343967613183611ac37dccb2846496d954c06 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 6 May 2009 13:06:47 +0100 Subject: x86-64: finish cleanup_highmaps()'s job wrt. _brk_end With the introduction of the .brk section, special care must be taken that no unused page table entries remain if _brk_end and _end are separated by a 2M page boundary. cleanup_highmap() runs very early and hence cannot take care of that, hence potential entries needing to be removed past _brk_end must be cleared once the brk allocator has done its job. [ Impact: avoids undesirable TLB aliases ] Signed-off-by: Jan Beulich Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fd3da1d..ae4f7b5 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -304,8 +305,23 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, #endif #ifdef CONFIG_X86_64 - if (!after_bootmem) + if (!after_bootmem && !start) { + pud_t *pud; + pmd_t *pmd; + mmu_cr4_features = read_cr4(); + + /* + * _brk_end cannot change anymore, but it and _end may be + * located on different 2M pages. cleanup_highmap(), however, + * can only consider _end when it runs, so destroy any + * mappings beyond _brk_end here. + */ + pud = pud_offset(pgd_offset_k(_brk_end), _brk_end); + pmd = pmd_offset(pud, _brk_end - 1); + while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1)) + pmd_clear(pmd); + } #endif __flush_tlb_all(); -- cgit v1.1