diff options
author | Bjorn Helgaas <bjorn.helgaas@hp.com> | 2006-05-05 17:19:50 -0600 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2006-05-08 16:32:05 -0700 |
commit | 32e62c636a728cb39c0b3bd191286f2ca65d4028 (patch) | |
tree | 656454a01e720819103c172daae15b5f2fd85d68 /arch | |
parent | 6810b548b25114607e0814612d84125abccc0a4f (diff) | |
download | op-kernel-dev-32e62c636a728cb39c0b3bd191286f2ca65d4028.zip op-kernel-dev-32e62c636a728cb39c0b3bd191286f2ca65d4028.tar.gz |
[IA64] rework memory attribute aliasing
This closes a couple holes in our attribute aliasing avoidance scheme:
- The current kernel fails mmaps of some /dev/mem MMIO regions because
they don't appear in the EFI memory map. This keeps X from working
on the Intel Tiger box.
- The current kernel allows UC mmap of the 0-1MB region of
/sys/.../legacy_mem even when the chipset doesn't support UC
access. This causes an MCA when starting X on HP rx7620 and rx8620
boxes in the default configuration.
There's more detail in the Documentation/ia64/aliasing.txt file this
adds, but the general idea is that if a region might be covered by
a granule-sized kernel identity mapping, any access via /dev/mem or
mmap must use the same attribute as the identity mapping.
Otherwise, we fall back to using an attribute that is supported
according to the EFI memory map, or to using UC if the EFI memory
map doesn't mention the region.
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/ia64/kernel/efi.c | 156 | ||||
-rw-r--r-- | arch/ia64/mm/ioremap.c | 27 | ||||
-rw-r--r-- | arch/ia64/pci/pci.c | 17 |
3 files changed, 139 insertions, 61 deletions
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 12cfedc..c33d0ba 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -8,6 +8,8 @@ * Copyright (C) 1999-2003 Hewlett-Packard Co. * David Mosberger-Tang <davidm@hpl.hp.com> * Stephane Eranian <eranian@hpl.hp.com> + * (c) Copyright 2006 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas <bjorn.helgaas@hp.com> * * All EFI Runtime Services are not implemented yet as EFI only * supports physical mode addressing on SoftSDV. This is to be fixed @@ -622,28 +624,20 @@ efi_get_iobase (void) return 0; } -static efi_memory_desc_t * -efi_memory_descriptor (unsigned long phys_addr) +static struct kern_memdesc * +kern_memory_descriptor (unsigned long phys_addr) { - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; + struct kern_memdesc *md; - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - - if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) + for (md = kern_memmap; md->start != ~0UL; md++) { + if (phys_addr - md->start < (md->num_pages << EFI_PAGE_SHIFT)) return md; } return 0; } -static int -efi_memmap_has_mmio (void) +static efi_memory_desc_t * +efi_memory_descriptor (unsigned long phys_addr) { void *efi_map_start, *efi_map_end, *p; efi_memory_desc_t *md; @@ -656,8 +650,8 @@ efi_memmap_has_mmio (void) for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { md = p; - if (md->type == EFI_MEMORY_MAPPED_IO) - return 1; + if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) + return md; } return 0; } @@ -683,71 +677,125 @@ efi_mem_attributes (unsigned long phys_addr) } EXPORT_SYMBOL(efi_mem_attributes); -/* - * Determines whether the memory at phys_addr supports the desired - * attribute (WB, UC, etc). If this returns 1, the caller can safely - * access size bytes at phys_addr with the specified attribute. - */ -int -efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, u64 attr) +u64 +efi_mem_attribute (unsigned long phys_addr, unsigned long size) { unsigned long end = phys_addr + size; efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + u64 attr; + + if (!md) + return 0; + + /* + * EFI_MEMORY_RUNTIME is not a memory attribute; it just tells + * the kernel that firmware needs this region mapped. + */ + attr = md->attribute & ~EFI_MEMORY_RUNTIME; + do { + unsigned long md_end = efi_md_end(md); + + if (end <= md_end) + return attr; + + md = efi_memory_descriptor(md_end); + if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr) + return 0; + } while (md); + return 0; +} + +u64 +kern_mem_attribute (unsigned long phys_addr, unsigned long size) +{ + unsigned long end = phys_addr + size; + struct kern_memdesc *md; + u64 attr; /* - * Some firmware doesn't report MMIO regions in the EFI memory - * map. The Intel BigSur (a.k.a. HP i2000) has this problem. - * On those platforms, we have to assume UC is valid everywhere. + * This is a hack for ioremap calls before we set up kern_memmap. + * Maybe we should do efi_memmap_init() earlier instead. */ - if (!md || (md->attribute & attr) != attr) { - if (attr == EFI_MEMORY_UC && !efi_memmap_has_mmio()) - return 1; + if (!kern_memmap) { + attr = efi_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return EFI_MEMORY_WB; return 0; } + md = kern_memory_descriptor(phys_addr); + if (!md) + return 0; + + attr = md->attribute; do { - unsigned long md_end = efi_md_end(md); + unsigned long md_end = kmd_end(md); if (end <= md_end) - return 1; + return attr; - md = efi_memory_descriptor(md_end); - if (!md || (md->attribute & attr) != attr) + md = kern_memory_descriptor(md_end); + if (!md || md->attribute != attr) return 0; } while (md); return 0; } +EXPORT_SYMBOL(kern_mem_attribute); -/* - * For /dev/mem, we only allow read & write system calls to access - * write-back memory, because read & write don't allow the user to - * control access size. - */ int valid_phys_addr_range (unsigned long phys_addr, unsigned long size) { - return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB); + u64 attr; + + /* + * /dev/mem reads and writes use copy_to_user(), which implicitly + * uses a granule-sized kernel identity mapping. It's really + * only safe to do this for regions in kern_memmap. For more + * details, see Documentation/ia64/aliasing.txt. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) + return 1; + return 0; } -/* - * We allow mmap of anything in the EFI memory map that supports - * either write-back or uncacheable access. For uncacheable regions, - * the supported access sizes are system-dependent, and the user is - * responsible for using the correct size. - * - * Note that this doesn't currently allow access to hot-added memory, - * because that doesn't appear in the boot-time EFI memory map. - */ int valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long size) { - if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB)) - return 1; + /* + * MMIO regions are often missing from the EFI memory map. + * We must allow mmap of them for programs like X, so we + * currently can't do any useful validation. + */ + return 1; +} - if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC)) - return 1; +pgprot_t +phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, + pgprot_t vma_prot) +{ + unsigned long phys_addr = pfn << PAGE_SHIFT; + u64 attr; - return 0; + /* + * For /dev/mem mmap, we use user mappings, but if the region is + * in kern_memmap (and hence may be covered by a kernel mapping), + * we must use the same attribute as the kernel mapping. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return pgprot_cacheable(vma_prot); + else if (attr & EFI_MEMORY_UC) + return pgprot_noncached(vma_prot); + + /* + * Some chipsets don't support UC access to memory. If + * WB is supported, we prefer that. + */ + if (efi_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) + return pgprot_cacheable(vma_prot); + + return pgprot_noncached(vma_prot); } int __init diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c index 643ccc6..07bd02b 100644 --- a/arch/ia64/mm/ioremap.c +++ b/arch/ia64/mm/ioremap.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/efi.h> #include <asm/io.h> +#include <asm/meminit.h> static inline void __iomem * __ioremap (unsigned long offset, unsigned long size) @@ -21,16 +22,29 @@ __ioremap (unsigned long offset, unsigned long size) void __iomem * ioremap (unsigned long offset, unsigned long size) { - if (efi_mem_attribute_range(offset, size, EFI_MEMORY_WB)) - return phys_to_virt(offset); + u64 attr; + unsigned long gran_base, gran_size; - if (efi_mem_attribute_range(offset, size, EFI_MEMORY_UC)) + /* + * For things in kern_memmap, we must use the same attribute + * as the rest of the kernel. For more details, see + * Documentation/ia64/aliasing.txt. + */ + attr = kern_mem_attribute(offset, size); + if (attr & EFI_MEMORY_WB) + return phys_to_virt(offset); + else if (attr & EFI_MEMORY_UC) return __ioremap(offset, size); /* - * Someday this should check ACPI resources so we - * can do the right thing for hot-plugged regions. + * Some chipsets don't support UC access to memory. If + * WB is supported for the whole granule, we prefer that. */ + gran_base = GRANULEROUNDDOWN(offset); + gran_size = GRANULEROUNDUP(offset + size) - gran_base; + if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB) + return phys_to_virt(offset); + return __ioremap(offset, size); } EXPORT_SYMBOL(ioremap); @@ -38,6 +52,9 @@ EXPORT_SYMBOL(ioremap); void __iomem * ioremap_nocache (unsigned long offset, unsigned long size) { + if (kern_mem_attribute(offset, size) & EFI_MEMORY_WB) + return 0; + return __ioremap(offset, size); } EXPORT_SYMBOL(ioremap_nocache); diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index ab829a2..30d148f 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -645,18 +645,31 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus) int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma) { + unsigned long size = vma->vm_end - vma->vm_start; + pgprot_t prot; char *addr; + /* + * Avoid attribute aliasing. See Documentation/ia64/aliasing.txt + * for more details. + */ + if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, size)) + return -EINVAL; + prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size, + vma->vm_page_prot); + if (pgprot_val(prot) != pgprot_val(pgprot_noncached(vma->vm_page_prot))) + return -EINVAL; + addr = pci_get_legacy_mem(bus); if (IS_ERR(addr)) return PTR_ERR(addr); vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_page_prot = prot; vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO); if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, vma->vm_page_prot)) + size, vma->vm_page_prot)) return -EAGAIN; return 0; |