diff options
Diffstat (limited to 'include')
119 files changed, 3882 insertions, 573 deletions
diff --git a/include/asm-alpha/bug.h b/include/asm-alpha/bug.h index 39a3e2a..695a5ee 100644 --- a/include/asm-alpha/bug.h +++ b/include/asm-alpha/bug.h @@ -1,14 +1,24 @@ #ifndef _ALPHA_BUG_H #define _ALPHA_BUG_H +#include <linux/linkage.h> + #ifdef CONFIG_BUG #include <asm/pal.h> /* ??? Would be nice to use .gprel32 here, but we can't be sure that the function loaded the GP, so this could fail in modules. */ -#define BUG() \ - __asm__ __volatile__("call_pal %0 # bugchk\n\t"".long %1\n\t.8byte %2" \ - : : "i" (PAL_bugchk), "i"(__LINE__), "i"(__FILE__)) +static inline void ATTRIB_NORET __BUG(const char *file, int line) +{ + __asm__ __volatile__( + "call_pal %0 # bugchk\n\t" + ".long %1\n\t.8byte %2" + : : "i" (PAL_bugchk), "i"(line), "i"(file)); + for ( ; ; ) + ; +} + +#define BUG() __BUG(__FILE__, __LINE__) #define HAVE_ARCH_BUG #endif diff --git a/include/asm-alpha/byteorder.h b/include/asm-alpha/byteorder.h index 7af2b8d..58e958f 100644 --- a/include/asm-alpha/byteorder.h +++ b/include/asm-alpha/byteorder.h @@ -7,7 +7,7 @@ #ifdef __GNUC__ -static __inline __attribute_const__ __u32 __arch__swab32(__u32 x) +static inline __attribute_const__ __u32 __arch__swab32(__u32 x) { /* * Unfortunately, we can't use the 6 instruction sequence diff --git a/include/asm-alpha/pgtable.h b/include/asm-alpha/pgtable.h index 99037b0..05ce5fb 100644 --- a/include/asm-alpha/pgtable.h +++ b/include/asm-alpha/pgtable.h @@ -268,6 +268,7 @@ extern inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_FOW); } extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } extern inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +extern inline int pte_special(pte_t pte) { return 0; } extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; } extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~(__DIRTY_BITS); return pte; } @@ -275,6 +276,7 @@ extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~(__ACCESS_BITS); ret extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_FOW; return pte; } extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; } extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } +extern inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/include/asm-arm/arch-sa1100/ide.h b/include/asm-arm/arch-sa1100/ide.h index 98b10bc..b14cbda 100644 --- a/include/asm-arm/arch-sa1100/ide.h +++ b/include/asm-arm/arch-sa1100/ide.h @@ -37,12 +37,12 @@ static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, memset(hw, 0, sizeof(*hw)); - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; + for (i = 0; i <= 7; i++) { + hw->io_ports_array[i] = reg; reg += regincr; } - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + hw->io_ports.ctl_addr = ctrl_port; if (irq) *irq = 0; diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h index 5e01824..5571c13 100644 --- a/include/asm-arm/pgtable.h +++ b/include/asm-arm/pgtable.h @@ -260,6 +260,7 @@ extern struct page *empty_zero_page; #define pte_write(pte) (pte_val(pte) & L_PTE_WRITE) #define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) +#define pte_special(pte) (0) /* * The following only works if pte_present() is not true. @@ -280,6 +281,8 @@ PTE_BIT_FUNC(mkdirty, |= L_PTE_DIRTY); PTE_BIT_FUNC(mkold, &= ~L_PTE_YOUNG); PTE_BIT_FUNC(mkyoung, |= L_PTE_YOUNG); +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } + /* * Mark the prot value as uncacheable and unbufferable. */ diff --git a/include/asm-avr32/pgtable.h b/include/asm-avr32/pgtable.h index 3ae7b54..c0e5e29 100644 --- a/include/asm-avr32/pgtable.h +++ b/include/asm-avr32/pgtable.h @@ -212,6 +212,10 @@ static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_special(pte_t pte) +{ + return 0; +} /* * The following only work if pte_present() is not true. @@ -252,6 +256,10 @@ static inline pte_t pte_mkyoung(pte_t pte) set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) diff --git a/include/asm-cris/arch-v10/ide.h b/include/asm-cris/arch-v10/ide.h index ea34e0d..5366e62 100644 --- a/include/asm-cris/arch-v10/ide.h +++ b/include/asm-cris/arch-v10/ide.h @@ -59,22 +59,19 @@ static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, u int i; /* fill in ports for ATA addresses 0 to 7 */ - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = data_port | + for (i = 0; i <= 7; i++) { + hw->io_ports_array[i] = data_port | IO_FIELD(R_ATA_CTRL_DATA, addr, i) | IO_STATE(R_ATA_CTRL_DATA, cs0, active); } /* the IDE control register is at ATA address 6, with CS1 active instead of CS0 */ - - hw->io_ports[IDE_CONTROL_OFFSET] = data_port | + hw->io_ports.ctl_addr = data_port | IO_FIELD(R_ATA_CTRL_DATA, addr, 6) | IO_STATE(R_ATA_CTRL_DATA, cs1, active); /* whats this for ? */ - - hw->io_ports[IDE_IRQ_OFFSET] = 0; + hw->io_ports.irq_addr = 0; } static inline void ide_init_default_hwifs(void) diff --git a/include/asm-cris/pgtable.h b/include/asm-cris/pgtable.h index a260757..829e7a7 100644 --- a/include/asm-cris/pgtable.h +++ b/include/asm-cris/pgtable.h @@ -115,6 +115,7 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WR static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -162,6 +163,7 @@ static inline pte_t pte_mkyoung(pte_t pte) } return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Conversion functions: convert a page and protection to a page entry, @@ -229,7 +231,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep) #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) /* to find an entry in a page-table-directory */ -static inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address) +static inline pgd_t * pgd_offset(const struct mm_struct *mm, unsigned long address) { return mm->pgd + pgd_index(address); } diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index 4e21904..83c51aba 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -380,6 +380,7 @@ static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address) static inline int pte_dirty(pte_t pte) { return (pte).pte & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return (pte).pte & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return !((pte).pte & _PAGE_WP); } +static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_mkclean(pte_t pte) { (pte).pte &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { (pte).pte &= ~_PAGE_ACCESSED; return pte; } @@ -387,6 +388,7 @@ static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte |= _PAGE_WP; return pte static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte &= ~_PAGE_WP; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index f29a502..ecf675a 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -16,7 +16,14 @@ #define ARCH_NR_GPIOS 256 #endif +static inline int gpio_is_valid(int number) +{ + /* only some non-negative numbers are valid */ + return ((unsigned)number) < ARCH_NR_GPIOS; +} + struct seq_file; +struct module; /** * struct gpio_chip - abstract a GPIO controller @@ -48,6 +55,7 @@ struct seq_file; */ struct gpio_chip { char *label; + struct module *owner; int (*direction_input)(struct gpio_chip *chip, unsigned offset); @@ -66,6 +74,7 @@ struct gpio_chip { extern const char *gpiochip_is_requested(struct gpio_chip *chip, unsigned offset); +extern int __init __must_check gpiochip_reserve(int start, int ngpio); /* add/remove chips */ extern int gpiochip_add(struct gpio_chip *chip); @@ -97,6 +106,12 @@ extern int __gpio_cansleep(unsigned gpio); #else +static inline int gpio_is_valid(int number) +{ + /* only non-negative numbers are valid */ + return number >= 0; +} + /* platforms that don't directly support access to GPIOs through I2C, SPI, * or other blocking infrastructure can use these wrappers. */ diff --git a/include/asm-ia64/gcc_intrin.h b/include/asm-ia64/gcc_intrin.h index de2ed2c..2fe292c 100644 --- a/include/asm-ia64/gcc_intrin.h +++ b/include/asm-ia64/gcc_intrin.h @@ -21,6 +21,10 @@ #define ia64_invala_fr(regnum) asm volatile ("invala.e f%0" :: "i"(regnum)) +#define ia64_flushrs() asm volatile ("flushrs;;":::"memory") + +#define ia64_loadrs() asm volatile ("loadrs;;":::"memory") + extern void ia64_bad_param_for_setreg (void); extern void ia64_bad_param_for_getreg (void); @@ -517,6 +521,14 @@ do { \ #define ia64_ptrd(addr, size) \ asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory") +#define ia64_ttag(addr) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ + ia64_intri_res; \ +}) + + /* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */ #define ia64_lfhint_none 0 diff --git a/include/asm-ia64/hugetlb.h b/include/asm-ia64/hugetlb.h new file mode 100644 index 0000000..f28a970 --- /dev/null +++ b/include/asm-ia64/hugetlb.h @@ -0,0 +1,79 @@ +#ifndef _ASM_IA64_HUGETLB_H +#define _ASM_IA64_HUGETLB_H + +#include <asm/page.h> + + +void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling); + +int prepare_hugepage_range(unsigned long addr, unsigned long len); + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) +{ + return (REGION_NUMBER(addr) == RGN_HPAGE || + REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE); +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +#endif /* _ASM_IA64_HUGETLB_H */ diff --git a/include/asm-ia64/kvm.h b/include/asm-ia64/kvm.h index 030d29b..eb2d355 100644 --- a/include/asm-ia64/kvm.h +++ b/include/asm-ia64/kvm.h @@ -1,6 +1,205 @@ -#ifndef __LINUX_KVM_IA64_H -#define __LINUX_KVM_IA64_H +#ifndef __ASM_IA64_KVM_H +#define __ASM_IA64_KVM_H -/* ia64 does not support KVM */ +/* + * asm-ia64/kvm.h: kvm structure definitions for ia64 + * + * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <asm/types.h> +#include <asm/fpu.h> + +#include <linux/ioctl.h> + +/* Architectural interrupt line count. */ +#define KVM_NR_INTERRUPTS 256 + +#define KVM_IOAPIC_NUM_PINS 24 + +struct kvm_ioapic_state { + __u64 base_address; + __u32 ioregsel; + __u32 id; + __u32 irr; + __u32 pad; + union { + __u64 bits; + struct { + __u8 vector; + __u8 delivery_mode:3; + __u8 dest_mode:1; + __u8 delivery_status:1; + __u8 polarity:1; + __u8 remote_irr:1; + __u8 trig_mode:1; + __u8 mask:1; + __u8 reserve:7; + __u8 reserved[4]; + __u8 dest_id; + } fields; + } redirtbl[KVM_IOAPIC_NUM_PINS]; +}; + +#define KVM_IRQCHIP_PIC_MASTER 0 +#define KVM_IRQCHIP_PIC_SLAVE 1 +#define KVM_IRQCHIP_IOAPIC 2 + +#define KVM_CONTEXT_SIZE 8*1024 + +union context { + /* 8K size */ + char dummy[KVM_CONTEXT_SIZE]; + struct { + unsigned long psr; + unsigned long pr; + unsigned long caller_unat; + unsigned long pad; + unsigned long gr[32]; + unsigned long ar[128]; + unsigned long br[8]; + unsigned long cr[128]; + unsigned long rr[8]; + unsigned long ibr[8]; + unsigned long dbr[8]; + unsigned long pkr[8]; + struct ia64_fpreg fr[128]; + }; +}; + +struct thash_data { + union { + struct { + unsigned long p : 1; /* 0 */ + unsigned long rv1 : 1; /* 1 */ + unsigned long ma : 3; /* 2-4 */ + unsigned long a : 1; /* 5 */ + unsigned long d : 1; /* 6 */ + unsigned long pl : 2; /* 7-8 */ + unsigned long ar : 3; /* 9-11 */ + unsigned long ppn : 38; /* 12-49 */ + unsigned long rv2 : 2; /* 50-51 */ + unsigned long ed : 1; /* 52 */ + unsigned long ig1 : 11; /* 53-63 */ + }; + struct { + unsigned long __rv1 : 53; /* 0-52 */ + unsigned long contiguous : 1; /*53 */ + unsigned long tc : 1; /* 54 TR or TC */ + unsigned long cl : 1; + /* 55 I side or D side cache line */ + unsigned long len : 4; /* 56-59 */ + unsigned long io : 1; /* 60 entry is for io or not */ + unsigned long nomap : 1; + /* 61 entry cann't be inserted into machine TLB.*/ + unsigned long checked : 1; + /* 62 for VTLB/VHPT sanity check */ + unsigned long invalid : 1; + /* 63 invalid entry */ + }; + unsigned long page_flags; + }; /* same for VHPT and TLB */ + + union { + struct { + unsigned long rv3 : 2; + unsigned long ps : 6; + unsigned long key : 24; + unsigned long rv4 : 32; + }; + unsigned long itir; + }; + union { + struct { + unsigned long ig2 : 12; + unsigned long vpn : 49; + unsigned long vrn : 3; + }; + unsigned long ifa; + unsigned long vadr; + struct { + unsigned long tag : 63; + unsigned long ti : 1; + }; + unsigned long etag; + }; + union { + struct thash_data *next; + unsigned long rid; + unsigned long gpaddr; + }; +}; + +#define NITRS 8 +#define NDTRS 8 + +struct saved_vpd { + unsigned long vhpi; + unsigned long vgr[16]; + unsigned long vbgr[16]; + unsigned long vnat; + unsigned long vbnat; + unsigned long vcpuid[5]; + unsigned long vpsr; + unsigned long vpr; + unsigned long vcr[128]; +}; + +struct kvm_regs { + char *saved_guest; + char *saved_stack; + struct saved_vpd vpd; + /*Arch-regs*/ + int mp_state; + unsigned long vmm_rr; + /* TR and TC. */ + struct thash_data itrs[NITRS]; + struct thash_data dtrs[NDTRS]; + /* Bit is set if there is a tr/tc for the region. */ + unsigned char itr_regions; + unsigned char dtr_regions; + unsigned char tc_regions; + + char irq_check; + unsigned long saved_itc; + unsigned long itc_check; + unsigned long timer_check; + unsigned long timer_pending; + unsigned long last_itc; + + unsigned long vrr[8]; + unsigned long ibr[8]; + unsigned long dbr[8]; + unsigned long insvc[4]; /* Interrupt in service. */ + unsigned long xtp; + + unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */ + unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */ + unsigned long metaphysical_saved_rr0; /* from kvm_arch */ + unsigned long metaphysical_saved_rr4; /* from kvm_arch */ + unsigned long fp_psr; /*used for lazy float register */ + unsigned long saved_gp; + /*for phycial emulation */ +}; + +struct kvm_sregs { +}; + +struct kvm_fpu { +}; #endif diff --git a/include/asm-ia64/kvm_host.h b/include/asm-ia64/kvm_host.h new file mode 100644 index 0000000..c082c20 --- /dev/null +++ b/include/asm-ia64/kvm_host.h @@ -0,0 +1,524 @@ +/* + * kvm_host.h: used for kvm module, and hold ia64-specific sections. + * + * Copyright (C) 2007, Intel Corporation. + * + * Xiantao Zhang <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#ifndef __ASM_KVM_HOST_H +#define __ASM_KVM_HOST_H + + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/kvm.h> +#include <linux/kvm_para.h> +#include <linux/kvm_types.h> + +#include <asm/pal.h> +#include <asm/sal.h> + +#define KVM_MAX_VCPUS 4 +#define KVM_MEMORY_SLOTS 32 +/* memory slots that does not exposed to userspace */ +#define KVM_PRIVATE_MEM_SLOTS 4 + + +/* define exit reasons from vmm to kvm*/ +#define EXIT_REASON_VM_PANIC 0 +#define EXIT_REASON_MMIO_INSTRUCTION 1 +#define EXIT_REASON_PAL_CALL 2 +#define EXIT_REASON_SAL_CALL 3 +#define EXIT_REASON_SWITCH_RR6 4 +#define EXIT_REASON_VM_DESTROY 5 +#define EXIT_REASON_EXTERNAL_INTERRUPT 6 +#define EXIT_REASON_IPI 7 +#define EXIT_REASON_PTC_G 8 + +/*Define vmm address space and vm data space.*/ +#define KVM_VMM_SIZE (16UL<<20) +#define KVM_VMM_SHIFT 24 +#define KVM_VMM_BASE 0xD000000000000000UL +#define VMM_SIZE (8UL<<20) + +/* + * Define vm_buffer, used by PAL Services, base address. + * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M + */ +#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) +#define KVM_VM_BUFFER_SIZE (8UL<<20) + +/*Define Virtual machine data layout.*/ +#define KVM_VM_DATA_SHIFT 24 +#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT) +#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE) + + +#define KVM_P2M_BASE KVM_VM_DATA_BASE +#define KVM_P2M_OFS 0 +#define KVM_P2M_SIZE (8UL << 20) + +#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE) +#define KVM_VHPT_OFS KVM_P2M_SIZE +#define KVM_VHPT_BLOCK_SIZE (2UL << 20) +#define VHPT_SHIFT 18 +#define VHPT_SIZE (1UL << VHPT_SHIFT) +#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5)) + +#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE) +#define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE) +#define KVM_VTLB_BLOCK_SIZE (1UL<<20) +#define VTLB_SHIFT 17 +#define VTLB_SIZE (1UL<<VTLB_SHIFT) +#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5)) + +#define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE) +#define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE) +#define KVM_VPD_BLOCK_SIZE (2UL<<20) +#define VPD_SHIFT 16 +#define VPD_SIZE (1UL<<VPD_SHIFT) + +#define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE) +#define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE) +#define KVM_VCPU_BLOCK_SIZE (2UL<<20) +#define VCPU_SHIFT 18 +#define VCPU_SIZE (1UL<<VCPU_SHIFT) +#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE + +#define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE) +#define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE) +#define KVM_VM_BLOCK_SIZE (1UL<<19) + +#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE) +#define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE) +#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19) + +/* Get vpd, vhpt, tlb, vcpu, base*/ +#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE) +#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE) +#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE) +#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE) + +/*IO section definitions*/ +#define IOREQ_READ 1 +#define IOREQ_WRITE 0 + +#define STATE_IOREQ_NONE 0 +#define STATE_IOREQ_READY 1 +#define STATE_IOREQ_INPROCESS 2 +#define STATE_IORESP_READY 3 + +/*Guest Physical address layout.*/ +#define GPFN_MEM (0UL << 60) /* Guest pfn is normal mem */ +#define GPFN_FRAME_BUFFER (1UL << 60) /* VGA framebuffer */ +#define GPFN_LOW_MMIO (2UL << 60) /* Low MMIO range */ +#define GPFN_PIB (3UL << 60) /* PIB base */ +#define GPFN_IOSAPIC (4UL << 60) /* IOSAPIC base */ +#define GPFN_LEGACY_IO (5UL << 60) /* Legacy I/O base */ +#define GPFN_GFW (6UL << 60) /* Guest Firmware */ +#define GPFN_HIGH_MMIO (7UL << 60) /* High MMIO range */ + +#define GPFN_IO_MASK (7UL << 60) /* Guest pfn is I/O type */ +#define GPFN_INV_MASK (1UL << 63) /* Guest pfn is invalid */ +#define INVALID_MFN (~0UL) +#define MEM_G (1UL << 30) +#define MEM_M (1UL << 20) +#define MMIO_START (3 * MEM_G) +#define MMIO_SIZE (512 * MEM_M) +#define VGA_IO_START 0xA0000UL +#define VGA_IO_SIZE 0x20000 +#define LEGACY_IO_START (MMIO_START + MMIO_SIZE) +#define LEGACY_IO_SIZE (64 * MEM_M) +#define IO_SAPIC_START 0xfec00000UL +#define IO_SAPIC_SIZE 0x100000 +#define PIB_START 0xfee00000UL +#define PIB_SIZE 0x200000 +#define GFW_START (4 * MEM_G - 16 * MEM_M) +#define GFW_SIZE (16 * MEM_M) + +/*Deliver mode, defined for ioapic.c*/ +#define dest_Fixed IOSAPIC_FIXED +#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY + +#define NMI_VECTOR 2 +#define ExtINT_VECTOR 0 +#define NULL_VECTOR (-1) +#define IA64_SPURIOUS_INT_VECTOR 0x0f + +#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24) + +/* + *Delivery mode + */ +#define SAPIC_DELIV_SHIFT 8 +#define SAPIC_FIXED 0x0 +#define SAPIC_LOWEST_PRIORITY 0x1 +#define SAPIC_PMI 0x2 +#define SAPIC_NMI 0x4 +#define SAPIC_INIT 0x5 +#define SAPIC_EXTINT 0x7 + +/* + * vcpu->requests bit members for arch + */ +#define KVM_REQ_PTC_G 32 +#define KVM_REQ_RESUME 33 + +#define KVM_PAGES_PER_HPAGE 1 + +struct kvm; +struct kvm_vcpu; +struct kvm_guest_debug{ +}; + +struct kvm_mmio_req { + uint64_t addr; /* physical address */ + uint64_t size; /* size in bytes */ + uint64_t data; /* data (or paddr of data) */ + uint8_t state:4; + uint8_t dir:1; /* 1=read, 0=write */ +}; + +/*Pal data struct */ +struct kvm_pal_call{ + /*In area*/ + uint64_t gr28; + uint64_t gr29; + uint64_t gr30; + uint64_t gr31; + /*Out area*/ + struct ia64_pal_retval ret; +}; + +/* Sal data structure */ +struct kvm_sal_call{ + /*In area*/ + uint64_t in0; + uint64_t in1; + uint64_t in2; + uint64_t in3; + uint64_t in4; + uint64_t in5; + uint64_t in6; + uint64_t in7; + struct sal_ret_values ret; +}; + +/*Guest change rr6*/ +struct kvm_switch_rr6 { + uint64_t old_rr; + uint64_t new_rr; +}; + +union ia64_ipi_a{ + unsigned long val; + struct { + unsigned long rv : 3; + unsigned long ir : 1; + unsigned long eid : 8; + unsigned long id : 8; + unsigned long ib_base : 44; + }; +}; + +union ia64_ipi_d { + unsigned long val; + struct { + unsigned long vector : 8; + unsigned long dm : 3; + unsigned long ig : 53; + }; +}; + +/*ipi check exit data*/ +struct kvm_ipi_data{ + union ia64_ipi_a addr; + union ia64_ipi_d data; +}; + +/*global purge data*/ +struct kvm_ptc_g { + unsigned long vaddr; + unsigned long rr; + unsigned long ps; + struct kvm_vcpu *vcpu; +}; + +/*Exit control data */ +struct exit_ctl_data{ + uint32_t exit_reason; + uint32_t vm_status; + union { + struct kvm_mmio_req ioreq; + struct kvm_pal_call pal_data; + struct kvm_sal_call sal_data; + struct kvm_switch_rr6 rr_data; + struct kvm_ipi_data ipi_data; + struct kvm_ptc_g ptc_g_data; + } u; +}; + +union pte_flags { + unsigned long val; + struct { + unsigned long p : 1; /*0 */ + unsigned long : 1; /* 1 */ + unsigned long ma : 3; /* 2-4 */ + unsigned long a : 1; /* 5 */ + unsigned long d : 1; /* 6 */ + unsigned long pl : 2; /* 7-8 */ + unsigned long ar : 3; /* 9-11 */ + unsigned long ppn : 38; /* 12-49 */ + unsigned long : 2; /* 50-51 */ + unsigned long ed : 1; /* 52 */ + }; +}; + +union ia64_pta { + unsigned long val; + struct { + unsigned long ve : 1; + unsigned long reserved0 : 1; + unsigned long size : 6; + unsigned long vf : 1; + unsigned long reserved1 : 6; + unsigned long base : 49; + }; +}; + +struct thash_cb { + /* THASH base information */ + struct thash_data *hash; /* hash table pointer */ + union ia64_pta pta; + int num; +}; + +struct kvm_vcpu_stat { +}; + +struct kvm_vcpu_arch { + int launched; + int last_exit; + int last_run_cpu; + int vmm_tr_slot; + int vm_tr_slot; + +#define KVM_MP_STATE_RUNNABLE 0 +#define KVM_MP_STATE_UNINITIALIZED 1 +#define KVM_MP_STATE_INIT_RECEIVED 2 +#define KVM_MP_STATE_HALTED 3 + int mp_state; + +#define MAX_PTC_G_NUM 3 + int ptc_g_count; + struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM]; + + /*halt timer to wake up sleepy vcpus*/ + struct hrtimer hlt_timer; + long ht_active; + + struct kvm_lapic *apic; /* kernel irqchip context */ + struct vpd *vpd; + + /* Exit data for vmm_transition*/ + struct exit_ctl_data exit_data; + + cpumask_t cache_coherent_map; + + unsigned long vmm_rr; + unsigned long host_rr6; + unsigned long psbits[8]; + unsigned long cr_iipa; + unsigned long cr_isr; + unsigned long vsa_base; + unsigned long dirty_log_lock_pa; + unsigned long __gp; + /* TR and TC. */ + struct thash_data itrs[NITRS]; + struct thash_data dtrs[NDTRS]; + /* Bit is set if there is a tr/tc for the region. */ + unsigned char itr_regions; + unsigned char dtr_regions; + unsigned char tc_regions; + /* purge all */ + unsigned long ptce_base; + unsigned long ptce_count[2]; + unsigned long ptce_stride[2]; + /* itc/itm */ + unsigned long last_itc; + long itc_offset; + unsigned long itc_check; + unsigned long timer_check; + unsigned long timer_pending; + + unsigned long vrr[8]; + unsigned long ibr[8]; + unsigned long dbr[8]; + unsigned long insvc[4]; /* Interrupt in service. */ + unsigned long xtp; + + unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */ + unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */ + unsigned long metaphysical_saved_rr0; /* from kvm_arch */ + unsigned long metaphysical_saved_rr4; /* from kvm_arch */ + unsigned long fp_psr; /*used for lazy float register */ + unsigned long saved_gp; + /*for phycial emulation */ + int mode_flags; + struct thash_cb vtlb; + struct thash_cb vhpt; + char irq_check; + char irq_new_pending; + + unsigned long opcode; + unsigned long cause; + union context host; + union context guest; +}; + +struct kvm_vm_stat { + u64 remote_tlb_flush; +}; + +struct kvm_sal_data { + unsigned long boot_ip; + unsigned long boot_gp; +}; + +struct kvm_arch { + unsigned long vm_base; + unsigned long metaphysical_rr0; + unsigned long metaphysical_rr4; + unsigned long vmm_init_rr; + unsigned long vhpt_base; + unsigned long vtlb_base; + unsigned long vpd_base; + spinlock_t dirty_log_lock; + struct kvm_ioapic *vioapic; + struct kvm_vm_stat stat; + struct kvm_sal_data rdv_sal_data; +}; + +union cpuid3_t { + u64 value; + struct { + u64 number : 8; + u64 revision : 8; + u64 model : 8; + u64 family : 8; + u64 archrev : 8; + u64 rv : 24; + }; +}; + +struct kvm_pt_regs { + /* The following registers are saved by SAVE_MIN: */ + unsigned long b6; /* scratch */ + unsigned long b7; /* scratch */ + + unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */ + unsigned long ar_ssd; /* reserved for future use (scratch) */ + + unsigned long r8; /* scratch (return value register 0) */ + unsigned long r9; /* scratch (return value register 1) */ + unsigned long r10; /* scratch (return value register 2) */ + unsigned long r11; /* scratch (return value register 3) */ + + unsigned long cr_ipsr; /* interrupted task's psr */ + unsigned long cr_iip; /* interrupted task's instruction pointer */ + unsigned long cr_ifs; /* interrupted task's function state */ + + unsigned long ar_unat; /* interrupted task's NaT register (preserved) */ + unsigned long ar_pfs; /* prev function state */ + unsigned long ar_rsc; /* RSE configuration */ + /* The following two are valid only if cr_ipsr.cpl > 0: */ + unsigned long ar_rnat; /* RSE NaT */ + unsigned long ar_bspstore; /* RSE bspstore */ + + unsigned long pr; /* 64 predicate registers (1 bit each) */ + unsigned long b0; /* return pointer (bp) */ + unsigned long loadrs; /* size of dirty partition << 16 */ + + unsigned long r1; /* the gp pointer */ + unsigned long r12; /* interrupted task's memory stack pointer */ + unsigned long r13; /* thread pointer */ + + unsigned long ar_fpsr; /* floating point status (preserved) */ + unsigned long r15; /* scratch */ + + /* The remaining registers are NOT saved for system calls. */ + unsigned long r14; /* scratch */ + unsigned long r2; /* scratch */ + unsigned long r3; /* scratch */ + unsigned long r16; /* scratch */ + unsigned long r17; /* scratch */ + unsigned long r18; /* scratch */ + unsigned long r19; /* scratch */ + unsigned long r20; /* scratch */ + unsigned long r21; /* scratch */ + unsigned long r22; /* scratch */ + unsigned long r23; /* scratch */ + unsigned long r24; /* scratch */ + unsigned long r25; /* scratch */ + unsigned long r26; /* scratch */ + unsigned long r27; /* scratch */ + unsigned long r28; /* scratch */ + unsigned long r29; /* scratch */ + unsigned long r30; /* scratch */ + unsigned long r31; /* scratch */ + unsigned long ar_ccv; /* compare/exchange value (scratch) */ + + /* + * Floating point registers that the kernel considers scratch: + */ + struct ia64_fpreg f6; /* scratch */ + struct ia64_fpreg f7; /* scratch */ + struct ia64_fpreg f8; /* scratch */ + struct ia64_fpreg f9; /* scratch */ + struct ia64_fpreg f10; /* scratch */ + struct ia64_fpreg f11; /* scratch */ + + unsigned long r4; /* preserved */ + unsigned long r5; /* preserved */ + unsigned long r6; /* preserved */ + unsigned long r7; /* preserved */ + unsigned long eml_unat; /* used for emulating instruction */ + unsigned long pad0; /* alignment pad */ +}; + +static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v) +{ + return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; +} + +typedef int kvm_vmm_entry(void); +typedef void kvm_tramp_entry(union context *host, union context *guest); + +struct kvm_vmm_info{ + struct module *module; + kvm_vmm_entry *vmm_entry; + kvm_tramp_entry *tramp_entry; + unsigned long vmm_ivt; +}; + +int kvm_highest_pending_irq(struct kvm_vcpu *vcpu); +int kvm_emulate_halt(struct kvm_vcpu *vcpu); +int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); +void kvm_sal_emul(struct kvm_vcpu *vcpu); + +#endif diff --git a/include/asm-ia64/kvm_para.h b/include/asm-ia64/kvm_para.h new file mode 100644 index 0000000..9f9796b --- /dev/null +++ b/include/asm-ia64/kvm_para.h @@ -0,0 +1,29 @@ +#ifndef __IA64_KVM_PARA_H +#define __IA64_KVM_PARA_H + +/* + * asm-ia64/kvm_para.h + * + * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +#endif diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h index 4999a6c..36f3932 100644 --- a/include/asm-ia64/page.h +++ b/include/asm-ia64/page.h @@ -54,9 +54,6 @@ # define HPAGE_MASK (~(HPAGE_SIZE - 1)) # define HAVE_ARCH_HUGETLB_UNMAPPED_AREA -# define ARCH_HAS_HUGEPAGE_ONLY_RANGE -# define ARCH_HAS_PREPARE_HUGEPAGE_RANGE -# define ARCH_HAS_HUGETLB_FREE_PGD_RANGE #endif /* CONFIG_HUGETLB_PAGE */ #ifdef __ASSEMBLY__ @@ -153,9 +150,6 @@ typedef union ia64_va { # define htlbpage_to_page(x) (((unsigned long) REGION_NUMBER(x) << 61) \ | (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT))) # define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -# define is_hugepage_only_range(mm, addr, len) \ - (REGION_NUMBER(addr) == RGN_HPAGE || \ - REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE) extern unsigned int hpage_shift; #endif diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index ed70862..7a9bff4 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h @@ -302,6 +302,8 @@ ia64_phys_addr_valid (unsigned long addr) #define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) #define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) #define pte_file(pte) ((pte_val(pte) & _PAGE_FILE) != 0) +#define pte_special(pte) 0 + /* * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the * access rights: @@ -313,6 +315,7 @@ ia64_phys_addr_valid (unsigned long addr) #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) #define pte_mkhuge(pte) (__pte(pte_val(pte))) +#define pte_mkspecial(pte) (pte) /* * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 741f7ec..6aff126 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -119,6 +119,69 @@ struct ia64_psr { __u64 reserved4 : 19; }; +union ia64_isr { + __u64 val; + struct { + __u64 code : 16; + __u64 vector : 8; + __u64 reserved1 : 8; + __u64 x : 1; + __u64 w : 1; + __u64 r : 1; + __u64 na : 1; + __u64 sp : 1; + __u64 rs : 1; + __u64 ir : 1; + __u64 ni : 1; + __u64 so : 1; + __u64 ei : 2; + __u64 ed : 1; + __u64 reserved2 : 20; + }; +}; + +union ia64_lid { + __u64 val; + struct { + __u64 rv : 16; + __u64 eid : 8; + __u64 id : 8; + __u64 ig : 32; + }; +}; + +union ia64_tpr { + __u64 val; + struct { + __u64 ig0 : 4; + __u64 mic : 4; + __u64 rsv : 8; + __u64 mmi : 1; + __u64 ig1 : 47; + }; +}; + +union ia64_itir { + __u64 val; + struct { + __u64 rv3 : 2; /* 0-1 */ + __u64 ps : 6; /* 2-7 */ + __u64 key : 24; /* 8-31 */ + __u64 rv4 : 32; /* 32-63 */ + }; +}; + +union ia64_rr { + __u64 val; + struct { + __u64 ve : 1; /* enable hw walker */ + __u64 reserved0: 1; /* reserved */ + __u64 ps : 6; /* log page size */ + __u64 rid : 24; /* region id */ + __u64 reserved1: 32; /* reserved */ + }; +}; + /* * CPU type, hardware bug flags, and per-CPU state. Frequently used * state comes earlier: diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h index 8650538..e6359c5 100644 --- a/include/asm-m32r/pgtable.h +++ b/include/asm-m32r/pgtable.h @@ -214,6 +214,11 @@ static inline int pte_file(pte_t pte) return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) +{ + return 0; +} + static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; @@ -250,6 +255,11 @@ static inline pte_t pte_mkwrite(pte_t pte) return pte; } +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} + static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_ACCESSED, ptep); diff --git a/include/asm-m68k/motorola_pgtable.h b/include/asm-m68k/motorola_pgtable.h index 13135d4..8e9a8a7 100644 --- a/include/asm-m68k/motorola_pgtable.h +++ b/include/asm-m68k/motorola_pgtable.h @@ -168,6 +168,7 @@ static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RONLY); static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } @@ -185,6 +186,7 @@ static inline pte_t pte_mkcache(pte_t pte) pte_val(pte) = (pte_val(pte) & _CACHEMASK040) | m68k_supervisor_cachemode; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/include/asm-m68k/sun3_pgtable.h b/include/asm-m68k/sun3_pgtable.h index b766fc2..f847ec7 100644 --- a/include/asm-m68k/sun3_pgtable.h +++ b/include/asm-m68k/sun3_pgtable.h @@ -169,6 +169,7 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & SUN3_PAGE_WRITEA static inline int pte_dirty(pte_t pte) { return pte_val(pte) & SUN3_PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } +static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_MODIFIED; return pte; } @@ -181,6 +182,7 @@ static inline pte_t pte_mknocache(pte_t pte) { pte_val(pte) |= SUN3_PAGE_NOCACHE //static inline pte_t pte_mkcache(pte_t pte) { pte_val(pte) &= SUN3_PAGE_NOCACHE; return pte; } // until then, use: static inline pte_t pte_mkcache(pte_t pte) { return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h index 17a7703..782221e 100644 --- a/include/asm-mips/pgtable.h +++ b/include/asm-mips/pgtable.h @@ -285,6 +285,8 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } #endif +static inline int pte_special(pte_t pte) { return 0; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Macro to make mark a page protection value as "uncacheable". Note diff --git a/include/asm-mips/vr41xx/siu.h b/include/asm-mips/vr41xx/siu.h index 98cdb40..da9f6e3 100644 --- a/include/asm-mips/vr41xx/siu.h +++ b/include/asm-mips/vr41xx/siu.h @@ -1,7 +1,7 @@ /* * Include file for NEC VR4100 series Serial Interface Unit. * - * Copyright (C) 2005 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp> + * Copyright (C) 2005-2008 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -49,4 +49,10 @@ typedef enum { extern void vr41xx_select_irda_module(irda_module_t module, irda_speed_t speed); +#ifdef CONFIG_SERIAL_VR41XX_CONSOLE +extern void vr41xx_siu_early_setup(struct uart_port *port); +#else +static inline void vr41xx_siu_early_setup(struct uart_port *port) {} +#endif + #endif /* __NEC_VR41XX_SIU_H */ diff --git a/include/asm-mips/vr41xx/vr41xx.h b/include/asm-mips/vr41xx/vr41xx.h index 88b492f..22be649 100644 --- a/include/asm-mips/vr41xx/vr41xx.h +++ b/include/asm-mips/vr41xx/vr41xx.h @@ -7,7 +7,7 @@ * Copyright (C) 2001, 2002 Paul Mundt * Copyright (C) 2002 MontaVista Software, Inc. * Copyright (C) 2002 TimeSys Corp. - * Copyright (C) 2003-2005 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp> + * Copyright (C) 2003-2008 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -143,4 +143,10 @@ extern void vr41xx_disable_csiint(uint16_t mask); extern void vr41xx_enable_bcuint(void); extern void vr41xx_disable_bcuint(void); +#ifdef CONFIG_SERIAL_VR41XX_CONSOLE +extern void vr41xx_siu_setup(void); +#else +static inline void vr41xx_siu_setup(void) {} +#endif + #endif /* __NEC_VR41XX_H */ diff --git a/include/asm-mn10300/pgtable.h b/include/asm-mn10300/pgtable.h index 375c494..6dc30fc 100644 --- a/include/asm-mn10300/pgtable.h +++ b/include/asm-mn10300/pgtable.h @@ -224,6 +224,7 @@ static inline int pte_read(pte_t pte) { return pte_val(pte) & __PAGE_PROT_USER; static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & __PAGE_PROT_WRITE; } +static inline int pte_special(pte_t pte){ return 0; } /* * The following only works if pte_present() is not true. @@ -265,6 +266,8 @@ static inline pte_t pte_mkwrite(pte_t pte) return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } + #define pte_ERROR(e) \ printk(KERN_ERR "%s:%d: bad pte %08lx.\n", \ __FILE__, __LINE__, pte_val(e)) diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h index dc86adb..470a4b8 100644 --- a/include/asm-parisc/pgtable.h +++ b/include/asm-parisc/pgtable.h @@ -323,6 +323,7 @@ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } @@ -330,6 +331,7 @@ static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; ret static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Conversion functions: convert a page and protection to a page entry, diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h new file mode 100644 index 0000000..649c6c3 --- /dev/null +++ b/include/asm-powerpc/hugetlb.h @@ -0,0 +1,79 @@ +#ifndef _ASM_POWERPC_HUGETLB_H +#define _ASM_POWERPC_HUGETLB_H + +#include <asm/page.h> + + +int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, + unsigned long len); + +void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling); + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + +/* + * If the arch doesn't supply something else, assume that hugepage + * size aligned regions are ok without further preparation. + */ +static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +#endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/include/asm-powerpc/kvm.h b/include/asm-powerpc/kvm.h index d1b530f..f993e41 100644 --- a/include/asm-powerpc/kvm.h +++ b/include/asm-powerpc/kvm.h @@ -1,6 +1,55 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + #ifndef __LINUX_KVM_POWERPC_H #define __LINUX_KVM_POWERPC_H -/* powerpc does not support KVM */ +#include <asm/types.h> + +struct kvm_regs { + __u64 pc; + __u64 cr; + __u64 ctr; + __u64 lr; + __u64 xer; + __u64 msr; + __u64 srr0; + __u64 srr1; + __u64 pid; + + __u64 sprg0; + __u64 sprg1; + __u64 sprg2; + __u64 sprg3; + __u64 sprg4; + __u64 sprg5; + __u64 sprg6; + __u64 sprg7; + + __u64 gpr[32]; +}; + +struct kvm_sregs { +}; + +struct kvm_fpu { + __u64 fpr[32]; +}; -#endif +#endif /* __LINUX_KVM_POWERPC_H */ diff --git a/include/asm-powerpc/kvm_asm.h b/include/asm-powerpc/kvm_asm.h new file mode 100644 index 0000000..2197764 --- /dev/null +++ b/include/asm-powerpc/kvm_asm.h @@ -0,0 +1,55 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __POWERPC_KVM_ASM_H__ +#define __POWERPC_KVM_ASM_H__ + +/* IVPR must be 64KiB-aligned. */ +#define VCPU_SIZE_ORDER 4 +#define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12) +#define VCPU_TLB_PGSZ PPC44x_TLB_64K +#define VCPU_SIZE_BYTES (1<<VCPU_SIZE_LOG) + +#define BOOKE_INTERRUPT_CRITICAL 0 +#define BOOKE_INTERRUPT_MACHINE_CHECK 1 +#define BOOKE_INTERRUPT_DATA_STORAGE 2 +#define BOOKE_INTERRUPT_INST_STORAGE 3 +#define BOOKE_INTERRUPT_EXTERNAL 4 +#define BOOKE_INTERRUPT_ALIGNMENT 5 +#define BOOKE_INTERRUPT_PROGRAM 6 +#define BOOKE_INTERRUPT_FP_UNAVAIL 7 +#define BOOKE_INTERRUPT_SYSCALL 8 +#define BOOKE_INTERRUPT_AP_UNAVAIL 9 +#define BOOKE_INTERRUPT_DECREMENTER 10 +#define BOOKE_INTERRUPT_FIT 11 +#define BOOKE_INTERRUPT_WATCHDOG 12 +#define BOOKE_INTERRUPT_DTLB_MISS 13 +#define BOOKE_INTERRUPT_ITLB_MISS 14 +#define BOOKE_INTERRUPT_DEBUG 15 +#define BOOKE_MAX_INTERRUPT 15 + +#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ +#define RESUME_FLAG_HOST (1<<1) /* Resume host? */ + +#define RESUME_GUEST 0 +#define RESUME_GUEST_NV RESUME_FLAG_NV +#define RESUME_HOST RESUME_FLAG_HOST +#define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV) + +#endif /* __POWERPC_KVM_ASM_H__ */ diff --git a/include/asm-powerpc/kvm_host.h b/include/asm-powerpc/kvm_host.h new file mode 100644 index 0000000..04ffbb8 --- /dev/null +++ b/include/asm-powerpc/kvm_host.h @@ -0,0 +1,152 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __POWERPC_KVM_HOST_H__ +#define __POWERPC_KVM_HOST_H__ + +#include <linux/mutex.h> +#include <linux/timer.h> +#include <linux/types.h> +#include <linux/kvm_types.h> +#include <asm/kvm_asm.h> + +#define KVM_MAX_VCPUS 1 +#define KVM_MEMORY_SLOTS 32 +/* memory slots that does not exposed to userspace */ +#define KVM_PRIVATE_MEM_SLOTS 4 + +/* We don't currently support large pages. */ +#define KVM_PAGES_PER_HPAGE (1<<31) + +struct kvm; +struct kvm_run; +struct kvm_vcpu; + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_vcpu_stat { + u32 sum_exits; + u32 mmio_exits; + u32 dcr_exits; + u32 signal_exits; + u32 light_exits; + /* Account for special types of light exits: */ + u32 itlb_real_miss_exits; + u32 itlb_virt_miss_exits; + u32 dtlb_real_miss_exits; + u32 dtlb_virt_miss_exits; + u32 syscall_exits; + u32 isi_exits; + u32 dsi_exits; + u32 emulated_inst_exits; + u32 dec_exits; + u32 ext_intr_exits; +}; + +struct tlbe { + u32 tid; /* Only the low 8 bits are used. */ + u32 word0; + u32 word1; + u32 word2; +}; + +struct kvm_arch { +}; + +struct kvm_vcpu_arch { + /* Unmodified copy of the guest's TLB. */ + struct tlbe guest_tlb[PPC44x_TLB_SIZE]; + /* TLB that's actually used when the guest is running. */ + struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; + /* Pages which are referenced in the shadow TLB. */ + struct page *shadow_pages[PPC44x_TLB_SIZE]; + /* Copy of the host's TLB. */ + struct tlbe host_tlb[PPC44x_TLB_SIZE]; + + u32 host_stack; + u32 host_pid; + + u64 fpr[32]; + u32 gpr[32]; + + u32 pc; + u32 cr; + u32 ctr; + u32 lr; + u32 xer; + + u32 msr; + u32 mmucr; + u32 sprg0; + u32 sprg1; + u32 sprg2; + u32 sprg3; + u32 sprg4; + u32 sprg5; + u32 sprg6; + u32 sprg7; + u32 srr0; + u32 srr1; + u32 csrr0; + u32 csrr1; + u32 dsrr0; + u32 dsrr1; + u32 dear; + u32 esr; + u32 dec; + u32 decar; + u32 tbl; + u32 tbu; + u32 tcr; + u32 tsr; + u32 ivor[16]; + u32 ivpr; + u32 pir; + u32 pid; + u32 pvr; + u32 ccr0; + u32 ccr1; + u32 dbcr0; + u32 dbcr1; + + u32 last_inst; + u32 fault_dear; + u32 fault_esr; + gpa_t paddr_accessed; + + u8 io_gpr; /* GPR used as IO source/target */ + u8 mmio_is_bigendian; + u8 dcr_needed; + u8 dcr_is_write; + + u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ + + struct timer_list dec_timer; + unsigned long pending_exceptions; +}; + +struct kvm_guest_debug { + int enabled; + unsigned long bp[4]; + int singlestep; +}; + +#endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/include/asm-powerpc/kvm_para.h b/include/asm-powerpc/kvm_para.h new file mode 100644 index 0000000..2d48f6a --- /dev/null +++ b/include/asm-powerpc/kvm_para.h @@ -0,0 +1,37 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __POWERPC_KVM_PARA_H__ +#define __POWERPC_KVM_PARA_H__ + +#ifdef __KERNEL__ + +static inline int kvm_para_available(void) +{ + return 0; +} + +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +#endif /* __KERNEL__ */ + +#endif /* __POWERPC_KVM_PARA_H__ */ diff --git a/include/asm-powerpc/kvm_ppc.h b/include/asm-powerpc/kvm_ppc.h new file mode 100644 index 0000000..7ac8203 --- /dev/null +++ b/include/asm-powerpc/kvm_ppc.h @@ -0,0 +1,88 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __POWERPC_KVM_PPC_H__ +#define __POWERPC_KVM_PPC_H__ + +/* This file exists just so we can dereference kvm_vcpu, avoiding nested header + * dependencies. */ + +#include <linux/mutex.h> +#include <linux/timer.h> +#include <linux/types.h> +#include <linux/kvm_types.h> +#include <linux/kvm_host.h> + +struct kvm_tlb { + struct tlbe guest_tlb[PPC44x_TLB_SIZE]; + struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; +}; + +enum emulation_result { + EMULATE_DONE, /* no further processing */ + EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ + EMULATE_DO_DCR, /* kvm_run filled with DCR request */ + EMULATE_FAIL, /* can't emulate this instruction */ +}; + +extern const unsigned char exception_priority[]; +extern const unsigned char priority_exception[]; + +extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); +extern char kvmppc_handlers_start[]; +extern unsigned long kvmppc_handler_len; + +extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); +extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, + int is_bigendian); +extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + u32 val, unsigned int bytes, int is_bigendian); + +extern int kvmppc_emulate_instruction(struct kvm_run *run, + struct kvm_vcpu *vcpu); + +extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, + u64 asid, u32 flags); +extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid); +extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); + +extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); + +static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception) +{ + unsigned int priority = exception_priority[exception]; + set_bit(priority, &vcpu->arch.pending_exceptions); +} + +static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) +{ + unsigned int priority = exception_priority[exception]; + clear_bit(priority, &vcpu->arch.pending_exceptions); +} + +static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) +{ + if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) + kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); + + vcpu->arch.msr = new_msr; +} + +#endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/include/asm-powerpc/mmu-44x.h b/include/asm-powerpc/mmu-44x.h index c8b02d9..a825524 100644 --- a/include/asm-powerpc/mmu-44x.h +++ b/include/asm-powerpc/mmu-44x.h @@ -53,6 +53,8 @@ #ifndef __ASSEMBLY__ +extern unsigned int tlb_44x_hwater; + typedef struct { unsigned long id; unsigned long vdso_base; diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h index 67834ea..25af4fc 100644 --- a/include/asm-powerpc/page_64.h +++ b/include/asm-powerpc/page_64.h @@ -128,11 +128,6 @@ extern void slice_init_context(struct mm_struct *mm, unsigned int psize); extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); #define slice_mm_new_context(mm) ((mm)->context.id == 0) -#define ARCH_HAS_HUGEPAGE_ONLY_RANGE -extern int is_hugepage_only_range(struct mm_struct *m, - unsigned long addr, - unsigned long len); - #endif /* __ASSEMBLY__ */ #else #define slice_init() @@ -146,8 +141,6 @@ do { \ #ifdef CONFIG_HUGETLB_PAGE -#define ARCH_HAS_HUGETLB_FREE_PGD_RANGE -#define ARCH_HAS_SETCLEAR_HUGE_PTE #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif /* !CONFIG_HUGETLB_PAGE */ diff --git a/include/asm-powerpc/pgtable-ppc32.h b/include/asm-powerpc/pgtable-ppc32.h index daea769..7c97b5a 100644 --- a/include/asm-powerpc/pgtable-ppc32.h +++ b/include/asm-powerpc/pgtable-ppc32.h @@ -504,6 +504,7 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } @@ -521,6 +522,8 @@ static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { + return pte; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h index dd4c26d..27f1869 100644 --- a/include/asm-powerpc/pgtable-ppc64.h +++ b/include/asm-powerpc/pgtable-ppc64.h @@ -239,6 +239,7 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW;} static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;} static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;} static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;} +static inline int pte_special(pte_t pte) { return 0; } static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } @@ -257,6 +258,8 @@ static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { + return pte; } /* Atomic PTE updates */ static inline unsigned long pte_update(struct mm_struct *mm, diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h index 70435d3..55f9d38 100644 --- a/include/asm-ppc/pgtable.h +++ b/include/asm-ppc/pgtable.h @@ -483,6 +483,7 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } @@ -500,6 +501,8 @@ static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { + return pte; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild index e92b429..13c9805 100644 --- a/include/asm-s390/Kbuild +++ b/include/asm-s390/Kbuild @@ -7,6 +7,7 @@ header-y += tape390.h header-y += ucontext.h header-y += vtoc.h header-y += zcrypt.h +header-y += kvm.h unifdef-y += cmb.h unifdef-y += debug.h diff --git a/include/asm-s390/kvm.h b/include/asm-s390/kvm.h index 573f2a3..d74002f 100644 --- a/include/asm-s390/kvm.h +++ b/include/asm-s390/kvm.h @@ -1,6 +1,45 @@ #ifndef __LINUX_KVM_S390_H #define __LINUX_KVM_S390_H -/* s390 does not support KVM */ +/* + * asm-s390/kvm.h - KVM s390 specific structures and definitions + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ +#include <asm/types.h> + +/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ +struct kvm_pic_state { + /* no PIC for s390 */ +}; + +struct kvm_ioapic_state { + /* no IOAPIC for s390 */ +}; + +/* for KVM_GET_REGS and KVM_SET_REGS */ +struct kvm_regs { + /* general purpose regs for s390 */ + __u64 gprs[16]; +}; + +/* for KVM_GET_SREGS and KVM_SET_SREGS */ +struct kvm_sregs { + __u32 acrs[16]; + __u64 crs[16]; +}; + +/* for KVM_GET_FPU and KVM_SET_FPU */ +struct kvm_fpu { + __u32 fpc; + __u64 fprs[16]; +}; #endif diff --git a/include/asm-s390/kvm_host.h b/include/asm-s390/kvm_host.h new file mode 100644 index 0000000..f8204a4 --- /dev/null +++ b/include/asm-s390/kvm_host.h @@ -0,0 +1,234 @@ +/* + * asm-s390/kvm_host.h - definition for kernel virtual machines on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + */ + + +#ifndef ASM_KVM_HOST_H +#define ASM_KVM_HOST_H +#include <linux/kvm_host.h> +#include <asm/debug.h> + +#define KVM_MAX_VCPUS 64 +#define KVM_MEMORY_SLOTS 32 +/* memory slots that does not exposed to userspace */ +#define KVM_PRIVATE_MEM_SLOTS 4 + +struct kvm_guest_debug { +}; + +struct sca_entry { + atomic_t scn; + __u64 reserved; + __u64 sda; + __u64 reserved2[2]; +} __attribute__((packed)); + + +struct sca_block { + __u64 ipte_control; + __u64 reserved[5]; + __u64 mcn; + __u64 reserved2; + struct sca_entry cpu[64]; +} __attribute__((packed)); + +#define KVM_PAGES_PER_HPAGE 256 + +#define CPUSTAT_HOST 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + +struct sie_block { + atomic_t cpuflags; /* 0x0000 */ + __u32 prefix; /* 0x0004 */ + __u8 reserved8[32]; /* 0x0008 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u8 reserved40[4]; /* 0x0040 */ +#define LCTL_CR0 0x8000 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ + __u32 ictl; /* 0x0048 */ + __u32 eca; /* 0x004c */ + __u8 icptcode; /* 0x0050 */ + __u8 reserved51; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54[2]; /* 0x0054 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ + __u8 reserved60; /* 0x0060 */ + __u8 ecb; /* 0x0061 */ + __u8 reserved62[2]; /* 0x0062 */ + __u32 scaol; /* 0x0064 */ + __u8 reserved68[4]; /* 0x0068 */ + __u32 todpr; /* 0x006c */ + __u8 reserved70[16]; /* 0x0070 */ + __u64 gmsor; /* 0x0080 */ + __u64 gmslm; /* 0x0088 */ + psw_t gpsw; /* 0x0090 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[30]; /* 0x00b0 */ + __u16 iprcc; /* 0x00ce */ + __u8 reservedd0[48]; /* 0x00d0 */ + __u64 gcr[16]; /* 0x0100 */ + __u64 gbea; /* 0x0180 */ + __u8 reserved188[120]; /* 0x0188 */ +} __attribute__((packed)); + +struct kvm_vcpu_stat { + u32 exit_userspace; + u32 exit_external_request; + u32 exit_external_interrupt; + u32 exit_stop_request; + u32 exit_validity; + u32 exit_instruction; + u32 instruction_lctl; + u32 instruction_lctg; + u32 exit_program_interruption; + u32 exit_instr_and_program; + u32 deliver_emergency_signal; + u32 deliver_service_signal; + u32 deliver_virtio_interrupt; + u32 deliver_stop_signal; + u32 deliver_prefix_signal; + u32 deliver_restart_signal; + u32 deliver_program_int; + u32 exit_wait_state; + u32 instruction_stidp; + u32 instruction_spx; + u32 instruction_stpx; + u32 instruction_stap; + u32 instruction_storage_key; + u32 instruction_stsch; + u32 instruction_chsc; + u32 instruction_stsi; + u32 instruction_stfl; + u32 instruction_sigp_sense; + u32 instruction_sigp_emergency; + u32 instruction_sigp_stop; + u32 instruction_sigp_arch; + u32 instruction_sigp_prefix; + u32 instruction_sigp_restart; + u32 diagnose_44; +}; + +struct io_info { + __u16 subchannel_id; /* 0x0b8 */ + __u16 subchannel_nr; /* 0x0ba */ + __u32 io_int_parm; /* 0x0bc */ + __u32 io_int_word; /* 0x0c0 */ +}; + +struct ext_info { + __u32 ext_params; + __u64 ext_params2; +}; + +#define PGM_OPERATION 0x01 +#define PGM_PRIVILEGED_OPERATION 0x02 +#define PGM_EXECUTE 0x03 +#define PGM_PROTECTION 0x04 +#define PGM_ADDRESSING 0x05 +#define PGM_SPECIFICATION 0x06 +#define PGM_DATA 0x07 + +struct pgm_info { + __u16 code; +}; + +struct prefix_info { + __u32 address; +}; + +struct interrupt_info { + struct list_head list; + u64 type; + union { + struct io_info io; + struct ext_info ext; + struct pgm_info pgm; + struct prefix_info prefix; + }; +}; + +/* for local_interrupt.action_flags */ +#define ACTION_STORE_ON_STOP 1 +#define ACTION_STOP_ON_STOP 2 + +struct local_interrupt { + spinlock_t lock; + struct list_head list; + atomic_t active; + struct float_interrupt *float_int; + int timer_due; /* event indicator for waitqueue below */ + wait_queue_head_t wq; + atomic_t *cpuflags; + unsigned int action_bits; +}; + +struct float_interrupt { + spinlock_t lock; + struct list_head list; + atomic_t active; + int next_rr_cpu; + unsigned long idle_mask [(64 + sizeof(long) - 1) / sizeof(long)]; + struct local_interrupt *local_int[64]; +}; + + +struct kvm_vcpu_arch { + struct sie_block *sie_block; + unsigned long guest_gprs[16]; + s390_fp_regs host_fpregs; + unsigned int host_acrs[NUM_ACRS]; + s390_fp_regs guest_fpregs; + unsigned int guest_acrs[NUM_ACRS]; + struct local_interrupt local_int; + struct timer_list ckc_timer; + union { + cpuid_t cpu_id; + u64 stidp_data; + }; +}; + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_arch{ + unsigned long guest_origin; + unsigned long guest_memsize; + struct sca_block *sca; + debug_info_t *dbf; + struct float_interrupt float_int; +}; + +extern int sie64a(struct sie_block *, __u64 *); +#endif diff --git a/include/asm-s390/kvm_para.h b/include/asm-s390/kvm_para.h new file mode 100644 index 0000000..2c50379 --- /dev/null +++ b/include/asm-s390/kvm_para.h @@ -0,0 +1,150 @@ +/* + * asm-s390/kvm_para.h - definition for paravirtual devices on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#ifndef __S390_KVM_PARA_H +#define __S390_KVM_PARA_H + +/* + * Hypercalls for KVM on s390. The calling convention is similar to the + * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1 + * as hypercall number and R7 as parameter 6. The return value is + * written to R2. We use the diagnose instruction as hypercall. To avoid + * conflicts with existing diagnoses for LPAR and z/VM, we do not use + * the instruction encoded number, but specify the number in R1 and + * use 0x500 as KVM hypercall + * + * Copyright IBM Corp. 2007,2008 + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +static inline long kvm_hypercall0(unsigned long nr) +{ + register unsigned long __nr asm("1") = nr; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr): "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall1(unsigned long nr, unsigned long p1) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall2(unsigned long nr, unsigned long p1, + unsigned long p2) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2) + : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall3(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3) : "memory", "cc"); + return __rc; +} + + +static inline long kvm_hypercall4(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register unsigned long __p4 asm("5") = p4; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3), "d" (__p4) : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall5(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register unsigned long __p4 asm("5") = p4; + register unsigned long __p5 asm("6") = p5; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3), "d" (__p4), "d" (__p5) : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall6(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, + unsigned long p6) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register unsigned long __p4 asm("5") = p4; + register unsigned long __p5 asm("6") = p5; + register unsigned long __p6 asm("7") = p6; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6) + : "memory", "cc"); + return __rc; +} + +/* kvm on s390 is always paravirtualization enabled */ +static inline int kvm_para_available(void) +{ + return 1; +} + +/* No feature bits are currently assigned for kvm on s390 */ +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +#endif /* __S390_KVM_PARA_H */ diff --git a/include/asm-s390/kvm_virtio.h b/include/asm-s390/kvm_virtio.h new file mode 100644 index 0000000..5c871a9 --- /dev/null +++ b/include/asm-s390/kvm_virtio.h @@ -0,0 +1,53 @@ +/* + * kvm_virtio.h - definition for virtio for kvm on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#ifndef __KVM_S390_VIRTIO_H +#define __KVM_S390_VIRTIO_H + +#include <linux/types.h> + +struct kvm_device_desc { + /* The device type: console, network, disk etc. Type 0 terminates. */ + __u8 type; + /* The number of virtqueues (first in config array) */ + __u8 num_vq; + /* + * The number of bytes of feature bits. Multiply by 2: one for host + * features and one for guest acknowledgements. + */ + __u8 feature_len; + /* The number of bytes of the config array after virtqueues. */ + __u8 config_len; + /* A status byte, written by the Guest. */ + __u8 status; + __u8 config[0]; +}; + +/* + * This is how we expect the device configuration field for a virtqueue + * to be laid out in config space. + */ +struct kvm_vqconfig { + /* The token returned with an interrupt. Set by the guest */ + __u64 token; + /* The address of the virtio ring */ + __u64 address; + /* The number of entries in the virtio_ring */ + __u16 num; + +}; + +#define KVM_S390_VIRTIO_NOTIFY 0 +#define KVM_S390_VIRTIO_RESET 1 +#define KVM_S390_VIRTIO_SET_STATUS 2 + +#endif diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h index 5de3efb..0bc51d5 100644 --- a/include/asm-s390/lowcore.h +++ b/include/asm-s390/lowcore.h @@ -381,27 +381,32 @@ struct _lowcore /* whether the kernel died with panic() or not */ __u32 panic_magic; /* 0xe00 */ - __u8 pad13[0x1200-0xe04]; /* 0xe04 */ + __u8 pad13[0x11b8-0xe04]; /* 0xe04 */ + + /* 64 bit extparam used for pfault, diag 250 etc */ + __u64 ext_params2; /* 0x11B8 */ + + __u8 pad14[0x1200-0x11C0]; /* 0x11C0 */ /* System info area */ __u64 floating_pt_save_area[16]; /* 0x1200 */ __u64 gpregs_save_area[16]; /* 0x1280 */ __u32 st_status_fixed_logout[4]; /* 0x1300 */ - __u8 pad14[0x1318-0x1310]; /* 0x1310 */ + __u8 pad15[0x1318-0x1310]; /* 0x1310 */ __u32 prefixreg_save_area; /* 0x1318 */ __u32 fpt_creg_save_area; /* 0x131c */ - __u8 pad15[0x1324-0x1320]; /* 0x1320 */ + __u8 pad16[0x1324-0x1320]; /* 0x1320 */ __u32 tod_progreg_save_area; /* 0x1324 */ __u32 cpu_timer_save_area[2]; /* 0x1328 */ __u32 clock_comp_save_area[2]; /* 0x1330 */ - __u8 pad16[0x1340-0x1338]; /* 0x1338 */ + __u8 pad17[0x1340-0x1338]; /* 0x1338 */ __u32 access_regs_save_area[16]; /* 0x1340 */ __u64 cregs_save_area[16]; /* 0x1380 */ /* align to the top of the prefix area */ - __u8 pad17[0x2000-0x1400]; /* 0x1400 */ + __u8 pad18[0x2000-0x1400]; /* 0x1400 */ #endif /* !__s390x__ */ } __attribute__((packed)); /* End structure*/ diff --git a/include/asm-s390/mmu.h b/include/asm-s390/mmu.h index 1698e29..5dd5e7b 100644 --- a/include/asm-s390/mmu.h +++ b/include/asm-s390/mmu.h @@ -7,6 +7,7 @@ typedef struct { unsigned long asce_bits; unsigned long asce_limit; int noexec; + int pgstes; } mm_context_t; #endif diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h index b5a34c6..4c2fbf4 100644 --- a/include/asm-s390/mmu_context.h +++ b/include/asm-s390/mmu_context.h @@ -20,7 +20,13 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_64BIT mm->context.asce_bits |= _ASCE_TYPE_REGION3; #endif - mm->context.noexec = s390_noexec; + if (current->mm->context.pgstes) { + mm->context.noexec = 0; + mm->context.pgstes = 1; + } else { + mm->context.noexec = s390_noexec; + mm->context.pgstes = 0; + } mm->context.asce_limit = STACK_TOP_MAX; crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 65154dc..f8347ce 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -30,6 +30,7 @@ */ #ifndef __ASSEMBLY__ #include <linux/mm_types.h> +#include <asm/bitops.h> #include <asm/bug.h> #include <asm/processor.h> @@ -219,6 +220,8 @@ extern char empty_zero_page[PAGE_SIZE]; /* Software bits in the page table entry */ #define _PAGE_SWT 0x001 /* SW pte type bit t */ #define _PAGE_SWX 0x002 /* SW pte type bit x */ +#define _PAGE_SPECIAL 0x004 /* SW associated with special page */ +#define __HAVE_ARCH_PTE_SPECIAL /* Six different types of pages. */ #define _PAGE_TYPE_EMPTY 0x400 @@ -258,6 +261,13 @@ extern char empty_zero_page[PAGE_SIZE]; * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. */ +/* Page status table bits for virtualization */ +#define RCP_PCL_BIT 55 +#define RCP_HR_BIT 54 +#define RCP_HC_BIT 53 +#define RCP_GR_BIT 50 +#define RCP_GC_BIT 49 + #ifndef __s390x__ /* Bits in the segment table address-space-control-element */ @@ -510,9 +520,56 @@ static inline int pte_file(pte_t pte) return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; } +static inline int pte_special(pte_t pte) +{ + return (pte_val(pte) & _PAGE_SPECIAL); +} + #define __HAVE_ARCH_PTE_SAME #define pte_same(a,b) (pte_val(a) == pte_val(b)) +static inline void rcp_lock(pte_t *ptep) +{ +#ifdef CONFIG_PGSTE + unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + preempt_disable(); + while (test_and_set_bit(RCP_PCL_BIT, pgste)) + ; +#endif +} + +static inline void rcp_unlock(pte_t *ptep) +{ +#ifdef CONFIG_PGSTE + unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + clear_bit(RCP_PCL_BIT, pgste); + preempt_enable(); +#endif +} + +/* forward declaration for SetPageUptodate in page-flags.h*/ +static inline void page_clear_dirty(struct page *page); +#include <linux/page-flags.h> + +static inline void ptep_rcp_copy(pte_t *ptep) +{ +#ifdef CONFIG_PGSTE + struct page *page = virt_to_page(pte_val(*ptep)); + unsigned int skey; + unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + + skey = page_get_storage_key(page_to_phys(page)); + if (skey & _PAGE_CHANGED) + set_bit_simple(RCP_GC_BIT, pgste); + if (skey & _PAGE_REFERENCED) + set_bit_simple(RCP_GR_BIT, pgste); + if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) + SetPageDirty(page); + if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) + SetPageReferenced(page); +#endif +} + /* * query functions pte_write/pte_dirty/pte_young only work if * pte_present() is true. Undefined behaviour if not.. @@ -599,6 +656,8 @@ static inline void pmd_clear(pmd_t *pmd) static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { + if (mm->context.pgstes) + ptep_rcp_copy(ptep); pte_val(*ptep) = _PAGE_TYPE_EMPTY; if (mm->context.noexec) pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY; @@ -663,10 +722,34 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= _PAGE_SPECIAL; + return pte; +} + #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { +#ifdef CONFIG_PGSTE + unsigned long physpage; + int young; + unsigned long *pgste; + + if (!vma->vm_mm->context.pgstes) + return 0; + physpage = pte_val(*ptep) & PAGE_MASK; + pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + + young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0); + rcp_lock(ptep); + if (young) + set_bit_simple(RCP_GR_BIT, pgste); + young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste); + rcp_unlock(ptep); + return young; +#endif return 0; } @@ -674,7 +757,13 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, static inline int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - /* No need to flush TLB; bits are in storage key */ + /* No need to flush TLB + * On s390 reference bits are in storage key and never in TLB + * With virtualization we handle the reference bit, without we + * we can simply return */ +#ifdef CONFIG_PGSTE + return ptep_test_and_clear_young(vma, address, ptep); +#endif return 0; } @@ -693,15 +782,25 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); } - pte_val(*ptep) = _PAGE_TYPE_EMPTY; } static inline void ptep_invalidate(struct mm_struct *mm, unsigned long address, pte_t *ptep) { + if (mm->context.pgstes) { + rcp_lock(ptep); + __ptep_ipte(address, ptep); + ptep_rcp_copy(ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + rcp_unlock(ptep); + return; + } __ptep_ipte(address, ptep); - if (mm->context.noexec) + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + if (mm->context.noexec) { __ptep_ipte(address, ptep + PTRS_PER_PTE); + pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY; + } } /* @@ -966,6 +1065,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) extern int add_shared_memory(unsigned long start, unsigned long size); extern int remove_shared_memory(unsigned long start, unsigned long size); +extern int s390_enable_sie(void); /* * No page table caches to initialise diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index a76a6b8..aaf4b51 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -62,6 +62,7 @@ extern unsigned long machine_flags; #define MACHINE_IS_VM (machine_flags & 1) #define MACHINE_IS_P390 (machine_flags & 4) #define MACHINE_HAS_MVPG (machine_flags & 16) +#define MACHINE_IS_KVM (machine_flags & 64) #define MACHINE_HAS_IDTE (machine_flags & 128) #define MACHINE_HAS_DIAG9C (machine_flags & 256) diff --git a/include/asm-sh/hugetlb.h b/include/asm-sh/hugetlb.h new file mode 100644 index 0000000..02402303 --- /dev/null +++ b/include/asm-sh/hugetlb.h @@ -0,0 +1,91 @@ +#ifndef _ASM_SH_HUGETLB_H +#define _ASM_SH_HUGETLB_H + +#include <asm/page.h> + + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) { + return 0; +} + +/* + * If the arch doesn't supply something else, assume that hugepage + * size aligned regions are ok without further preparation. + */ +static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) { +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +#endif /* _ASM_SH_HUGETLB_H */ diff --git a/include/asm-sh/pgtable_32.h b/include/asm-sh/pgtable_32.h index 3e3557c..cbc731d 100644 --- a/include/asm-sh/pgtable_32.h +++ b/include/asm-sh/pgtable_32.h @@ -326,6 +326,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte) #define pte_dirty(pte) ((pte).pte_low & _PAGE_DIRTY) #define pte_young(pte) ((pte).pte_low & _PAGE_ACCESSED) #define pte_file(pte) ((pte).pte_low & _PAGE_FILE) +#define pte_special(pte) (0) #ifdef CONFIG_X2TLB #define pte_write(pte) ((pte).pte_high & _PAGE_EXT_USER_WRITE) @@ -356,6 +357,8 @@ PTE_BIT_FUNC(low, mkdirty, |= _PAGE_DIRTY); PTE_BIT_FUNC(low, mkold, &= ~_PAGE_ACCESSED); PTE_BIT_FUNC(low, mkyoung, |= _PAGE_ACCESSED); +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } + /* * Macro and implementation to make a page protection as uncachable. */ diff --git a/include/asm-sh/pgtable_64.h b/include/asm-sh/pgtable_64.h index f9dd9d3..c78990c 100644 --- a/include/asm-sh/pgtable_64.h +++ b/include/asm-sh/pgtable_64.h @@ -254,10 +254,11 @@ extern void __handle_bad_pmd_kernel(pmd_t * pmd); /* * The following have defined behavior only work if pte_present() is true. */ -static inline int pte_dirty(pte_t pte){ return pte_val(pte) & _PAGE_DIRTY; } -static inline int pte_young(pte_t pte){ return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } -static inline int pte_write(pte_t pte){ return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_special(pte_t pte){ return 0; } static inline pte_t pte_wrprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_WRITE)); return pte; } static inline pte_t pte_mkclean(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; } @@ -266,6 +267,7 @@ static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_SZHUGE)); return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h index 2cc235b..d84af6d 100644 --- a/include/asm-sparc/pgtable.h +++ b/include/asm-sparc/pgtable.h @@ -219,6 +219,11 @@ static inline int pte_file(pte_t pte) return pte_val(pte) & BTFIXUP_HALF(pte_filei); } +static inline int pte_special(pte_t pte) +{ + return 0; +} + /* */ BTFIXUPDEF_HALF(pte_wrprotecti) @@ -251,6 +256,8 @@ BTFIXUPDEF_CALL_CONST(pte_t, pte_mkyoung, pte_t) #define pte_mkdirty(pte) BTFIXUP_CALL(pte_mkdirty)(pte) #define pte_mkyoung(pte) BTFIXUP_CALL(pte_mkyoung)(pte) +#define pte_mkspecial(pte) (pte) + #define pfn_pte(pfn, prot) mk_pte(pfn_to_page(pfn), prot) BTFIXUPDEF_CALL(unsigned long, pte_pfn, pte_t) diff --git a/include/asm-sparc64/hugetlb.h b/include/asm-sparc64/hugetlb.h new file mode 100644 index 0000000..412af58 --- /dev/null +++ b/include/asm-sparc64/hugetlb.h @@ -0,0 +1,84 @@ +#ifndef _ASM_SPARC64_HUGETLB_H +#define _ASM_SPARC64_HUGETLB_H + +#include <asm/page.h> + + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + +void hugetlb_prefault_arch_hook(struct mm_struct *mm); + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) { + return 0; +} + +/* + * If the arch doesn't supply something else, assume that hugepage + * size aligned regions are ok without further preparation. + */ +static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +#endif /* _ASM_SPARC64_HUGETLB_H */ diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h index e93a482..618117d 100644 --- a/include/asm-sparc64/page.h +++ b/include/asm-sparc64/page.h @@ -39,8 +39,6 @@ #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#define ARCH_HAS_SETCLEAR_HUGE_PTE -#define ARCH_HAS_HUGETLB_PREFAULT_HOOK #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index 549e452..0e200e7 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -506,6 +506,11 @@ static inline pte_t pte_mkyoung(pte_t pte) return __pte(pte_val(pte) | mask); } +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} + static inline unsigned long pte_young(pte_t pte) { unsigned long mask; @@ -608,6 +613,11 @@ static inline unsigned long pte_present(pte_t pte) return val; } +static inline int pte_special(pte_t pte) +{ + return 0; +} + #define pmd_set(pmdp, ptep) \ (pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL)) #define pud_set(pudp, pmdp) \ diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h index 4102b44..02db81b 100644 --- a/include/asm-um/pgtable.h +++ b/include/asm-um/pgtable.h @@ -173,6 +173,11 @@ static inline int pte_newprot(pte_t pte) return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); } +static inline int pte_special(pte_t pte) +{ + return 0; +} + /* * ================================= * Flags setting section. @@ -241,6 +246,11 @@ static inline pte_t pte_mknewpage(pte_t pte) return(pte); } +static inline pte_t pte_mkspecial(pte_t pte) +{ + return(pte); +} + static inline void set_pte(pte_t *pteptr, pte_t pteval) { pte_copy(*pteptr, pteval); diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h index 9870cc1..7154dc4 100644 --- a/include/asm-x86/geode.h +++ b/include/asm-x86/geode.h @@ -30,7 +30,13 @@ extern int geode_get_dev_base(unsigned int dev); /* MSRS */ -#define GX_GLCP_SYS_RSTPLL 0x4C000014 +#define MSR_GLIU_P2D_RO0 0x10000029 + +#define MSR_LX_GLD_MSR_CONFIG 0x48002001 +#define MSR_LX_MSR_PADSEL 0x48002011 /* NOT 0x48000011; the data + * sheet has the wrong value */ +#define MSR_GLCP_SYS_RSTPLL 0x4C000014 +#define MSR_GLCP_DOTPLL 0x4C000015 #define MSR_LBAR_SMB 0x5140000B #define MSR_LBAR_GPIO 0x5140000C @@ -45,8 +51,14 @@ extern int geode_get_dev_base(unsigned int dev); #define MSR_PIC_ZSEL_LOW 0x51400022 #define MSR_PIC_ZSEL_HIGH 0x51400023 -#define MFGPT_IRQ_MSR 0x51400028 -#define MFGPT_NR_MSR 0x51400029 +#define MSR_MFGPT_IRQ 0x51400028 +#define MSR_MFGPT_NR 0x51400029 +#define MSR_MFGPT_SETUP 0x5140002B + +#define MSR_LX_SPARE_MSR 0x80000011 /* DC-specific */ + +#define MSR_GX_GLD_MSR_CONFIG 0xC0002001 +#define MSR_GX_MSR_PADSEL 0xC0002011 /* Resource Sizes */ @@ -93,6 +105,15 @@ extern int geode_get_dev_base(unsigned int dev); #define PM_AWKD 0x50 #define PM_SSC 0x54 +/* VSA2 magic values */ + +#define VSA_VRC_INDEX 0xAC1C +#define VSA_VRC_DATA 0xAC1E +#define VSA_VR_UNLOCK 0xFC53 /* unlock virtual register */ +#define VSA_VR_SIGNATURE 0x0003 +#define VSA_VR_MEM_SIZE 0x0200 +#define VSA_SIG 0x4132 /* signature is ascii 'VSA2' */ + /* GPIO */ #define GPIO_OUTPUT_VAL 0x00 @@ -164,6 +185,17 @@ static inline int is_geode(void) return (is_geode_gx() || is_geode_lx()); } +/* + * The VSA has virtual registers that we can query for a signature. + */ +static inline int geode_has_vsa2(void) +{ + outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); + outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); + + return (inw(VSA_VRC_DATA) == VSA_SIG); +} + /* MFGPTs */ #define MFGPT_MAX_TIMERS 8 diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h new file mode 100644 index 0000000..14171a4 --- /dev/null +++ b/include/asm-x86/hugetlb.h @@ -0,0 +1,91 @@ +#ifndef _ASM_X86_HUGETLB_H +#define _ASM_X86_HUGETLB_H + +#include <asm/page.h> + + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) { + return 0; +} + +/* + * If the arch doesn't supply something else, assume that hugepage + * size aligned regions are ok without further preparation. + */ +static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) { +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +#endif /* _ASM_X86_HUGETLB_H */ diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h index 7a71120..80eefef2 100644 --- a/include/asm-x86/kvm.h +++ b/include/asm-x86/kvm.h @@ -188,4 +188,45 @@ struct kvm_cpuid2 { struct kvm_cpuid_entry2 entries[0]; }; +/* for KVM_GET_PIT and KVM_SET_PIT */ +struct kvm_pit_channel_state { + __u32 count; /* can be 65536 */ + __u16 latched_count; + __u8 count_latched; + __u8 status_latched; + __u8 status; + __u8 read_state; + __u8 write_state; + __u8 write_latch; + __u8 rw_mode; + __u8 mode; + __u8 bcd; + __u8 gate; + __s64 count_load_time; +}; + +struct kvm_pit_state { + struct kvm_pit_channel_state channels[3]; +}; + +#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) +#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) +#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) +#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) +#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) +#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) +#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) +#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) +#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) +#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) +#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) +#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) +#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) +#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) +#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) +#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) +#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) +#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) +#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) + #endif diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 68ee390..9d963cd 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -20,6 +20,13 @@ #include <asm/desc.h> +#define KVM_MAX_VCPUS 16 +#define KVM_MEMORY_SLOTS 32 +/* memory slots that does not exposed to userspace */ +#define KVM_PRIVATE_MEM_SLOTS 4 + +#define KVM_PIO_PAGE_OFFSET 1 + #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ @@ -39,6 +46,13 @@ #define INVALID_PAGE (~(hpa_t)0) #define UNMAPPED_GVA (~(gpa_t)0) +/* shadow tables are PAE even on non-PAE hosts */ +#define KVM_HPAGE_SHIFT 21 +#define KVM_HPAGE_SIZE (1UL << KVM_HPAGE_SHIFT) +#define KVM_HPAGE_MASK (~(KVM_HPAGE_SIZE - 1)) + +#define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE) + #define DE_VECTOR 0 #define UD_VECTOR 6 #define NM_VECTOR 7 @@ -48,6 +62,7 @@ #define SS_VECTOR 12 #define GP_VECTOR 13 #define PF_VECTOR 14 +#define MC_VECTOR 18 #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 @@ -58,7 +73,8 @@ #define KVM_PERMILLE_MMU_PAGES 20 #define KVM_MIN_ALLOC_MMU_PAGES 64 -#define KVM_NUM_MMU_PAGES 1024 +#define KVM_MMU_HASH_SHIFT 10 +#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 #define KVM_MAX_CPUID_ENTRIES 40 @@ -106,6 +122,12 @@ enum { #define KVM_NR_MEM_OBJS 40 +struct kvm_guest_debug { + int enabled; + unsigned long bp[4]; + int singlestep; +}; + /* * We don't want allocation failures within the mmu code, so we preallocate * enough memory for a single page fault in a cache. @@ -140,6 +162,7 @@ union kvm_mmu_page_role { unsigned pad_for_nice_hex_output:6; unsigned metaphysical:1; unsigned access:3; + unsigned invalid:1; }; }; @@ -204,11 +227,6 @@ struct kvm_vcpu_arch { u64 shadow_efer; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ -#define VCPU_MP_STATE_RUNNABLE 0 -#define VCPU_MP_STATE_UNINITIALIZED 1 -#define VCPU_MP_STATE_INIT_RECEIVED 2 -#define VCPU_MP_STATE_SIPI_RECEIVED 3 -#define VCPU_MP_STATE_HALTED 4 int mp_state; int sipi_vector; u64 ia32_misc_enable_msr; @@ -226,8 +244,9 @@ struct kvm_vcpu_arch { u64 *last_pte_updated; struct { - gfn_t gfn; /* presumed gfn during guest pte update */ - struct page *page; /* page corresponding to that gfn */ + gfn_t gfn; /* presumed gfn during guest pte update */ + pfn_t pfn; /* pfn corresponding to that gfn */ + int largepage; } update_pte; struct i387_fxsave_struct host_fx_image; @@ -261,6 +280,11 @@ struct kvm_vcpu_arch { /* emulate context */ struct x86_emulate_ctxt emulate_ctxt; + + gpa_t time; + struct kvm_vcpu_time_info hv_clock; + unsigned int time_offset; + struct page *time_page; }; struct kvm_mem_alias { @@ -283,10 +307,13 @@ struct kvm_arch{ struct list_head active_mmu_pages; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; + struct kvm_pit *vpit; int round_robin_prev_vcpu; unsigned int tss_addr; struct page *apic_access_page; + + gpa_t wall_clock; }; struct kvm_vm_stat { @@ -298,6 +325,7 @@ struct kvm_vm_stat { u32 mmu_recycled; u32 mmu_cache_miss; u32 remote_tlb_flush; + u32 lpages; }; struct kvm_vcpu_stat { @@ -320,6 +348,7 @@ struct kvm_vcpu_stat { u32 fpu_reload; u32 insn_emulation; u32 insn_emulation_fail; + u32 hypercalls; }; struct descriptor_table { @@ -355,6 +384,7 @@ struct kvm_x86_ops { u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); void (*get_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); + int (*get_cpl)(struct kvm_vcpu *vcpu); void (*set_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); @@ -410,6 +440,15 @@ void kvm_mmu_zap_all(struct kvm *kvm); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); +int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); + +int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, + const void *val, int bytes); +int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, + gpa_t addr, unsigned long *ret); + +extern bool tdp_enabled; + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ @@ -429,6 +468,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, unsigned long *rflags); +void kvm_enable_efer_bits(u64); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); @@ -448,12 +488,14 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value); -void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0); -void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); -void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); -unsigned long get_cr8(struct kvm_vcpu *vcpu); -void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); + +void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); +void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); +unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); +void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); @@ -491,6 +533,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); +void kvm_enable_tdp(void); + int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int complete_pio(struct kvm_vcpu *vcpu); @@ -600,6 +644,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" #define MSR_IA32_TIME_STAMP_COUNTER 0x010 @@ -610,4 +655,30 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) #define RMODE_TSS_SIZE \ (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) +enum { + TASK_SWITCH_CALL = 0, + TASK_SWITCH_IRET = 1, + TASK_SWITCH_JMP = 2, + TASK_SWITCH_GATE = 3, +}; + +#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 5, d1, d2, d3, d4, d5) +#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 4, d1, d2, d3, d4, 0) +#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 3, d1, d2, d3, 0, 0) +#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 2, d1, d2, 0, 0, 0) +#define KVMTRACE_1D(evt, vcpu, d1, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 1, d1, 0, 0, 0, 0) +#define KVMTRACE_0D(evt, vcpu, name) \ + trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ + vcpu, 0, 0, 0, 0, 0, 0) + #endif diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h index c6f3fd8..5098459 100644 --- a/include/asm-x86/kvm_para.h +++ b/include/asm-x86/kvm_para.h @@ -10,10 +10,65 @@ * paravirtualization, the appropriate feature bit should be checked. */ #define KVM_CPUID_FEATURES 0x40000001 +#define KVM_FEATURE_CLOCKSOURCE 0 +#define KVM_FEATURE_NOP_IO_DELAY 1 +#define KVM_FEATURE_MMU_OP 2 + +#define MSR_KVM_WALL_CLOCK 0x11 +#define MSR_KVM_SYSTEM_TIME 0x12 + +#define KVM_MAX_MMU_OP_BATCH 32 + +/* Operations for KVM_HC_MMU_OP */ +#define KVM_MMU_OP_WRITE_PTE 1 +#define KVM_MMU_OP_FLUSH_TLB 2 +#define KVM_MMU_OP_RELEASE_PT 3 + +/* Payload for KVM_HC_MMU_OP */ +struct kvm_mmu_op_header { + __u32 op; + __u32 pad; +}; + +struct kvm_mmu_op_write_pte { + struct kvm_mmu_op_header header; + __u64 pte_phys; + __u64 pte_val; +}; + +struct kvm_mmu_op_flush_tlb { + struct kvm_mmu_op_header header; +}; + +struct kvm_mmu_op_release_pt { + struct kvm_mmu_op_header header; + __u64 pt_phys; +}; #ifdef __KERNEL__ #include <asm/processor.h> +/* xen binary-compatible interface. See xen headers for details */ +struct kvm_vcpu_time_info { + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; + uint64_t system_time; + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad[3]; +} __attribute__((__packed__)); /* 32 bytes */ + +struct kvm_wall_clock { + uint32_t wc_version; + uint32_t wc_sec; + uint32_t wc_nsec; +} __attribute__((__packed__)); + + +extern void kvmclock_init(void); + + /* This instruction is vmcall. On non-VT architectures, it will generate a * trap that we will then rewrite to the appropriate instruction. */ diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index a496d63..801b31f 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -195,6 +195,11 @@ static inline int pte_exec(pte_t pte) return !(pte_val(pte) & _PAGE_NX); } +static inline int pte_special(pte_t pte) +{ + return 0; +} + static inline int pmd_large(pmd_t pte) { return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == @@ -256,6 +261,11 @@ static inline pte_t pte_clrglobal(pte_t pte) return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL); } +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} + extern pteval_t __supported_pte_mask; static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 117343b..2e7974e 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -722,6 +722,7 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { + trace_hardirqs_on(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h index 6b5233b..e63741f 100644 --- a/include/asm-x86/reboot.h +++ b/include/asm-x86/reboot.h @@ -15,5 +15,7 @@ struct machine_ops { extern struct machine_ops machine_ops; void machine_real_restart(unsigned char *code, int length); +void native_machine_crash_shutdown(struct pt_regs *regs); +void native_machine_shutdown(void); #endif /* _ASM_REBOOT_H */ diff --git a/include/asm-xtensa/pgtable.h b/include/asm-xtensa/pgtable.h index c8b024a..8014d96 100644 --- a/include/asm-xtensa/pgtable.h +++ b/include/asm-xtensa/pgtable.h @@ -210,6 +210,8 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } + static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~(_PAGE_WRITABLE | _PAGE_HW_WRITE); return pte; } static inline pte_t pte_mkclean(pte_t pte) @@ -222,6 +224,8 @@ static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITABLE; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) + { return pte; } /* * Conversion functions: convert a page and protection to a page entry, diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 1dbe074..43b406d 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -46,6 +46,8 @@ * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) + * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap + * bitmap_fold(dst, orig, sz, nbits) dst bits = orig bits mod sz * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf @@ -121,6 +123,10 @@ extern void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, int bits); extern int bitmap_bitremap(int oldbit, const unsigned long *old, const unsigned long *new, int bits); +extern void bitmap_onto(unsigned long *dst, const unsigned long *orig, + const unsigned long *relmap, int bits); +extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, + int sz, int bits); extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 4e4e340..6a5dbdc 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -101,6 +101,8 @@ extern void reserve_bootmem_node(pg_data_t *pgdat, extern void free_bootmem_node(pg_data_t *pgdat, unsigned long addr, unsigned long size); +extern void *alloc_bootmem_section(unsigned long size, + unsigned long section_nr); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem_node(pgdat, x) \ diff --git a/include/linux/bsg.h b/include/linux/bsg.h index e8406c5..cf0303a6 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -56,19 +56,25 @@ struct sg_io_v4 { #if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device { struct device *class_dev; - struct device *dev; + struct device *parent; int minor; struct request_queue *queue; + struct kref ref; + void (*release)(struct device *); }; -extern int bsg_register_queue(struct request_queue *, struct device *, const char *); +extern int bsg_register_queue(struct request_queue *q, + struct device *parent, const char *name, + void (*release)(struct device *)); extern void bsg_unregister_queue(struct request_queue *); #else -static inline int bsg_register_queue(struct request_queue * rq, struct device *dev, const char *name) +static inline int bsg_register_queue(struct request_queue *q, + struct device *parent, const char *name, + void (*release)(struct device *)) { return 0; } -static inline void bsg_unregister_queue(struct request_queue *rq) +static inline void bsg_unregister_queue(struct request_queue *q) { } #endif diff --git a/include/linux/cache.h b/include/linux/cache.h index 4552504..97e2488 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -60,4 +60,8 @@ #endif #endif +#ifndef CONFIG_ARCH_HAS_CACHE_LINE_SIZE +#define cache_line_size() L1_CACHE_BYTES +#endif + #endif /* __LINUX_CACHE_H */ diff --git a/include/linux/capability.h b/include/linux/capability.h index 7d50ff6..eaab759 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -155,6 +155,7 @@ typedef struct kernel_cap_struct { * Add any capability from current's capability bounding set * to the current process' inheritable set * Allow taking bits out of capability bounding set + * Allow modification of the securebits for a process */ #define CAP_SETPCAP 8 @@ -490,8 +491,6 @@ extern const kernel_cap_t __cap_init_eff_set; int capable(int cap); int __capable(struct task_struct *t, int cap); -extern long cap_prctl_drop(unsigned long cap); - #endif /* __KERNEL__ */ #endif /* !_LINUX_CAPABILITY_H */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 259c805..9650806 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -14,6 +14,8 @@ * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c. + * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c. + * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c. * * The available cpumask operations are: * @@ -53,7 +55,9 @@ * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing * int cpulist_parse(buf, map) Parse ascii string as cpulist * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit) - * int cpus_remap(dst, src, old, new) *dst = map(old, new)(src) + * void cpus_remap(dst, src, old, new) *dst = map(old, new)(src) + * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap + * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz * * for_each_cpu_mask(cpu, mask) for-loop cpu over mask * @@ -330,6 +334,22 @@ static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp, bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); } +#define cpus_onto(dst, orig, relmap) \ + __cpus_onto(&(dst), &(orig), &(relmap), NR_CPUS) +static inline void __cpus_onto(cpumask_t *dstp, const cpumask_t *origp, + const cpumask_t *relmapp, int nbits) +{ + bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); +} + +#define cpus_fold(dst, orig, sz) \ + __cpus_fold(&(dst), &(orig), sz, NR_CPUS) +static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, + int sz, int nbits) +{ + bitmap_fold(dstp->bits, origp->bits, sz, nbits); +} + #if NR_CPUS > 1 #define for_each_cpu_mask(cpu, mask) \ for ((cpu) = first_cpu(mask); \ diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 726761e..0385783 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -26,7 +26,7 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); void cpuset_update_task_memory_state(void); -int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); +int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask); extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask); @@ -103,7 +103,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) static inline void cpuset_init_current_mems_allowed(void) {} static inline void cpuset_update_task_memory_state(void) {} -static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) +static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) { return 1; } diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 325acdf..2a063b6 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -90,6 +90,7 @@ static inline int dmi_check_system(const struct dmi_system_id *list) { return 0; static inline const char * dmi_get_system_info(int field) { return NULL; } static inline const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from) { return NULL; } +static inline void dmi_scan_machine(void) { return; } static inline int dmi_get_year(int year) { return 0; } static inline int dmi_name_in_vendors(const char *s) { return 0; } #define dmi_available 0 diff --git a/include/linux/fb.h b/include/linux/fb.h index 58c57a3..72295b0 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -791,6 +791,17 @@ struct fb_tile_ops { */ #define FBINFO_MISC_ALWAYS_SETPAR 0x40000 +/* + * Host and GPU endianness differ. + */ +#define FBINFO_FOREIGN_ENDIAN 0x100000 +/* + * Big endian math. This is the same flags as above, but with different + * meaning, it is set by the fb subsystem depending FOREIGN_ENDIAN flag + * and host endianness. Drivers should not use this flag. + */ +#define FBINFO_BE_MATH 0x100000 + struct fb_info { int node; int flags; @@ -899,15 +910,11 @@ struct fb_info { #endif -#if defined (__BIG_ENDIAN) -#define FB_LEFT_POS(bpp) (32 - bpp) -#define FB_SHIFT_HIGH(val, bits) ((val) >> (bits)) -#define FB_SHIFT_LOW(val, bits) ((val) << (bits)) -#else -#define FB_LEFT_POS(bpp) (0) -#define FB_SHIFT_HIGH(val, bits) ((val) << (bits)) -#define FB_SHIFT_LOW(val, bits) ((val) >> (bits)) -#endif +#define FB_LEFT_POS(p, bpp) (fb_be_math(p) ? (32 - (bpp)) : 0) +#define FB_SHIFT_HIGH(p, val, bits) (fb_be_math(p) ? (val) >> (bits) : \ + (val) << (bits)) +#define FB_SHIFT_LOW(p, val, bits) (fb_be_math(p) ? (val) << (bits) : \ + (val) >> (bits)) /* * `Generic' versions of the frame buffer device operations @@ -970,6 +977,25 @@ extern void fb_deferred_io_cleanup(struct fb_info *info); extern int fb_deferred_io_fsync(struct file *file, struct dentry *dentry, int datasync); +static inline bool fb_be_math(struct fb_info *info) +{ +#ifdef CONFIG_FB_FOREIGN_ENDIAN +#if defined(CONFIG_FB_BOTH_ENDIAN) + return info->flags & FBINFO_BE_MATH; +#elif defined(CONFIG_FB_BIG_ENDIAN) + return true; +#elif defined(CONFIG_FB_LITTLE_ENDIAN) + return false; +#endif /* CONFIG_FB_BOTH_ENDIAN */ +#else +#ifdef __BIG_ENDIAN + return true; +#else + return false; +#endif /* __BIG_ENDIAN */ +#endif /* CONFIG_FB_FOREIGN_ENDIAN */ +} + /* drivers/video/fbsysfs.c */ extern struct fb_info *framebuffer_alloc(size_t size, struct device *dev); extern void framebuffer_release(struct fb_info *info); diff --git a/include/linux/fs.h b/include/linux/fs.h index d6d7c52..2c92574 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -474,8 +474,8 @@ struct address_space_operations { int (*releasepage) (struct page *, gfp_t); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); - struct page* (*get_xip_page)(struct address_space *, sector_t, - int); + int (*get_xip_mem)(struct address_space *, pgoff_t, int, + void **, unsigned long *); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct address_space *, struct page *, struct page *); @@ -1178,7 +1178,8 @@ struct block_device_operations { int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); long (*unlocked_ioctl) (struct file *, unsigned, unsigned long); long (*compat_ioctl) (struct file *, unsigned, unsigned long); - int (*direct_access) (struct block_device *, sector_t, unsigned long *); + int (*direct_access) (struct block_device *, sector_t, + void **, unsigned long *); int (*media_changed) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 164be9d..c37653b 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -119,35 +119,22 @@ static inline int allocflags_to_migratetype(gfp_t gfp_flags) static inline enum zone_type gfp_zone(gfp_t flags) { - int base = 0; - -#ifdef CONFIG_NUMA - if (flags & __GFP_THISNODE) - base = MAX_NR_ZONES; -#endif - #ifdef CONFIG_ZONE_DMA if (flags & __GFP_DMA) - return base + ZONE_DMA; + return ZONE_DMA; #endif #ifdef CONFIG_ZONE_DMA32 if (flags & __GFP_DMA32) - return base + ZONE_DMA32; + return ZONE_DMA32; #endif if ((flags & (__GFP_HIGHMEM | __GFP_MOVABLE)) == (__GFP_HIGHMEM | __GFP_MOVABLE)) - return base + ZONE_MOVABLE; + return ZONE_MOVABLE; #ifdef CONFIG_HIGHMEM if (flags & __GFP_HIGHMEM) - return base + ZONE_HIGHMEM; + return ZONE_HIGHMEM; #endif - return base + ZONE_NORMAL; -} - -static inline gfp_t set_migrateflags(gfp_t gfp, gfp_t migrate_flags) -{ - BUG_ON((gfp & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); - return (gfp & ~(GFP_MOVABLE_MASK)) | migrate_flags; + return ZONE_NORMAL; } /* @@ -157,13 +144,27 @@ static inline gfp_t set_migrateflags(gfp_t gfp, gfp_t migrate_flags) * virtual kernel addresses to the allocated page(s). */ +static inline int gfp_zonelist(gfp_t flags) +{ + if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE)) + return 1; + + return 0; +} + /* * We get the zone list from the current node and the gfp_mask. * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. + * There are two zonelists per node, one for all zones with memory and + * one containing just zones from the node the zonelist belongs to. * * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets * optimized to &contig_page_data at compile-time. */ +static inline struct zonelist *node_zonelist(int nid, gfp_t flags) +{ + return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags); +} #ifndef HAVE_ARCH_FREE_PAGE static inline void arch_free_page(struct page *page, int order) { } @@ -174,6 +175,10 @@ static inline void arch_alloc_page(struct page *page, int order) { } extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *); +extern struct page * +__alloc_pages_nodemask(gfp_t, unsigned int, + struct zonelist *, nodemask_t *nodemask); + static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { @@ -184,8 +189,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, if (nid < 0) nid = numa_node_id(); - return __alloc_pages(gfp_mask, order, - NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask)); + return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); } #ifdef CONFIG_NUMA diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index addca4c..a79e80b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -8,6 +8,7 @@ #include <linux/mempolicy.h> #include <linux/shm.h> #include <asm/tlbflush.h> +#include <asm/hugetlb.h> struct ctl_table; @@ -51,51 +52,6 @@ int pmd_huge(pmd_t pmd); void hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); -#ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE -#define is_hugepage_only_range(mm, addr, len) 0 -#endif - -#ifndef ARCH_HAS_HUGETLB_FREE_PGD_RANGE -#define hugetlb_free_pgd_range free_pgd_range -#else -void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, - unsigned long end, unsigned long floor, - unsigned long ceiling); -#endif - -#ifndef ARCH_HAS_PREPARE_HUGEPAGE_RANGE -/* - * If the arch doesn't supply something else, assume that hugepage - * size aligned regions are ok without further preparation. - */ -static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) -{ - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - return 0; -} -#else -int prepare_hugepage_range(unsigned long addr, unsigned long len); -#endif - -#ifndef ARCH_HAS_SETCLEAR_HUGE_PTE -#define set_huge_pte_at(mm, addr, ptep, pte) set_pte_at(mm, addr, ptep, pte) -#define huge_ptep_get_and_clear(mm, addr, ptep) ptep_get_and_clear(mm, addr, ptep) -#else -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); -#endif - -#ifndef ARCH_HAS_HUGETLB_PREFAULT_HOOK -#define hugetlb_prefault_arch_hook(mm) do { } while (0) -#else -void hugetlb_prefault_arch_hook(struct mm_struct *mm); -#endif - #else /* !CONFIG_HUGETLB_PAGE */ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) diff --git a/include/linux/i2o.h b/include/linux/i2o.h index e92170d..f65e58a 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -613,14 +613,9 @@ struct i2o_sys_tbl { extern struct list_head i2o_controllers; /* Message functions */ -static inline struct i2o_message *i2o_msg_get(struct i2o_controller *); extern struct i2o_message *i2o_msg_get_wait(struct i2o_controller *, int); -static inline void i2o_msg_post(struct i2o_controller *, struct i2o_message *); -static inline int i2o_msg_post_wait(struct i2o_controller *, - struct i2o_message *, unsigned long); extern int i2o_msg_post_wait_mem(struct i2o_controller *, struct i2o_message *, unsigned long, struct i2o_dma *); -static inline void i2o_flush_reply(struct i2o_controller *, u32); /* IOP functions */ extern int i2o_status_get(struct i2o_controller *); diff --git a/include/linux/ide.h b/include/linux/ide.h index f0af504..32fd77b 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -48,13 +48,6 @@ typedef unsigned char byte; /* used everywhere */ #define ERROR_RECAL 1 /* Recalibrate every 2nd retry */ /* - * Tune flags - */ -#define IDE_TUNE_NOAUTO 2 -#define IDE_TUNE_AUTO 1 -#define IDE_TUNE_DEFAULT 0 - -/* * state flags */ @@ -68,23 +61,30 @@ typedef unsigned char byte; /* used everywhere */ */ #define IDE_NR_PORTS (10) -#define IDE_DATA_OFFSET (0) -#define IDE_ERROR_OFFSET (1) -#define IDE_NSECTOR_OFFSET (2) -#define IDE_SECTOR_OFFSET (3) -#define IDE_LCYL_OFFSET (4) -#define IDE_HCYL_OFFSET (5) -#define IDE_SELECT_OFFSET (6) -#define IDE_STATUS_OFFSET (7) -#define IDE_CONTROL_OFFSET (8) -#define IDE_IRQ_OFFSET (9) - -#define IDE_FEATURE_OFFSET IDE_ERROR_OFFSET -#define IDE_COMMAND_OFFSET IDE_STATUS_OFFSET -#define IDE_ALTSTATUS_OFFSET IDE_CONTROL_OFFSET -#define IDE_IREASON_OFFSET IDE_NSECTOR_OFFSET -#define IDE_BCOUNTL_OFFSET IDE_LCYL_OFFSET -#define IDE_BCOUNTH_OFFSET IDE_HCYL_OFFSET +struct ide_io_ports { + unsigned long data_addr; + + union { + unsigned long error_addr; /* read: error */ + unsigned long feature_addr; /* write: feature */ + }; + + unsigned long nsect_addr; + unsigned long lbal_addr; + unsigned long lbam_addr; + unsigned long lbah_addr; + + unsigned long device_addr; + + union { + unsigned long status_addr; /*  read: status  */ + unsigned long command_addr; /* write: command */ + }; + + unsigned long ctl_addr; + + unsigned long irq_addr; +}; #define OK_STAT(stat,good,bad) (((stat)&((good)|(bad)))==(good)) #define BAD_R_STAT (BUSY_STAT | ERR_STAT) @@ -163,7 +163,11 @@ typedef u8 hwif_chipset_t; * Structure to hold all information about the location of this port */ typedef struct hw_regs_s { - unsigned long io_ports[IDE_NR_PORTS]; /* task file registers */ + union { + struct ide_io_ports io_ports; + unsigned long io_ports_array[IDE_NR_PORTS]; + }; + int irq; /* our irq number */ ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ hwif_chipset_t chipset; @@ -179,10 +183,10 @@ static inline void ide_std_init_ports(hw_regs_t *hw, { unsigned int i; - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) - hw->io_ports[i] = io_addr++; + for (i = 0; i <= 7; i++) + hw->io_ports_array[i] = io_addr++; - hw->io_ports[IDE_CONTROL_OFFSET] = ctl_addr; + hw->io_ports.ctl_addr = ctl_addr; } #include <asm/ide.h> @@ -328,7 +332,6 @@ typedef struct ide_drive_s { unsigned atapi_overlap : 1; /* ATAPI overlap (not supported) */ unsigned doorlocking : 1; /* for removable only: door lock/unlock works */ unsigned nodma : 1; /* disallow DMA */ - unsigned autotune : 2; /* 0=default, 1=autotune, 2=noautotune */ unsigned remap_0_to_1 : 1; /* 0=noremap, 1=remap 0->1 (for EZDrive) */ unsigned blocked : 1; /* 1=powermanagment told us not to do anything, so sleep nicely */ unsigned vdma : 1; /* 1=doing PIO over DMA 0=doing normal DMA */ @@ -432,8 +435,8 @@ typedef struct hwif_s { char name[6]; /* name of interface, eg. "ide0" */ - /* task file registers for pata and sata */ - unsigned long io_ports[IDE_NR_PORTS]; + struct ide_io_ports io_ports; + unsigned long sata_scr[SATA_NR_PORTS]; ide_drive_t drives[MAX_DRIVES]; /* drive info */ @@ -520,7 +523,6 @@ typedef struct hwif_s { unsigned present : 1; /* this interface exists */ unsigned serialized : 1; /* serialized all channel operation */ unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */ - unsigned reset : 1; /* reset after probe */ unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ unsigned mmio : 1; /* host uses MMIO */ @@ -703,10 +705,6 @@ void ide_add_generic_settings(ide_drive_t *); read_proc_t proc_ide_read_capacity; read_proc_t proc_ide_read_geometry; -#ifdef CONFIG_BLK_DEV_IDEPCI -void ide_pci_create_host_proc(const char *, get_info_t *); -#endif - /* * Standard exit stuff: */ @@ -807,8 +805,14 @@ int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsig #ifndef _IDE_C extern ide_hwif_t ide_hwifs[]; /* master data repository */ #endif +extern int ide_noacpi; +extern int ide_acpigtf; +extern int ide_acpionboot; extern int noautodma; +extern int ide_vlb_clk; +extern int ide_pci_clk; + ide_hwif_t *ide_find_port_slot(const struct ide_port_info *); static inline ide_hwif_t *ide_find_port(void) @@ -1068,8 +1072,6 @@ enum { IDE_HFLAG_NO_DMA = (1 << 14), /* check if host is PCI IDE device before allowing DMA */ IDE_HFLAG_NO_AUTODMA = (1 << 15), - /* don't autotune PIO */ - IDE_HFLAG_NO_AUTOTUNE = (1 << 16), /* host is CS5510/CS5520 */ IDE_HFLAG_CS5520 = IDE_HFLAG_VDMA, /* no LBA48 */ @@ -1215,13 +1217,15 @@ static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} #endif void ide_remove_port_from_hwgroup(ide_hwif_t *); -void ide_unregister(unsigned int); +void ide_unregister(ide_hwif_t *); void ide_register_region(struct gendisk *); void ide_unregister_region(struct gendisk *); void ide_undecoded_slave(ide_drive_t *); +void ide_port_apply_params(ide_hwif_t *); + int ide_device_add_all(u8 *idx, const struct ide_port_info *); int ide_device_add(u8 idx[4], const struct ide_port_info *); int ide_legacy_device_add(const struct ide_port_info *, unsigned long); @@ -1333,29 +1337,28 @@ static inline void ide_set_irq(ide_drive_t *drive, int on) { ide_hwif_t *hwif = drive->hwif; - hwif->OUTB(drive->ctl | (on ? 0 : 2), - hwif->io_ports[IDE_CONTROL_OFFSET]); + hwif->OUTB(drive->ctl | (on ? 0 : 2), hwif->io_ports.ctl_addr); } static inline u8 ide_read_status(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - return hwif->INB(hwif->io_ports[IDE_STATUS_OFFSET]); + return hwif->INB(hwif->io_ports.status_addr); } static inline u8 ide_read_altstatus(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - return hwif->INB(hwif->io_ports[IDE_CONTROL_OFFSET]); + return hwif->INB(hwif->io_ports.ctl_addr); } static inline u8 ide_read_error(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - return hwif->INB(hwif->io_ports[IDE_ERROR_OFFSET]); + return hwif->INB(hwif->io_ports.error_addr); } /* @@ -1368,7 +1371,7 @@ static inline void ide_atapi_discard_data(ide_drive_t *drive, unsigned bcount) /* FIXME: use ->atapi_input_bytes */ while (bcount--) - (void)hwif->INB(hwif->io_ports[IDE_DATA_OFFSET]); + (void)hwif->INB(hwif->io_ports.data_addr); } static inline void ide_atapi_write_zeros(ide_drive_t *drive, unsigned bcount) @@ -1377,7 +1380,7 @@ static inline void ide_atapi_write_zeros(ide_drive_t *drive, unsigned bcount) /* FIXME: use ->atapi_output_bytes */ while (bcount--) - hwif->OUTB(0, hwif->io_ports[IDE_DATA_OFFSET]); + hwif->OUTB(0, hwif->io_ports.data_addr); } #endif /* _IDE_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 37a6f5b..bf6b8a6 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -9,6 +9,7 @@ #include <linux/ipc.h> #include <linux/pid_namespace.h> #include <linux/user_namespace.h> +#include <linux/securebits.h> #include <net/net_namespace.h> #define INIT_FDTABLE \ @@ -172,7 +173,7 @@ extern struct group_info init_groups; .cap_inheritable = CAP_INIT_INH_SET, \ .cap_permitted = CAP_FULL_SET, \ .cap_bset = CAP_INIT_BSET, \ - .keep_capabilities = 0, \ + .securebits = SECUREBITS_DEFAULT, \ .user = INIT_USER, \ .comm = "swapper", \ .thread = INIT_THREAD, \ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 0f28486..1036631 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -173,6 +173,13 @@ struct kretprobe_blackpoint { const char *name; void *addr; }; + +struct kprobe_blackpoint { + const char *name; + unsigned long start_addr; + unsigned long range; +}; + extern struct kretprobe_blackpoint kretprobe_blacklist[]; static inline void kretprobe_assert(struct kretprobe_instance *ri, @@ -227,15 +234,21 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); +int register_kprobes(struct kprobe **kps, int num); +void unregister_kprobes(struct kprobe **kps, int num); int setjmp_pre_handler(struct kprobe *, struct pt_regs *); int longjmp_break_handler(struct kprobe *, struct pt_regs *); int register_jprobe(struct jprobe *p); void unregister_jprobe(struct jprobe *p); +int register_jprobes(struct jprobe **jps, int num); +void unregister_jprobes(struct jprobe **jps, int num); void jprobe_return(void); unsigned long arch_deref_entry_point(void *); int register_kretprobe(struct kretprobe *rp); void unregister_kretprobe(struct kretprobe *rp); +int register_kretprobes(struct kretprobe **rps, int num); +void unregister_kretprobes(struct kretprobe **rps, int num); void kprobe_flush_task(struct task_struct *tk); void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); @@ -254,16 +267,30 @@ static inline int register_kprobe(struct kprobe *p) { return -ENOSYS; } +static inline int register_kprobes(struct kprobe **kps, int num) +{ + return -ENOSYS; +} static inline void unregister_kprobe(struct kprobe *p) { } +static inline void unregister_kprobes(struct kprobe **kps, int num) +{ +} static inline int register_jprobe(struct jprobe *p) { return -ENOSYS; } +static inline int register_jprobes(struct jprobe **jps, int num) +{ + return -ENOSYS; +} static inline void unregister_jprobe(struct jprobe *p) { } +static inline void unregister_jprobes(struct jprobe **jps, int num) +{ +} static inline void jprobe_return(void) { } @@ -271,9 +298,16 @@ static inline int register_kretprobe(struct kretprobe *rp) { return -ENOSYS; } +static inline int register_kretprobes(struct kretprobe **rps, int num) +{ + return -ENOSYS; +} static inline void unregister_kretprobe(struct kretprobe *rp) { } +static inline void unregister_kretprobes(struct kretprobe **rps, int num) +{ +} static inline void kprobe_flush_task(struct task_struct *tk) { } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index c1ec04f..a281afe 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -8,11 +8,18 @@ */ #include <asm/types.h> +#include <linux/compiler.h> #include <linux/ioctl.h> #include <asm/kvm.h> #define KVM_API_VERSION 12 +/* for KVM_TRACE_ENABLE */ +struct kvm_user_trace_setup { + __u32 buf_size; /* sub_buffer size of each per-cpu */ + __u32 buf_nr; /* the number of sub_buffers of each per-cpu */ +}; + /* for KVM_CREATE_MEMORY_REGION */ struct kvm_memory_region { __u32 slot; @@ -73,6 +80,9 @@ struct kvm_irqchip { #define KVM_EXIT_INTR 10 #define KVM_EXIT_SET_TPR 11 #define KVM_EXIT_TPR_ACCESS 12 +#define KVM_EXIT_S390_SIEIC 13 +#define KVM_EXIT_S390_RESET 14 +#define KVM_EXIT_DCR 15 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { @@ -137,6 +147,27 @@ struct kvm_run { __u32 is_write; __u32 pad; } tpr_access; + /* KVM_EXIT_S390_SIEIC */ + struct { + __u8 icptcode; + __u64 mask; /* psw upper half */ + __u64 addr; /* psw lower half */ + __u16 ipa; + __u32 ipb; + } s390_sieic; + /* KVM_EXIT_S390_RESET */ +#define KVM_S390_RESET_POR 1 +#define KVM_S390_RESET_CLEAR 2 +#define KVM_S390_RESET_SUBSYSTEM 4 +#define KVM_S390_RESET_CPU_INIT 8 +#define KVM_S390_RESET_IPL 16 + __u64 s390_reset_flags; + /* KVM_EXIT_DCR */ + struct { + __u32 dcrn; + __u32 data; + __u8 is_write; + } dcr; /* Fix the size of the union. */ char padding[256]; }; @@ -204,6 +235,74 @@ struct kvm_vapic_addr { __u64 vapic_addr; }; +/* for KVM_SET_MPSTATE */ + +#define KVM_MP_STATE_RUNNABLE 0 +#define KVM_MP_STATE_UNINITIALIZED 1 +#define KVM_MP_STATE_INIT_RECEIVED 2 +#define KVM_MP_STATE_HALTED 3 +#define KVM_MP_STATE_SIPI_RECEIVED 4 + +struct kvm_mp_state { + __u32 mp_state; +}; + +struct kvm_s390_psw { + __u64 mask; + __u64 addr; +}; + +/* valid values for type in kvm_s390_interrupt */ +#define KVM_S390_SIGP_STOP 0xfffe0000u +#define KVM_S390_PROGRAM_INT 0xfffe0001u +#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u +#define KVM_S390_RESTART 0xfffe0003u +#define KVM_S390_INT_VIRTIO 0xffff2603u +#define KVM_S390_INT_SERVICE 0xffff2401u +#define KVM_S390_INT_EMERGENCY 0xffff1201u + +struct kvm_s390_interrupt { + __u32 type; + __u32 parm; + __u64 parm64; +}; + +#define KVM_TRC_SHIFT 16 +/* + * kvm trace categories + */ +#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) +#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) /* only 12 bits */ + +/* + * kvm trace action + */ +#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) +#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) +#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) + +#define KVM_TRC_HEAD_SIZE 12 +#define KVM_TRC_CYCLE_SIZE 8 +#define KVM_TRC_EXTRA_MAX 7 + +/* This structure represents a single trace buffer record. */ +struct kvm_trace_rec { + __u32 event:28; + __u32 extra_u32:3; + __u32 cycle_in:1; + __u32 pid; + __u32 vcpu_id; + union { + struct { + __u32 cycle_lo, cycle_hi; + __u32 extra_u32[KVM_TRC_EXTRA_MAX]; + } cycle; + struct { + __u32 extra_u32[KVM_TRC_EXTRA_MAX]; + } nocycle; + } u; +}; + #define KVMIO 0xAE /* @@ -212,6 +311,8 @@ struct kvm_vapic_addr { #define KVM_GET_API_VERSION _IO(KVMIO, 0x00) #define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) + +#define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06) /* * Check if a kvm extension is available. Argument is extension number, * return is 1 (yes) or 0 (no, sorry). @@ -222,7 +323,12 @@ struct kvm_vapic_addr { */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) - +/* + * ioctls for kvm trace + */ +#define KVM_TRACE_ENABLE _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) +#define KVM_TRACE_PAUSE _IO(KVMIO, 0x07) +#define KVM_TRACE_DISABLE _IO(KVMIO, 0x08) /* * Extension capability list. */ @@ -233,6 +339,13 @@ struct kvm_vapic_addr { #define KVM_CAP_SET_TSS_ADDR 4 #define KVM_CAP_VAPIC 6 #define KVM_CAP_EXT_CPUID 7 +#define KVM_CAP_CLOCKSOURCE 8 +#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */ +#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ +#define KVM_CAP_PIT 11 +#define KVM_CAP_NOP_IO_DELAY 12 +#define KVM_CAP_PV_MMU 13 +#define KVM_CAP_MP_STATE 14 /* * ioctls for VM fds @@ -255,6 +368,9 @@ struct kvm_vapic_addr { #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) +#define KVM_CREATE_PIT _IO(KVMIO, 0x64) +#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) +#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) /* * ioctls for vcpu fds @@ -281,5 +397,17 @@ struct kvm_vapic_addr { #define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) /* Available with KVM_CAP_VAPIC */ #define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr) +/* valid for virtual machine (for floating interrupt)_and_ vcpu */ +#define KVM_S390_INTERRUPT _IOW(KVMIO, 0x94, struct kvm_s390_interrupt) +/* store status for s390 */ +#define KVM_S390_STORE_STATUS_NOADDR (-1ul) +#define KVM_S390_STORE_STATUS_PREFIXED (-2ul) +#define KVM_S390_STORE_STATUS _IOW(KVMIO, 0x95, unsigned long) +/* initial ipl psw for s390 */ +#define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw) +/* initial reset for s390 */ +#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) +#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) +#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 928b0d59..3989789 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -15,6 +15,7 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/preempt.h> +#include <linux/marker.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -24,29 +25,18 @@ #include <asm/kvm_host.h> -#define KVM_MAX_VCPUS 4 -#define KVM_MEMORY_SLOTS 8 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 4 - -#define KVM_PIO_PAGE_OFFSET 1 - /* * vcpu->requests bit members */ #define KVM_REQ_TLB_FLUSH 0 #define KVM_REQ_MIGRATE_TIMER 1 #define KVM_REQ_REPORT_TPR_ACCESS 2 +#define KVM_REQ_MMU_RELOAD 3 +#define KVM_REQ_TRIPLE_FAULT 4 struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; -struct kvm_guest_debug { - int enabled; - unsigned long bp[4]; - int singlestep; -}; - /* * It would be nice to use something smarter than a linear search, TBD... * Thankfully we dont expect many devices to register (famous last words :), @@ -67,7 +57,9 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_vcpu { struct kvm *kvm; +#ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; +#endif int vcpu_id; struct mutex mutex; int cpu; @@ -100,6 +92,10 @@ struct kvm_memory_slot { unsigned long flags; unsigned long *rmap; unsigned long *dirty_bitmap; + struct { + unsigned long rmap_pde; + int write_count; + } *lpage_info; unsigned long userspace_addr; int user_alloc; }; @@ -114,11 +110,11 @@ struct kvm { KVM_PRIVATE_MEM_SLOTS]; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; struct list_head vm_list; - struct file *filp; struct kvm_io_bus mmio_bus; struct kvm_io_bus pio_bus; struct kvm_vm_stat stat; struct kvm_arch arch; + atomic_t users_count; }; /* The guest did something we don't support. */ @@ -145,14 +141,19 @@ int kvm_init(void *opaque, unsigned int vcpu_size, struct module *module); void kvm_exit(void); +void kvm_get_kvm(struct kvm *kvm); +void kvm_put_kvm(struct kvm *kvm); + #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); extern struct page *bad_page; +extern pfn_t bad_pfn; int is_error_page(struct page *page); +int is_error_pfn(pfn_t pfn); int kvm_is_error_hva(unsigned long addr); int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, @@ -166,8 +167,19 @@ int kvm_arch_set_memory_region(struct kvm *kvm, int user_alloc); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); +unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); +void kvm_set_page_dirty(struct page *page); +void kvm_set_page_accessed(struct page *page); + +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); +void kvm_release_pfn_dirty(pfn_t); +void kvm_release_pfn_clean(pfn_t pfn); +void kvm_set_pfn_dirty(pfn_t pfn); +void kvm_set_pfn_accessed(pfn_t pfn); +void kvm_get_pfn(pfn_t pfn); + int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, @@ -188,6 +200,7 @@ void kvm_resched(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); void kvm_flush_remote_tlbs(struct kvm *kvm); +void kvm_reload_remote_mmus(struct kvm *kvm); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -223,6 +236,10 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state); +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state); int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); @@ -255,6 +272,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); static inline void kvm_guest_enter(void) @@ -296,5 +314,18 @@ struct kvm_stats_debugfs_item { struct dentry *dentry; }; extern struct kvm_stats_debugfs_item debugfs_entries[]; +extern struct dentry *kvm_debugfs_dir; + +#ifdef CONFIG_KVM_TRACE +int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg); +void kvm_trace_cleanup(void); +#else +static inline +int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} +#define kvm_trace_cleanup() ((void)0) +#endif #endif diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index 5497aac..3ddce03 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h @@ -11,8 +11,11 @@ /* Return values for hypercalls */ #define KVM_ENOSYS 1000 +#define KVM_EFAULT EFAULT +#define KVM_E2BIG E2BIG -#define KVM_HC_VAPIC_POLL_IRQ 1 +#define KVM_HC_VAPIC_POLL_IRQ 1 +#define KVM_HC_MMU_OP 2 /* * hypercalls use architecture specific @@ -20,6 +23,12 @@ #include <asm/kvm_para.h> #ifdef __KERNEL__ +#ifdef CONFIG_KVM_GUEST +void __init kvm_guest_init(void); +#else +#define kvm_guest_init() do { } while (0) +#endif + static inline int kvm_para_has_feature(unsigned int feature) { if (kvm_arch_para_features() & (1UL << feature)) diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 1c4e46d..9b6f395 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -38,6 +38,8 @@ typedef unsigned long hva_t; typedef u64 hpa_t; typedef unsigned long hfn_t; +typedef hfn_t pfn_t; + struct kvm_pio_request { unsigned long count; int cur_count; diff --git a/include/linux/list.h b/include/linux/list.h index dac16f9..b4a939b 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -319,6 +319,15 @@ static inline int list_empty_careful(const struct list_head *head) return (next == head) && (next == head->prev); } +/** + * list_is_singular - tests whether a list has just one entry. + * @head: the list to test. + */ +static inline int list_is_singular(const struct list_head *head) +{ + return !list_empty(head) && (head->next == head->prev); +} + static inline void __list_splice(struct list_head *list, struct list_head *head) { diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 8fee7a4..73e3586 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -8,8 +8,18 @@ struct page; struct zone; struct pglist_data; +struct mem_section; #ifdef CONFIG_MEMORY_HOTPLUG + +/* + * Magic number for free bootmem. + * The normal smallest mapcount is -1. Here is smaller value than it. + */ +#define SECTION_INFO 0xfffffffe +#define MIX_INFO 0xfffffffd +#define NODE_INFO 0xfffffffc + /* * pgdat resizing functions */ @@ -64,9 +74,11 @@ extern int offline_pages(unsigned long, unsigned long, unsigned long); /* reasonably generic interface to expand the physical pages in a zone */ extern int __add_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); +extern int __remove_pages(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages); /* - * Walk thorugh all memory which is registered as resource. + * Walk through all memory which is registered as resource. * arg is (start_pfn, nr_pages, private_arg_pointer) */ extern int walk_memory_resource(unsigned long start_pfn, @@ -142,6 +154,18 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) #endif /* CONFIG_NUMA */ #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ +#ifdef CONFIG_SPARSEMEM_VMEMMAP +static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) +{ +} +static inline void put_page_bootmem(struct page *page) +{ +} +#else +extern void register_page_bootmem_info_node(struct pglist_data *pgdat); +extern void put_page_bootmem(struct page *page); +#endif + #else /* ! CONFIG_MEMORY_HOTPLUG */ /* * Stub functions for when hotplug is off @@ -169,6 +193,10 @@ static inline int mhp_notimplemented(const char *func) return -ENOSYS; } +static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) +{ +} + #endif /* ! CONFIG_MEMORY_HOTPLUG */ extern int add_memory(int nid, u64 start, u64 size); @@ -176,5 +204,8 @@ extern int arch_add_memory(int nid, u64 start, u64 size); extern int remove_memory(u64 start, u64 size); extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, int nr_pages); +extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms); +extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, + unsigned long pnum); #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 59c4865..3a39570 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -8,15 +8,32 @@ * Copyright 2003,2004 Andi Kleen SuSE Labs */ +/* + * Both the MPOL_* mempolicy mode and the MPOL_F_* optional mode flags are + * passed by the user to either set_mempolicy() or mbind() in an 'int' actual. + * The MPOL_MODE_FLAGS macro determines the legal set of optional mode flags. + */ + /* Policies */ -#define MPOL_DEFAULT 0 -#define MPOL_PREFERRED 1 -#define MPOL_BIND 2 -#define MPOL_INTERLEAVE 3 +enum { + MPOL_DEFAULT, + MPOL_PREFERRED, + MPOL_BIND, + MPOL_INTERLEAVE, + MPOL_MAX, /* always last member of enum */ +}; -#define MPOL_MAX MPOL_INTERLEAVE +/* Flags for set_mempolicy */ +#define MPOL_F_STATIC_NODES (1 << 15) +#define MPOL_F_RELATIVE_NODES (1 << 14) -/* Flags for get_mem_policy */ +/* + * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to + * either set_mempolicy() or mbind(). + */ +#define MPOL_MODE_FLAGS (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES) + +/* Flags for get_mempolicy */ #define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */ #define MPOL_F_ADDR (1<<1) /* look up vma using address */ #define MPOL_F_MEMS_ALLOWED (1<<2) /* return allowed memories */ @@ -27,6 +44,14 @@ #define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ #define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */ +/* + * Internal flags that share the struct mempolicy flags word with + * "mode flags". These flags are allocated from bit 0 up, as they + * are never OR'ed into the mode in mempolicy API arguments. + */ +#define MPOL_F_SHARED (1 << 0) /* identify shared policies */ +#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ + #ifdef __KERNEL__ #include <linux/mmzone.h> @@ -35,7 +60,6 @@ #include <linux/spinlock.h> #include <linux/nodemask.h> -struct vm_area_struct; struct mm_struct; #ifdef CONFIG_NUMA @@ -54,22 +78,27 @@ struct mm_struct; * mmap_sem. * * Freeing policy: - * When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd. - * All other policies don't have any external state. mpol_free() handles this. + * Mempolicy objects are reference counted. A mempolicy will be freed when + * mpol_put() decrements the reference count to zero. * - * Copying policy objects: - * For MPOL_BIND the zonelist must be always duplicated. mpol_clone() does this. + * Duplicating policy objects: + * mpol_dup() allocates a new mempolicy and copies the specified mempolicy + * to the new storage. The reference count of the new object is initialized + * to 1, representing the caller of mpol_dup(). */ struct mempolicy { atomic_t refcnt; - short policy; /* See MPOL_* above */ + unsigned short mode; /* See MPOL_* above */ + unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ union { - struct zonelist *zonelist; /* bind */ short preferred_node; /* preferred */ - nodemask_t nodes; /* interleave */ + nodemask_t nodes; /* interleave/bind */ /* undefined for default */ } v; - nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */ + union { + nodemask_t cpuset_mems_allowed; /* relative to these nodes */ + nodemask_t user_nodemask; /* nodemask passed by user */ + } w; }; /* @@ -77,18 +106,43 @@ struct mempolicy { * The default fast path of a NULL MPOL_DEFAULT policy is always inlined. */ -extern void __mpol_free(struct mempolicy *pol); -static inline void mpol_free(struct mempolicy *pol) +extern void __mpol_put(struct mempolicy *pol); +static inline void mpol_put(struct mempolicy *pol) { if (pol) - __mpol_free(pol); + __mpol_put(pol); } -extern struct mempolicy *__mpol_copy(struct mempolicy *pol); -static inline struct mempolicy *mpol_copy(struct mempolicy *pol) +/* + * Does mempolicy pol need explicit unref after use? + * Currently only needed for shared policies. + */ +static inline int mpol_needs_cond_ref(struct mempolicy *pol) +{ + return (pol && (pol->flags & MPOL_F_SHARED)); +} + +static inline void mpol_cond_put(struct mempolicy *pol) +{ + if (mpol_needs_cond_ref(pol)) + __mpol_put(pol); +} + +extern struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol, + struct mempolicy *frompol); +static inline struct mempolicy *mpol_cond_copy(struct mempolicy *tompol, + struct mempolicy *frompol) +{ + if (!frompol) + return frompol; + return __mpol_cond_copy(tompol, frompol); +} + +extern struct mempolicy *__mpol_dup(struct mempolicy *pol); +static inline struct mempolicy *mpol_dup(struct mempolicy *pol) { if (pol) - pol = __mpol_copy(pol); + pol = __mpol_dup(pol); return pol; } @@ -108,11 +162,6 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b) return 1; return __mpol_equal(a, b); } -#define vma_mpol_equal(a,b) mpol_equal(vma_policy(a), vma_policy(b)) - -/* Could later add inheritance of the process policy here. */ - -#define mpol_set_vma_default(vma) ((vma)->vm_policy = NULL) /* * Tree of shared policies for a shared memory region. @@ -133,8 +182,7 @@ struct shared_policy { spinlock_t lock; }; -void mpol_shared_policy_init(struct shared_policy *info, int policy, - nodemask_t *nodes); +void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); int mpol_set_shared_policy(struct shared_policy *info, struct vm_area_struct *vma, struct mempolicy *new); @@ -149,9 +197,9 @@ extern void mpol_rebind_task(struct task_struct *tsk, extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); extern void mpol_fix_fork_child_flag(struct task_struct *p); -extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, - unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol); + unsigned long addr, gfp_t gfp_flags, + struct mempolicy **mpol, nodemask_t **nodemask); extern unsigned slab_node(struct mempolicy *policy); extern enum zone_type policy_zone; @@ -165,6 +213,13 @@ static inline void check_highest_zone(enum zone_type k) int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); + +#ifdef CONFIG_TMPFS +extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context); + +extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, + int no_context); +#endif #else struct mempolicy {}; @@ -173,19 +228,26 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b) { return 1; } -#define vma_mpol_equal(a,b) 1 -#define mpol_set_vma_default(vma) do {} while(0) +static inline void mpol_put(struct mempolicy *p) +{ +} + +static inline void mpol_cond_put(struct mempolicy *pol) +{ +} -static inline void mpol_free(struct mempolicy *p) +static inline struct mempolicy *mpol_cond_copy(struct mempolicy *to, + struct mempolicy *from) { + return from; } static inline void mpol_get(struct mempolicy *pol) { } -static inline struct mempolicy *mpol_copy(struct mempolicy *old) +static inline struct mempolicy *mpol_dup(struct mempolicy *old) { return NULL; } @@ -199,8 +261,8 @@ static inline int mpol_set_shared_policy(struct shared_policy *info, return -EINVAL; } -static inline void mpol_shared_policy_init(struct shared_policy *info, - int policy, nodemask_t *nodes) +static inline void mpol_shared_policy_init(struct shared_policy *sp, + struct mempolicy *mpol) { } @@ -239,9 +301,12 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p) } static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, - unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol) + unsigned long addr, gfp_t gfp_flags, + struct mempolicy **mpol, nodemask_t **nodemask) { - return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); + *mpol = NULL; + *nodemask = NULL; + return node_zonelist(0, gfp_flags); } static inline int do_migrate_pages(struct mm_struct *mm, @@ -254,6 +319,21 @@ static inline int do_migrate_pages(struct mm_struct *mm, static inline void check_highest_zone(int k) { } + +#ifdef CONFIG_TMPFS +static inline int mpol_parse_str(char *str, struct mempolicy **mpol, + int no_context) +{ + return 1; /* error */ +} + +static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, + int no_context) +{ + return 0; +} +#endif + #endif /* CONFIG_NUMA */ #endif /* __KERNEL__ */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ff7df1a..9fa1a80 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -208,6 +208,38 @@ struct mlx4_mtt { int page_shift; }; +enum { + MLX4_DB_PER_PAGE = PAGE_SIZE / 4 +}; + +struct mlx4_db_pgdir { + struct list_head list; + DECLARE_BITMAP(order0, MLX4_DB_PER_PAGE); + DECLARE_BITMAP(order1, MLX4_DB_PER_PAGE / 2); + unsigned long *bits[2]; + __be32 *db_page; + dma_addr_t db_dma; +}; + +struct mlx4_ib_user_db_page; + +struct mlx4_db { + __be32 *db; + union { + struct mlx4_db_pgdir *pgdir; + struct mlx4_ib_user_db_page *user_page; + } u; + dma_addr_t dma; + int index; + int order; +}; + +struct mlx4_hwq_resources { + struct mlx4_db db; + struct mlx4_mtt mtt; + struct mlx4_buf buf; +}; + struct mlx4_mr { struct mlx4_mtt mtt; u64 iova; @@ -341,6 +373,14 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_buf *buf); +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); +void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); + +int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, + int size, int max_direct); +void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, + int size); + int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index a5e43fe..7f128b2 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -296,6 +296,10 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, struct mlx4_qp_context *context); +int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, + struct mlx4_qp_context *context, + struct mlx4_qp *qp, enum mlx4_qp_state *qp_state); + static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1)); diff --git a/include/linux/mm.h b/include/linux/mm.h index 286d315..8b7f4a5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -107,6 +107,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ +#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS @@ -164,8 +165,6 @@ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); - struct page *(*nopage)(struct vm_area_struct *area, - unsigned long address, int *type); unsigned long (*nopfn)(struct vm_area_struct *area, unsigned long address); @@ -173,7 +172,25 @@ struct vm_operations_struct { * writable, if an error is returned it will cause a SIGBUS */ int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); #ifdef CONFIG_NUMA + /* + * set_policy() op must add a reference to any non-NULL @new mempolicy + * to hold the policy upon return. Caller should pass NULL @new to + * remove a policy and fall back to surrounding context--i.e. do not + * install a MPOL_DEFAULT policy, nor the task or system default + * mempolicy. + */ int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); + + /* + * get_policy() op must add reference [mpol_get()] to any policy at + * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure + * in mm/mempolicy.c will do this automatically. + * get_policy() must NOT add a ref if the policy at (vma,addr) is not + * marked as MPOL_SHARED. vma policies are protected by the mmap_sem. + * If no [shared/vma] mempolicy exists at the addr, get_policy() op + * must return NULL--i.e., do not "fallback" to task or system default + * policy. + */ struct mempolicy *(*get_policy)(struct vm_area_struct *vma, unsigned long addr); int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from, @@ -397,11 +414,11 @@ static inline void set_compound_order(struct page *page, unsigned long order) * we have run out of space and have to fall back to an * alternate (slower) way of determining the node. * - * No sparsemem: | NODE | ZONE | ... | FLAGS | - * with space for node: | SECTION | NODE | ZONE | ... | FLAGS | - * no space for node: | SECTION | ZONE | ... | FLAGS | + * No sparsemem or sparsemem vmemmap: | NODE | ZONE | ... | FLAGS | + * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS | + * classic sparse no space for node: | SECTION | ZONE | ... | FLAGS | */ -#ifdef CONFIG_SPARSEMEM +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTIONS_WIDTH SECTIONS_SHIFT #else #define SECTIONS_WIDTH 0 @@ -409,9 +426,12 @@ static inline void set_compound_order(struct page *page, unsigned long order) #define ZONES_WIDTH ZONES_SHIFT -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= FLAGS_RESERVED +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS #define NODES_WIDTH NODES_SHIFT #else +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#error "Vmemmap: No space for nodes field in page flags" +#endif #define NODES_WIDTH 0 #endif @@ -454,8 +474,8 @@ static inline void set_compound_order(struct page *page, unsigned long order) #define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) -#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED -#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED +#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS +#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS #endif #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) @@ -504,10 +524,12 @@ static inline struct zone *page_zone(struct page *page) return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; } +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) static inline unsigned long page_to_section(struct page *page) { return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } +#endif static inline void set_page_zone(struct page *page, enum zone_type zone) { @@ -602,9 +624,12 @@ static inline struct address_space *page_mapping(struct page *page) struct address_space *mapping = page->mapping; VM_BUG_ON(PageSlab(page)); +#ifdef CONFIG_SWAP if (unlikely(PageSwapCache(page))) mapping = &swapper_space; - else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON)) + else +#endif + if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON)) mapping = NULL; return mapping; } @@ -649,12 +674,6 @@ static inline int page_mapped(struct page *page) } /* - * Error return values for the *_nopage functions - */ -#define NOPAGE_SIGBUS (NULL) -#define NOPAGE_OOM ((struct page *) (-1)) - -/* * Error return values for the *_nopfn functions */ #define NOPFN_SIGBUS ((unsigned long) -1) @@ -720,7 +739,9 @@ struct zap_details { unsigned long truncate_count; /* Compare vm_truncate_count */ }; -struct page *vm_normal_page(struct vm_area_struct *, unsigned long, pte_t); +struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, + pte_t pte); + unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *); unsigned long unmap_vmas(struct mmu_gather **tlb, @@ -1149,6 +1170,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr, int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); +int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn); struct page *follow_page(struct vm_area_struct *, unsigned long address, unsigned int foll_flags); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index af190ce..29adaa78 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -172,6 +172,7 @@ struct mm_struct { atomic_t mm_users; /* How many users with user space? */ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ int map_count; /* number of VMAs */ + int core_waiters; struct rw_semaphore mmap_sem; spinlock_t page_table_lock; /* Protects page tables and some counters */ @@ -216,11 +217,10 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ /* coredumping support */ - int core_waiters; struct completion *core_startup_done, core_done; /* aio bits */ - rwlock_t ioctx_list_lock; + rwlock_t ioctx_list_lock; /* aio lock */ struct kioctx *ioctx_list; #ifdef CONFIG_CGROUP_MEM_RES_CTLR struct mem_cgroup *mem_cgroup; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9f274a6..aad9800 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -3,6 +3,7 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ +#ifndef __GENERATING_BOUNDS_H #include <linux/spinlock.h> #include <linux/list.h> @@ -15,6 +16,7 @@ #include <linux/seqlock.h> #include <linux/nodemask.h> #include <linux/pageblock-flags.h> +#include <linux/bounds.h> #include <asm/atomic.h> #include <asm/page.h> @@ -129,6 +131,8 @@ struct per_cpu_pageset { #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) #endif +#endif /* !__GENERATING_BOUNDS.H */ + enum zone_type { #ifdef CONFIG_ZONE_DMA /* @@ -177,9 +181,11 @@ enum zone_type { ZONE_HIGHMEM, #endif ZONE_MOVABLE, - MAX_NR_ZONES + __MAX_NR_ZONES }; +#ifndef __GENERATING_BOUNDS_H + /* * When a memory allocation must conform to specific limitations (such * as being suitable for DMA) the caller will pass in hints to the @@ -188,28 +194,15 @@ enum zone_type { * match the requested limits. See gfp_zone() in include/linux/gfp.h */ -/* - * Count the active zones. Note that the use of defined(X) outside - * #if and family is not necessarily defined so ensure we cannot use - * it later. Use __ZONE_COUNT to work out how many shift bits we need. - */ -#define __ZONE_COUNT ( \ - defined(CONFIG_ZONE_DMA) \ - + defined(CONFIG_ZONE_DMA32) \ - + 1 \ - + defined(CONFIG_HIGHMEM) \ - + 1 \ -) -#if __ZONE_COUNT < 2 +#if MAX_NR_ZONES < 2 #define ZONES_SHIFT 0 -#elif __ZONE_COUNT <= 2 +#elif MAX_NR_ZONES <= 2 #define ZONES_SHIFT 1 -#elif __ZONE_COUNT <= 4 +#elif MAX_NR_ZONES <= 4 #define ZONES_SHIFT 2 #else #error ZONES_SHIFT -- too many zones configured adjust calculation #endif -#undef __ZONE_COUNT struct zone { /* Fields commonly accessed by the page allocator */ @@ -393,10 +386,10 @@ static inline int zone_is_oom_locked(const struct zone *zone) * The NUMA zonelists are doubled becausse we need zonelists that restrict the * allocations to a single node for GFP_THISNODE. * - * [0 .. MAX_NR_ZONES -1] : Zonelists with fallback - * [MAZ_NR_ZONES ... MAZ_ZONELISTS -1] : No fallback (GFP_THISNODE) + * [0] : Zonelist with fallback + * [1] : No fallback (GFP_THISNODE) */ -#define MAX_ZONELISTS (2 * MAX_NR_ZONES) +#define MAX_ZONELISTS 2 /* @@ -464,11 +457,20 @@ struct zonelist_cache { unsigned long last_full_zap; /* when last zap'd (jiffies) */ }; #else -#define MAX_ZONELISTS MAX_NR_ZONES +#define MAX_ZONELISTS 1 struct zonelist_cache; #endif /* + * This struct contains information about a zone in a zonelist. It is stored + * here to avoid dereferences into large structures and lookups of tables + */ +struct zoneref { + struct zone *zone; /* Pointer to actual zone */ + int zone_idx; /* zone_idx(zoneref->zone) */ +}; + +/* * One allocation request operates on a zonelist. A zonelist * is a list of zones, the first one is the 'goal' of the * allocation, the other zones are fallback zones, in decreasing @@ -476,34 +478,23 @@ struct zonelist_cache; * * If zlcache_ptr is not NULL, then it is just the address of zlcache, * as explained above. If zlcache_ptr is NULL, there is no zlcache. + * * + * To speed the reading of the zonelist, the zonerefs contain the zone index + * of the entry being read. Helper functions to access information given + * a struct zoneref are + * + * zonelist_zone() - Return the struct zone * for an entry in _zonerefs + * zonelist_zone_idx() - Return the index of the zone for an entry + * zonelist_node_idx() - Return the index of the node for an entry */ - struct zonelist { struct zonelist_cache *zlcache_ptr; // NULL or &zlcache - struct zone *zones[MAX_ZONES_PER_ZONELIST + 1]; // NULL delimited + struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; #ifdef CONFIG_NUMA struct zonelist_cache zlcache; // optional ... #endif }; -#ifdef CONFIG_NUMA -/* - * Only custom zonelists like MPOL_BIND need to be filtered as part of - * policies. As described in the comment for struct zonelist_cache, these - * zonelists will not have a zlcache so zlcache_ptr will not be set. Use - * that to determine if the zonelists needs to be filtered or not. - */ -static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) -{ - return !zonelist->zlcache_ptr; -} -#else -static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) -{ - return 0; -} -#endif /* CONFIG_NUMA */ - #ifdef CONFIG_ARCH_POPULATES_NODE_MAP struct node_active_region { unsigned long start_pfn; @@ -637,9 +628,10 @@ static inline int is_normal_idx(enum zone_type idx) static inline int is_highmem(struct zone *zone) { #ifdef CONFIG_HIGHMEM - int zone_idx = zone - zone->zone_pgdat->node_zones; - return zone_idx == ZONE_HIGHMEM || - (zone_idx == ZONE_MOVABLE && zone_movable_is_highmem()); + int zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones; + return zone_off == ZONE_HIGHMEM * sizeof(*zone) || + (zone_off == ZONE_MOVABLE * sizeof(*zone) && + zone_movable_is_highmem()); #else return 0; #endif @@ -730,32 +722,103 @@ extern struct zone *next_zone(struct zone *zone); zone; \ zone = next_zone(zone)) -#ifdef CONFIG_SPARSEMEM -#include <asm/sparsemem.h> -#endif +static inline struct zone *zonelist_zone(struct zoneref *zoneref) +{ + return zoneref->zone; +} -#if BITS_PER_LONG == 32 -/* - * with 32 bit page->flags field, we reserve 9 bits for node/zone info. - * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes. +static inline int zonelist_zone_idx(struct zoneref *zoneref) +{ + return zoneref->zone_idx; +} + +static inline int zonelist_node_idx(struct zoneref *zoneref) +{ +#ifdef CONFIG_NUMA + /* zone_to_nid not available in this context */ + return zoneref->zone->node; +#else + return 0; +#endif /* CONFIG_NUMA */ +} + +/** + * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point + * @z - The cursor used as a starting point for the search + * @highest_zoneidx - The zone index of the highest zone to return + * @nodes - An optional nodemask to filter the zonelist with + * @zone - The first suitable zone found is returned via this parameter + * + * This function returns the next zone at or below a given zone index that is + * within the allowed nodemask using a cursor as the starting point for the + * search. The zoneref returned is a cursor that is used as the next starting + * point for future calls to next_zones_zonelist(). */ -#define FLAGS_RESERVED 9 +struct zoneref *next_zones_zonelist(struct zoneref *z, + enum zone_type highest_zoneidx, + nodemask_t *nodes, + struct zone **zone); -#elif BITS_PER_LONG == 64 -/* - * with 64 bit flags field, there's plenty of room. +/** + * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist + * @zonelist - The zonelist to search for a suitable zone + * @highest_zoneidx - The zone index of the highest zone to return + * @nodes - An optional nodemask to filter the zonelist with + * @zone - The first suitable zone found is returned via this parameter + * + * This function returns the first zone at or below a given zone index that is + * within the allowed nodemask. The zoneref returned is a cursor that can be + * used to iterate the zonelist with next_zones_zonelist. The cursor should + * not be used by the caller as it does not match the value of the zone + * returned. */ -#define FLAGS_RESERVED 32 +static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, + enum zone_type highest_zoneidx, + nodemask_t *nodes, + struct zone **zone) +{ + return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes, + zone); +} -#else +/** + * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask + * @zone - The current zone in the iterator + * @z - The current pointer within zonelist->zones being iterated + * @zlist - The zonelist being iterated + * @highidx - The zone index of the highest zone to return + * @nodemask - Nodemask allowed by the allocator + * + * This iterator iterates though all zones at or below a given zone index and + * within a given nodemask + */ +#define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ + for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \ + zone; \ + z = next_zones_zonelist(z, highidx, nodemask, &zone)) \ -#error BITS_PER_LONG not defined +/** + * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index + * @zone - The current zone in the iterator + * @z - The current pointer within zonelist->zones being iterated + * @zlist - The zonelist being iterated + * @highidx - The zone index of the highest zone to return + * + * This iterator iterates though all zones at or below a given zone index. + */ +#define for_each_zone_zonelist(zone, z, zlist, highidx) \ + for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL) +#ifdef CONFIG_SPARSEMEM +#include <asm/sparsemem.h> #endif #if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ !defined(CONFIG_ARCH_POPULATES_NODE_MAP) -#define early_pfn_to_nid(nid) (0UL) +static inline unsigned long early_pfn_to_nid(unsigned long pfn) +{ + return 0; +} #endif #ifdef CONFIG_FLATMEM @@ -833,6 +896,7 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; } extern int __section_nr(struct mem_section* ms); +extern unsigned long usemap_size(void); /* * We use the lower bits of the mem_map pointer to store @@ -938,6 +1002,7 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); #define pfn_valid_within(pfn) (1) #endif +#endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _LINUX_MMZONE_H */ diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index f950921..b03b274 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -58,7 +58,11 @@ #define MSDOS_DOTDOT ".. " /* "..", padded to MSDOS_NAME chars */ /* media of boot sector */ -#define FAT_VALID_MEDIA(x) ((0xF8 <= (x) && (x) <= 0xFF) || (x) == 0xF0) +static inline int fat_valid_media(u8 media) +{ + return 0xf8 <= media || media == 0xf0; +} + #define FAT_FIRST_ENT(s, x) ((MSDOS_SB(s)->fat_bits == 32 ? 0x0FFFFF00 : \ MSDOS_SB(s)->fat_bits == 16 ? 0xFF00 : 0xF00) | (x)) @@ -195,6 +199,7 @@ struct fat_mount_options { char *iocharset; /* Charset used for filename input/display */ unsigned short shortname; /* flags for shortname display/create rule */ unsigned char name_check; /* r = relaxed, n = normal, s = strict */ + unsigned short allow_utime;/* permission for setting the [am]time */ unsigned quiet:1, /* set = fake successful chmods and chowns */ showexec:1, /* set = only set x bit for com/exe/bat */ sys_immutable:1, /* set = system files are immutable */ @@ -232,6 +237,7 @@ struct msdos_sb_info { struct mutex fat_lock; unsigned int prev_free; /* previously allocated cluster number */ unsigned int free_clusters; /* -1 if undefined */ + unsigned int free_clus_valid; /* is free_clusters valid? */ struct fat_mount_options options; struct nls_table *nls_disk; /* Codepage used on disk */ struct nls_table *nls_io; /* Charset used for input and display */ @@ -401,7 +407,7 @@ extern int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); extern const struct file_operations fat_file_operations; extern const struct inode_operations fat_file_inode_operations; -extern int fat_notify_change(struct dentry * dentry, struct iattr * attr); +extern int fat_setattr(struct dentry * dentry, struct iattr * attr); extern void fat_truncate(struct inode *inode); extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index 88766e4..9f2d763 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -204,6 +204,7 @@ void ncp_update_inode2(struct inode *, struct ncp_entry_info *); /* linux/fs/ncpfs/dir.c */ extern const struct inode_operations ncp_dir_inode_operations; extern const struct file_operations ncp_dir_operations; +extern struct dentry_operations ncp_root_dentry_operations; int ncp_conn_logged_in(struct super_block *); int ncp_date_dos2unix(__le16 time, __le16 date); void ncp_date_unix2dos(int unix_date, __le16 * time, __le16 * date); @@ -223,6 +224,12 @@ int ncp_disconnect(struct ncp_server *server); void ncp_lock_server(struct ncp_server *server); void ncp_unlock_server(struct ncp_server *server); +/* linux/fs/ncpfs/symlink.c */ +#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS) +extern const struct address_space_operations ncp_symlink_aops; +int ncp_symlink(struct inode*, struct dentry*, const char*); +#endif + /* linux/fs/ncpfs/file.c */ extern const struct inode_operations ncp_file_inode_operations; extern const struct file_operations ncp_file_operations; diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 905e18f..848025c 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -14,6 +14,8 @@ * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. * For details of node_remap(), see bitmap_bitremap in lib/bitmap.c. * For details of nodes_remap(), see bitmap_remap in lib/bitmap.c. + * For details of nodes_onto(), see bitmap_onto in lib/bitmap.c. + * For details of nodes_fold(), see bitmap_fold in lib/bitmap.c. * * The available nodemask operations are: * @@ -55,7 +57,9 @@ * int nodelist_scnprintf(buf, len, mask) Format nodemask as list for printing * int nodelist_parse(buf, map) Parse ascii string as nodelist * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) - * int nodes_remap(dst, src, old, new) *dst = map(old, new)(dst) + * void nodes_remap(dst, src, old, new) *dst = map(old, new)(src) + * void nodes_onto(dst, orig, relmap) *dst = orig relative to relmap + * void nodes_fold(dst, orig, sz) dst bits = orig bits mod sz * * for_each_node_mask(node, mask) for-loop node over mask * @@ -326,6 +330,22 @@ static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); } +#define nodes_onto(dst, orig, relmap) \ + __nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES) +static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, + const nodemask_t *relmapp, int nbits) +{ + bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); +} + +#define nodes_fold(dst, orig, sz) \ + __nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES) +static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, + int sz, int nbits) +{ + bitmap_fold(dstp->bits, origp->bits, sz, nbits); +} + #if MAX_NUMNODES > 1 #define for_each_node_mask(node, mask) \ for ((node) = first_node(mask); \ diff --git a/include/linux/notifier.h b/include/linux/notifier.h index f4df400..20dfed5 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -247,6 +247,7 @@ extern struct blocking_notifier_head reboot_notifier_list; #define VT_DEALLOCATE 0x0002 /* Console will be deallocated */ #define VT_WRITE 0x0003 /* A char got output */ #define VT_UPDATE 0x0004 /* A bigger update occurred */ +#define VT_PREWRITE 0x0005 /* A char is about to be written to the console */ #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ diff --git a/include/linux/oom.h b/include/linux/oom.h index 3852436..a7979ba 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -23,8 +23,8 @@ enum oom_constraint { CONSTRAINT_MEMORY_POLICY, }; -extern int try_set_zone_oom(struct zonelist *zonelist); -extern void clear_zonelist_oom(struct zonelist *zonelist); +extern int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_flags); +extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); extern int register_oom_notifier(struct notifier_block *nb); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b5b30f1..590cff3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -6,7 +6,10 @@ #define PAGE_FLAGS_H #include <linux/types.h> +#ifndef __GENERATING_BOUNDS_H #include <linux/mm_types.h> +#include <linux/bounds.h> +#endif /* !__GENERATING_BOUNDS_H */ /* * Various page->flags bits: @@ -59,77 +62,138 @@ * extends from the high bits downwards. * * | FIELD | ... | FLAGS | - * N-1 ^ 0 - * (N-FLAGS_RESERVED) + * N-1 ^ 0 + * (NR_PAGEFLAGS) * - * The fields area is reserved for fields mapping zone, node and SPARSEMEM - * section. The boundry between these two areas is defined by - * FLAGS_RESERVED which defines the width of the fields section - * (see linux/mmzone.h). New flags must _not_ overlap with this area. + * The fields area is reserved for fields mapping zone, node (for NUMA) and + * SPARSEMEM section (for variants of SPARSEMEM that require section ids like + * SPARSEMEM_EXTREME with !SPARSEMEM_VMEMMAP). */ -#define PG_locked 0 /* Page is locked. Don't touch. */ -#define PG_error 1 -#define PG_referenced 2 -#define PG_uptodate 3 +enum pageflags { + PG_locked, /* Page is locked. Don't touch. */ + PG_error, + PG_referenced, + PG_uptodate, + PG_dirty, + PG_lru, + PG_active, + PG_slab, + PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ + PG_arch_1, + PG_reserved, + PG_private, /* If pagecache, has fs-private data */ + PG_writeback, /* Page is under writeback */ +#ifdef CONFIG_PAGEFLAGS_EXTENDED + PG_head, /* A head page */ + PG_tail, /* A tail page */ +#else + PG_compound, /* A compound page */ +#endif + PG_swapcache, /* Swap page: swp_entry_t in private */ + PG_mappedtodisk, /* Has blocks allocated on-disk */ + PG_reclaim, /* To be reclaimed asap */ + PG_buddy, /* Page is free, on buddy lists */ +#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR + PG_uncached, /* Page has been mapped as uncached */ +#endif + __NR_PAGEFLAGS +}; + +#ifndef __GENERATING_BOUNDS_H + +/* + * Macros to create function definitions for page flags + */ +#define TESTPAGEFLAG(uname, lname) \ +static inline int Page##uname(struct page *page) \ + { return test_bit(PG_##lname, &page->flags); } -#define PG_dirty 4 -#define PG_lru 5 -#define PG_active 6 -#define PG_slab 7 /* slab debug (Suparna wants this) */ +#define SETPAGEFLAG(uname, lname) \ +static inline void SetPage##uname(struct page *page) \ + { set_bit(PG_##lname, &page->flags); } -#define PG_owner_priv_1 8 /* Owner use. If pagecache, fs may use*/ -#define PG_arch_1 9 -#define PG_reserved 10 -#define PG_private 11 /* If pagecache, has fs-private data */ +#define CLEARPAGEFLAG(uname, lname) \ +static inline void ClearPage##uname(struct page *page) \ + { clear_bit(PG_##lname, &page->flags); } -#define PG_writeback 12 /* Page is under writeback */ -#define PG_compound 14 /* Part of a compound page */ -#define PG_swapcache 15 /* Swap page: swp_entry_t in private */ +#define __SETPAGEFLAG(uname, lname) \ +static inline void __SetPage##uname(struct page *page) \ + { __set_bit(PG_##lname, &page->flags); } -#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ -#define PG_reclaim 17 /* To be reclaimed asap */ -#define PG_buddy 19 /* Page is free, on buddy lists */ +#define __CLEARPAGEFLAG(uname, lname) \ +static inline void __ClearPage##uname(struct page *page) \ + { __clear_bit(PG_##lname, &page->flags); } + +#define TESTSETFLAG(uname, lname) \ +static inline int TestSetPage##uname(struct page *page) \ + { return test_and_set_bit(PG_##lname, &page->flags); } + +#define TESTCLEARFLAG(uname, lname) \ +static inline int TestClearPage##uname(struct page *page) \ + { return test_and_clear_bit(PG_##lname, &page->flags); } -/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ -#define PG_readahead PG_reclaim /* Reminder to do async read-ahead */ -/* PG_owner_priv_1 users should have descriptive aliases */ -#define PG_checked PG_owner_priv_1 /* Used by some filesystems */ -#define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */ +#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ + SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname) + +#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ + __SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname) + +#define PAGEFLAG_FALSE(uname) \ +static inline int Page##uname(struct page *page) \ + { return 0; } + +#define TESTSCFLAG(uname, lname) \ + TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname) + +struct page; /* forward declaration */ + +PAGEFLAG(Locked, locked) TESTSCFLAG(Locked, locked) +PAGEFLAG(Error, error) +PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) +PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) +PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) +PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) +__PAGEFLAG(Slab, slab) +PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */ +PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */ +PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) +PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) + __SETPAGEFLAG(Private, private) -#if (BITS_PER_LONG > 32) /* - * 64-bit-only flags build down from bit 31 - * - * 32 bit -------------------------------| FIELDS | FLAGS | - * 64 bit | FIELDS | ?????? FLAGS | - * 63 32 0 + * Only test-and-set exist for PG_writeback. The unconditional operators are + * risky: they bypass page accounting. */ -#define PG_uncached 31 /* Page has been mapped as uncached */ -#endif +TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) +__PAGEFLAG(Buddy, buddy) +PAGEFLAG(MappedToDisk, mappedtodisk) +/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ +PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) +PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ + +#ifdef CONFIG_HIGHMEM /* - * Manipulation of page state flags + * Must use a macro here due to header dependency issues. page_zone() is not + * available at this point. */ -#define PageLocked(page) \ - test_bit(PG_locked, &(page)->flags) -#define SetPageLocked(page) \ - set_bit(PG_locked, &(page)->flags) -#define TestSetPageLocked(page) \ - test_and_set_bit(PG_locked, &(page)->flags) -#define ClearPageLocked(page) \ - clear_bit(PG_locked, &(page)->flags) -#define TestClearPageLocked(page) \ - test_and_clear_bit(PG_locked, &(page)->flags) - -#define PageError(page) test_bit(PG_error, &(page)->flags) -#define SetPageError(page) set_bit(PG_error, &(page)->flags) -#define ClearPageError(page) clear_bit(PG_error, &(page)->flags) - -#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags) -#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags) -#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) -#define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) +#define PageHighMem(__p) is_highmem(page_zone(__p)) +#else +PAGEFLAG_FALSE(HighMem) +#endif + +#ifdef CONFIG_SWAP +PAGEFLAG(SwapCache, swapcache) +#else +PAGEFLAG_FALSE(SwapCache) +#endif + +#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR +PAGEFLAG(Uncached, uncached) +#else +PAGEFLAG_FALSE(Uncached) +#endif static inline int PageUptodate(struct page *page) { @@ -177,97 +241,59 @@ static inline void SetPageUptodate(struct page *page) #endif } -#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) - -#define PageDirty(page) test_bit(PG_dirty, &(page)->flags) -#define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags) -#define TestSetPageDirty(page) test_and_set_bit(PG_dirty, &(page)->flags) -#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags) -#define __ClearPageDirty(page) __clear_bit(PG_dirty, &(page)->flags) -#define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags) - -#define PageLRU(page) test_bit(PG_lru, &(page)->flags) -#define SetPageLRU(page) set_bit(PG_lru, &(page)->flags) -#define ClearPageLRU(page) clear_bit(PG_lru, &(page)->flags) -#define __ClearPageLRU(page) __clear_bit(PG_lru, &(page)->flags) - -#define PageActive(page) test_bit(PG_active, &(page)->flags) -#define SetPageActive(page) set_bit(PG_active, &(page)->flags) -#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) -#define __ClearPageActive(page) __clear_bit(PG_active, &(page)->flags) - -#define PageSlab(page) test_bit(PG_slab, &(page)->flags) -#define __SetPageSlab(page) __set_bit(PG_slab, &(page)->flags) -#define __ClearPageSlab(page) __clear_bit(PG_slab, &(page)->flags) - -#ifdef CONFIG_HIGHMEM -#define PageHighMem(page) is_highmem(page_zone(page)) -#else -#define PageHighMem(page) 0 /* needed to optimize away at compile time */ -#endif +CLEARPAGEFLAG(Uptodate, uptodate) -#define PageChecked(page) test_bit(PG_checked, &(page)->flags) -#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) -#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) - -#define PagePinned(page) test_bit(PG_pinned, &(page)->flags) -#define SetPagePinned(page) set_bit(PG_pinned, &(page)->flags) -#define ClearPagePinned(page) clear_bit(PG_pinned, &(page)->flags) +extern void cancel_dirty_page(struct page *page, unsigned int account_size); -#define PageReserved(page) test_bit(PG_reserved, &(page)->flags) -#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) -#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) -#define __ClearPageReserved(page) __clear_bit(PG_reserved, &(page)->flags) +int test_clear_page_writeback(struct page *page); +int test_set_page_writeback(struct page *page); -#define SetPagePrivate(page) set_bit(PG_private, &(page)->flags) -#define ClearPagePrivate(page) clear_bit(PG_private, &(page)->flags) -#define PagePrivate(page) test_bit(PG_private, &(page)->flags) -#define __SetPagePrivate(page) __set_bit(PG_private, &(page)->flags) -#define __ClearPagePrivate(page) __clear_bit(PG_private, &(page)->flags) +static inline void set_page_writeback(struct page *page) +{ + test_set_page_writeback(page); +} +#ifdef CONFIG_PAGEFLAGS_EXTENDED /* - * Only test-and-set exist for PG_writeback. The unconditional operators are - * risky: they bypass page accounting. + * System with lots of page flags available. This allows separate + * flags for PageHead() and PageTail() checks of compound pages so that bit + * tests can be used in performance sensitive paths. PageCompound is + * generally not used in hot code paths. */ -#define PageWriteback(page) test_bit(PG_writeback, &(page)->flags) -#define TestSetPageWriteback(page) test_and_set_bit(PG_writeback, \ - &(page)->flags) -#define TestClearPageWriteback(page) test_and_clear_bit(PG_writeback, \ - &(page)->flags) +__PAGEFLAG(Head, head) +__PAGEFLAG(Tail, tail) -#define PageBuddy(page) test_bit(PG_buddy, &(page)->flags) -#define __SetPageBuddy(page) __set_bit(PG_buddy, &(page)->flags) -#define __ClearPageBuddy(page) __clear_bit(PG_buddy, &(page)->flags) - -#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags) -#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags) -#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags) - -#define PageReadahead(page) test_bit(PG_readahead, &(page)->flags) -#define SetPageReadahead(page) set_bit(PG_readahead, &(page)->flags) -#define ClearPageReadahead(page) clear_bit(PG_readahead, &(page)->flags) - -#define PageReclaim(page) test_bit(PG_reclaim, &(page)->flags) -#define SetPageReclaim(page) set_bit(PG_reclaim, &(page)->flags) -#define ClearPageReclaim(page) clear_bit(PG_reclaim, &(page)->flags) -#define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags) +static inline int PageCompound(struct page *page) +{ + return page->flags & ((1L << PG_head) | (1L << PG_tail)); -#define PageCompound(page) test_bit(PG_compound, &(page)->flags) -#define __SetPageCompound(page) __set_bit(PG_compound, &(page)->flags) -#define __ClearPageCompound(page) __clear_bit(PG_compound, &(page)->flags) +} +#else +/* + * Reduce page flag use as much as possible by overlapping + * compound page flags with the flags used for page cache pages. Possible + * because PageCompound is always set for compound pages and not for + * pages on the LRU and/or pagecache. + */ +TESTPAGEFLAG(Compound, compound) +__PAGEFLAG(Head, compound) /* * PG_reclaim is used in combination with PG_compound to mark the - * head and tail of a compound page + * head and tail of a compound page. This saves one page flag + * but makes it impossible to use compound pages for the page cache. + * The PG_reclaim bit would have to be used for reclaim or readahead + * if compound pages enter the page cache. * * PG_compound & PG_reclaim => Tail page * PG_compound & ~PG_reclaim => Head page */ - #define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim)) -#define PageTail(page) (((page)->flags & PG_head_tail_mask) \ - == PG_head_tail_mask) +static inline int PageTail(struct page *page) +{ + return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask); +} static inline void __SetPageTail(struct page *page) { @@ -279,33 +305,6 @@ static inline void __ClearPageTail(struct page *page) page->flags &= ~PG_head_tail_mask; } -#define PageHead(page) (((page)->flags & PG_head_tail_mask) \ - == (1L << PG_compound)) -#define __SetPageHead(page) __SetPageCompound(page) -#define __ClearPageHead(page) __ClearPageCompound(page) - -#ifdef CONFIG_SWAP -#define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags) -#define SetPageSwapCache(page) set_bit(PG_swapcache, &(page)->flags) -#define ClearPageSwapCache(page) clear_bit(PG_swapcache, &(page)->flags) -#else -#define PageSwapCache(page) 0 -#endif - -#define PageUncached(page) test_bit(PG_uncached, &(page)->flags) -#define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags) -#define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags) - -struct page; /* forward declaration */ - -extern void cancel_dirty_page(struct page *page, unsigned int account_size); - -int test_clear_page_writeback(struct page *page); -int test_set_page_writeback(struct page *page); - -static inline void set_page_writeback(struct page *page) -{ - test_set_page_writeback(page); -} - +#endif /* !PAGEFLAGS_EXTENDED */ +#endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 5c80b19..5ad7919 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -16,7 +16,8 @@ # define PR_UNALIGN_NOPRINT 1 /* silently fix up unaligned user accesses */ # define PR_UNALIGN_SIGBUS 2 /* generate SIGBUS on unaligned user access */ -/* Get/set whether or not to drop capabilities on setuid() away from uid 0 */ +/* Get/set whether or not to drop capabilities on setuid() away from + * uid 0 (as per security/commoncap.c) */ #define PR_GET_KEEPCAPS 7 #define PR_SET_KEEPCAPS 8 @@ -63,7 +64,7 @@ #define PR_GET_SECCOMP 21 #define PR_SET_SECCOMP 22 -/* Get/set the capability bounding set */ +/* Get/set the capability bounding set (as per security/commoncap.c) */ #define PR_CAPBSET_READ 23 #define PR_CAPBSET_DROP 24 @@ -73,4 +74,8 @@ # define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ # define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ +/* Get/set securebits (as per security/commoncap.c) */ +#define PR_GET_SECUREBITS 27 +#define PR_SET_SECUREBITS 28 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/quota.h b/include/linux/quota.h index eb560d0..52e49dc 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -202,10 +202,14 @@ struct quota_format_type; struct mem_dqinfo { struct quota_format_type *dqi_format; + int dqi_fmt_id; /* Id of the dqi_format - used when turning + * quotas on after remount RW */ struct list_head dqi_dirty_list; /* List of dirty dquots */ unsigned long dqi_flags; unsigned int dqi_bgrace; unsigned int dqi_igrace; + qsize_t dqi_maxblimit; + qsize_t dqi_maxilimit; union { struct v1_mem_dqinfo v1_i; struct v2_mem_dqinfo v2_i; @@ -296,8 +300,8 @@ struct dquot_operations { /* Operations handling requests from userspace */ struct quotactl_ops { - int (*quota_on)(struct super_block *, int, int, char *); - int (*quota_off)(struct super_block *, int); + int (*quota_on)(struct super_block *, int, int, char *, int); + int (*quota_off)(struct super_block *, int, int); int (*quota_sync)(struct super_block *, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); @@ -318,6 +322,10 @@ struct quota_format_type { #define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ #define DQUOT_GRP_ENABLED 0x02 /* Group diskquotas enabled */ +#define DQUOT_USR_SUSPENDED 0x04 /* User diskquotas are off, but + * we have necessary info in + * memory to turn them on */ +#define DQUOT_GRP_SUSPENDED 0x08 /* The same for group quotas */ struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ @@ -329,17 +337,16 @@ struct quota_info { struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ }; -/* Inline would be better but we need to dereference super_block which is not defined yet */ -int mark_dquot_dirty(struct dquot *dquot); - -#define dquot_dirty(dquot) test_bit(DQ_MOD_B, &(dquot)->dq_flags) - #define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \ (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED)) #define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \ sb_has_quota_enabled(sb, GRPQUOTA)) +#define sb_has_quota_suspended(sb, type) \ + ((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \ + (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED)) + int register_quota_format(struct quota_format_type *fmt); void unregister_quota_format(struct quota_format_type *fmt); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 5110201..f867020 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -37,11 +37,11 @@ extern int dquot_release(struct dquot *dquot); extern int dquot_commit_info(struct super_block *sb, int type); extern int dquot_mark_dquot_dirty(struct dquot *dquot); -extern int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path); +extern int vfs_quota_on(struct super_block *sb, int type, int format_id, + char *path, int remount); extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type); -extern int vfs_quota_off(struct super_block *sb, int type); -#define vfs_quota_off_mount(sb, type) vfs_quota_off(sb, type) +extern int vfs_quota_off(struct super_block *sb, int type, int remount); extern int vfs_quota_sync(struct super_block *sb, int type); extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); @@ -59,7 +59,7 @@ extern struct quotactl_ops vfs_quotactl_ops; /* It is better to call this function outside of any transaction as it might * need a lot of space in journal for dquot structure allocation. */ -static __inline__ void DQUOT_INIT(struct inode *inode) +static inline void DQUOT_INIT(struct inode *inode) { BUG_ON(!inode->i_sb); if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) @@ -67,7 +67,7 @@ static __inline__ void DQUOT_INIT(struct inode *inode) } /* The same as with DQUOT_INIT */ -static __inline__ void DQUOT_DROP(struct inode *inode) +static inline void DQUOT_DROP(struct inode *inode) { /* Here we can get arbitrary inode from clear_inode() so we have * to be careful. OTOH we don't need locking as quota operations @@ -90,7 +90,7 @@ static __inline__ void DQUOT_DROP(struct inode *inode) /* The following allocation/freeing/transfer functions *must* be called inside * a transaction (deadlocks possible otherwise) */ -static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) +static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { if (sb_any_quota_enabled(inode->i_sb)) { /* Used space is updated in alloc_space() */ @@ -102,7 +102,7 @@ static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t return 0; } -static __inline__ int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr) +static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr) { int ret; if (!(ret = DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr))) @@ -110,7 +110,7 @@ static __inline__ int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr) return ret; } -static __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) +static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { if (sb_any_quota_enabled(inode->i_sb)) { /* Used space is updated in alloc_space() */ @@ -122,7 +122,7 @@ static __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) return 0; } -static __inline__ int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr) +static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr) { int ret; if (!(ret = DQUOT_ALLOC_SPACE_NODIRTY(inode, nr))) @@ -130,7 +130,7 @@ static __inline__ int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr) return ret; } -static __inline__ int DQUOT_ALLOC_INODE(struct inode *inode) +static inline int DQUOT_ALLOC_INODE(struct inode *inode) { if (sb_any_quota_enabled(inode->i_sb)) { DQUOT_INIT(inode); @@ -140,7 +140,7 @@ static __inline__ int DQUOT_ALLOC_INODE(struct inode *inode) return 0; } -static __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) +static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { if (sb_any_quota_enabled(inode->i_sb)) inode->i_sb->dq_op->free_space(inode, nr); @@ -148,19 +148,19 @@ static __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) inode_sub_bytes(inode, nr); } -static __inline__ void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) +static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) { DQUOT_FREE_SPACE_NODIRTY(inode, nr); mark_inode_dirty(inode); } -static __inline__ void DQUOT_FREE_INODE(struct inode *inode) +static inline void DQUOT_FREE_INODE(struct inode *inode) { if (sb_any_quota_enabled(inode->i_sb)) inode->i_sb->dq_op->free_inode(inode, 1); } -static __inline__ int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) +static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) { if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) { DQUOT_INIT(inode); @@ -171,14 +171,32 @@ static __inline__ int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) } /* The following two functions cannot be called inside a transaction */ -#define DQUOT_SYNC(sb) sync_dquots(sb, -1) +static inline void DQUOT_SYNC(struct super_block *sb) +{ + sync_dquots(sb, -1); +} -static __inline__ int DQUOT_OFF(struct super_block *sb) +static inline int DQUOT_OFF(struct super_block *sb, int remount) { int ret = -ENOSYS; - if (sb_any_quota_enabled(sb) && sb->s_qcop && sb->s_qcop->quota_off) - ret = sb->s_qcop->quota_off(sb, -1); + if (sb->s_qcop && sb->s_qcop->quota_off) + ret = sb->s_qcop->quota_off(sb, -1, remount); + return ret; +} + +static inline int DQUOT_ON_REMOUNT(struct super_block *sb) +{ + int cnt; + int ret = 0, err; + + if (!sb->s_qcop || !sb->s_qcop->quota_on) + return -ENOSYS; + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1); + if (err < 0 && !ret) + ret = err; + } return ret; } @@ -189,13 +207,43 @@ static __inline__ int DQUOT_OFF(struct super_block *sb) */ #define sb_dquot_ops (NULL) #define sb_quotactl_ops (NULL) -#define DQUOT_INIT(inode) do { } while(0) -#define DQUOT_DROP(inode) do { } while(0) -#define DQUOT_ALLOC_INODE(inode) (0) -#define DQUOT_FREE_INODE(inode) do { } while(0) -#define DQUOT_SYNC(sb) do { } while(0) -#define DQUOT_OFF(sb) do { } while(0) -#define DQUOT_TRANSFER(inode, iattr) (0) + +static inline void DQUOT_INIT(struct inode *inode) +{ +} + +static inline void DQUOT_DROP(struct inode *inode) +{ +} + +static inline int DQUOT_ALLOC_INODE(struct inode *inode) +{ + return 0; +} + +static inline void DQUOT_FREE_INODE(struct inode *inode) +{ +} + +static inline void DQUOT_SYNC(struct super_block *sb) +{ +} + +static inline int DQUOT_OFF(struct super_block *sb, int remount) +{ + return 0; +} + +static inline int DQUOT_ON_REMOUNT(struct super_block *sb) +{ + return 0; +} + +static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) +{ + return 0; +} + static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { inode_add_bytes(inode, nr); @@ -235,11 +283,38 @@ static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) #endif /* CONFIG_QUOTA */ -#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) DQUOT_PREALLOC_SPACE_NODIRTY(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) -#define DQUOT_PREALLOC_BLOCK(inode, nr) DQUOT_PREALLOC_SPACE(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) -#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) DQUOT_ALLOC_SPACE_NODIRTY(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) -#define DQUOT_ALLOC_BLOCK(inode, nr) DQUOT_ALLOC_SPACE(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) -#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) DQUOT_FREE_SPACE_NODIRTY(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) -#define DQUOT_FREE_BLOCK(inode, nr) DQUOT_FREE_SPACE(inode, ((qsize_t)(nr)) << (inode)->i_sb->s_blocksize_bits) +static inline int DQUOT_PREALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr) +{ + return DQUOT_PREALLOC_SPACE_NODIRTY(inode, + nr << inode->i_sb->s_blocksize_bits); +} + +static inline int DQUOT_PREALLOC_BLOCK(struct inode *inode, qsize_t nr) +{ + return DQUOT_PREALLOC_SPACE(inode, + nr << inode->i_sb->s_blocksize_bits); +} + +static inline int DQUOT_ALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr) +{ + return DQUOT_ALLOC_SPACE_NODIRTY(inode, + nr << inode->i_sb->s_blocksize_bits); +} + +static inline int DQUOT_ALLOC_BLOCK(struct inode *inode, qsize_t nr) +{ + return DQUOT_ALLOC_SPACE(inode, + nr << inode->i_sb->s_blocksize_bits); +} + +static inline void DQUOT_FREE_BLOCK_NODIRTY(struct inode *inode, qsize_t nr) +{ + DQUOT_FREE_SPACE_NODIRTY(inode, nr << inode->i_sb->s_blocksize_bits); +} + +static inline void DQUOT_FREE_BLOCK(struct inode *inode, qsize_t nr) +{ + DQUOT_FREE_SPACE(inode, nr << inode->i_sb->s_blocksize_bits); +} #endif /* _LINUX_QUOTAOPS_ */ diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 93678f5..f0827d3 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -252,6 +252,8 @@ struct r6_state { #define STRIPE_EXPANDING 9 #define STRIPE_EXPAND_SOURCE 10 #define STRIPE_EXPAND_READY 11 +#define STRIPE_IO_STARTED 12 /* do not count towards 'bypass_count' */ +#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */ /* * Operations flags (in issue order) */ @@ -316,12 +318,17 @@ struct raid5_private_data { int previous_raid_disks; struct list_head handle_list; /* stripes needing handling */ + struct list_head hold_list; /* preread ready stripes */ struct list_head delayed_list; /* stripes that have plugged requests */ struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ struct bio *retry_read_aligned; /* currently retrying aligned bios */ struct bio *retry_read_aligned_list; /* aligned bios retry list */ atomic_t preread_active_stripes; /* stripes with scheduled io */ atomic_t active_aligned_reads; + atomic_t pending_full_writes; /* full write backlog */ + int bypass_count; /* bypassed prereads */ + int bypass_threshold; /* preread nice */ + struct list_head *last_hold; /* detect hold_list promotions */ atomic_t reshape_stripes; /* stripes with pending writes for reshape */ /* unfortunately we need two cache names as we temporarily have diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 8e7eff2..4aacaee 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2176,6 +2176,7 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); long reiserfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +int reiserfs_unpack(struct inode *inode, struct file *filp); /* ioctl's command */ #define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) diff --git a/include/linux/sched.h b/include/linux/sched.h index d0bd970..024d72b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -68,7 +68,6 @@ struct sched_param { #include <linux/smp.h> #include <linux/sem.h> #include <linux/signal.h> -#include <linux/securebits.h> #include <linux/fs_struct.h> #include <linux/compiler.h> #include <linux/completion.h> @@ -1133,7 +1132,7 @@ struct task_struct { gid_t gid,egid,sgid,fsgid; struct group_info *group_info; kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; - unsigned keep_capabilities:1; + unsigned securebits; struct user_struct *user; #ifdef CONFIG_KEYS struct key *request_key_auth; /* assumed request_key authority */ @@ -1798,6 +1797,8 @@ extern void mmput(struct mm_struct *); extern struct mm_struct *get_task_mm(struct task_struct *task); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); +/* Allocate a new mm structure and copy contents from tsk->mm */ +extern struct mm_struct *dup_mm(struct task_struct *tsk); extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); extern void flush_thread(void); diff --git a/include/linux/securebits.h b/include/linux/securebits.h index 5b06178..c1f19db 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -3,28 +3,39 @@ #define SECUREBITS_DEFAULT 0x00000000 -extern unsigned securebits; - /* When set UID 0 has no special privileges. When unset, we support inheritance of root-permissions and suid-root executable under compatibility mode. We raise the effective and inheritable bitmasks *of the executable file* if the effective uid of the new process is 0. If the real uid is 0, we raise the inheritable bitmask of the executable file. */ -#define SECURE_NOROOT 0 +#define SECURE_NOROOT 0 +#define SECURE_NOROOT_LOCKED 1 /* make bit-0 immutable */ /* When set, setuid to/from uid 0 does not trigger capability-"fixes" to be compatible with old programs relying on set*uid to loose privileges. When unset, setuid doesn't change privileges. */ -#define SECURE_NO_SETUID_FIXUP 2 +#define SECURE_NO_SETUID_FIXUP 2 +#define SECURE_NO_SETUID_FIXUP_LOCKED 3 /* make bit-2 immutable */ + +/* When set, a process can retain its capabilities even after + transitioning to a non-root user (the set-uid fixup suppressed by + bit 2). Bit-4 is cleared when a process calls exec(); setting both + bit 4 and 5 will create a barrier through exec that no exec()'d + child can use this feature again. */ +#define SECURE_KEEP_CAPS 4 +#define SECURE_KEEP_CAPS_LOCKED 5 /* make bit-4 immutable */ /* Each securesetting is implemented using two bits. One bit specify whether the setting is on or off. The other bit specify whether the setting is fixed or not. A setting which is fixed cannot be changed from user-level. */ +#define issecure_mask(X) (1 << (X)) +#define issecure(X) (issecure_mask(X) & current->securebits) -#define issecure(X) ( (1 << (X+1)) & SECUREBITS_DEFAULT ? \ - (1 << (X)) & SECUREBITS_DEFAULT : \ - (1 << (X)) & securebits ) +#define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ + issecure_mask(SECURE_NO_SETUID_FIXUP) | \ + issecure_mask(SECURE_KEEP_CAPS)) +#define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1) #endif /* !_LINUX_SECUREBITS_H */ diff --git a/include/linux/security.h b/include/linux/security.h index 53a3453..e6299e5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -34,8 +34,6 @@ #include <linux/xfrm.h> #include <net/flow.h> -extern unsigned securebits; - /* Maximum number of letters for an LSM name string */ #define SECURITY_NAME_MAX 10 @@ -61,6 +59,8 @@ extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); extern void cap_task_reparent_to_init (struct task_struct *p); +extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5, long *rc_p); extern int cap_task_setscheduler (struct task_struct *p, int policy, struct sched_param *lp); extern int cap_task_setioprio (struct task_struct *p, int ioprio); extern int cap_task_setnice (struct task_struct *p, int nice); @@ -720,7 +720,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @arg3 contains a argument. * @arg4 contains a argument. * @arg5 contains a argument. - * Return 0 if permission is granted. + * @rc_p contains a pointer to communicate back the forced return code + * Return 0 if permission is granted, and non-zero if the security module + * has taken responsibility (setting *rc_p) for the prctl call. * @task_reparent_to_init: * Set the security attributes in @p->security for a kernel thread that * is being reparented to the init task. @@ -1420,7 +1422,7 @@ struct security_operations { int (*task_wait) (struct task_struct * p); int (*task_prctl) (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5); + unsigned long arg5, long *rc_p); void (*task_reparent_to_init) (struct task_struct * p); void (*task_to_inode)(struct task_struct *p, struct inode *inode); @@ -1684,7 +1686,7 @@ int security_task_kill(struct task_struct *p, struct siginfo *info, int sig, u32 secid); int security_task_wait(struct task_struct *p); int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); + unsigned long arg4, unsigned long arg5, long *rc_p); void security_task_reparent_to_init(struct task_struct *p); void security_task_to_inode(struct task_struct *p, struct inode *inode); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); @@ -2271,9 +2273,9 @@ static inline int security_task_wait (struct task_struct *p) static inline int security_task_prctl (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5) + unsigned long arg5, long *rc_p) { - return 0; + return cap_task_prctl(option, arg2, arg3, arg3, arg5, rc_p); } static inline void security_task_reparent_to_init (struct task_struct *p) diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 00b65c0..3d37c94 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -46,6 +46,7 @@ enum { PLAT8250_DEV_HUB6, PLAT8250_DEV_MCA, PLAT8250_DEV_AU1X00, + PLAT8250_DEV_SM501, }; /* diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 8d5fb36..f2d12d5 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -34,8 +34,7 @@ struct shmem_sb_info { uid_t uid; /* Mount uid for root directory */ gid_t gid; /* Mount gid for root directory */ mode_t mode; /* Mount mode for root directory */ - int policy; /* Default NUMA memory alloc policy */ - nodemask_t policy_nodes; /* nodemask for preferred and bind */ + struct mempolicy *mpol; /* default memory policy for mappings */ }; static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 1d7d4c5..a697742 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -12,11 +12,22 @@ #include <asm/errno.h> #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) +extern void pm_set_vt_switch(int); extern int pm_prepare_console(void); extern void pm_restore_console(void); #else -static inline int pm_prepare_console(void) { return 0; } -static inline void pm_restore_console(void) {} +static inline void pm_set_vt_switch(int do_switch) +{ +} + +static inline int pm_prepare_console(void) +{ + return 0; +} + +static inline void pm_restore_console(void) +{ +} #endif typedef int __bitwise suspend_state_t; diff --git a/include/linux/swap.h b/include/linux/swap.h index 878459a..0b33776 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -177,11 +177,11 @@ extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern int lru_add_drain_all(void); -extern int rotate_reclaimable_page(struct page *page); +extern void rotate_reclaimable_page(struct page *page); extern void swap_setup(void); /* linux/mm/vmscan.c */ -extern unsigned long try_to_free_pages(struct zone **zones, int order, +extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask); diff --git a/include/linux/synclink.h b/include/linux/synclink.h index 5562fbf..45f6bc8 100644 --- a/include/linux/synclink.h +++ b/include/linux/synclink.h @@ -13,10 +13,6 @@ #define _SYNCLINK_H_ #define SYNCLINK_H_VERSION 3.6 -#define BOOLEAN int -#define TRUE 1 -#define FALSE 0 - #define BIT0 0x0001 #define BIT1 0x0002 #define BIT2 0x0004 diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 03378e3..add3c5a 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -32,7 +32,7 @@ struct attribute { struct attribute_group { const char *name; - int (*is_visible)(struct kobject *, + mode_t (*is_visible)(struct kobject *, struct attribute *, int); struct attribute **attrs; }; @@ -105,6 +105,8 @@ void sysfs_remove_link(struct kobject *kobj, const char *name); int __must_check sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp); +int sysfs_update_group(struct kobject *kobj, + const struct attribute_group *grp); void sysfs_remove_group(struct kobject *kobj, const struct attribute_group *grp); int sysfs_add_file_to_group(struct kobject *kobj, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index ce8e7da..364789a 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -31,6 +31,7 @@ struct vm_struct { struct page **pages; unsigned int nr_pages; unsigned long phys_addr; + void *caller; }; /* @@ -66,6 +67,8 @@ static inline size_t get_vm_area_size(const struct vm_struct *area) } extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); +extern struct vm_struct *get_vm_area_caller(unsigned long size, + unsigned long flags, void *caller); extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, unsigned long start, unsigned long end); extern struct vm_struct *get_vm_area_node(unsigned long size, @@ -87,4 +90,6 @@ extern void free_vm_area(struct vm_struct *area); extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; +extern const struct seq_operations vmalloc_op; + #endif /* _LINUX_VMALLOC_H */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9f1b4b4..e83b693 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -25,6 +25,7 @@ #define HIGHMEM_ZONE(xx) #endif + #define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) , xx##_MOVABLE enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, @@ -37,6 +38,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, FOR_ALL_ZONES(PGSCAN_DIRECT), PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL, PAGEOUTRUN, ALLOCSTALL, PGROTATED, +#ifdef CONFIG_HUGETLB_PAGE + HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, +#endif NR_VM_EVENT_ITEMS }; @@ -174,7 +178,7 @@ static inline unsigned long node_page_state(int node, zone_page_state(&zones[ZONE_MOVABLE], item); } -extern void zone_statistics(struct zonelist *, struct zone *); +extern void zone_statistics(struct zone *, struct zone *); #else diff --git a/include/net/compat.h b/include/net/compat.h index 406db24..05fa5d0 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -40,4 +40,7 @@ extern int put_cmsg_compat(struct msghdr*, int, int, int, void *); extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, unsigned char *, int); +extern int compat_mc_setsockopt(struct sock *, int, int, char __user *, int, + int (*)(struct sock *, int, int, char __user *, int)); + #endif /* NET_COMPAT_H */ diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index b8b19e2..f6a9fe0 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -181,7 +181,8 @@ struct scsi_device { sdev_printk(prefix, (scmd)->device, fmt, ##a) enum scsi_target_state { - STARGET_RUNNING = 1, + STARGET_CREATED = 1, + STARGET_RUNNING, STARGET_DEL, }; diff --git a/include/video/atmel_lcdc.h b/include/video/atmel_lcdc.h index 336c20d..ed64862 100644 --- a/include/video/atmel_lcdc.h +++ b/include/video/atmel_lcdc.h @@ -22,6 +22,15 @@ #ifndef __ATMEL_LCDC_H__ #define __ATMEL_LCDC_H__ + +/* Way LCD wires are connected to the chip: + * Some Atmel chips use BGR color mode (instead of standard RGB) + * A swapped wiring onboard can bring to RGB mode. + */ +#define ATMEL_LCDC_WIRING_BGR 0 +#define ATMEL_LCDC_WIRING_RGB 1 + + /* LCD Controller info data structure, stored in device platform_data */ struct atmel_lcdfb_info { spinlock_t lock; @@ -39,8 +48,10 @@ struct atmel_lcdfb_info { u8 bl_power; #endif bool lcdcon_is_backlight; + u8 saved_lcdcon; u8 default_bpp; + u8 lcd_wiring_mode; unsigned int default_lcdcon2; unsigned int default_dmacon; void (*atmel_lcdfb_power_control)(int on); diff --git a/include/video/hecubafb.h b/include/video/hecubafb.h new file mode 100644 index 0000000..7b99523 --- /dev/null +++ b/include/video/hecubafb.h @@ -0,0 +1,51 @@ +/* + * hecubafb.h - definitions for the hecuba framebuffer driver + * + * Copyright (C) 2008 by Jaya Kumar + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + * + */ + +#ifndef _LINUX_HECUBAFB_H_ +#define _LINUX_HECUBAFB_H_ + +/* Apollo controller specific defines */ +#define APOLLO_START_NEW_IMG 0xA0 +#define APOLLO_STOP_IMG_DATA 0xA1 +#define APOLLO_DISPLAY_IMG 0xA2 +#define APOLLO_ERASE_DISPLAY 0xA3 +#define APOLLO_INIT_DISPLAY 0xA4 + +/* Hecuba interface specific defines */ +#define HCB_WUP_BIT 0x01 +#define HCB_DS_BIT 0x02 +#define HCB_RW_BIT 0x04 +#define HCB_CD_BIT 0x08 +#define HCB_ACK_BIT 0x80 + +/* struct used by hecuba. board specific stuff comes from *board */ +struct hecubafb_par { + struct fb_info *info; + struct hecuba_board *board; + void (*send_command)(struct hecubafb_par *, unsigned char); + void (*send_data)(struct hecubafb_par *, unsigned char); +}; + +/* board specific routines +board drivers can implement wait_for_ack with interrupts if desired. if +wait_for_ack is called with clear=0, then go to sleep and return when ack +goes hi or if wait_for_ack with clear=1, then return when ack goes lo */ +struct hecuba_board { + struct module *owner; + void (*remove)(struct hecubafb_par *); + void (*set_ctl)(struct hecubafb_par *, unsigned char, unsigned char); + void (*set_data)(struct hecubafb_par *, unsigned char); + void (*wait_for_ack)(struct hecubafb_par *, int); + int (*init)(struct hecubafb_par *); +}; + + +#endif diff --git a/include/video/metronomefb.h b/include/video/metronomefb.h new file mode 100644 index 0000000..dab04b4 --- /dev/null +++ b/include/video/metronomefb.h @@ -0,0 +1,62 @@ +/* + * metronomefb.h - definitions for the metronome framebuffer driver + * + * Copyright (C) 2008 by Jaya Kumar + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + * + */ + +#ifndef _LINUX_METRONOMEFB_H_ +#define _LINUX_METRONOMEFB_H_ + +/* address and control descriptors used by metronome controller */ +struct metromem_desc { + u32 mFDADR0; + u32 mFSADR0; + u32 mFIDR0; + u32 mLDCMD0; +}; + +/* command structure used by metronome controller */ +struct metromem_cmd { + u16 opcode; + u16 args[((64-2)/2)]; + u16 csum; +}; + +/* struct used by metronome. board specific stuff comes from *board */ +struct metronomefb_par { + unsigned char *metromem; + struct metromem_desc *metromem_desc; + struct metromem_cmd *metromem_cmd; + unsigned char *metromem_wfm; + unsigned char *metromem_img; + u16 *metromem_img_csum; + u16 *csum_table; + int metromemsize; + dma_addr_t metromem_dma; + dma_addr_t metromem_desc_dma; + struct fb_info *info; + struct metronome_board *board; + wait_queue_head_t waitq; + u8 frame_count; +}; + +/* board specific routines */ +struct metronome_board { + struct module *owner; + void (*free_irq)(struct fb_info *); + void (*init_gpio_regs)(struct metronomefb_par *); + void (*init_lcdc_regs)(struct metronomefb_par *); + void (*post_dma_setup)(struct metronomefb_par *); + void (*set_rst)(struct metronomefb_par *, int); + void (*set_stdby)(struct metronomefb_par *, int); + int (*met_wait_event)(struct metronomefb_par *); + int (*met_wait_event_intr)(struct metronomefb_par *); + int (*setup_irq)(struct fb_info *); +}; + +#endif |