diff options
142 files changed, 642 insertions, 2183 deletions
diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt index d79b008..3f9f808 100644 --- a/Documentation/cachetlb.txt +++ b/Documentation/cachetlb.txt @@ -317,10 +317,10 @@ maps this page at its virtual address. about doing this. The idea is, first at flush_dcache_page() time, if - page->mapping->i_mmap is an empty tree and ->i_mmap_nonlinear - an empty list, just mark the architecture private page flag bit. - Later, in update_mmu_cache(), a check is made of this flag bit, - and if set the flush is done and the flag bit is cleared. + page->mapping->i_mmap is an empty tree, just mark the architecture + private page flag bit. Later, in update_mmu_cache(), a check is + made of this flag bit, and if set the flush is done and the flag + bit is cleared. IMPORTANT NOTE: It is often important, if you defer the flush, that the actual flush occurs on the same CPU diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt index 1b805a0..f6d9c99 100644 --- a/Documentation/filesystems/fiemap.txt +++ b/Documentation/filesystems/fiemap.txt @@ -196,7 +196,8 @@ struct fiemap_extent_info { }; It is intended that the file system should not need to access any of this -structure directly. +structure directly. Filesystem handlers should be tolerant to signals and return +EINTR once fatal signal received. Flag checking should be done at the beginning of the ->fiemap callback via the diff --git a/Documentation/filesystems/inotify.txt b/Documentation/filesystems/inotify.txt index cfd0271..51f61db 100644 --- a/Documentation/filesystems/inotify.txt +++ b/Documentation/filesystems/inotify.txt @@ -4,201 +4,10 @@ Document started 15 Mar 2005 by Robert Love <rml@novell.com> +Document updated 4 Jan 2015 by Zhang Zhen <zhenzhang.zhang@huawei.com> + --Deleted obsoleted interface, just refer to manpages for user interface. - -(i) User Interface - -Inotify is controlled by a set of three system calls and normal file I/O on a -returned file descriptor. - -First step in using inotify is to initialise an inotify instance: - - int fd = inotify_init (); - -Each instance is associated with a unique, ordered queue. - -Change events are managed by "watches". A watch is an (object,mask) pair where -the object is a file or directory and the mask is a bit mask of one or more -inotify events that the application wishes to receive. See <linux/inotify.h> -for valid events. A watch is referenced by a watch descriptor, or wd. - -Watches are added via a path to the file. - -Watches on a directory will return events on any files inside of the directory. - -Adding a watch is simple: - - int wd = inotify_add_watch (fd, path, mask); - -Where "fd" is the return value from inotify_init(), path is the path to the -object to watch, and mask is the watch mask (see <linux/inotify.h>). - -You can update an existing watch in the same manner, by passing in a new mask. - -An existing watch is removed via - - int ret = inotify_rm_watch (fd, wd); - -Events are provided in the form of an inotify_event structure that is read(2) -from a given inotify instance. The filename is of dynamic length and follows -the struct. It is of size len. The filename is padded with null bytes to -ensure proper alignment. This padding is reflected in len. - -You can slurp multiple events by passing a large buffer, for example - - size_t len = read (fd, buf, BUF_LEN); - -Where "buf" is a pointer to an array of "inotify_event" structures at least -BUF_LEN bytes in size. The above example will return as many events as are -available and fit in BUF_LEN. - -Each inotify instance fd is also select()- and poll()-able. - -You can find the size of the current event queue via the standard FIONREAD -ioctl on the fd returned by inotify_init(). - -All watches are destroyed and cleaned up on close. - - -(ii) - -Prototypes: - - int inotify_init (void); - int inotify_add_watch (int fd, const char *path, __u32 mask); - int inotify_rm_watch (int fd, __u32 mask); - - -(iii) Kernel Interface - -Inotify's kernel API consists a set of functions for managing watches and an -event callback. - -To use the kernel API, you must first initialize an inotify instance with a set -of inotify_operations. You are given an opaque inotify_handle, which you use -for any further calls to inotify. - - struct inotify_handle *ih = inotify_init(my_event_handler); - -You must provide a function for processing events and a function for destroying -the inotify watch. - - void handle_event(struct inotify_watch *watch, u32 wd, u32 mask, - u32 cookie, const char *name, struct inode *inode) - - watch - the pointer to the inotify_watch that triggered this call - wd - the watch descriptor - mask - describes the event that occurred - cookie - an identifier for synchronizing events - name - the dentry name for affected files in a directory-based event - inode - the affected inode in a directory-based event - - void destroy_watch(struct inotify_watch *watch) - -You may add watches by providing a pre-allocated and initialized inotify_watch -structure and specifying the inode to watch along with an inotify event mask. -You must pin the inode during the call. You will likely wish to embed the -inotify_watch structure in a structure of your own which contains other -information about the watch. Once you add an inotify watch, it is immediately -subject to removal depending on filesystem events. You must grab a reference if -you depend on the watch hanging around after the call. - - inotify_init_watch(&my_watch->iwatch); - inotify_get_watch(&my_watch->iwatch); // optional - s32 wd = inotify_add_watch(ih, &my_watch->iwatch, inode, mask); - inotify_put_watch(&my_watch->iwatch); // optional - -You may use the watch descriptor (wd) or the address of the inotify_watch for -other inotify operations. You must not directly read or manipulate data in the -inotify_watch. Additionally, you must not call inotify_add_watch() more than -once for a given inotify_watch structure, unless you have first called either -inotify_rm_watch() or inotify_rm_wd(). - -To determine if you have already registered a watch for a given inode, you may -call inotify_find_watch(), which gives you both the wd and the watch pointer for -the inotify_watch, or an error if the watch does not exist. - - wd = inotify_find_watch(ih, inode, &watchp); - -You may use container_of() on the watch pointer to access your own data -associated with a given watch. When an existing watch is found, -inotify_find_watch() bumps the refcount before releasing its locks. You must -put that reference with: - - put_inotify_watch(watchp); - -Call inotify_find_update_watch() to update the event mask for an existing watch. -inotify_find_update_watch() returns the wd of the updated watch, or an error if -the watch does not exist. - - wd = inotify_find_update_watch(ih, inode, mask); - -An existing watch may be removed by calling either inotify_rm_watch() or -inotify_rm_wd(). - - int ret = inotify_rm_watch(ih, &my_watch->iwatch); - int ret = inotify_rm_wd(ih, wd); - -A watch may be removed while executing your event handler with the following: - - inotify_remove_watch_locked(ih, iwatch); - -Call inotify_destroy() to remove all watches from your inotify instance and -release it. If there are no outstanding references, inotify_destroy() will call -your destroy_watch op for each watch. - - inotify_destroy(ih); - -When inotify removes a watch, it sends an IN_IGNORED event to your callback. -You may use this event as an indication to free the watch memory. Note that -inotify may remove a watch due to filesystem events, as well as by your request. -If you use IN_ONESHOT, inotify will remove the watch after the first event, at -which point you may call the final inotify_put_watch. - -(iv) Kernel Interface Prototypes - - struct inotify_handle *inotify_init(struct inotify_operations *ops); - - inotify_init_watch(struct inotify_watch *watch); - - s32 inotify_add_watch(struct inotify_handle *ih, - struct inotify_watch *watch, - struct inode *inode, u32 mask); - - s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode, - struct inotify_watch **watchp); - - s32 inotify_find_update_watch(struct inotify_handle *ih, - struct inode *inode, u32 mask); - - int inotify_rm_wd(struct inotify_handle *ih, u32 wd); - - int inotify_rm_watch(struct inotify_handle *ih, - struct inotify_watch *watch); - - void inotify_remove_watch_locked(struct inotify_handle *ih, - struct inotify_watch *watch); - - void inotify_destroy(struct inotify_handle *ih); - - void get_inotify_watch(struct inotify_watch *watch); - void put_inotify_watch(struct inotify_watch *watch); - - -(v) Internal Kernel Implementation - -Each inotify instance is represented by an inotify_handle structure. -Inotify's userspace consumers also have an inotify_device which is -associated with the inotify_handle, and on which events are queued. - -Each watch is associated with an inotify_watch structure. Watches are chained -off of each associated inotify_handle and each associated inode. - -See fs/notify/inotify/inotify_fsnotify.c and fs/notify/inotify/inotify_user.c -for the locking and lifetime rules. - - -(vi) Rationale +(i) Rationale Q: What is the design decision behind not tying the watch to the open fd of the watched object? diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 7618a28..28f8c08 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -100,3 +100,7 @@ coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode coherency=buffered Allow concurrent O_DIRECT writes without EX lock among nodes, which gains high performance at risk of getting stale data on other nodes. +journal_async_commit Commit block can be written to disk without waiting + for descriptor blocks. If enabled older kernels cannot + mount the device. This will enable 'journal_checksum' + internally. diff --git a/Documentation/vm/remap_file_pages.txt b/Documentation/vm/remap_file_pages.txt index 560e436..f609142 100644 --- a/Documentation/vm/remap_file_pages.txt +++ b/Documentation/vm/remap_file_pages.txt @@ -18,10 +18,9 @@ on 32-bit systems to map files bigger than can linearly fit into 32-bit virtual address space. This use-case is not critical anymore since 64-bit systems are widely available. -The plan is to deprecate the syscall and replace it with an emulation. -The emulation will create new VMAs instead of nonlinear mappings. It's -going to work slower for rare users of remap_file_pages() but ABI is -preserved. +The syscall is deprecated and replaced it with an emulation now. The +emulation creates new VMAs instead of nonlinear mappings. It's going to +work slower for rare users of remap_file_pages() but ABI is preserved. One side effect of emulation (apart from performance) is that user can hit vm.max_map_count limit more easily due to additional VMAs. See comment for diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index d8f9b7e..fce22cf 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -73,7 +73,6 @@ struct vm_area_struct; /* .. and these are ours ... */ #define _PAGE_DIRTY 0x20000 #define _PAGE_ACCESSED 0x40000 -#define _PAGE_FILE 0x80000 /* set:pagecache, unset:swap */ /* * NOTE! The "accessed" bit isn't necessarily exact: it can be kept exactly @@ -268,7 +267,6 @@ extern inline void pgd_clear(pgd_t * pgdp) { pgd_val(*pgdp) = 0; } extern inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_FOW); } extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -extern inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } extern inline int pte_special(pte_t pte) { return 0; } extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; } @@ -345,11 +343,6 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define pte_to_pgoff(pte) (pte_val(pte) >> 32) -#define pgoff_to_pte(off) ((pte_t) { ((off) << 32) | _PAGE_FILE }) - -#define PTE_FILE_MAX_BITS 32 - #ifndef CONFIG_DISCONTIGMEM #define kern_addr_valid(addr) (1) #endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 6b0b7f7e..bdc8cca 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -61,7 +61,6 @@ #define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */ #define _PAGE_READ (1<<5) /* Page has user read perm (H) */ #define _PAGE_MODIFIED (1<<6) /* Page modified (dirty) (S) */ -#define _PAGE_FILE (1<<7) /* page cache/ swap (S) */ #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ #define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */ @@ -73,7 +72,6 @@ #define _PAGE_READ (1<<3) /* Page has user read perm (H) */ #define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */ #define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */ -#define _PAGE_FILE (1<<6) /* page cache/ swap (S) */ #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ #define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */ #define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr @@ -268,15 +266,6 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) pte; \ }) -/* TBD: Non linear mapping stuff */ -static inline int pte_file(pte_t pte) -{ - return pte_val(pte) & _PAGE_FILE; -} - -#define PTE_FILE_MAX_BITS 30 -#define pgoff_to_pte(x) __pte(x) -#define pte_to_pgoff(x) (pte_val(x) >> 2) #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) #define pfn_pte(pfn, prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) #define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) @@ -364,7 +353,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, /* Encode swap {type,off} tuple into PTE * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that - * both PAGE_FILE and PAGE_PRESENT are zero in a PTE holding swap "identifier" + * PAGE_PRESENT is zero in a PTE holding swap "identifier" */ #define __swp_entry(type, off) ((swp_entry_t) { \ ((type) & 0x1f) | ((off) << 13) }) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index f027941..bcc5e30 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -118,7 +118,6 @@ #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) -#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ #define L_PTE_DIRTY (_AT(pteval_t, 1) << 6) #define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) #define L_PTE_USER (_AT(pteval_t, 1) << 8) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index a31ecdad..18dbc82 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -77,7 +77,6 @@ */ #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ #define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */ -#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ #define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */ diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index 0642228..c35e53e 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -54,8 +54,6 @@ typedef pte_t *pte_addr_t; -static inline int pte_file(pte_t pte) { return 0; } - /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index d5cac54..f403541 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -318,12 +318,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - * <--------------- offset ----------------------> < type -> 0 0 0 + * <--------------- offset ------------------------> < type -> 0 0 * - * This gives us up to 31 swap files and 64GB per swap file. Note that + * This gives us up to 31 swap files and 128GB per swap file. Note that * the offset field is always non-zero. */ -#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_SHIFT 2 #define __SWP_TYPE_BITS 5 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) @@ -342,20 +342,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) */ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) -/* - * Encode and decode a file entry. File entries are stored in the Linux - * page tables as follows: - * - * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 - * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - * <----------------------- offset ------------------------> 1 0 0 - */ -#define pte_file(pte) (pte_val(pte) & L_PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 3) -#define pgoff_to_pte(x) __pte(((x) << 3) | L_PTE_FILE) - -#define PTE_FILE_MAX_BITS 29 - /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ /* FIXME: this is not correct */ #define kern_addr_valid(addr) (1) diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index ba1196c..082b9f2 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -98,7 +98,7 @@ #endif #if !defined (CONFIG_ARM_LPAE) && \ (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ - L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED + L_PTE_PRESENT) > L_PTE_SHARED #error Invalid Linux PTE bit settings #endif #endif /* CONFIG_MMU */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 210d632..4c44505 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -25,7 +25,6 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) #define PTE_WRITE (_AT(pteval_t, 1) << 57) @@ -469,13 +468,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-8: swap type - * bits 9-57: swap offset + * bits 2-7: swap type + * bits 8-57: swap offset */ -#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_SHIFT 2 #define __SWP_TYPE_BITS 6 -#define __SWP_OFFSET_BITS 49 +#define __SWP_OFFSET_BITS 50 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) #define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) @@ -493,18 +491,6 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; */ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) -/* - * Encode and decode a file entry: - * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-57: file offset / PAGE_SIZE - */ -#define pte_file(pte) (pte_val(pte) & PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 3) -#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) - -#define PTE_FILE_MAX_BITS 55 - extern int kern_addr_valid(unsigned long addr); #include <asm-generic/pgtable.h> diff --git a/arch/avr32/include/asm/pgtable.h b/arch/avr32/include/asm/pgtable.h index 4beff97..ac7a817 100644 --- a/arch/avr32/include/asm/pgtable.h +++ b/arch/avr32/include/asm/pgtable.h @@ -86,9 +86,6 @@ extern struct page *empty_zero_page; #define _PAGE_BIT_PRESENT 10 #define _PAGE_BIT_ACCESSED 11 /* software: page was accessed */ -/* The following flags are only valid when !PRESENT */ -#define _PAGE_BIT_FILE 0 /* software: pagecache or swap? */ - #define _PAGE_WT (1 << _PAGE_BIT_WT) #define _PAGE_DIRTY (1 << _PAGE_BIT_DIRTY) #define _PAGE_EXECUTE (1 << _PAGE_BIT_EXECUTE) @@ -101,7 +98,6 @@ extern struct page *empty_zero_page; /* Software flags */ #define _PAGE_ACCESSED (1 << _PAGE_BIT_ACCESSED) #define _PAGE_PRESENT (1 << _PAGE_BIT_PRESENT) -#define _PAGE_FILE (1 << _PAGE_BIT_FILE) /* * Page types, i.e. sizes. _PAGE_TYPE_NONE corresponds to what is @@ -210,14 +206,6 @@ static inline int pte_special(pte_t pte) return 0; } -/* - * The following only work if pte_present() is not true. - */ -static inline int pte_file(pte_t pte) -{ - return pte_val(pte) & _PAGE_FILE; -} - /* Mutator functions for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { @@ -329,7 +317,6 @@ extern void update_mmu_cache(struct vm_area_struct * vma, * Encode and decode a swap entry * * Constraints: - * _PAGE_FILE at bit 0 * _PAGE_TYPE_* at bits 2-3 (for emulating _PAGE_PROTNONE) * _PAGE_PRESENT at bit 10 * @@ -346,18 +333,6 @@ extern void update_mmu_cache(struct vm_area_struct * vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -/* - * Encode and decode a nonlinear file mapping entry. We have to - * preserve _PAGE_FILE and _PAGE_PRESENT here. _PAGE_TYPE_* isn't - * necessary, since _PAGE_FILE implies !_PAGE_PROTNONE (?) - */ -#define PTE_FILE_MAX_BITS 30 -#define pte_to_pgoff(pte) (((pte_val(pte) >> 1) & 0x1ff) \ - | ((pte_val(pte) >> 11) << 9)) -#define pgoff_to_pte(off) ((pte_t) { ((((off) & 0x1ff) << 1) \ - | (((off) >> 9) << 11) \ - | _PAGE_FILE) }) - typedef pte_t *pte_addr_t; #define kern_addr_valid(addr) (1) diff --git a/arch/blackfin/include/asm/pgtable.h b/arch/blackfin/include/asm/pgtable.h index 0b04901..b88a155 100644 --- a/arch/blackfin/include/asm/pgtable.h +++ b/arch/blackfin/include/asm/pgtable.h @@ -45,11 +45,6 @@ extern void paging_init(void); #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_file(pte_t pte) -{ - return 0; -} - #define set_pte(pteptr, pteval) (*(pteptr) = pteval) #define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h index c0eed5b..78d4483b 100644 --- a/arch/c6x/include/asm/pgtable.h +++ b/arch/c6x/include/asm/pgtable.h @@ -50,11 +50,6 @@ extern void paging_init(void); #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_file(pte_t pte) -{ - return 0; -} - #define set_pte(pteptr, pteval) (*(pteptr) = pteval) #define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) diff --git a/arch/cris/include/arch-v10/arch/mmu.h b/arch/cris/include/arch-v10/arch/mmu.h index e829e5a..47a5dd2 100644 --- a/arch/cris/include/arch-v10/arch/mmu.h +++ b/arch/cris/include/arch-v10/arch/mmu.h @@ -58,7 +58,6 @@ typedef struct /* Bits the HW doesn't care about but the kernel uses them in SW */ #define _PAGE_PRESENT (1<<4) /* page present in memory */ -#define _PAGE_FILE (1<<5) /* set: pagecache, unset: swap (when !PRESENT) */ #define _PAGE_ACCESSED (1<<5) /* simulated in software using valid bit */ #define _PAGE_MODIFIED (1<<6) /* simulated in software using we bit */ #define _PAGE_READ (1<<7) /* read-enabled */ @@ -105,6 +104,4 @@ typedef struct #define __S110 PAGE_SHARED #define __S111 PAGE_SHARED -#define PTE_FILE_MAX_BITS 26 - #endif diff --git a/arch/cris/include/arch-v32/arch/mmu.h b/arch/cris/include/arch-v32/arch/mmu.h index c1a13e0..e6db161 100644 --- a/arch/cris/include/arch-v32/arch/mmu.h +++ b/arch/cris/include/arch-v32/arch/mmu.h @@ -53,7 +53,6 @@ typedef struct * software. */ #define _PAGE_PRESENT (1 << 5) /* Page is present in memory. */ -#define _PAGE_FILE (1 << 6) /* 1=pagecache, 0=swap (when !present) */ #define _PAGE_ACCESSED (1 << 6) /* Simulated in software using valid bit. */ #define _PAGE_MODIFIED (1 << 7) /* Simulated in software using we bit. */ #define _PAGE_READ (1 << 8) /* Read enabled. */ @@ -108,6 +107,4 @@ typedef struct #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC -#define PTE_FILE_MAX_BITS 25 - #endif /* _ASM_CRIS_ARCH_MMU_H */ diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h index 8b8c867..e824257 100644 --- a/arch/cris/include/asm/pgtable.h +++ b/arch/cris/include/asm/pgtable.h @@ -114,7 +114,6 @@ extern unsigned long empty_zero_page; static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) @@ -290,9 +289,6 @@ static inline void update_mmu_cache(struct vm_area_struct * vma, */ #define pgtable_cache_init() do { } while (0) -#define pte_to_pgoff(x) (pte_val(x) >> 6) -#define pgoff_to_pte(x) __pte(((x) << 6) | _PAGE_FILE) - typedef pte_t *pte_addr_t; #endif /* __ASSEMBLY__ */ diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h index eb0110a..c49699d 100644 --- a/arch/frv/include/asm/pgtable.h +++ b/arch/frv/include/asm/pgtable.h @@ -62,10 +62,6 @@ typedef pte_t *pte_addr_t; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#ifndef __ASSEMBLY__ -static inline int pte_file(pte_t pte) { return 0; } -#endif - #define ZERO_PAGE(vaddr) ({ BUG(); NULL; }) #define swapper_pg_dir ((pgd_t *) NULL) @@ -298,7 +294,6 @@ static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address) #define _PAGE_RESERVED_MASK (xAMPRx_RESERVED8 | xAMPRx_RESERVED13) -#define _PAGE_FILE 0x002 /* set:pagecache unset:swap */ #define _PAGE_PROTNONE 0x000 /* If not present */ #define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) @@ -463,27 +458,15 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * Handle swap and file entries * - the PTE is encoded in the following format: * bit 0: Must be 0 (!_PAGE_PRESENT) - * bit 1: Type: 0 for swap, 1 for file (_PAGE_FILE) - * bits 2-7: Swap type - * bits 8-31: Swap offset - * bits 2-31: File pgoff + * bits 1-6: Swap type + * bits 7-31: Swap offset */ -#define __swp_type(x) (((x).val >> 2) & 0x1f) -#define __swp_offset(x) ((x).val >> 8) -#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 2) | ((offset) << 8) }) +#define __swp_type(x) (((x).val >> 1) & 0x1f) +#define __swp_offset(x) ((x).val >> 7) +#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 7) }) #define __pte_to_swp_entry(_pte) ((swp_entry_t) { (_pte).pte }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_file(pte_t pte) -{ - return pte.pte & _PAGE_FILE; -} - -#define PTE_FILE_MAX_BITS 29 - -#define pte_to_pgoff(PTE) ((PTE).pte >> 2) -#define pgoff_to_pte(off) __pte((off) << 2 | _PAGE_FILE) - /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ #define PageSkip(page) (0) #define kern_addr_valid(addr) (1) diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index d8bd54f..6e35e71 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -62,13 +62,6 @@ extern unsigned long zero_page_mask; #define _PAGE_ACCESSED (1<<2) /* - * _PAGE_FILE is only meaningful if _PAGE_PRESENT is false, while - * _PAGE_DIRTY is only meaningful if _PAGE_PRESENT is true. - * So we can overload the bit... - */ -#define _PAGE_FILE _PAGE_DIRTY /* set: pagecache, unset = swap */ - -/* * For now, let's say that Valid and Present are the same thing. * Alternatively, we could say that it's the "or" of R, W, and X * permissions. @@ -456,57 +449,36 @@ static inline int pte_exec(pte_t pte) #define pgtable_cache_init() do { } while (0) /* - * Swap/file PTE definitions. If _PAGE_PRESENT is zero, the rest of the - * PTE is interpreted as swap information. Depending on the _PAGE_FILE - * bit, the remaining free bits are eitehr interpreted as a file offset - * or a swap type/offset tuple. Rather than have the TLB fill handler - * test _PAGE_PRESENT, we're going to reserve the permissions bits - * and set them to all zeros for swap entries, which speeds up the - * miss handler at the cost of 3 bits of offset. That trade-off can - * be revisited if necessary, but Hexagon processor architecture and - * target applications suggest a lot of TLB misses and not much swap space. + * Swap/file PTE definitions. If _PAGE_PRESENT is zero, the rest of the PTE is + * interpreted as swap information. The remaining free bits are interpreted as + * swap type/offset tuple. Rather than have the TLB fill handler test + * _PAGE_PRESENT, we're going to reserve the permissions bits and set them to + * all zeros for swap entries, which speeds up the miss handler at the cost of + * 3 bits of offset. That trade-off can be revisited if necessary, but Hexagon + * processor architecture and target applications suggest a lot of TLB misses + * and not much swap space. * * Format of swap PTE: * bit 0: Present (zero) - * bit 1: _PAGE_FILE (zero) - * bits 2-6: swap type (arch independent layer uses 5 bits max) - * bits 7-9: bits 2:0 of offset - * bits 10-12: effectively _PAGE_PROTNONE (all zero) - * bits 13-31: bits 21:3 of swap offset - * - * Format of file PTE: - * bit 0: Present (zero) - * bit 1: _PAGE_FILE (zero) - * bits 2-9: bits 7:0 of offset - * bits 10-12: effectively _PAGE_PROTNONE (all zero) - * bits 13-31: bits 26:8 of swap offset + * bits 1-5: swap type (arch independent layer uses 5 bits max) + * bits 6-9: bits 3:0 of offset + * bits 10-12: effectively _PAGE_PROTNONE (all zero) + * bits 13-31: bits 22:4 of swap offset * * The split offset makes some of the following macros a little gnarly, * but there's plenty of precedent for this sort of thing. */ -#define PTE_FILE_MAX_BITS 27 /* Used for swap PTEs */ -#define __swp_type(swp_pte) (((swp_pte).val >> 2) & 0x1f) +#define __swp_type(swp_pte) (((swp_pte).val >> 1) & 0x1f) #define __swp_offset(swp_pte) \ - ((((swp_pte).val >> 7) & 0x7) | (((swp_pte).val >> 10) & 0x003ffff8)) + ((((swp_pte).val >> 6) & 0xf) | (((swp_pte).val >> 9) & 0x7ffff0)) #define __swp_entry(type, offset) \ ((swp_entry_t) { \ - ((type << 2) | \ - ((offset & 0x3ffff8) << 10) | ((offset & 0x7) << 7)) }) - -/* Used for file PTEs */ -#define pte_file(pte) \ - ((pte_val(pte) & (_PAGE_FILE | _PAGE_PRESENT)) == _PAGE_FILE) - -#define pte_to_pgoff(pte) \ - (((pte_val(pte) >> 2) & 0xff) | ((pte_val(pte) >> 5) & 0x07ffff00)) - -#define pgoff_to_pte(off) \ - ((pte_t) { ((((off) & 0x7ffff00) << 5) | (((off) & 0xff) << 2)\ - | _PAGE_FILE) }) + ((type << 1) | \ + ((offset & 0x7ffff0) << 9) | ((offset & 0xf) << 6)) }) /* Oh boy. There are a lot of possible arch overrides found in this file. */ #include <asm-generic/pgtable.h> diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 7935115..2f07bb3 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -57,9 +57,6 @@ #define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */ #define _PAGE_PROTNONE (__IA64_UL(1) << 63) -/* Valid only for a PTE with the present bit cleared: */ -#define _PAGE_FILE (1 << 1) /* see swap & file pte remarks below */ - #define _PFN_MASK _PAGE_PPN_MASK /* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */ #define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED) @@ -300,7 +297,6 @@ extern unsigned long VMALLOC_END; #define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0) #define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) #define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) -#define pte_file(pte) ((pte_val(pte) & _PAGE_FILE) != 0) #define pte_special(pte) 0 /* @@ -472,27 +468,16 @@ extern void paging_init (void); * * Format of swap pte: * bit 0 : present bit (must be zero) - * bit 1 : _PAGE_FILE (must be zero) - * bits 2- 8: swap-type - * bits 9-62: swap offset - * bit 63 : _PAGE_PROTNONE bit - * - * Format of file pte: - * bit 0 : present bit (must be zero) - * bit 1 : _PAGE_FILE (must be one) - * bits 2-62: file_offset/PAGE_SIZE + * bits 1- 7: swap-type + * bits 8-62: swap offset * bit 63 : _PAGE_PROTNONE bit */ -#define __swp_type(entry) (((entry).val >> 2) & 0x7f) -#define __swp_offset(entry) (((entry).val << 1) >> 10) -#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 2) | ((long) (offset) << 9) }) +#define __swp_type(entry) (((entry).val >> 1) & 0x7f) +#define __swp_offset(entry) (((entry).val << 1) >> 9) +#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 1) | ((long) (offset) << 8) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define PTE_FILE_MAX_BITS 61 -#define pte_to_pgoff(pte) ((pte_val(pte) << 1) >> 3) -#define pgoff_to_pte(off) ((pte_t) { ((off) << 2) | _PAGE_FILE }) - /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. diff --git a/arch/m32r/include/asm/pgtable-2level.h b/arch/m32r/include/asm/pgtable-2level.h index 9cdaf73..8fd8ee7 100644 --- a/arch/m32r/include/asm/pgtable-2level.h +++ b/arch/m32r/include/asm/pgtable-2level.h @@ -70,9 +70,5 @@ static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address) #define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#define PTE_FILE_MAX_BITS 29 -#define pte_to_pgoff(pte) (((pte_val(pte) >> 2) & 0x7f) | (((pte_val(pte) >> 10)) << 7)) -#define pgoff_to_pte(off) ((pte_t) { (((off) & 0x7f) << 2) | (((off) >> 7) << 10) | _PAGE_FILE }) - #endif /* __KERNEL__ */ #endif /* _ASM_M32R_PGTABLE_2LEVEL_H */ diff --git a/arch/m32r/include/asm/pgtable.h b/arch/m32r/include/asm/pgtable.h index 103ce67..050f7a6 100644 --- a/arch/m32r/include/asm/pgtable.h +++ b/arch/m32r/include/asm/pgtable.h @@ -80,8 +80,6 @@ extern unsigned long empty_zero_page[1024]; */ #define _PAGE_BIT_DIRTY 0 /* software: page changed */ -#define _PAGE_BIT_FILE 0 /* when !present: nonlinear file - mapping */ #define _PAGE_BIT_PRESENT 1 /* Valid: page is valid */ #define _PAGE_BIT_GLOBAL 2 /* Global */ #define _PAGE_BIT_LARGE 3 /* Large */ @@ -93,7 +91,6 @@ extern unsigned long empty_zero_page[1024]; #define _PAGE_BIT_PROTNONE 9 /* software: if not present */ #define _PAGE_DIRTY (1UL << _PAGE_BIT_DIRTY) -#define _PAGE_FILE (1UL << _PAGE_BIT_FILE) #define _PAGE_PRESENT (1UL << _PAGE_BIT_PRESENT) #define _PAGE_GLOBAL (1UL << _PAGE_BIT_GLOBAL) #define _PAGE_LARGE (1UL << _PAGE_BIT_LARGE) @@ -206,14 +203,6 @@ static inline int pte_write(pte_t pte) return pte_val(pte) & _PAGE_WRITE; } -/* - * The following only works if pte_present() is not true. - */ -static inline int pte_file(pte_t pte) -{ - return pte_val(pte) & _PAGE_FILE; -} - static inline int pte_special(pte_t pte) { return 0; diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index 3c79368..2500ce0 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -35,7 +35,6 @@ * hitting hardware. */ #define CF_PAGE_DIRTY 0x00000001 -#define CF_PAGE_FILE 0x00000200 #define CF_PAGE_ACCESSED 0x00001000 #define _PAGE_CACHE040 0x020 /* 68040 cache mode, cachable, copyback */ @@ -243,11 +242,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & CF_PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) -{ - return pte_val(pte) & CF_PAGE_FILE; -} - static inline int pte_special(pte_t pte) { return 0; @@ -391,26 +385,13 @@ static inline void cache_page(void *vaddr) *ptep = pte_mkcache(*ptep); } -#define PTE_FILE_MAX_BITS 21 -#define PTE_FILE_SHIFT 11 - -static inline unsigned long pte_to_pgoff(pte_t pte) -{ - return pte_val(pte) >> PTE_FILE_SHIFT; -} - -static inline pte_t pgoff_to_pte(unsigned pgoff) -{ - return __pte((pgoff << PTE_FILE_SHIFT) + CF_PAGE_FILE); -} - /* * Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */ #define __swp_type(x) ((x).val & 0xFF) -#define __swp_offset(x) ((x).val >> PTE_FILE_SHIFT) +#define __swp_offset(x) ((x).val >> 11) #define __swp_entry(typ, off) ((swp_entry_t) { (typ) | \ - (off << PTE_FILE_SHIFT) }) + (off << 11) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) (__pte((x).val)) diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index e0fdd4d..0085aab 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -28,7 +28,6 @@ #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NOCACHE) #define _PAGE_PROTNONE 0x004 -#define _PAGE_FILE 0x008 /* pagecache or swap? */ #ifndef __ASSEMBLY__ @@ -168,7 +167,6 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp) static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RONLY); } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; } @@ -266,19 +264,6 @@ static inline void cache_page(void *vaddr) } } -#define PTE_FILE_MAX_BITS 28 - -static inline unsigned long pte_to_pgoff(pte_t pte) -{ - return pte.pte >> 4; -} - -static inline pte_t pgoff_to_pte(unsigned off) -{ - pte_t pte = { (off << 4) + _PAGE_FILE }; - return pte; -} - /* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */ #define __swp_type(x) (((x).val >> 4) & 0xff) #define __swp_offset(x) ((x).val >> 12) diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h index 11859b8..ac7d87a 100644 --- a/arch/m68k/include/asm/pgtable_no.h +++ b/arch/m68k/include/asm/pgtable_no.h @@ -37,8 +37,6 @@ extern void paging_init(void); #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_file(pte_t pte) { return 0; } - /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h index f55aa04..48657f9 100644 --- a/arch/m68k/include/asm/sun3_pgtable.h +++ b/arch/m68k/include/asm/sun3_pgtable.h @@ -38,8 +38,6 @@ #define _PAGE_PRESENT (SUN3_PAGE_VALID) #define _PAGE_ACCESSED (SUN3_PAGE_ACCESSED) -#define PTE_FILE_MAX_BITS 28 - /* Compound page protection values. */ //todo: work out which ones *should* have SUN3_PAGE_NOCACHE and fix... // is it just PAGE_KERNEL and PAGE_SHARED? @@ -168,7 +166,6 @@ static inline void pgd_clear (pgd_t *pgdp) {} static inline int pte_write(pte_t pte) { return pte_val(pte) & SUN3_PAGE_WRITEABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & SUN3_PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; } @@ -202,18 +199,6 @@ static inline pmd_t *pmd_offset (pgd_t *pgd, unsigned long address) return (pmd_t *) pgd; } -static inline unsigned long pte_to_pgoff(pte_t pte) -{ - return pte.pte & SUN3_PAGE_PGNUM_MASK; -} - -static inline pte_t pgoff_to_pte(unsigned off) -{ - pte_t pte = { off + SUN3_PAGE_ACCESSED }; - return pte; -} - - /* Find an entry in the third-level pagetable. */ #define pte_index(address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) #define pte_offset_kernel(pmd, address) ((pte_t *) __pmd_page(*pmd) + pte_index(address)) diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h index 0d9dc54..d0604c0 100644 --- a/arch/metag/include/asm/pgtable.h +++ b/arch/metag/include/asm/pgtable.h @@ -47,7 +47,6 @@ */ #define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1 #define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2 -#define _PAGE_FILE _PAGE_ALWAYS_ZERO_3 /* Pages owned, and protected by, the kernel. */ #define _PAGE_KERNEL _PAGE_PRIV @@ -219,7 +218,6 @@ extern unsigned long empty_zero_page; static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= (~_PAGE_WRITE); return pte; } @@ -327,10 +325,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define PTE_FILE_MAX_BITS 22 -#define pte_to_pgoff(x) (pte_val(x) >> 10) -#define pgoff_to_pte(x) __pte(((x) << 10) | _PAGE_FILE) - #define kern_addr_valid(addr) (1) /* diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index df19d0c..91b9b46 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -40,10 +40,6 @@ extern int mem_init_done; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#ifndef __ASSEMBLY__ -static inline int pte_file(pte_t pte) { return 0; } -#endif /* __ASSEMBLY__ */ - #define ZERO_PAGE(vaddr) ({ BUG(); NULL; }) #define swapper_pg_dir ((pgd_t *) NULL) @@ -207,7 +203,6 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* Definitions for MicroBlaze. */ #define _PAGE_GUARDED 0x001 /* G: page is guarded from prefetch */ -#define _PAGE_FILE 0x001 /* when !present: nonlinear file mapping */ #define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */ #define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */ #define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */ @@ -337,7 +332,6 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } @@ -499,11 +493,6 @@ static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address) #define pte_unmap(pte) kunmap_atomic(pte) -/* Encode and decode a nonlinear file mapping entry */ -#define PTE_FILE_MAX_BITS 29 -#define pte_to_pgoff(pte) (pte_val(pte) >> 3) -#define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE }) - extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index 68984b6..16aa9f2 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -161,22 +161,6 @@ pfn_pte(unsigned long pfn, pgprot_t prot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -/* - * Encode and decode a nonlinear file mapping entry - */ -#define pte_to_pgoff(_pte) ((((_pte).pte >> 1 ) & 0x07) | \ - (((_pte).pte >> 2 ) & 0x38) | \ - (((_pte).pte >> 10) << 6 )) - -#define pgoff_to_pte(off) ((pte_t) { (((off) & 0x07) << 1 ) | \ - (((off) & 0x38) << 2 ) | \ - (((off) >> 6 ) << 10) | \ - _PAGE_FILE }) - -/* - * Bits 0, 4, 8, and 9 are taken, split up 28 bits of offset into this range: - */ -#define PTE_FILE_MAX_BITS 28 #else #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) @@ -188,13 +172,6 @@ pfn_pte(unsigned long pfn, pgprot_t prot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_high }) #define __swp_entry_to_pte(x) ((pte_t) { 0, (x).val }) -/* - * Bits 0 and 1 of pte_high are taken, use the rest for the page offset... - */ -#define pte_to_pgoff(_pte) ((_pte).pte_high >> 2) -#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) << 2 }) - -#define PTE_FILE_MAX_BITS 30 #else /* * Constraints: @@ -209,19 +186,6 @@ pfn_pte(unsigned long pfn, pgprot_t prot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -/* - * Encode and decode a nonlinear file mapping entry - */ -#define pte_to_pgoff(_pte) ((((_pte).pte >> 1) & 0x7) | \ - (((_pte).pte >> 2) & 0x8) | \ - (((_pte).pte >> 8) << 4)) - -#define pgoff_to_pte(off) ((pte_t) { (((off) & 0x7) << 1) | \ - (((off) & 0x8) << 2) | \ - (((off) >> 4) << 8) | \ - _PAGE_FILE }) - -#define PTE_FILE_MAX_BITS 28 #endif /* defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) */ #endif /* defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) */ diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index e1c49a9..1659bb9 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -291,13 +291,4 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -/* - * Bits 0, 4, 6, and 7 are taken. Let's leave bits 1, 2, 3, and 5 alone to - * make things easier, and only use the upper 56 bits for the page offset... - */ -#define PTE_FILE_MAX_BITS 56 - -#define pte_to_pgoff(_pte) ((_pte).pte >> 8) -#define pgoff_to_pte(off) ((pte_t) { ((off) << 8) | _PAGE_FILE }) - #endif /* _ASM_PGTABLE_64_H */ diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h index ca11f14..fc807aa 100644 --- a/arch/mips/include/asm/pgtable-bits.h +++ b/arch/mips/include/asm/pgtable-bits.h @@ -48,8 +48,6 @@ /* * The following bits are implemented in software - * - * _PAGE_FILE semantics: set:pagecache unset:swap */ #define _PAGE_PRESENT_SHIFT (_CACHE_SHIFT + 3) #define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT) @@ -64,7 +62,6 @@ #define _PAGE_SILENT_READ _PAGE_VALID #define _PAGE_SILENT_WRITE _PAGE_DIRTY -#define _PAGE_FILE _PAGE_MODIFIED #define _PFN_SHIFT (PAGE_SHIFT - 12 + _CACHE_SHIFT + 3) @@ -72,8 +69,6 @@ /* * The following are implemented by software - * - * _PAGE_FILE semantics: set:pagecache unset:swap */ #define _PAGE_PRESENT_SHIFT 0 #define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT) @@ -85,8 +80,6 @@ #define _PAGE_ACCESSED (1 << _PAGE_ACCESSED_SHIFT) #define _PAGE_MODIFIED_SHIFT 4 #define _PAGE_MODIFIED (1 << _PAGE_MODIFIED_SHIFT) -#define _PAGE_FILE_SHIFT 4 -#define _PAGE_FILE (1 << _PAGE_FILE_SHIFT) /* * And these are the hardware TLB bits @@ -116,7 +109,6 @@ * The following bits are implemented in software * * _PAGE_READ / _PAGE_READ_SHIFT should be unused if cpu_has_rixi. - * _PAGE_FILE semantics: set:pagecache unset:swap */ #define _PAGE_PRESENT_SHIFT (0) #define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT) @@ -128,7 +120,6 @@ #define _PAGE_ACCESSED (1 << _PAGE_ACCESSED_SHIFT) #define _PAGE_MODIFIED_SHIFT (_PAGE_ACCESSED_SHIFT + 1) #define _PAGE_MODIFIED (1 << _PAGE_MODIFIED_SHIFT) -#define _PAGE_FILE (_PAGE_MODIFIED) #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT /* huge tlb page */ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 62a6ba3..583ff42 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -231,7 +231,6 @@ extern pgd_t swapper_pg_dir[]; static inline int pte_write(pte_t pte) { return pte.pte_low & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte.pte_low & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte.pte_low & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte.pte_low & _PAGE_FILE; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -287,7 +286,6 @@ static inline pte_t pte_mkyoung(pte_t pte) static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline pte_t pte_wrprotect(pte_t pte) { diff --git a/arch/mn10300/include/asm/pgtable.h b/arch/mn10300/include/asm/pgtable.h index 2ddaa67e..629181a 100644 --- a/arch/mn10300/include/asm/pgtable.h +++ b/arch/mn10300/include/asm/pgtable.h @@ -134,7 +134,6 @@ extern pte_t kernel_vmalloc_ptes[(VMALLOC_END - VMALLOC_START) / PAGE_SIZE]; #define _PAGE_NX 0 /* no-execute bit */ /* If _PAGE_VALID is clear, we use these: */ -#define _PAGE_FILE xPTEL2_C /* set:pagecache unset:swap */ #define _PAGE_PROTNONE 0x000 /* If not present */ #define __PAGE_PROT_UWAUX 0x010 @@ -241,11 +240,6 @@ static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & __PAGE_PROT_WRITE; } static inline int pte_special(pte_t pte){ return 0; } -/* - * The following only works if pte_present() is not true. - */ -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } - static inline pte_t pte_rdprotect(pte_t pte) { pte_val(pte) &= ~(__PAGE_PROT_USER|__PAGE_PROT_UWAUX); return pte; @@ -338,16 +332,11 @@ static inline int pte_exec_kernel(pte_t pte) return 1; } -#define PTE_FILE_MAX_BITS 30 - -#define pte_to_pgoff(pte) (pte_val(pte) >> 2) -#define pgoff_to_pte(off) __pte((off) << 2 | _PAGE_FILE) - /* Encode and de-code a swap entry */ -#define __swp_type(x) (((x).val >> 2) & 0x3f) -#define __swp_offset(x) ((x).val >> 8) +#define __swp_type(x) (((x).val >> 1) & 0x3f) +#define __swp_offset(x) ((x).val >> 7) #define __swp_entry(type, offset) \ - ((swp_entry_t) { ((type) << 2) | ((offset) << 8) }) + ((swp_entry_t) { ((type) << 1) | ((offset) << 7) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) __pte((x).val) diff --git a/arch/nios2/include/asm/pgtable-bits.h b/arch/nios2/include/asm/pgtable-bits.h index ce9e706..bfddff3 100644 --- a/arch/nios2/include/asm/pgtable-bits.h +++ b/arch/nios2/include/asm/pgtable-bits.h @@ -30,6 +30,5 @@ #define _PAGE_PRESENT (1<<25) /* PTE contains a translation */ #define _PAGE_ACCESSED (1<<26) /* page referenced */ #define _PAGE_DIRTY (1<<27) /* dirty page */ -#define _PAGE_FILE (1<<28) /* PTE used for file mapping or swap */ #endif /* _ASM_NIOS2_PGTABLE_BITS_H */ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index ccbaffd..7b292e3 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -112,8 +112,6 @@ static inline int pte_dirty(pte_t pte) \ { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) \ { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) \ - { return pte_val(pte) & _PAGE_FILE; } static inline int pte_special(pte_t pte) { return 0; } #define pgprot_noncached pgprot_noncached @@ -272,8 +270,7 @@ static inline void pte_clear(struct mm_struct *mm, __FILE__, __LINE__, pgd_val(e)) /* - * Encode and decode a swap entry (must be !pte_none(pte) && !pte_present(pte) - * && !pte_file(pte)): + * Encode and decode a swap entry (must be !pte_none(pte) && !pte_present(pte): * * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 ... 1 0 * 0 0 0 0 type. 0 0 0 0 0 0 offset......... @@ -290,11 +287,6 @@ static inline void pte_clear(struct mm_struct *mm, #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -/* Encode and decode a nonlinear file mapping entry */ -#define PTE_FILE_MAX_BITS 25 -#define pte_to_pgoff(pte) (pte_val(pte) & 0x1ffffff) -#define pgoff_to_pte(off) __pte(((off) & 0x1ffffff) | _PAGE_FILE) - #define kern_addr_valid(addr) (1) #include <asm-generic/pgtable.h> diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 37bf6a3..18994cc 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -125,7 +125,6 @@ extern void paging_init(void); #define _PAGE_CC 0x001 /* software: pte contains a translation */ #define _PAGE_CI 0x002 /* cache inhibit */ #define _PAGE_WBC 0x004 /* write back cache */ -#define _PAGE_FILE 0x004 /* set: pagecache, unset: swap (when !PRESENT) */ #define _PAGE_WOM 0x008 /* weakly ordered memory */ #define _PAGE_A 0x010 /* accessed */ @@ -240,7 +239,6 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_mkspecial(pte_t pte) { return pte; } @@ -438,12 +436,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -/* Encode and decode a nonlinear file mapping entry */ - -#define PTE_FILE_MAX_BITS 26 -#define pte_to_pgoff(x) (pte_val(x) >> 6) -#define pgoff_to_pte(x) __pte(((x) << 6) | _PAGE_FILE) - #define kern_addr_valid(addr) (1) #include <asm-generic/pgtable.h> diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index 1d3c9c2..f147933 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -754,11 +754,6 @@ _dc_enable: /* ===============================================[ page table masks ]=== */ -/* bit 4 is used in hardware as write back cache bit. we never use this bit - * explicitly, so we can reuse it as _PAGE_FILE bit and mask it out when - * writing into hardware pte's - */ - #define DTLB_UP_CONVERT_MASK 0x3fa #define ITLB_UP_CONVERT_MASK 0x3a diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 22b89d1..1d49a4a 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -146,7 +146,6 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); #define _PAGE_GATEWAY_BIT 28 /* (0x008) privilege promotion allowed */ #define _PAGE_DMB_BIT 27 /* (0x010) Data Memory Break enable (B bit) */ #define _PAGE_DIRTY_BIT 26 /* (0x020) Page Dirty (D bit) */ -#define _PAGE_FILE_BIT _PAGE_DIRTY_BIT /* overload this bit */ #define _PAGE_REFTRAP_BIT 25 /* (0x040) Page Ref. Trap enable (T bit) */ #define _PAGE_NO_CACHE_BIT 24 /* (0x080) Uncached Page (U bit) */ #define _PAGE_ACCESSED_BIT 23 /* (0x100) Software: Page Accessed */ @@ -167,13 +166,6 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); /* PFN_PTE_SHIFT defines the shift of a PTE value to access the PFN field */ #define PFN_PTE_SHIFT 12 - -/* this is how many bits may be used by the file functions */ -#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_SHIFT) - -#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_SHIFT) -#define pgoff_to_pte(off) ((pte_t) { ((off) << PTE_SHIFT) | _PAGE_FILE }) - #define _PAGE_READ (1 << xlate_pabit(_PAGE_READ_BIT)) #define _PAGE_WRITE (1 << xlate_pabit(_PAGE_WRITE_BIT)) #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) @@ -186,7 +178,6 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); #define _PAGE_ACCESSED (1 << xlate_pabit(_PAGE_ACCESSED_BIT)) #define _PAGE_PRESENT (1 << xlate_pabit(_PAGE_PRESENT_BIT)) #define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT)) -#define _PAGE_FILE (1 << xlate_pabit(_PAGE_FILE_BIT)) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) @@ -344,7 +335,6 @@ static inline void pgd_clear(pgd_t * pgdp) { } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5e102422..ffb1d8c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -249,10 +249,10 @@ static inline int is_module_addr(void *addr) _PAGE_YOUNG) /* - * handle_pte_fault uses pte_present, pte_none and pte_file to find out the - * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit - * is used to distinguish present from not-present ptes. It is changed only - * with the page table lock held. + * handle_pte_fault uses pte_present and pte_none to find out the pte type + * WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to + * distinguish present from not-present ptes. It is changed only with the page + * table lock held. * * The following table gives the different possible bit combinations for * the pte hardware and software bits in the last 12 bits of a pte: @@ -279,7 +279,6 @@ static inline int is_module_addr(void *addr) * * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 - * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600 * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 */ @@ -671,13 +670,6 @@ static inline int pte_swap(pte_t pte) == (_PAGE_INVALID | _PAGE_TYPE); } -static inline int pte_file(pte_t pte) -{ - /* Bit pattern: (pte & 0x601) == 0x600 */ - return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT)) - == (_PAGE_INVALID | _PAGE_PROTECT); -} - static inline int pte_special(pte_t pte) { return (pte_val(pte) & _PAGE_SPECIAL); @@ -1756,19 +1748,6 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#ifndef CONFIG_64BIT -# define PTE_FILE_MAX_BITS 26 -#else /* CONFIG_64BIT */ -# define PTE_FILE_MAX_BITS 59 -#endif /* CONFIG_64BIT */ - -#define pte_to_pgoff(__pte) \ - ((((__pte).pte >> 12) << 7) + (((__pte).pte >> 1) & 0x7f)) - -#define pgoff_to_pte(__off) \ - ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \ - | _PAGE_INVALID | _PAGE_PROTECT }) - #endif /* !__ASSEMBLY__ */ #define kern_addr_valid(addr) (1) diff --git a/arch/score/include/asm/pgtable-bits.h b/arch/score/include/asm/pgtable-bits.h index 7d65a96..0e5c6f4 100644 --- a/arch/score/include/asm/pgtable-bits.h +++ b/arch/score/include/asm/pgtable-bits.h @@ -6,7 +6,6 @@ #define _PAGE_WRITE (1<<7) /* implemented in software */ #define _PAGE_PRESENT (1<<9) /* implemented in software */ #define _PAGE_MODIFIED (1<<10) /* implemented in software */ -#define _PAGE_FILE (1<<10) #define _PAGE_GLOBAL (1<<0) #define _PAGE_VALID (1<<1) diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h index db96ad9..5170ffd 100644 --- a/arch/score/include/asm/pgtable.h +++ b/arch/score/include/asm/pgtable.h @@ -90,15 +90,6 @@ static inline void pmd_clear(pmd_t *pmdp) ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address)) #define pte_unmap(pte) ((void)(pte)) -/* - * Bits 9(_PAGE_PRESENT) and 10(_PAGE_FILE)are taken, - * split up 30 bits of offset into this range: - */ -#define PTE_FILE_MAX_BITS 30 -#define pte_to_pgoff(_pte) \ - (((_pte).pte & 0x1ff) | (((_pte).pte >> 11) << 9)) -#define pgoff_to_pte(off) \ - ((pte_t) {((off) & 0x1ff) | (((off) >> 9) << 11) | _PAGE_FILE}) #define __pte_to_swp_entry(pte) \ ((swp_entry_t) { pte_val(pte)}) #define __swp_entry_to_pte(x) ((pte_t) {(x).val}) @@ -169,8 +160,8 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot) } #define __swp_type(x) ((x).val & 0x1f) -#define __swp_offset(x) ((x).val >> 11) -#define __swp_entry(type, offset) ((swp_entry_t){(type) | ((offset) << 11)}) +#define __swp_offset(x) ((x).val >> 10) +#define __swp_entry(type, offset) ((swp_entry_t){(type) | ((offset) << 10)}) extern unsigned long empty_zero_page; extern unsigned long zero_page_mask; @@ -198,11 +189,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) -{ - return pte_val(pte) & _PAGE_FILE; -} - #define pte_special(pte) (0) static inline pte_t pte_wrprotect(pte_t pte) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 0f09f52..eb4ef27 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -1,7 +1,7 @@ config SUPERH def_bool y select ARCH_MIGHT_HAVE_PC_PARPORT - select EXPERT + select HAVE_PATA_PLATFORM select CLKDEV_LOOKUP select HAVE_IDE if HAS_IOPORT_MAP select HAVE_MEMBLOCK diff --git a/arch/sh/boards/mach-se/7343/irq.c b/arch/sh/boards/mach-se/7343/irq.c index 7646bf0..1087dba 100644 --- a/arch/sh/boards/mach-se/7343/irq.c +++ b/arch/sh/boards/mach-se/7343/irq.c @@ -14,9 +14,6 @@ #define DRV_NAME "SE7343-FPGA" #define pr_fmt(fmt) DRV_NAME ": " fmt -#define irq_reg_readl ioread16 -#define irq_reg_writel iowrite16 - #include <linux/init.h> #include <linux/irq.h> #include <linux/interrupt.h> diff --git a/arch/sh/boards/mach-se/7722/irq.c b/arch/sh/boards/mach-se/7722/irq.c index f5e2af1b..00e6992 100644 --- a/arch/sh/boards/mach-se/7722/irq.c +++ b/arch/sh/boards/mach-se/7722/irq.c @@ -11,9 +11,6 @@ #define DRV_NAME "SE7722-FPGA" #define pr_fmt(fmt) DRV_NAME ": " fmt -#define irq_reg_readl ioread16 -#define irq_reg_writel iowrite16 - #include <linux/init.h> #include <linux/irq.h> #include <linux/interrupt.h> diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h index 0bce3d8..c646e56 100644 --- a/arch/sh/include/asm/pgtable_32.h +++ b/arch/sh/include/asm/pgtable_32.h @@ -26,8 +26,6 @@ * and timing control which (together with bit 0) are moved into the * old-style PTEA on the parts that support it. * - * XXX: Leave the _PAGE_FILE and _PAGE_WT overhaul for a rainy day. - * * SH-X2 MMUs and extended PTEs * * SH-X2 supports an extended mode TLB with split data arrays due to the @@ -51,7 +49,6 @@ #define _PAGE_PRESENT 0x100 /* V-bit : page is valid */ #define _PAGE_PROTNONE 0x200 /* software: if not present */ #define _PAGE_ACCESSED 0x400 /* software: page referenced */ -#define _PAGE_FILE _PAGE_WT /* software: pagecache or swap? */ #define _PAGE_SPECIAL 0x800 /* software: special page */ #define _PAGE_SZ_MASK (_PAGE_SZ0 | _PAGE_SZ1) @@ -105,14 +102,13 @@ static inline unsigned long copy_ptea_attributes(unsigned long x) /* Mask which drops unused bits from the PTEL value */ #if defined(CONFIG_CPU_SH3) #define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED| \ - _PAGE_FILE | _PAGE_SZ1 | \ - _PAGE_HW_SHARED) + _PAGE_SZ1 | _PAGE_HW_SHARED) #elif defined(CONFIG_X2TLB) /* Get rid of the legacy PR/SZ bits when using extended mode */ #define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED | \ - _PAGE_FILE | _PAGE_PR_MASK | _PAGE_SZ_MASK) + _PAGE_PR_MASK | _PAGE_SZ_MASK) #else -#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED | _PAGE_FILE) +#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED) #endif #define _PAGE_FLAGS_HARDWARE_MASK (phys_addr_mask() & ~(_PAGE_CLEAR_FLAGS)) @@ -343,7 +339,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte) #define pte_not_present(pte) (!((pte).pte_low & _PAGE_PRESENT)) #define pte_dirty(pte) ((pte).pte_low & _PAGE_DIRTY) #define pte_young(pte) ((pte).pte_low & _PAGE_ACCESSED) -#define pte_file(pte) ((pte).pte_low & _PAGE_FILE) #define pte_special(pte) ((pte).pte_low & _PAGE_SPECIAL) #ifdef CONFIG_X2TLB @@ -445,7 +440,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * Encode and de-code a swap entry * * Constraints: - * _PAGE_FILE at bit 0 * _PAGE_PRESENT at bit 8 * _PAGE_PROTNONE at bit 9 * @@ -453,9 +447,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * swap offset into bits 10:30. For the 64-bit PTE case, we keep the * preserved bits in the low 32-bits and use the upper 32 as the swap * offset (along with a 5-bit type), following the same approach as x86 - * PAE. This keeps the logic quite simple, and allows for a full 32 - * PTE_FILE_MAX_BITS, as opposed to the 29-bits we're constrained with - * in the pte_low case. + * PAE. This keeps the logic quite simple. * * As is evident by the Alpha code, if we ever get a 64-bit unsigned * long (swp_entry_t) to match up with the 64-bit PTEs, this all becomes @@ -471,13 +463,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) #define __swp_entry_to_pte(x) ((pte_t){ 0, (x).val }) -/* - * Encode and decode a nonlinear file mapping entry - */ -#define pte_to_pgoff(pte) ((pte).pte_high) -#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) }) - -#define PTE_FILE_MAX_BITS 32 #else #define __swp_type(x) ((x).val & 0xff) #define __swp_offset(x) ((x).val >> 10) @@ -485,13 +470,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 1 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 1 }) - -/* - * Encode and decode a nonlinear file mapping entry - */ -#define PTE_FILE_MAX_BITS 29 -#define pte_to_pgoff(pte) (pte_val(pte) >> 1) -#define pgoff_to_pte(off) ((pte_t) { ((off) << 1) | _PAGE_FILE }) #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/sh/include/asm/pgtable_64.h b/arch/sh/include/asm/pgtable_64.h index dda8c82..0742496 100644 --- a/arch/sh/include/asm/pgtable_64.h +++ b/arch/sh/include/asm/pgtable_64.h @@ -107,7 +107,6 @@ static __inline__ void set_pte(pte_t *pteptr, pte_t pteval) #define _PAGE_DEVICE 0x001 /* CB0: if uncacheable, 1->device (i.e. no write-combining or reordering at bus level) */ #define _PAGE_CACHABLE 0x002 /* CB1: uncachable/cachable */ #define _PAGE_PRESENT 0x004 /* software: page referenced */ -#define _PAGE_FILE 0x004 /* software: only when !present */ #define _PAGE_SIZE0 0x008 /* SZ0-bit : size of page */ #define _PAGE_SIZE1 0x010 /* SZ1-bit : size of page */ #define _PAGE_SHARED 0x020 /* software: reflects PTEH's SH */ @@ -129,7 +128,7 @@ static __inline__ void set_pte(pte_t *pteptr, pte_t pteval) #define _PAGE_WIRED _PAGE_EXT(0x001) /* software: wire the tlb entry */ #define _PAGE_SPECIAL _PAGE_EXT(0x002) -#define _PAGE_CLEAR_FLAGS (_PAGE_PRESENT | _PAGE_FILE | _PAGE_SHARED | \ +#define _PAGE_CLEAR_FLAGS (_PAGE_PRESENT | _PAGE_SHARED | \ _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_WIRED) /* Mask which drops software flags */ @@ -260,7 +259,6 @@ static __inline__ void set_pte(pte_t *pteptr, pte_t pteval) */ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_special(pte_t pte){ return pte_val(pte) & _PAGE_SPECIAL; } @@ -304,11 +302,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -/* Encode and decode a nonlinear file mapping entry */ -#define PTE_FILE_MAX_BITS 29 -#define pte_to_pgoff(pte) (pte_val(pte)) -#define pgoff_to_pte(off) ((pte_t) { (off) | _PAGE_FILE }) - #endif /* !__ASSEMBLY__ */ #define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index b9b91ae..b2f7dc4 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -221,14 +221,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & SRMMU_REF; } -/* - * The following only work if pte_present() is not true. - */ -static inline int pte_file(pte_t pte) -{ - return pte_val(pte) & SRMMU_FILE; -} - static inline int pte_special(pte_t pte) { return 0; @@ -375,22 +367,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -/* file-offset-in-pte helpers */ -static inline unsigned long pte_to_pgoff(pte_t pte) -{ - return pte_val(pte) >> SRMMU_PTE_FILE_SHIFT; -} - -static inline pte_t pgoff_to_pte(unsigned long pgoff) -{ - return __pte((pgoff << SRMMU_PTE_FILE_SHIFT) | SRMMU_FILE); -} - -/* - * This is made a constant because mm/fremap.c required a constant. - */ -#define PTE_FILE_MAX_BITS 24 - static inline unsigned long __get_phys (unsigned long addr) { diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 1ff9e78..2ac7873 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -137,7 +137,6 @@ bool kern_addr_valid(unsigned long addr); #define _PAGE_SOFT_4U _AC(0x0000000000001F80,UL) /* Software bits: */ #define _PAGE_EXEC_4U _AC(0x0000000000001000,UL) /* Executable SW bit */ #define _PAGE_MODIFIED_4U _AC(0x0000000000000800,UL) /* Modified (dirty) */ -#define _PAGE_FILE_4U _AC(0x0000000000000800,UL) /* Pagecache page */ #define _PAGE_ACCESSED_4U _AC(0x0000000000000400,UL) /* Accessed (ref'd) */ #define _PAGE_READ_4U _AC(0x0000000000000200,UL) /* Readable SW Bit */ #define _PAGE_WRITE_4U _AC(0x0000000000000100,UL) /* Writable SW Bit */ @@ -167,7 +166,6 @@ bool kern_addr_valid(unsigned long addr); #define _PAGE_EXEC_4V _AC(0x0000000000000080,UL) /* Executable Page */ #define _PAGE_W_4V _AC(0x0000000000000040,UL) /* Writable */ #define _PAGE_SOFT_4V _AC(0x0000000000000030,UL) /* Software bits */ -#define _PAGE_FILE_4V _AC(0x0000000000000020,UL) /* Pagecache page */ #define _PAGE_PRESENT_4V _AC(0x0000000000000010,UL) /* Present */ #define _PAGE_RESV_4V _AC(0x0000000000000008,UL) /* Reserved */ #define _PAGE_SZ16GB_4V _AC(0x0000000000000007,UL) /* 16GB Page */ @@ -332,22 +330,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) } #endif -static inline pte_t pgoff_to_pte(unsigned long off) -{ - off <<= PAGE_SHIFT; - - __asm__ __volatile__( - "\n661: or %0, %2, %0\n" - " .section .sun4v_1insn_patch, \"ax\"\n" - " .word 661b\n" - " or %0, %3, %0\n" - " .previous\n" - : "=r" (off) - : "0" (off), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V)); - - return __pte(off); -} - static inline pgprot_t pgprot_noncached(pgprot_t prot) { unsigned long val = pgprot_val(prot); @@ -609,22 +591,6 @@ static inline unsigned long pte_exec(pte_t pte) return (pte_val(pte) & mask); } -static inline unsigned long pte_file(pte_t pte) -{ - unsigned long val = pte_val(pte); - - __asm__ __volatile__( - "\n661: and %0, %2, %0\n" - " .section .sun4v_1insn_patch, \"ax\"\n" - " .word 661b\n" - " and %0, %3, %0\n" - " .previous\n" - : "=r" (val) - : "0" (val), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V)); - - return val; -} - static inline unsigned long pte_present(pte_t pte) { unsigned long val = pte_val(pte); @@ -971,12 +937,6 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -/* File offset in PTE support. */ -unsigned long pte_file(pte_t); -#define pte_to_pgoff(pte) (pte_val(pte) >> PAGE_SHIFT) -pte_t pgoff_to_pte(unsigned long); -#define PTE_FILE_MAX_BITS (64UL - PAGE_SHIFT - 1UL) - int page_in_phys_avail(unsigned long paddr); /* diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h index 79da178..ae51a11 100644 --- a/arch/sparc/include/asm/pgtsrmmu.h +++ b/arch/sparc/include/asm/pgtsrmmu.h @@ -80,10 +80,6 @@ #define SRMMU_PRIV 0x1c #define SRMMU_PRIV_RDONLY 0x18 -#define SRMMU_FILE 0x40 /* Implemented in software */ - -#define SRMMU_PTE_FILE_SHIFT 8 /* == 32-PTE_FILE_MAX_BITS */ - #define SRMMU_CHG_MASK (0xffffff00 | SRMMU_REF | SRMMU_DIRTY) /* SRMMU swap entry encoding @@ -94,13 +90,13 @@ * oooooooooooooooooootttttRRRRRRRR * fedcba9876543210fedcba9876543210 * - * The bottom 8 bits are reserved for protection and status bits, especially - * FILE and PRESENT. + * The bottom 7 bits are reserved for protection and status bits, especially + * PRESENT. */ #define SRMMU_SWP_TYPE_MASK 0x1f -#define SRMMU_SWP_TYPE_SHIFT SRMMU_PTE_FILE_SHIFT -#define SRMMU_SWP_OFF_MASK 0x7ffff -#define SRMMU_SWP_OFF_SHIFT (SRMMU_PTE_FILE_SHIFT + 5) +#define SRMMU_SWP_TYPE_SHIFT 7 +#define SRMMU_SWP_OFF_MASK 0xfffff +#define SRMMU_SWP_OFF_SHIFT (SRMMU_SWP_TYPE_SHIFT + 5) /* Some day I will implement true fine grained access bits for * user pages because the SRMMU gives us the capabilities to diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index 5d19507..bc75b6e 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -285,17 +285,6 @@ extern void start_mm_caching(struct mm_struct *mm); extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next); /* - * Support non-linear file mappings (see sys_remap_file_pages). - * This is defined by CLIENT1 set but CLIENT0 and _PAGE_PRESENT clear, and the - * file offset in the 32 high bits. - */ -#define _PAGE_FILE HV_PTE_CLIENT1 -#define PTE_FILE_MAX_BITS 32 -#define pte_file(pte) (hv_pte_get_client1(pte) && !hv_pte_get_client0(pte)) -#define pte_to_pgoff(pte) ((pte).val >> 32) -#define pgoff_to_pte(off) ((pte_t) { (((long long)(off)) << 32) | _PAGE_FILE }) - -/* * Encode and de-code a swap entry (see <linux/swapops.h>). * We put the swap file type+offset in the 32 high bits; * I believe we can just leave the low bits clear. diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index cd33873..0029b3f 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -263,10 +263,6 @@ static int pte_to_home(pte_t pte) /* Update the home of a PTE if necessary (can also be used for a pgprot_t). */ pte_t pte_set_home(pte_t pte, int home) { - /* Check for non-linear file mapping "PTEs" and pass them through. */ - if (pte_file(pte)) - return pte; - #if CHIP_HAS_MMIO() /* Check for MMIO mappings and pass them through. */ if (hv_pte_get_mode(pte) == HV_PTE_MODE_MMIO) diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index f534b73..7afe860 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -41,13 +41,4 @@ static inline void pgd_mkuptodate(pgd_t pgd) { } #define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot)) #define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot)) -/* - * Bits 0 through 4 are taken - */ -#define PTE_FILE_MAX_BITS 27 - -#define pte_to_pgoff(pte) (pte_val(pte) >> 5) - -#define pgoff_to_pte(off) ((pte_t) { ((off) << 5) + _PAGE_FILE }) - #endif diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index 0032f92..344c559 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -112,25 +112,5 @@ static inline pmd_t pfn_pmd(pfn_t page_nr, pgprot_t pgprot) return __pmd((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); } -/* - * Bits 0 through 3 are taken in the low part of the pte, - * put the 32 bits of offset into the high part. - */ -#define PTE_FILE_MAX_BITS 32 - -#ifdef CONFIG_64BIT - -#define pte_to_pgoff(p) ((p).pte >> 32) - -#define pgoff_to_pte(off) ((pte_t) { ((off) << 32) | _PAGE_FILE }) - -#else - -#define pte_to_pgoff(pte) ((pte).pte_high) - -#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) }) - -#endif - #endif diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index bf974f7..2324b62 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -18,7 +18,6 @@ #define _PAGE_ACCESSED 0x080 #define _PAGE_DIRTY 0x100 /* If _PAGE_PRESENT is clear, we use these: */ -#define _PAGE_FILE 0x008 /* nonlinear file mapping, saved PTE; unset:swap */ #define _PAGE_PROTNONE 0x010 /* if the user mapped it with PROT_NONE; pte_present gives true */ @@ -151,14 +150,6 @@ static inline int pte_write(pte_t pte) !(pte_get_bits(pte, _PAGE_PROTNONE))); } -/* - * The following only works if pte_present() is not true. - */ -static inline int pte_file(pte_t pte) -{ - return pte_get_bits(pte, _PAGE_FILE); -} - static inline int pte_dirty(pte_t pte) { return pte_get_bits(pte, _PAGE_DIRTY); diff --git a/arch/unicore32/include/asm/pgtable-hwdef.h b/arch/unicore32/include/asm/pgtable-hwdef.h index 7314e85..e37fa47 100644 --- a/arch/unicore32/include/asm/pgtable-hwdef.h +++ b/arch/unicore32/include/asm/pgtable-hwdef.h @@ -44,7 +44,6 @@ #define PTE_TYPE_INVALID (3 << 0) #define PTE_PRESENT (1 << 2) -#define PTE_FILE (1 << 3) /* only when !PRESENT */ #define PTE_YOUNG (1 << 3) #define PTE_DIRTY (1 << 4) #define PTE_CACHEABLE (1 << 5) diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index ed6f7d0..818d0f5 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -283,20 +283,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; #define MAX_SWAPFILES_CHECK() \ BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) -/* - * Encode and decode a file entry. File entries are stored in the Linux - * page tables as follows: - * - * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 - * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - * <----------------------- offset ----------------------> 1 0 0 0 - */ -#define pte_file(pte) (pte_val(pte) & PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 4) -#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) - -#define PTE_FILE_MAX_BITS 28 - /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ /* FIXME: this is not correct */ #define kern_addr_valid(addr) (1) diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 206a87f..fd74a11 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -62,44 +62,8 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi return ((value >> rightshift) & mask) << leftshift; } -/* - * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, - * split up the 29 bits of offset into this range. - */ -#define PTE_FILE_MAX_BITS 29 -#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) -#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1) -#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1) -#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) -#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) - -#define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1) -#define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1) - -#define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1) -#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2) - -static __always_inline pgoff_t pte_to_pgoff(pte_t pte) -{ - return (pgoff_t) - (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + - pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + - pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, -1UL, PTE_FILE_LSHIFT3)); -} - -static __always_inline pte_t pgoff_to_pte(pgoff_t off) -{ - return (pte_t){ - .pte_low = - pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + - pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + - pte_bitop(off, PTE_FILE_LSHIFT3, -1UL, PTE_FILE_SHIFT3) + - _PAGE_FILE, - }; -} - /* Encode and de-code a swap entry */ -#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) +#define SWP_TYPE_BITS 5 #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 81bb91b..cdaa58c 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -176,18 +176,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) #endif -/* - * Bits 0, 6 and 7 are taken in the low part of the pte, - * put the 32 bits of offset into the high part. - * - * For soft-dirty tracking 11 bit is taken from - * the low part of pte as well. - */ -#define pte_to_pgoff(pte) ((pte).pte_high) -#define pgoff_to_pte(off) \ - ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } }) -#define PTE_FILE_MAX_BITS 32 - /* Encode and de-code a swap entry */ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) #define __swp_type(x) (((x).val) & 0x1f) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index e8a5454..0fe03f8 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -115,11 +115,6 @@ static inline int pte_write(pte_t pte) return pte_flags(pte) & _PAGE_RW; } -static inline int pte_file(pte_t pte) -{ - return pte_flags(pte) & _PAGE_FILE; -} - static inline int pte_huge(pte_t pte) { return pte_flags(pte) & _PAGE_PSE; @@ -329,21 +324,6 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); } -static inline pte_t pte_file_clear_soft_dirty(pte_t pte) -{ - return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); -} - -static inline pte_t pte_file_mksoft_dirty(pte_t pte) -{ - return pte_set_flags(pte, _PAGE_SOFT_DIRTY); -} - -static inline int pte_file_soft_dirty(pte_t pte) -{ - return pte_flags(pte) & _PAGE_SOFT_DIRTY; -} - #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ /* diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 4572b2f..e227970 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -133,10 +133,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } /* PUD - Level3 access */ /* PMD - Level 2 access */ -#define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) -#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \ - _PAGE_FILE }) -#define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT /* PTE - Level 1 access. */ @@ -145,7 +141,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define pte_unmap(pte) ((void)(pte))/* NOP */ /* Encode and de-code a swap entry */ -#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) +#define SWP_TYPE_BITS 5 #ifdef CONFIG_NUMA_BALANCING /* Automatic NUMA balancing needs to be distinguishable from swap entries */ #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 25bcd4a8..5185a4f 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -38,8 +38,6 @@ /* If _PAGE_BIT_PRESENT is clear, we use these: */ /* - if the user mapped it with PROT_NONE; pte_present gives true */ #define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL -/* - set: nonlinear file mapping, saved PTE; unset:swap */ -#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) #define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) @@ -114,7 +112,6 @@ #define _PAGE_NX (_AT(pteval_t, 0)) #endif -#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 8b977eb..bca0aa3 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -178,4 +178,15 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); + +#ifdef CONFIG_CMA +static __init int gigantic_pages_init(void) +{ + /* With CMA we can allocate gigantic pages at runtime */ + if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT)) + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + return 0; +} +arch_initcall(gigantic_pages_init); +#endif #endif diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 872bf01..01b80dc 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -89,8 +89,6 @@ * (PAGE_NONE)| PPN | 0 | 00 | ADW | 01 | 11 | 11 | * +-----------------------------------------+ * swap | index | type | 01 | 11 | 00 | - * +- - - - - - - - - - - - - - - - - - - - -+ - * file | file offset | 01 | 11 | 10 | * +-----------------------------------------+ * * For T1050 hardware and earlier the layout differs for present and (PAGE_NONE) @@ -111,7 +109,6 @@ * index swap offset / PAGE_SIZE (bit 11-31: 21 bits -> 8 GB) * (note that the index is always non-zero) * type swap type (5 bits -> 32 types) - * file offset 26-bit offset into the file, in increments of PAGE_SIZE * * Notes: * - (PROT_NONE) is a special case of 'present' but causes an exception for @@ -144,7 +141,6 @@ #define _PAGE_HW_VALID 0x00 #define _PAGE_NONE 0x0f #endif -#define _PAGE_FILE (1<<1) /* file mapped page, only if !present */ #define _PAGE_USER (1<<4) /* user access (ring=1) */ @@ -260,7 +256,6 @@ static inline void pgtable_cache_init(void) { } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) @@ -390,11 +385,6 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define PTE_FILE_MAX_BITS 26 -#define pte_to_pgoff(pte) (pte_val(pte) >> 6) -#define pgoff_to_pte(off) \ - ((pte_t) { ((off) << 6) | _PAGE_CA_INVALID | _PAGE_FILE | _PAGE_USER }) - #endif /* !defined (__ASSEMBLY__) */ diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c index 63b4712..68c1f32 100644 --- a/drivers/gpu/drm/drm_vma_manager.c +++ b/drivers/gpu/drm/drm_vma_manager.c @@ -50,8 +50,7 @@ * * You must not use multiple offset managers on a single address_space. * Otherwise, mm-core will be unable to tear down memory mappings as the VM will - * no longer be linear. Please use VM_NONLINEAR in that case and implement your - * own offset managers. + * no longer be linear. * * This offset manager works on page-based addresses. That is, every argument * and return code (with the exception of drm_vma_node_offset_addr()) is given diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 5594505..b401337 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -831,7 +831,6 @@ static const struct vm_operations_struct v9fs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = v9fs_vm_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { @@ -839,7 +838,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = v9fs_vm_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e409025..a606ab5 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2081,7 +2081,6 @@ static const struct vm_operations_struct btrfs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = btrfs_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c81c0e00..24be059 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1569,7 +1569,6 @@ out: static struct vm_operations_struct ceph_vmops = { .fault = ceph_filemap_fault, .page_mkwrite = ceph_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; int ceph_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index c1a8676..8fe1f7a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3236,7 +3236,6 @@ static struct vm_operations_struct cifs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = cifs_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8131be8..7cb5923 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -195,7 +195,6 @@ static const struct vm_operations_struct ext4_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = ext4_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3c27e0e..5674ba1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -92,7 +92,6 @@ static const struct vm_operations_struct f2fs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = f2fs_vm_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; static int get_parent_ino(struct inode *inode, nid_t *pino) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 760b2c5..d769e59 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2062,7 +2062,6 @@ static const struct vm_operations_struct fuse_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = fuse_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 6e600ab..ec9c2d3 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -498,7 +498,6 @@ static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = gfs2_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; /** @@ -356,7 +356,6 @@ void address_space_init_once(struct address_space *mapping) INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); mapping->i_mmap = RB_ROOT; - INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); } EXPORT_SYMBOL(address_space_init_once); @@ -379,6 +379,11 @@ int __generic_block_fiemap(struct inode *inode, past_eof = true; } cond_resched(); + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } + } while (1); /* If ret is 1 then we just hit the end of the extent array */ diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2ab6f00..94712fc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -646,7 +646,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = nfs_vm_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; static int nfs_need_sync_write(struct file *filp, struct inode *inode) diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 3a03e0a..a8c728ac 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -128,7 +128,6 @@ static const struct vm_operations_struct nilfs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = nilfs_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 30d3add..51ceb81 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -140,7 +140,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, } if (S_ISDIR(path->dentry->d_inode->i_mode) && - (marks_ignored_mask & FS_ISDIR)) + !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) return false; if (event_mask & marks_mask & ~marks_ignored_mask) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index bff8567..cf27550 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -487,20 +487,27 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, unsigned int flags, int *destroy) { - __u32 oldmask; + __u32 oldmask = 0; spin_lock(&fsn_mark->lock); if (!(flags & FAN_MARK_IGNORED_MASK)) { + __u32 tmask = fsn_mark->mask & ~mask; + + if (flags & FAN_MARK_ONDIR) + tmask &= ~FAN_ONDIR; + oldmask = fsn_mark->mask; - fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask)); + fsnotify_set_mark_mask_locked(fsn_mark, tmask); } else { - oldmask = fsn_mark->ignored_mask; - fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask)); + __u32 tmask = fsn_mark->ignored_mask & ~mask; + if (flags & FAN_MARK_ONDIR) + tmask &= ~FAN_ONDIR; + + fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); } + *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask); spin_unlock(&fsn_mark->lock); - *destroy = !(oldmask & ~mask); - return mask & oldmask; } @@ -569,20 +576,22 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, spin_lock(&fsn_mark->lock); if (!(flags & FAN_MARK_IGNORED_MASK)) { + __u32 tmask = fsn_mark->mask | mask; + + if (flags & FAN_MARK_ONDIR) + tmask |= FAN_ONDIR; + oldmask = fsn_mark->mask; - fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask)); + fsnotify_set_mark_mask_locked(fsn_mark, tmask); } else { __u32 tmask = fsn_mark->ignored_mask | mask; + if (flags & FAN_MARK_ONDIR) + tmask |= FAN_ONDIR; + fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); if (flags & FAN_MARK_IGNORED_SURV_MODIFY) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } - - if (!(flags & FAN_MARK_ONDIR)) { - __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR; - fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); - } - spin_unlock(&fsn_mark->lock); return mask & ~oldmask; diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 7e8282d..c58a1bc 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -245,16 +245,14 @@ int ocfs2_set_acl(handle_t *handle, ret = posix_acl_equiv_mode(acl, &mode); if (ret < 0) return ret; - else { - if (ret == 0) - acl = NULL; - ret = ocfs2_acl_set_mode(inode, di_bh, - handle, mode); - if (ret) - return ret; + if (ret == 0) + acl = NULL; - } + ret = ocfs2_acl_set_mode(inode, di_bh, + handle, mode); + if (ret) + return ret; } break; case ACL_TYPE_DEFAULT: diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index fcae9ef..044158b 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6873,7 +6873,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); - goto out_unlock; + goto out; } ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, @@ -6931,7 +6931,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, if (ret) { mlog_errno(ret); need_free = 1; - goto out_commit; + goto out_unlock; } page_end = PAGE_CACHE_SIZE; @@ -6964,12 +6964,16 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, if (ret) { mlog_errno(ret); need_free = 1; - goto out_commit; + goto out_unlock; } inode->i_blocks = ocfs2_inode_sector_count(inode); } +out_unlock: + if (pages) + ocfs2_unlock_and_free_pages(pages, num_pages); + out_commit: if (ret < 0 && did_quota) dquot_free_space_nodirty(inode, @@ -6989,15 +6993,11 @@ out_commit: ocfs2_commit_trans(osb, handle); -out_unlock: +out: if (data_ac) ocfs2_free_alloc_context(data_ac); - -out: - if (pages) { - ocfs2_unlock_and_free_pages(pages, num_pages); + if (pages) kfree(pages); - } return ret; } diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 2e355e0..56c403a 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1016,7 +1016,8 @@ void o2net_fill_node_map(unsigned long *map, unsigned bytes) memset(map, 0, bytes); for (node = 0; node < O2NM_MAX_NODES; ++node) { - o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret); + if (!o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret)) + continue; if (!ret) { set_bit(node, map); sc_put(sc); diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index dc024367..b95e7df 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -107,12 +107,12 @@ struct o2net_node { struct list_head nn_status_list; /* connects are attempted from when heartbeat comes up until either hb - * goes down, the node is unconfigured, no connect attempts succeed - * before O2NET_CONN_IDLE_DELAY, or a connect succeeds. connect_work - * is queued from set_nn_state both from hb up and from itself if a - * connect attempt fails and so can be self-arming. shutdown is - * careful to first mark the nn such that no connects will be attempted - * before canceling delayed connect work and flushing the queue. */ + * goes down, the node is unconfigured, or a connect succeeds. + * connect_work is queued from set_nn_state both from hb up and from + * itself if a connect attempt fails and so can be self-arming. + * shutdown is careful to first mark the nn such that no connects will + * be attempted before canceling delayed connect work and flushing the + * queue. */ struct delayed_work nn_connect_work; unsigned long nn_last_connect_attempt; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 319e786..b08050b 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -3456,10 +3456,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, int blocksize = dir->i_sb->s_blocksize; status = ocfs2_read_dir_block(dir, 0, &bh, 0); - if (status) { - mlog_errno(status); + if (status) goto bail; - } rec_len = OCFS2_DIR_REC_LEN(namelen); offset = 0; @@ -3480,10 +3478,9 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, status = ocfs2_read_dir_block(dir, offset >> sb->s_blocksize_bits, &bh, 0); - if (status) { - mlog_errno(status); + if (status) goto bail; - } + /* move to next block */ de = (struct ocfs2_dir_entry *) bh->b_data; } @@ -3513,7 +3510,6 @@ next: de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); } - status = 0; bail: brelse(bh); if (status) diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index b46278f..fd6bbbb 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -385,8 +385,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, head = &res->granted; list_for_each_entry(lock, head, list) { - if (lock->ml.cookie == cookie) + /* if lock is found but unlock is pending ignore the bast */ + if (lock->ml.cookie == cookie) { + if (lock->unlock_pending) + break; goto do_ast; + } } mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, " diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 149eb55..8251360 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -406,7 +406,7 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len) } spin_unlock(&dlm->spinlock); - out += snprintf(buf + out, len - out, "Total on list: %ld\n", total); + out += snprintf(buf + out, len - out, "Total on list: %lu\n", total); return out; } @@ -464,7 +464,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) spin_unlock(&dlm->master_lock); out += snprintf(buf + out, len - out, - "Total: %ld, Longest: %ld\n", total, longest); + "Total: %lu, Longest: %lu\n", total, longest); return out; } diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 50a59d2..7df88a6 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -674,20 +674,6 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm) spin_unlock(&dlm->spinlock); } -int dlm_joined(struct dlm_ctxt *dlm) -{ - int ret = 0; - - spin_lock(&dlm_domain_lock); - - if (dlm->dlm_state == DLM_CTXT_JOINED) - ret = 1; - - spin_unlock(&dlm_domain_lock); - - return ret; -} - int dlm_shutting_down(struct dlm_ctxt *dlm) { int ret = 0; diff --git a/fs/ocfs2/dlm/dlmdomain.h b/fs/ocfs2/dlm/dlmdomain.h index 2f7f60b..fd6122a 100644 --- a/fs/ocfs2/dlm/dlmdomain.h +++ b/fs/ocfs2/dlm/dlmdomain.h @@ -28,7 +28,6 @@ extern spinlock_t dlm_domain_lock; extern struct list_head dlm_domains; -int dlm_joined(struct dlm_ctxt *dlm); int dlm_shutting_down(struct dlm_ctxt *dlm); void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, int node_num); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index cecd875..ce12e0b 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1070,6 +1070,9 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm, dead_node, dlm->name); list_del_init(&lock->list); dlm_lock_put(lock); + /* Can't schedule DLM_UNLOCK_FREE_LOCK + * - do manually */ + dlm_lock_put(lock); break; } } @@ -2346,6 +2349,10 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) dead_node, dlm->name); list_del_init(&lock->list); dlm_lock_put(lock); + /* Can't schedule + * DLM_UNLOCK_FREE_LOCK + * - do manually */ + dlm_lock_put(lock); break; } } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 1c423af..11849a4 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3750,6 +3750,9 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, break; spin_unlock(&dentry_attach_lock); + if (S_ISDIR(dl->dl_inode->i_mode)) + shrink_dcache_parent(dentry); + mlog(0, "d_delete(%pd);\n", dentry); /* diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 3950693..245db4f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -569,7 +569,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, handle_t *handle = NULL; struct ocfs2_alloc_context *data_ac = NULL; struct ocfs2_alloc_context *meta_ac = NULL; - enum ocfs2_alloc_restarted why; + enum ocfs2_alloc_restarted why = RESTART_NONE; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_extent_tree et; int did_quota = 0; diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4f502382..d10860f 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1447,7 +1447,6 @@ bail: * requires that we call do_exit(). And it isn't exported, but * complete_and_exit() seems to be a minimal wrapper around it. */ complete_and_exit(NULL, status); - return status; } void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 10d66c7..9581d190 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -173,7 +173,6 @@ out: static const struct vm_operations_struct ocfs2_file_vm_ops = { .fault = ocfs2_fault, .page_mkwrite = ocfs2_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 7d6b7d0..fdbcbfe 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -279,6 +279,8 @@ enum ocfs2_mount_options writes */ OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ + + OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ }; #define OCFS2_OSB_SOFT_RO 0x0001 diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 89c0b26..3d0b63d 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -73,12 +73,6 @@ static loff_t ol_dqblk_off(struct super_block *sb, int c, int off) ol_dqblk_block_off(sb, c, off); } -/* Compute block number from given offset */ -static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off) -{ - return off >> sb->s_blocksize_bits; -} - static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off) { return off & ((1 << sb->s_blocksize_bits) - 1); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index d81f6e2..ee541f9 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2428,8 +2428,6 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, get_bh(prev_bh); } - rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; - trace_ocfs2_calc_refcount_meta_credits_iterate( recs_add, (unsigned long long)cpos, clusters, (unsigned long long)le64_to_cpu(rec.r_cpos), diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index 41ffd36..6a348b0 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c @@ -39,7 +39,7 @@ #define OCFS2_CHECK_RESERVATIONS #endif -DEFINE_SPINLOCK(resv_lock); +static DEFINE_SPINLOCK(resv_lock); #define OCFS2_MIN_RESV_WINDOW_BITS 8 #define OCFS2_MAX_RESV_WINDOW_BITS 1024 diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 706c71c..87a1f76 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -191,6 +191,7 @@ enum { Opt_coherency_full, Opt_resv_level, Opt_dir_resv_level, + Opt_journal_async_commit, Opt_err, }; @@ -222,6 +223,7 @@ static const match_table_t tokens = { {Opt_coherency_full, "coherency=full"}, {Opt_resv_level, "resv_level=%u"}, {Opt_dir_resv_level, "dir_resv_level=%u"}, + {Opt_journal_async_commit, "journal_async_commit"}, {Opt_err, NULL} }; @@ -1470,6 +1472,9 @@ static int ocfs2_parse_options(struct super_block *sb, option < OCFS2_MAX_RESV_LEVEL) mopt->dir_resv_level = option; break; + case Opt_journal_async_commit: + mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; + break; default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -1576,6 +1581,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) if (osb->osb_dir_resv_level != osb->osb_resv_level) seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level); + if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) + seq_printf(s, ",journal_async_commit"); + return 0; } @@ -2445,6 +2453,15 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) goto finally; } + if (osb->s_mount_opt & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) + jbd2_journal_set_features(osb->journal->j_journal, + JBD2_FEATURE_COMPAT_CHECKSUM, 0, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); + else + jbd2_journal_clear_features(osb->journal->j_journal, + JBD2_FEATURE_COMPAT_CHECKSUM, 0, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); + if (dirty) { /* recover my local alloc if we didn't unmount cleanly. */ status = ocfs2_begin_local_alloc_recovery(osb, diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 662f8de..85b190d 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -5334,16 +5334,6 @@ out: return ret; } -static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode, - struct ocfs2_xattr_bucket *bucket, - int offs) -{ - int block_off = offs >> inode->i_sb->s_blocksize_bits; - - offs = offs % inode->i_sb->s_blocksize; - return bucket_block(bucket, block_off) + offs; -} - /* * Truncate the specified xe_off entry in xattr bucket. * bucket is indicated by header_bh and len is the new length. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 246eae8..6396f88 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -443,7 +443,6 @@ struct mem_size_stats { unsigned long anonymous; unsigned long anonymous_thp; unsigned long swap; - unsigned long nonlinear; u64 pss; }; @@ -484,7 +483,6 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; - pgoff_t pgoff = linear_page_index(vma, addr); struct page *page = NULL; if (pte_present(*pte)) { @@ -496,17 +494,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, mss->swap += PAGE_SIZE; else if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); - } else if (pte_file(*pte)) { - if (pte_to_pgoff(*pte) != pgoff) - mss->nonlinear += PAGE_SIZE; } if (!page) return; - - if (page->index != pgoff) - mss->nonlinear += PAGE_SIZE; - smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); } @@ -596,7 +587,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_ACCOUNT)] = "ac", [ilog2(VM_NORESERVE)] = "nr", [ilog2(VM_HUGETLB)] = "ht", - [ilog2(VM_NONLINEAR)] = "nl", [ilog2(VM_ARCH_1)] = "ar", [ilog2(VM_DONTDUMP)] = "dd", #ifdef CONFIG_MEM_SOFT_DIRTY @@ -668,10 +658,6 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) (vma->vm_flags & VM_LOCKED) ? (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); - if (vma->vm_flags & VM_NONLINEAR) - seq_printf(m, "Nonlinear: %8lu kB\n", - mss.nonlinear >> 10); - show_smap_vma_flags(m, vma); m_cache_vma(m, vma); return 0; @@ -772,8 +758,6 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); } else if (is_swap_pte(ptent)) { ptent = pte_swp_clear_soft_dirty(ptent); - } else if (pte_file(ptent)) { - ptent = pte_file_clear_soft_dirty(ptent); } set_pte_at(vma->vm_mm, addr, pte, ptent); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 538519e..035e510 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1536,7 +1536,6 @@ static const struct vm_operations_struct ubifs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = ubifs_vm_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 712d312..f2d05a1 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1404,5 +1404,4 @@ static const struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = xfs_vm_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 177d597..129de92 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -474,21 +474,6 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { return pte; } - -static inline pte_t pte_file_clear_soft_dirty(pte_t pte) -{ - return pte; -} - -static inline pte_t pte_file_mksoft_dirty(pte_t pte) -{ - return pte; -} - -static inline int pte_file_soft_dirty(pte_t pte) -{ - return 0; -} #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/include/linux/fs.h b/include/linux/fs.h index ddd2fa7..f125b88 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -401,7 +401,6 @@ struct address_space { spinlock_t tree_lock; /* and lock protecting it */ atomic_t i_mmap_writable;/* count VM_SHARED mappings */ struct rb_root i_mmap; /* tree of private and shared mappings */ - struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ @@ -493,8 +492,7 @@ static inline void i_mmap_unlock_read(struct address_space *mapping) */ static inline int mapping_mapped(struct address_space *mapping) { - return !RB_EMPTY_ROOT(&mapping->i_mmap) || - !list_empty(&mapping->i_mmap_nonlinear); + return !RB_EMPTY_ROOT(&mapping->i_mmap); } /* @@ -2501,8 +2499,6 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, - unsigned long size, pgoff_t pgoff); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 1c804b0..7ee1774 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -101,8 +101,10 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, new_dir_mask |= FS_ISDIR; } - fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie); - fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie); + fsnotify(old_dir, old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_name, + fs_cookie); + fsnotify(new_dir, new_dir_mask, source, FSNOTIFY_EVENT_INODE, new_name, + fs_cookie); if (target) fsnotify_link_count(target); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 431b7fc..7d78563 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -86,7 +86,7 @@ void free_huge_page(struct page *page); pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); #endif -extern unsigned long hugepages_treat_as_movable; +extern int hugepages_treat_as_movable; extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7c95af8..fb212e1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -403,10 +403,9 @@ void memcg_update_array_size(int num_groups); struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); void __memcg_kmem_put_cache(struct kmem_cache *cachep); -int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order); -void __memcg_uncharge_slab(struct kmem_cache *cachep, int order); - -int __memcg_cleanup_cache_params(struct kmem_cache *s); +int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, + unsigned long nr_pages); +void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages); /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. diff --git a/include/linux/mm.h b/include/linux/mm.h index 237b3ba..65db4ae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -138,7 +138,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ -#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ #define VM_ARCH_2 0x02000000 #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ @@ -206,21 +205,19 @@ extern unsigned int kobjsize(const void *objp); extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ -#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ -#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ -#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ -#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ -#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ -#define FAULT_FLAG_TRIED 0x40 /* second try */ -#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */ +#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */ +#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */ +#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */ +#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */ +#define FAULT_FLAG_TRIED 0x20 /* Second try */ +#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ /* * vm_fault is filled by the the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask * of VM_FAULT_xxx flags that give details about how the fault was handled. * - * pgoff should be used in favour of virtual_address, if possible. If pgoff - * is used, one may implement ->remap_pages to get nonlinear mapping support. + * pgoff should be used in favour of virtual_address, if possible. */ struct vm_fault { unsigned int flags; /* FAULT_FLAG_xxx flags */ @@ -287,10 +284,6 @@ struct vm_operations_struct { struct mempolicy *(*get_policy)(struct vm_area_struct *vma, unsigned long addr); #endif - /* called by sys_remap_file_pages() to populate non-linear mapping */ - int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, - unsigned long size, pgoff_t pgoff); - /* * Called by vm_normal_page() for special PTEs to find the * page for @addr. This is useful if the default behavior @@ -454,6 +447,12 @@ static inline struct page *compound_head_by_tail(struct page *tail) return tail; } +/* + * Since either compound page could be dismantled asynchronously in THP + * or we access asynchronously arbitrary positioned struct page, there + * would be tail flag race. To handle this race, we should call + * smp_rmb() before checking tail flag. compound_head_by_tail() did it. + */ static inline struct page *compound_head(struct page *page) { if (unlikely(PageTail(page))) @@ -462,6 +461,18 @@ static inline struct page *compound_head(struct page *page) } /* + * If we access compound page synchronously such as access to + * allocated page, there is no need to handle tail flag race, so we can + * check tail flag directly without any synchronization primitive. + */ +static inline struct page *compound_head_fast(struct page *page) +{ + if (unlikely(PageTail(page))) + return page->first_page; + return page; +} + +/* * The atomic page->_mapcount, starts from -1: so that transitions * both from it and to it can be tracked, using atomic_inc_and_test * and atomic_add_negative(-1). @@ -539,7 +550,14 @@ static inline void get_page(struct page *page) static inline struct page *virt_to_head_page(const void *x) { struct page *page = virt_to_page(x); - return compound_head(page); + + /* + * We don't need to worry about synchronization of tail flag + * when we call virt_to_head_page() since it is only called for + * already allocated page and this page won't be freed until + * this virt_to_head_page() is finished. So use _fast variant. + */ + return compound_head_fast(page); } /* @@ -1129,7 +1147,6 @@ extern void user_shm_unlock(size_t, struct user_struct *); * Parameter block passed down to zap_pte_range in exceptional cases. */ struct zap_details { - struct vm_area_struct *nonlinear_vma; /* Check page->index if set */ struct address_space *check_mapping; /* Check page->mapping if set */ pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */ @@ -1785,12 +1802,6 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, for (vma = vma_interval_tree_iter_first(root, start, last); \ vma; vma = vma_interval_tree_iter_next(vma, start, last)) -static inline void vma_nonlinear_insert(struct vm_area_struct *vma, - struct list_head *list) -{ - list_add_tail(&vma->shared.nonlinear, list); -} - void anon_vma_interval_tree_insert(struct anon_vma_chain *node, struct rb_root *root); void anon_vma_interval_tree_remove(struct anon_vma_chain *node, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6d34aa2..07c8bd3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -273,15 +273,11 @@ struct vm_area_struct { /* * For areas with an address space and backing store, - * linkage into the address_space->i_mmap interval tree, or - * linkage of vma in the address_space->i_mmap_nonlinear list. + * linkage into the address_space->i_mmap interval tree. */ - union { - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } linear; - struct list_head nonlinear; + struct { + struct rb_node rb; + unsigned long rb_subtree_last; } shared; /* diff --git a/include/linux/rmap.h b/include/linux/rmap.h index d9d7e7e..b38f559 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -246,7 +246,6 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); * arg: passed to rmap_one() and invalid_vma() * rmap_one: executed on each vma where page is mapped * done: for checking traversing termination condition - * file_nonlinear: for handling file nonlinear mapping * anon_lock: for getting anon_lock by optimized way rather than default * invalid_vma: for skipping uninterested vma */ @@ -255,7 +254,6 @@ struct rmap_walk_control { int (*rmap_one)(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *arg); int (*done)(struct page *page); - int (*file_nonlinear)(struct page *, struct address_space *, void *arg); struct anon_vma *(*anon_lock)(struct page *page); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; diff --git a/include/linux/slab.h b/include/linux/slab.h index 9a139b6..2e3b448 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -116,9 +116,8 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, void (*)(void *)); #ifdef CONFIG_MEMCG_KMEM -struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *, - struct kmem_cache *, - const char *); +void memcg_create_kmem_cache(struct mem_cgroup *, struct kmem_cache *); +void memcg_destroy_kmem_caches(struct mem_cgroup *); #endif void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); @@ -491,7 +490,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) * Child caches will hold extra metadata needed for its operation. Fields are: * * @memcg: pointer to the memcg this cache belongs to - * @list: list_head for the list of all caches in this memcg * @root_cache: pointer to the global, root cache, this cache was derived from */ struct memcg_cache_params { @@ -503,7 +501,6 @@ struct memcg_cache_params { }; struct { struct mem_cgroup *memcg; - struct list_head list; struct kmem_cache *root_cache; }; }; diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 6adfb7b..50cbc87 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -54,7 +54,7 @@ static inline pgoff_t swp_offset(swp_entry_t entry) /* check whether a pte points to a swap entry */ static inline int is_swap_pte(pte_t pte) { - return !pte_none(pte) && !pte_present_nonuma(pte) && !pte_file(pte); + return !pte_none(pte) && !pte_present_nonuma(pte); } #endif @@ -66,7 +66,6 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte) { swp_entry_t arch_entry; - BUG_ON(pte_file(pte)); if (pte_swp_soft_dirty(pte)) pte = pte_swp_clear_soft_dirty(pte); arch_entry = __pte_to_swp_entry(pte); @@ -82,7 +81,6 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry) swp_entry_t arch_entry; arch_entry = __swp_entry(swp_type(entry), swp_offset(entry)); - BUG_ON(pte_file(__swp_entry_to_pte(arch_entry))); return __swp_entry_to_pte(arch_entry); } diff --git a/kernel/fork.c b/kernel/fork.c index 4dc2dda..b379d9a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -438,12 +438,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) atomic_inc(&mapping->i_mmap_writable); flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ - if (unlikely(tmp->vm_flags & VM_NONLINEAR)) - vma_nonlinear_insert(tmp, - &mapping->i_mmap_nonlinear); - else - vma_interval_tree_insert_after(tmp, mpnt, - &mapping->i_mmap); + vma_interval_tree_insert_after(tmp, mpnt, + &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); i_mmap_unlock_write(mapping); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 137c7f6..88ea2d6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1248,7 +1248,6 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = hugetlb_sysctl_handler, - .extra1 = &zero, }, #ifdef CONFIG_NUMA { @@ -1257,7 +1256,6 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &hugetlb_mempolicy_sysctl_handler, - .extra1 = &zero, }, #endif { @@ -1280,7 +1278,6 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = hugetlb_overcommit_handler, - .extra1 = &zero, }, #endif { diff --git a/mm/Makefile b/mm/Makefile index 4bf586e..3548460 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -3,7 +3,7 @@ # mmu-y := nommu.o -mmu-$(CONFIG_MMU) := fremap.o gup.o highmem.o memory.o mincore.o \ +mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ vmalloc.o pagewalk.o pgtable-generic.o @@ -130,7 +130,6 @@ static const struct trace_print_flags vmaflags_names[] = { {VM_ACCOUNT, "account" }, {VM_NORESERVE, "noreserve" }, {VM_HUGETLB, "hugetlb" }, - {VM_NONLINEAR, "nonlinear" }, #if defined(CONFIG_X86) {VM_PAT, "pat" }, #elif defined(CONFIG_PPC) diff --git a/mm/filemap.c b/mm/filemap.c index 673e458..bf7a271 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2087,7 +2087,6 @@ const struct vm_operations_struct generic_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = filemap_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; /* This is used for a general mmap of a disk file */ diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 0d105ae..70c09da 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -301,7 +301,6 @@ out: static const struct vm_operations_struct xip_file_vm_ops = { .fault = xip_file_fault, .page_mkwrite = filemap_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; int xip_file_mmap(struct file * file, struct vm_area_struct * vma) diff --git a/mm/fremap.c b/mm/fremap.c deleted file mode 100644 index 2805d71..0000000 --- a/mm/fremap.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * linux/mm/fremap.c - * - * Explicit pagetable population and nonlinear (random) mappings support. - * - * started by Ingo Molnar, Copyright (C) 2002, 2003 - */ -#include <linux/export.h> -#include <linux/backing-dev.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/file.h> -#include <linux/mman.h> -#include <linux/pagemap.h> -#include <linux/swapops.h> -#include <linux/rmap.h> -#include <linux/syscalls.h> -#include <linux/mmu_notifier.h> - -#include <asm/mmu_context.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> - -#include "internal.h" - -static int mm_counter(struct page *page) -{ - return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES; -} - -static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - pte_t pte = *ptep; - struct page *page; - swp_entry_t entry; - - if (pte_present(pte)) { - flush_cache_page(vma, addr, pte_pfn(pte)); - pte = ptep_clear_flush_notify(vma, addr, ptep); - page = vm_normal_page(vma, addr, pte); - if (page) { - if (pte_dirty(pte)) - set_page_dirty(page); - update_hiwater_rss(mm); - dec_mm_counter(mm, mm_counter(page)); - page_remove_rmap(page); - page_cache_release(page); - } - } else { /* zap_pte() is not called when pte_none() */ - if (!pte_file(pte)) { - update_hiwater_rss(mm); - entry = pte_to_swp_entry(pte); - if (non_swap_entry(entry)) { - if (is_migration_entry(entry)) { - page = migration_entry_to_page(entry); - dec_mm_counter(mm, mm_counter(page)); - } - } else { - free_swap_and_cache(entry); - dec_mm_counter(mm, MM_SWAPENTS); - } - } - pte_clear_not_present_full(mm, addr, ptep, 0); - } -} - -/* - * Install a file pte to a given virtual memory address, release any - * previously existing mapping. - */ -static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, unsigned long pgoff, pgprot_t prot) -{ - int err = -ENOMEM; - pte_t *pte, ptfile; - spinlock_t *ptl; - - pte = get_locked_pte(mm, addr, &ptl); - if (!pte) - goto out; - - ptfile = pgoff_to_pte(pgoff); - - if (!pte_none(*pte)) - zap_pte(mm, vma, addr, pte); - - set_pte_at(mm, addr, pte, pte_file_mksoft_dirty(ptfile)); - /* - * We don't need to run update_mmu_cache() here because the "file pte" - * being installed by install_file_pte() is not a real pte - it's a - * non-present entry (like a swap entry), noting what file offset should - * be mapped there when there's a fault (in a non-linear vma where - * that's not obvious). - */ - pte_unmap_unlock(pte, ptl); - err = 0; -out: - return err; -} - -int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, - unsigned long size, pgoff_t pgoff) -{ - struct mm_struct *mm = vma->vm_mm; - int err; - - do { - err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot); - if (err) - return err; - - size -= PAGE_SIZE; - addr += PAGE_SIZE; - pgoff++; - } while (size); - - return 0; -} -EXPORT_SYMBOL(generic_file_remap_pages); - -/** - * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma - * @start: start of the remapped virtual memory range - * @size: size of the remapped virtual memory range - * @prot: new protection bits of the range (see NOTE) - * @pgoff: to-be-mapped page of the backing store file - * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO. - * - * sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma - * (shared backing store file). - * - * This syscall works purely via pagetables, so it's the most efficient - * way to map the same (large) file into a given virtual window. Unlike - * mmap()/mremap() it does not create any new vmas. The new mappings are - * also safe across swapout. - * - * NOTE: the @prot parameter right now is ignored (but must be zero), - * and the vma's default protection is used. Arbitrary protections - * might be implemented in the future. - */ -SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, - unsigned long, prot, unsigned long, pgoff, unsigned long, flags) -{ - struct mm_struct *mm = current->mm; - struct address_space *mapping; - struct vm_area_struct *vma; - int err = -EINVAL; - int has_write_lock = 0; - vm_flags_t vm_flags = 0; - - pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. " - "See Documentation/vm/remap_file_pages.txt.\n", - current->comm, current->pid); - - if (prot) - return err; - /* - * Sanitize the syscall parameters: - */ - start = start & PAGE_MASK; - size = size & PAGE_MASK; - - /* Does the address range wrap, or is the span zero-sized? */ - if (start + size <= start) - return err; - - /* Does pgoff wrap? */ - if (pgoff + (size >> PAGE_SHIFT) < pgoff) - return err; - - /* Can we represent this offset inside this architecture's pte's? */ -#if PTE_FILE_MAX_BITS < BITS_PER_LONG - if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS)) - return err; -#endif - - /* We need down_write() to change vma->vm_flags. */ - down_read(&mm->mmap_sem); - retry: - vma = find_vma(mm, start); - - /* - * Make sure the vma is shared, that it supports prefaulting, - * and that the remapped range is valid and fully within - * the single existing vma. - */ - if (!vma || !(vma->vm_flags & VM_SHARED)) - goto out; - - if (!vma->vm_ops || !vma->vm_ops->remap_pages) - goto out; - - if (start < vma->vm_start || start + size > vma->vm_end) - goto out; - - /* Must set VM_NONLINEAR before any pages are populated. */ - if (!(vma->vm_flags & VM_NONLINEAR)) { - /* - * vm_private_data is used as a swapout cursor - * in a VM_NONLINEAR vma. - */ - if (vma->vm_private_data) - goto out; - - /* Don't need a nonlinear mapping, exit success */ - if (pgoff == linear_page_index(vma, start)) { - err = 0; - goto out; - } - - if (!has_write_lock) { -get_write_lock: - up_read(&mm->mmap_sem); - down_write(&mm->mmap_sem); - has_write_lock = 1; - goto retry; - } - mapping = vma->vm_file->f_mapping; - /* - * page_mkclean doesn't work on nonlinear vmas, so if - * dirty pages need to be accounted, emulate with linear - * vmas. - */ - if (mapping_cap_account_dirty(mapping)) { - unsigned long addr; - struct file *file = get_file(vma->vm_file); - /* mmap_region may free vma; grab the info now */ - vm_flags = vma->vm_flags; - - addr = mmap_region(file, start, size, vm_flags, pgoff); - fput(file); - if (IS_ERR_VALUE(addr)) { - err = addr; - } else { - BUG_ON(addr != start); - err = 0; - } - goto out_freed; - } - i_mmap_lock_write(mapping); - flush_dcache_mmap_lock(mapping); - vma->vm_flags |= VM_NONLINEAR; - vma_interval_tree_remove(vma, &mapping->i_mmap); - vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); - flush_dcache_mmap_unlock(mapping); - i_mmap_unlock_write(mapping); - } - - if (vma->vm_flags & VM_LOCKED) { - /* - * drop PG_Mlocked flag for over-mapped range - */ - if (!has_write_lock) - goto get_write_lock; - vm_flags = vma->vm_flags; - munlock_vma_pages_range(vma, start, start + size); - vma->vm_flags = vm_flags; - } - - mmu_notifier_invalidate_range_start(mm, start, start + size); - err = vma->vm_ops->remap_pages(vma, start, size, pgoff); - mmu_notifier_invalidate_range_end(mm, start, start + size); - - /* - * We can't clear VM_NONLINEAR because we'd have to do - * it after ->populate completes, and that would prevent - * downgrading the lock. (Locks can't be upgraded). - */ - -out: - if (vma) - vm_flags = vma->vm_flags; -out_freed: - if (likely(!has_write_lock)) - up_read(&mm->mmap_sem); - else - up_write(&mm->mmap_sem); - if (!err && ((vm_flags & VM_LOCKED) || !(flags & MAP_NONBLOCK))) - mm_populate(start, size); - - return err; -} @@ -55,7 +55,7 @@ retry: */ if (likely(!(flags & FOLL_MIGRATION))) goto no_page; - if (pte_none(pte) || pte_file(pte)) + if (pte_none(pte)) goto no_page; entry = pte_to_swp_entry(pte); if (!is_migration_entry(entry)) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 85032de..be0e5d0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -35,7 +35,7 @@ #include <linux/node.h> #include "internal.h" -unsigned long hugepages_treat_as_movable; +int hugepages_treat_as_movable; int hugetlb_max_hstate __read_mostly; unsigned int default_hstate_idx; diff --git a/mm/interval_tree.c b/mm/interval_tree.c index 8da581f..f2c2492 100644 --- a/mm/interval_tree.c +++ b/mm/interval_tree.c @@ -21,8 +21,8 @@ static inline unsigned long vma_last_pgoff(struct vm_area_struct *v) return v->vm_pgoff + ((v->vm_end - v->vm_start) >> PAGE_SHIFT) - 1; } -INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.linear.rb, - unsigned long, shared.linear.rb_subtree_last, +INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.rb, + unsigned long, shared.rb_subtree_last, vma_start_pgoff, vma_last_pgoff,, vma_interval_tree) /* Insert node immediately after prev in the interval tree */ @@ -36,26 +36,26 @@ void vma_interval_tree_insert_after(struct vm_area_struct *node, VM_BUG_ON_VMA(vma_start_pgoff(node) != vma_start_pgoff(prev), node); - if (!prev->shared.linear.rb.rb_right) { + if (!prev->shared.rb.rb_right) { parent = prev; - link = &prev->shared.linear.rb.rb_right; + link = &prev->shared.rb.rb_right; } else { - parent = rb_entry(prev->shared.linear.rb.rb_right, - struct vm_area_struct, shared.linear.rb); - if (parent->shared.linear.rb_subtree_last < last) - parent->shared.linear.rb_subtree_last = last; - while (parent->shared.linear.rb.rb_left) { - parent = rb_entry(parent->shared.linear.rb.rb_left, - struct vm_area_struct, shared.linear.rb); - if (parent->shared.linear.rb_subtree_last < last) - parent->shared.linear.rb_subtree_last = last; + parent = rb_entry(prev->shared.rb.rb_right, + struct vm_area_struct, shared.rb); + if (parent->shared.rb_subtree_last < last) + parent->shared.rb_subtree_last = last; + while (parent->shared.rb.rb_left) { + parent = rb_entry(parent->shared.rb.rb_left, + struct vm_area_struct, shared.rb); + if (parent->shared.rb_subtree_last < last) + parent->shared.rb_subtree_last = last; } - link = &parent->shared.linear.rb.rb_left; + link = &parent->shared.rb.rb_left; } - node->shared.linear.rb_subtree_last = last; - rb_link_node(&node->shared.linear.rb, &parent->shared.linear.rb, link); - rb_insert_augmented(&node->shared.linear.rb, root, + node->shared.rb_subtree_last = last; + rb_link_node(&node->shared.rb, &parent->shared.rb, link); + rb_insert_augmented(&node->shared.rb, root, &vma_interval_tree_augment); } @@ -1748,7 +1748,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, */ if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP)) + VM_HUGETLB | VM_MIXEDMAP)) return 0; /* just ignore the advice */ #ifdef VM_SAO diff --git a/mm/madvise.c b/mm/madvise.c index a271adc..d79fb5e 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -155,7 +155,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, pte = *(orig_pte + ((index - start) / PAGE_SIZE)); pte_unmap_unlock(orig_pte, ptl); - if (pte_present(pte) || pte_none(pte) || pte_file(pte)) + if (pte_present(pte) || pte_none(pte)) continue; entry = pte_to_swp_entry(pte); if (unlikely(non_swap_entry(entry))) @@ -278,14 +278,7 @@ static long madvise_dontneed(struct vm_area_struct *vma, if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) return -EINVAL; - if (unlikely(vma->vm_flags & VM_NONLINEAR)) { - struct zap_details details = { - .nonlinear_vma = vma, - .last_index = ULONG_MAX, - }; - zap_page_range(vma, start, end - start, &details); - } else - zap_page_range(vma, start, end - start, NULL); + zap_page_range(vma, start, end - start, NULL); return 0; } @@ -303,7 +296,7 @@ static long madvise_remove(struct vm_area_struct *vma, *prev = NULL; /* tell sys_madvise we drop mmap_sem */ - if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) + if (vma->vm_flags & (VM_LOCKED | VM_HUGETLB)) return -EINVAL; f = vma->vm_file; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2f6893c..f3f8a4f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -343,9 +343,6 @@ struct mem_cgroup { struct cg_proto tcp_mem; #endif #if defined(CONFIG_MEMCG_KMEM) - /* analogous to slab_common's slab_caches list, but per-memcg; - * protected by memcg_slab_mutex */ - struct list_head memcg_slab_caches; /* Index in the kmem_cache->memcg_params->memcg_caches array */ int kmemcg_id; #endif @@ -2476,27 +2473,8 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg, } #ifdef CONFIG_MEMCG_KMEM -/* - * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or - * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists. - */ -static DEFINE_MUTEX(memcg_slab_mutex); - -/* - * This is a bit cumbersome, but it is rarely used and avoids a backpointer - * in the memcg_cache_params struct. - */ -static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p) -{ - struct kmem_cache *cachep; - - VM_BUG_ON(p->is_root_cache); - cachep = p->root_cache; - return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg)); -} - -static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, - unsigned long nr_pages) +int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, + unsigned long nr_pages) { struct page_counter *counter; int ret = 0; @@ -2533,8 +2511,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, return ret; } -static void memcg_uncharge_kmem(struct mem_cgroup *memcg, - unsigned long nr_pages) +void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages) { page_counter_uncharge(&memcg->memory, nr_pages); if (do_swap_account) @@ -2579,10 +2556,7 @@ static int memcg_alloc_cache_id(void) else if (size > MEMCG_CACHES_MAX_SIZE) size = MEMCG_CACHES_MAX_SIZE; - mutex_lock(&memcg_slab_mutex); err = memcg_update_all_caches(size); - mutex_unlock(&memcg_slab_mutex); - if (err) { ida_simple_remove(&kmem_limited_groups, id); return err; @@ -2605,123 +2579,20 @@ void memcg_update_array_size(int num) memcg_limited_groups_array_size = num; } -static void memcg_register_cache(struct mem_cgroup *memcg, - struct kmem_cache *root_cache) -{ - static char memcg_name_buf[NAME_MAX + 1]; /* protected by - memcg_slab_mutex */ - struct kmem_cache *cachep; - int id; - - lockdep_assert_held(&memcg_slab_mutex); - - id = memcg_cache_id(memcg); - - /* - * Since per-memcg caches are created asynchronously on first - * allocation (see memcg_kmem_get_cache()), several threads can try to - * create the same cache, but only one of them may succeed. - */ - if (cache_from_memcg_idx(root_cache, id)) - return; - - cgroup_name(memcg->css.cgroup, memcg_name_buf, NAME_MAX + 1); - cachep = memcg_create_kmem_cache(memcg, root_cache, memcg_name_buf); - /* - * If we could not create a memcg cache, do not complain, because - * that's not critical at all as we can always proceed with the root - * cache. - */ - if (!cachep) - return; - - list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches); - - /* - * Since readers won't lock (see cache_from_memcg_idx()), we need a - * barrier here to ensure nobody will see the kmem_cache partially - * initialized. - */ - smp_wmb(); - - BUG_ON(root_cache->memcg_params->memcg_caches[id]); - root_cache->memcg_params->memcg_caches[id] = cachep; -} - -static void memcg_unregister_cache(struct kmem_cache *cachep) -{ - struct kmem_cache *root_cache; - struct mem_cgroup *memcg; - int id; - - lockdep_assert_held(&memcg_slab_mutex); - - BUG_ON(is_root_cache(cachep)); - - root_cache = cachep->memcg_params->root_cache; - memcg = cachep->memcg_params->memcg; - id = memcg_cache_id(memcg); - - BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep); - root_cache->memcg_params->memcg_caches[id] = NULL; - - list_del(&cachep->memcg_params->list); - - kmem_cache_destroy(cachep); -} - -int __memcg_cleanup_cache_params(struct kmem_cache *s) -{ - struct kmem_cache *c; - int i, failed = 0; - - mutex_lock(&memcg_slab_mutex); - for_each_memcg_cache_index(i) { - c = cache_from_memcg_idx(s, i); - if (!c) - continue; - - memcg_unregister_cache(c); - - if (cache_from_memcg_idx(s, i)) - failed++; - } - mutex_unlock(&memcg_slab_mutex); - return failed; -} - -static void memcg_unregister_all_caches(struct mem_cgroup *memcg) -{ - struct kmem_cache *cachep; - struct memcg_cache_params *params, *tmp; - - if (!memcg_kmem_is_active(memcg)) - return; - - mutex_lock(&memcg_slab_mutex); - list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) { - cachep = memcg_params_to_cache(params); - memcg_unregister_cache(cachep); - } - mutex_unlock(&memcg_slab_mutex); -} - -struct memcg_register_cache_work { +struct memcg_kmem_cache_create_work { struct mem_cgroup *memcg; struct kmem_cache *cachep; struct work_struct work; }; -static void memcg_register_cache_func(struct work_struct *w) +static void memcg_kmem_cache_create_func(struct work_struct *w) { - struct memcg_register_cache_work *cw = - container_of(w, struct memcg_register_cache_work, work); + struct memcg_kmem_cache_create_work *cw = + container_of(w, struct memcg_kmem_cache_create_work, work); struct mem_cgroup *memcg = cw->memcg; struct kmem_cache *cachep = cw->cachep; - mutex_lock(&memcg_slab_mutex); - memcg_register_cache(memcg, cachep); - mutex_unlock(&memcg_slab_mutex); + memcg_create_kmem_cache(memcg, cachep); css_put(&memcg->css); kfree(cw); @@ -2730,10 +2601,10 @@ static void memcg_register_cache_func(struct work_struct *w) /* * Enqueue the creation of a per-memcg kmem_cache. */ -static void __memcg_schedule_register_cache(struct mem_cgroup *memcg, - struct kmem_cache *cachep) +static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, + struct kmem_cache *cachep) { - struct memcg_register_cache_work *cw; + struct memcg_kmem_cache_create_work *cw; cw = kmalloc(sizeof(*cw), GFP_NOWAIT); if (!cw) @@ -2743,18 +2614,18 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg, cw->memcg = memcg; cw->cachep = cachep; + INIT_WORK(&cw->work, memcg_kmem_cache_create_func); - INIT_WORK(&cw->work, memcg_register_cache_func); schedule_work(&cw->work); } -static void memcg_schedule_register_cache(struct mem_cgroup *memcg, - struct kmem_cache *cachep) +static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, + struct kmem_cache *cachep) { /* * We need to stop accounting when we kmalloc, because if the * corresponding kmalloc cache is not yet created, the first allocation - * in __memcg_schedule_register_cache will recurse. + * in __memcg_schedule_kmem_cache_create will recurse. * * However, it is better to enclose the whole function. Depending on * the debugging options enabled, INIT_WORK(), for instance, can @@ -2763,24 +2634,10 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg, * the safest choice is to do it like this, wrapping the whole function. */ current->memcg_kmem_skip_account = 1; - __memcg_schedule_register_cache(memcg, cachep); + __memcg_schedule_kmem_cache_create(memcg, cachep); current->memcg_kmem_skip_account = 0; } -int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order) -{ - unsigned int nr_pages = 1 << order; - - return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages); -} - -void __memcg_uncharge_slab(struct kmem_cache *cachep, int order) -{ - unsigned int nr_pages = 1 << order; - - memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages); -} - /* * Return the kmem_cache we're supposed to use for a slab allocation. * We try to use the current memcg's version of the cache. @@ -2825,7 +2682,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep) * could happen with the slab_mutex held. So it's better to * defer everything. */ - memcg_schedule_register_cache(memcg, cachep); + memcg_schedule_kmem_cache_create(memcg, cachep); out: css_put(&memcg->css); return cachep; @@ -4154,7 +4011,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) static void memcg_destroy_kmem(struct mem_cgroup *memcg) { - memcg_unregister_all_caches(memcg); + memcg_destroy_kmem_caches(memcg); mem_cgroup_sockets_destroy(memcg); } #else @@ -4682,7 +4539,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) spin_lock_init(&memcg->event_list_lock); #ifdef CONFIG_MEMCG_KMEM memcg->kmemcg_id = -1; - INIT_LIST_HEAD(&memcg->memcg_slab_caches); #endif return &memcg->css; @@ -4926,10 +4782,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, return NULL; mapping = vma->vm_file->f_mapping; - if (pte_none(ptent)) - pgoff = linear_page_index(vma, addr); - else /* pte_file(ptent) is true */ - pgoff = pte_to_pgoff(ptent); + pgoff = linear_page_index(vma, addr); /* page is moved even if it's not RSS of this task(page-faulted). */ #ifdef CONFIG_SWAP @@ -4961,7 +4814,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, page = mc_handle_present_pte(vma, addr, ptent); else if (is_swap_pte(ptent)) page = mc_handle_swap_pte(vma, addr, ptent, &ent); - else if (pte_none(ptent) || pte_file(ptent)) + else if (pte_none(ptent)) page = mc_handle_file_pte(vma, addr, ptent, &ent); if (!page && !ent.val) diff --git a/mm/memory.c b/mm/memory.c index d707c4df..d63849b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -813,42 +813,40 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, /* pte contains position in swap or file, so copy. */ if (unlikely(!pte_present(pte))) { - if (!pte_file(pte)) { - swp_entry_t entry = pte_to_swp_entry(pte); - - if (likely(!non_swap_entry(entry))) { - if (swap_duplicate(entry) < 0) - return entry.val; - - /* make sure dst_mm is on swapoff's mmlist. */ - if (unlikely(list_empty(&dst_mm->mmlist))) { - spin_lock(&mmlist_lock); - if (list_empty(&dst_mm->mmlist)) - list_add(&dst_mm->mmlist, - &src_mm->mmlist); - spin_unlock(&mmlist_lock); - } - rss[MM_SWAPENTS]++; - } else if (is_migration_entry(entry)) { - page = migration_entry_to_page(entry); - - if (PageAnon(page)) - rss[MM_ANONPAGES]++; - else - rss[MM_FILEPAGES]++; - - if (is_write_migration_entry(entry) && - is_cow_mapping(vm_flags)) { - /* - * COW mappings require pages in both - * parent and child to be set to read. - */ - make_migration_entry_read(&entry); - pte = swp_entry_to_pte(entry); - if (pte_swp_soft_dirty(*src_pte)) - pte = pte_swp_mksoft_dirty(pte); - set_pte_at(src_mm, addr, src_pte, pte); - } + swp_entry_t entry = pte_to_swp_entry(pte); + + if (likely(!non_swap_entry(entry))) { + if (swap_duplicate(entry) < 0) + return entry.val; + + /* make sure dst_mm is on swapoff's mmlist. */ + if (unlikely(list_empty(&dst_mm->mmlist))) { + spin_lock(&mmlist_lock); + if (list_empty(&dst_mm->mmlist)) + list_add(&dst_mm->mmlist, + &src_mm->mmlist); + spin_unlock(&mmlist_lock); + } + rss[MM_SWAPENTS]++; + } else if (is_migration_entry(entry)) { + page = migration_entry_to_page(entry); + + if (PageAnon(page)) + rss[MM_ANONPAGES]++; + else + rss[MM_FILEPAGES]++; + + if (is_write_migration_entry(entry) && + is_cow_mapping(vm_flags)) { + /* + * COW mappings require pages in both + * parent and child to be set to read. + */ + make_migration_entry_read(&entry); + pte = swp_entry_to_pte(entry); + if (pte_swp_soft_dirty(*src_pte)) + pte = pte_swp_mksoft_dirty(pte); + set_pte_at(src_mm, addr, src_pte, pte); } } goto out_set_pte; @@ -1022,11 +1020,9 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, * readonly mappings. The tradeoff is that copy_page_range is more * efficient than faulting. */ - if (!(vma->vm_flags & (VM_HUGETLB | VM_NONLINEAR | - VM_PFNMAP | VM_MIXEDMAP))) { - if (!vma->anon_vma) - return 0; - } + if (!(vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) && + !vma->anon_vma) + return 0; if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); @@ -1084,6 +1080,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, spinlock_t *ptl; pte_t *start_pte; pte_t *pte; + swp_entry_t entry; again: init_rss_vec(rss); @@ -1109,28 +1106,12 @@ again: if (details->check_mapping && details->check_mapping != page->mapping) continue; - /* - * Each page->index must be checked when - * invalidating or truncating nonlinear. - */ - if (details->nonlinear_vma && - (page->index < details->first_index || - page->index > details->last_index)) - continue; } ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); tlb_remove_tlb_entry(tlb, pte, addr); if (unlikely(!page)) continue; - if (unlikely(details) && details->nonlinear_vma - && linear_page_index(details->nonlinear_vma, - addr) != page->index) { - pte_t ptfile = pgoff_to_pte(page->index); - if (pte_soft_dirty(ptent)) - ptfile = pte_file_mksoft_dirty(ptfile); - set_pte_at(mm, addr, pte, ptfile); - } if (PageAnon(page)) rss[MM_ANONPAGES]--; else { @@ -1153,33 +1134,25 @@ again: } continue; } - /* - * If details->check_mapping, we leave swap entries; - * if details->nonlinear_vma, we leave file entries. - */ + /* If details->check_mapping, we leave swap entries. */ if (unlikely(details)) continue; - if (pte_file(ptent)) { - if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) - print_bad_pte(vma, addr, ptent, NULL); - } else { - swp_entry_t entry = pte_to_swp_entry(ptent); - if (!non_swap_entry(entry)) - rss[MM_SWAPENTS]--; - else if (is_migration_entry(entry)) { - struct page *page; + entry = pte_to_swp_entry(ptent); + if (!non_swap_entry(entry)) + rss[MM_SWAPENTS]--; + else if (is_migration_entry(entry)) { + struct page *page; - page = migration_entry_to_page(entry); + page = migration_entry_to_page(entry); - if (PageAnon(page)) - rss[MM_ANONPAGES]--; - else - rss[MM_FILEPAGES]--; - } - if (unlikely(!free_swap_and_cache(entry))) - print_bad_pte(vma, addr, ptent, NULL); + if (PageAnon(page)) + rss[MM_ANONPAGES]--; + else + rss[MM_FILEPAGES]--; } + if (unlikely(!free_swap_and_cache(entry))) + print_bad_pte(vma, addr, ptent, NULL); pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); } while (pte++, addr += PAGE_SIZE, addr != end); @@ -1279,7 +1252,7 @@ static void unmap_page_range(struct mmu_gather *tlb, pgd_t *pgd; unsigned long next; - if (details && !details->check_mapping && !details->nonlinear_vma) + if (details && !details->check_mapping) details = NULL; BUG_ON(addr >= end); @@ -1373,7 +1346,7 @@ void unmap_vmas(struct mmu_gather *tlb, * @vma: vm_area_struct holding the applicable pages * @start: starting address of pages to zap * @size: number of bytes to zap - * @details: details of nonlinear truncation or shared cache invalidation + * @details: details of shared cache invalidation * * Caller must protect the VMA list */ @@ -1399,7 +1372,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, * @vma: vm_area_struct holding the applicable pages * @address: starting address of pages to zap * @size: number of bytes to zap - * @details: details of nonlinear truncation or shared cache invalidation + * @details: details of shared cache invalidation * * The range must fit into one VMA. */ @@ -1924,12 +1897,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, EXPORT_SYMBOL_GPL(apply_to_page_range); /* - * handle_pte_fault chooses page fault handler according to an entry - * which was read non-atomically. Before making any commitment, on - * those architectures or configurations (e.g. i386 with PAE) which - * might give a mix of unmatched parts, do_swap_page and do_nonlinear_fault - * must check under lock before unmapping the pte and proceeding - * (but do_wp_page is only called after already making such a check; + * handle_pte_fault chooses page fault handler according to an entry which was + * read non-atomically. Before making any commitment, on those architectures + * or configurations (e.g. i386 with PAE) which might give a mix of unmatched + * parts, do_swap_page must check under lock before unmapping the pte and + * proceeding (but do_wp_page is only called after already making such a check; * and do_anonymous_page can safely check later on). */ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, @@ -2035,7 +2007,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_t entry; int ret = 0; int page_mkwrite = 0; - struct page *dirty_page = NULL; + bool dirty_shared = false; unsigned long mmun_start = 0; /* For mmu_notifiers */ unsigned long mmun_end = 0; /* For mmu_notifiers */ struct mem_cgroup *memcg; @@ -2086,6 +2058,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, unlock_page(old_page); } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { + page_cache_get(old_page); /* * Only catch write-faults on shared writable pages, * read-only shared pages can get COWed by @@ -2093,7 +2066,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, */ if (vma->vm_ops && vma->vm_ops->page_mkwrite) { int tmp; - page_cache_get(old_page); + pte_unmap_unlock(page_table, ptl); tmp = do_page_mkwrite(vma, old_page, address); if (unlikely(!tmp || (tmp & @@ -2113,11 +2086,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, unlock_page(old_page); goto unlock; } - page_mkwrite = 1; } - dirty_page = old_page; - get_page(dirty_page); + + dirty_shared = true; reuse: /* @@ -2136,20 +2108,20 @@ reuse: pte_unmap_unlock(page_table, ptl); ret |= VM_FAULT_WRITE; - if (!dirty_page) - return ret; - - if (!page_mkwrite) { + if (dirty_shared) { struct address_space *mapping; int dirtied; - lock_page(dirty_page); - dirtied = set_page_dirty(dirty_page); - VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page); - mapping = dirty_page->mapping; - unlock_page(dirty_page); + if (!page_mkwrite) + lock_page(old_page); - if (dirtied && mapping) { + dirtied = set_page_dirty(old_page); + VM_BUG_ON_PAGE(PageAnon(old_page), old_page); + mapping = old_page->mapping; + unlock_page(old_page); + page_cache_release(old_page); + + if ((dirtied || page_mkwrite) && mapping) { /* * Some device drivers do not set page.mapping * but still dirty their pages @@ -2157,25 +2129,9 @@ reuse: balance_dirty_pages_ratelimited(mapping); } - /* file_update_time outside page_lock */ - if (vma->vm_file) + if (!page_mkwrite) file_update_time(vma->vm_file); } - put_page(dirty_page); - if (page_mkwrite) { - struct address_space *mapping = dirty_page->mapping; - - set_page_dirty(dirty_page); - unlock_page(dirty_page); - page_cache_release(dirty_page); - if (mapping) { - /* - * Some device drivers do not set page.mapping - * but still dirty their pages - */ - balance_dirty_pages_ratelimited(mapping); - } - } return ret; } @@ -2333,25 +2289,11 @@ static inline void unmap_mapping_range_tree(struct rb_root *root, } } -static inline void unmap_mapping_range_list(struct list_head *head, - struct zap_details *details) -{ - struct vm_area_struct *vma; - - /* - * In nonlinear VMAs there is no correspondence between virtual address - * offset and file offset. So we must perform an exhaustive search - * across *all* the pages in each nonlinear VMA, not just the pages - * whose virtual address lies outside the file truncation point. - */ - list_for_each_entry(vma, head, shared.nonlinear) { - details->nonlinear_vma = vma; - unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details); - } -} - /** - * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file. + * unmap_mapping_range - unmap the portion of all mmaps in the specified + * address_space corresponding to the specified page range in the underlying + * file. + * * @mapping: the address space containing mmaps to be unmapped. * @holebegin: byte in first page to unmap, relative to the start of * the underlying file. This will be rounded down to a PAGE_SIZE @@ -2380,7 +2322,6 @@ void unmap_mapping_range(struct address_space *mapping, } details.check_mapping = even_cows? NULL: mapping; - details.nonlinear_vma = NULL; details.first_index = hba; details.last_index = hba + hlen - 1; if (details.last_index < details.first_index) @@ -2390,8 +2331,6 @@ void unmap_mapping_range(struct address_space *mapping, i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) unmap_mapping_range_tree(&mapping->i_mmap, &details); - if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) - unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); i_mmap_unlock_write(mapping); } EXPORT_SYMBOL(unmap_mapping_range); @@ -2752,8 +2691,6 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, entry = mk_pte(page, vma->vm_page_prot); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); - else if (pte_file(*pte) && pte_file_soft_dirty(*pte)) - entry = pte_mksoft_dirty(entry); if (anon) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, address); @@ -2888,8 +2825,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) && - fault_around_bytes >> PAGE_SHIFT > 1) { + if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) @@ -3021,8 +2957,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, balance_dirty_pages_ratelimited(mapping); } - /* file_update_time outside page_lock */ - if (vma->vm_file && !vma->vm_ops->page_mkwrite) + if (!vma->vm_ops->page_mkwrite) file_update_time(vma->vm_file); return ret; @@ -3034,7 +2969,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ -static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, +static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned int flags, pte_t orig_pte) { @@ -3051,46 +2986,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); } -/* - * Fault of a previously existing named mapping. Repopulate the pte - * from the encoded file_pte if possible. This enables swappable - * nonlinear vmas. - * - * We enter with non-exclusive mmap_sem (to exclude vma changes, - * but allow concurrent faults), and pte mapped but not yet locked. - * We return with pte unmapped and unlocked. - * The mmap_sem may have been released depending on flags and our - * return value. See filemap_fault() and __lock_page_or_retry(). - */ -static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, pmd_t *pmd, - unsigned int flags, pte_t orig_pte) -{ - pgoff_t pgoff; - - flags |= FAULT_FLAG_NONLINEAR; - - if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) - return 0; - - if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { - /* - * Page table corrupted: show pte and kill process. - */ - print_bad_pte(vma, address, orig_pte, NULL); - return VM_FAULT_SIGBUS; - } - - pgoff = pte_to_pgoff(orig_pte); - if (!(flags & FAULT_FLAG_WRITE)) - return do_read_fault(mm, vma, address, pmd, pgoff, flags, - orig_pte); - if (!(vma->vm_flags & VM_SHARED)) - return do_cow_fault(mm, vma, address, pmd, pgoff, flags, - orig_pte); - return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); -} - static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags) @@ -3218,15 +3113,12 @@ static int handle_pte_fault(struct mm_struct *mm, if (pte_none(entry)) { if (vma->vm_ops) { if (likely(vma->vm_ops->fault)) - return do_linear_fault(mm, vma, address, - pte, pmd, flags, entry); + return do_fault(mm, vma, address, pte, + pmd, flags, entry); } return do_anonymous_page(mm, vma, address, pte, pmd, flags); } - if (pte_file(entry)) - return do_nonlinear_fault(mm, vma, address, - pte, pmd, flags, entry); return do_swap_page(mm, vma, address, pte, pmd, flags, entry); } diff --git a/mm/migrate.c b/mm/migrate.c index 344cdf6..6e284bc 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -179,37 +179,6 @@ out: } /* - * Congratulations to trinity for discovering this bug. - * mm/fremap.c's remap_file_pages() accepts any range within a single vma to - * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then - * replace the specified range by file ptes throughout (maybe populated after). - * If page migration finds a page within that range, while it's still located - * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem: - * zap_pte() clears the temporary migration entry before mmap_sem is dropped. - * But if the migrating page is in a part of the vma outside the range to be - * remapped, then it will not be cleared, and remove_migration_ptes() needs to - * deal with it. Fortunately, this part of the vma is of course still linear, - * so we just need to use linear location on the nonlinear list. - */ -static int remove_linear_migration_ptes_from_nonlinear(struct page *page, - struct address_space *mapping, void *arg) -{ - struct vm_area_struct *vma; - /* hugetlbfs does not support remap_pages, so no huge pgoff worries */ - pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - unsigned long addr; - - list_for_each_entry(vma, - &mapping->i_mmap_nonlinear, shared.nonlinear) { - - addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); - if (addr >= vma->vm_start && addr < vma->vm_end) - remove_migration_pte(page, vma, addr, arg); - } - return SWAP_AGAIN; -} - -/* * Get rid of all migration entries and replace them by * references to the indicated page. */ @@ -218,7 +187,6 @@ static void remove_migration_ptes(struct page *old, struct page *new) struct rmap_walk_control rwc = { .rmap_one = remove_migration_pte, .arg = old, - .file_nonlinear = remove_linear_migration_ptes_from_nonlinear, }; rmap_walk(new, &rwc); diff --git a/mm/mincore.c b/mm/mincore.c index c8c528b..46527c0 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -124,17 +124,13 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); do { pte_t pte = *ptep; - pgoff_t pgoff; next = addr + PAGE_SIZE; if (pte_none(pte)) mincore_unmapped_range(vma, addr, next, vec); else if (pte_present(pte)) *vec = 1; - else if (pte_file(pte)) { - pgoff = pte_to_pgoff(pte); - *vec = mincore_page(vma->vm_file->f_mapping, pgoff); - } else { /* pte is a swap entry */ + else { /* pte is a swap entry */ swp_entry_t entry = pte_to_swp_entry(pte); if (non_swap_entry(entry)) { @@ -145,9 +141,8 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, *vec = 1; } else { #ifdef CONFIG_SWAP - pgoff = entry.val; *vec = mincore_page(swap_address_space(entry), - pgoff); + entry.val); #else WARN_ON(1); *vec = 1; @@ -243,10 +243,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, mapping_unmap_writable(mapping); flush_dcache_mmap_lock(mapping); - if (unlikely(vma->vm_flags & VM_NONLINEAR)) - list_del_init(&vma->shared.nonlinear); - else - vma_interval_tree_remove(vma, &mapping->i_mmap); + vma_interval_tree_remove(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); } @@ -649,10 +646,7 @@ static void __vma_link_file(struct vm_area_struct *vma) atomic_inc(&mapping->i_mmap_writable); flush_dcache_mmap_lock(mapping); - if (unlikely(vma->vm_flags & VM_NONLINEAR)) - vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); - else - vma_interval_tree_insert(vma, &mapping->i_mmap); + vma_interval_tree_insert(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); } } @@ -789,14 +783,11 @@ again: remove_next = 1 + (end > next->vm_end); if (file) { mapping = file->f_mapping; - if (!(vma->vm_flags & VM_NONLINEAR)) { - root = &mapping->i_mmap; - uprobe_munmap(vma, vma->vm_start, vma->vm_end); + root = &mapping->i_mmap; + uprobe_munmap(vma, vma->vm_start, vma->vm_end); - if (adjust_next) - uprobe_munmap(next, next->vm_start, - next->vm_end); - } + if (adjust_next) + uprobe_munmap(next, next->vm_start, next->vm_end); i_mmap_lock_write(mapping); if (insert) { @@ -2634,6 +2625,75 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) return vm_munmap(addr, len); } + +/* + * Emulation of deprecated remap_file_pages() syscall. + */ +SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, + unsigned long, prot, unsigned long, pgoff, unsigned long, flags) +{ + + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long populate = 0; + unsigned long ret = -EINVAL; + struct file *file; + + pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. " + "See Documentation/vm/remap_file_pages.txt.\n", + current->comm, current->pid); + + if (prot) + return ret; + start = start & PAGE_MASK; + size = size & PAGE_MASK; + + if (start + size <= start) + return ret; + + /* Does pgoff wrap? */ + if (pgoff + (size >> PAGE_SHIFT) < pgoff) + return ret; + + down_write(&mm->mmap_sem); + vma = find_vma(mm, start); + + if (!vma || !(vma->vm_flags & VM_SHARED)) + goto out; + + if (start < vma->vm_start || start + size > vma->vm_end) + goto out; + + if (pgoff == linear_page_index(vma, start)) { + ret = 0; + goto out; + } + + prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; + prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0; + prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0; + + flags &= MAP_NONBLOCK; + flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; + if (vma->vm_flags & VM_LOCKED) { + flags |= MAP_LOCKED; + /* drop PG_Mlocked flag for over-mapped range */ + munlock_vma_pages_range(vma, start, start + size); + } + + file = get_file(vma->vm_file); + ret = do_mmap_pgoff(vma->vm_file, start, size, + prot, flags, pgoff, &populate); + fput(file); +out: + up_write(&mm->mmap_sem); + if (populate) + mm_populate(ret, populate); + if (!IS_ERR_VALUE(ret)) + ret = 0; + return ret; +} + static inline void verify_mm_writelocked(struct mm_struct *mm) { #ifdef CONFIG_DEBUG_VM @@ -3108,8 +3168,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) * * mmap_sem in write mode is required in order to block all operations * that could modify pagetables and free pages without need of - * altering the vma layout (for example populate_range() with - * nonlinear vmas). It's also needed in write mode to avoid new + * altering the vma layout. It's also needed in write mode to avoid new * anon_vmas to be associated with existing vmas. * * A single task can't take more than one mm_take_all_locks() in a row diff --git a/mm/mprotect.c b/mm/mprotect.c index ace9345..3312166 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -105,7 +105,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, } if (updated) pages++; - } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { + } else if (IS_ENABLED(CONFIG_MIGRATION)) { swp_entry_t entry = pte_to_swp_entry(oldpte); if (is_write_migration_entry(entry)) { diff --git a/mm/mremap.c b/mm/mremap.c index 17fa018..57dadc0 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -81,8 +81,6 @@ static pte_t move_soft_dirty_pte(pte_t pte) pte = pte_mksoft_dirty(pte); else if (is_swap_pte(pte)) pte = pte_swp_mksoft_dirty(pte); - else if (pte_file(pte)) - pte = pte_file_mksoft_dirty(pte); #endif return pte; } @@ -86,10 +86,7 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags) (vma->vm_flags & VM_SHARED)) { get_file(file); up_read(&mm->mmap_sem); - if (vma->vm_flags & VM_NONLINEAR) - error = vfs_fsync(file, 1); - else - error = vfs_fsync_range(file, fstart, fend, 1); + error = vfs_fsync_range(file, fstart, fend, 1); fput(file); if (error || start >= end) goto out; @@ -1984,14 +1984,6 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf) } EXPORT_SYMBOL(filemap_map_pages); -int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, - unsigned long size, pgoff_t pgoff) -{ - BUG(); - return 0; -} -EXPORT_SYMBOL(generic_file_remap_pages); - static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, int write) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8e20f9c..f121050 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -552,17 +552,15 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, return 0; if (page_is_guard(buddy) && page_order(buddy) == order) { - VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); - if (page_zone_id(page) != page_zone_id(buddy)) return 0; + VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); + return 1; } if (PageBuddy(buddy) && page_order(buddy) == order) { - VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); - /* * zone check is done late to avoid uselessly * calculating zone/node ids for pages that could @@ -571,6 +569,8 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, if (page_zone_id(page) != page_zone_id(buddy)) return 0; + VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); + return 1; } return 0; @@ -590,9 +590,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) if (!vma->anon_vma || !page__anon_vma || vma->anon_vma->root != page__anon_vma->root) return -EFAULT; - } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { - if (!vma->vm_file || - vma->vm_file->f_mapping != page->mapping) + } else if (page->mapping) { + if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) return -EFAULT; } else return -EFAULT; @@ -1274,7 +1273,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); set_pte_at(mm, address, pte, swp_pte); - BUG_ON(pte_file(*pte)); } else if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION)) { /* Establish migration entry for a file page */ @@ -1316,211 +1314,6 @@ out_mlock: return ret; } -/* - * objrmap doesn't work for nonlinear VMAs because the assumption that - * offset-into-file correlates with offset-into-virtual-addresses does not hold. - * Consequently, given a particular page and its ->index, we cannot locate the - * ptes which are mapping that page without an exhaustive linear search. - * - * So what this code does is a mini "virtual scan" of each nonlinear VMA which - * maps the file to which the target page belongs. The ->vm_private_data field - * holds the current cursor into that scan. Successive searches will circulate - * around the vma's virtual address space. - * - * So as more replacement pressure is applied to the pages in a nonlinear VMA, - * more scanning pressure is placed against them as well. Eventually pages - * will become fully unmapped and are eligible for eviction. - * - * For very sparsely populated VMAs this is a little inefficient - chances are - * there there won't be many ptes located within the scan cluster. In this case - * maybe we could scan further - to the end of the pte page, perhaps. - * - * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can - * acquire it without blocking. If vma locked, mlock the pages in the cluster, - * rather than unmapping them. If we encounter the "check_page" that vmscan is - * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN. - */ -#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE) -#define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) - -static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, - struct vm_area_struct *vma, struct page *check_page) -{ - struct mm_struct *mm = vma->vm_mm; - pmd_t *pmd; - pte_t *pte; - pte_t pteval; - spinlock_t *ptl; - struct page *page; - unsigned long address; - unsigned long mmun_start; /* For mmu_notifiers */ - unsigned long mmun_end; /* For mmu_notifiers */ - unsigned long end; - int ret = SWAP_AGAIN; - int locked_vma = 0; - - address = (vma->vm_start + cursor) & CLUSTER_MASK; - end = address + CLUSTER_SIZE; - if (address < vma->vm_start) - address = vma->vm_start; - if (end > vma->vm_end) - end = vma->vm_end; - - pmd = mm_find_pmd(mm, address); - if (!pmd) - return ret; - - mmun_start = address; - mmun_end = end; - mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - - /* - * If we can acquire the mmap_sem for read, and vma is VM_LOCKED, - * keep the sem while scanning the cluster for mlocking pages. - */ - if (down_read_trylock(&vma->vm_mm->mmap_sem)) { - locked_vma = (vma->vm_flags & VM_LOCKED); - if (!locked_vma) - up_read(&vma->vm_mm->mmap_sem); /* don't need it */ - } - - pte = pte_offset_map_lock(mm, pmd, address, &ptl); - - /* Update high watermark before we lower rss */ - update_hiwater_rss(mm); - - for (; address < end; pte++, address += PAGE_SIZE) { - if (!pte_present(*pte)) - continue; - page = vm_normal_page(vma, address, *pte); - BUG_ON(!page || PageAnon(page)); - - if (locked_vma) { - if (page == check_page) { - /* we know we have check_page locked */ - mlock_vma_page(page); - ret = SWAP_MLOCK; - } else if (trylock_page(page)) { - /* - * If we can lock the page, perform mlock. - * Otherwise leave the page alone, it will be - * eventually encountered again later. - */ - mlock_vma_page(page); - unlock_page(page); - } - continue; /* don't unmap */ - } - - /* - * No need for _notify because we're within an - * mmu_notifier_invalidate_range_ {start|end} scope. - */ - if (ptep_clear_flush_young(vma, address, pte)) - continue; - - /* Nuke the page table entry. */ - flush_cache_page(vma, address, pte_pfn(*pte)); - pteval = ptep_clear_flush_notify(vma, address, pte); - - /* If nonlinear, store the file page offset in the pte. */ - if (page->index != linear_page_index(vma, address)) { - pte_t ptfile = pgoff_to_pte(page->index); - if (pte_soft_dirty(pteval)) - ptfile = pte_file_mksoft_dirty(ptfile); - set_pte_at(mm, address, pte, ptfile); - } - - /* Move the dirty bit to the physical page now the pte is gone. */ - if (pte_dirty(pteval)) - set_page_dirty(page); - - page_remove_rmap(page); - page_cache_release(page); - dec_mm_counter(mm, MM_FILEPAGES); - (*mapcount)--; - } - pte_unmap_unlock(pte - 1, ptl); - mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); - if (locked_vma) - up_read(&vma->vm_mm->mmap_sem); - return ret; -} - -static int try_to_unmap_nonlinear(struct page *page, - struct address_space *mapping, void *arg) -{ - struct vm_area_struct *vma; - int ret = SWAP_AGAIN; - unsigned long cursor; - unsigned long max_nl_cursor = 0; - unsigned long max_nl_size = 0; - unsigned int mapcount; - - list_for_each_entry(vma, - &mapping->i_mmap_nonlinear, shared.nonlinear) { - - cursor = (unsigned long) vma->vm_private_data; - if (cursor > max_nl_cursor) - max_nl_cursor = cursor; - cursor = vma->vm_end - vma->vm_start; - if (cursor > max_nl_size) - max_nl_size = cursor; - } - - if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */ - return SWAP_FAIL; - } - - /* - * We don't try to search for this page in the nonlinear vmas, - * and page_referenced wouldn't have found it anyway. Instead - * just walk the nonlinear vmas trying to age and unmap some. - * The mapcount of the page we came in with is irrelevant, - * but even so use it as a guide to how hard we should try? - */ - mapcount = page_mapcount(page); - if (!mapcount) - return ret; - - cond_resched(); - - max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; - if (max_nl_cursor == 0) - max_nl_cursor = CLUSTER_SIZE; - - do { - list_for_each_entry(vma, - &mapping->i_mmap_nonlinear, shared.nonlinear) { - - cursor = (unsigned long) vma->vm_private_data; - while (cursor < max_nl_cursor && - cursor < vma->vm_end - vma->vm_start) { - if (try_to_unmap_cluster(cursor, &mapcount, - vma, page) == SWAP_MLOCK) - ret = SWAP_MLOCK; - cursor += CLUSTER_SIZE; - vma->vm_private_data = (void *) cursor; - if ((int)mapcount <= 0) - return ret; - } - vma->vm_private_data = (void *) max_nl_cursor; - } - cond_resched(); - max_nl_cursor += CLUSTER_SIZE; - } while (max_nl_cursor <= max_nl_size); - - /* - * Don't loop forever (perhaps all the remaining pages are - * in locked vmas). Reset cursor on all unreserved nonlinear - * vmas, now forgetting on which ones it had fallen behind. - */ - list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear) - vma->vm_private_data = NULL; - - return ret; -} - bool is_vma_temporary_stack(struct vm_area_struct *vma) { int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); @@ -1566,7 +1359,6 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) .rmap_one = try_to_unmap_one, .arg = (void *)flags, .done = page_not_mapped, - .file_nonlinear = try_to_unmap_nonlinear, .anon_lock = page_lock_anon_vma_read, }; @@ -1612,12 +1404,6 @@ int try_to_munlock(struct page *page) .rmap_one = try_to_unmap_one, .arg = (void *)TTU_MUNLOCK, .done = page_not_mapped, - /* - * We don't bother to try to find the munlocked page in - * nonlinears. It's costly. Instead, later, page reclaim logic - * may call try_to_unmap() and recover PG_mlocked lazily. - */ - .file_nonlinear = NULL, .anon_lock = page_lock_anon_vma_read, }; @@ -1748,13 +1534,6 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) goto done; } - if (!rwc->file_nonlinear) - goto done; - - if (list_empty(&mapping->i_mmap_nonlinear)) - goto done; - - ret = rwc->file_nonlinear(page, mapping, rwc->arg); done: i_mmap_unlock_read(mapping); return ret; @@ -3201,7 +3201,6 @@ static const struct vm_operations_struct shmem_vm_ops = { .set_policy = shmem_set_policy, .get_policy = shmem_get_policy, #endif - .remap_pages = generic_file_remap_pages, }; static struct dentry *shmem_mount(struct file_system_type *fs_type, @@ -235,7 +235,7 @@ static __always_inline int memcg_charge_slab(struct kmem_cache *s, return 0; if (is_root_cache(s)) return 0; - return __memcg_charge_slab(s, gfp, order); + return memcg_charge_kmem(s->memcg_params->memcg, gfp, 1 << order); } static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order) @@ -244,7 +244,7 @@ static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order) return; if (is_root_cache(s)) return; - __memcg_uncharge_slab(s, order); + memcg_uncharge_kmem(s->memcg_params->memcg, 1 << order); } #else static inline bool is_root_cache(struct kmem_cache *s) diff --git a/mm/slab_common.c b/mm/slab_common.c index e03dd6f..6e1e4cf 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -331,7 +331,7 @@ out: out_free_cache: memcg_free_cache_params(s); - kfree(s); + kmem_cache_free(kmem_cache, s); goto out; } @@ -425,21 +425,64 @@ out_unlock: } EXPORT_SYMBOL(kmem_cache_create); +static int do_kmem_cache_shutdown(struct kmem_cache *s, + struct list_head *release, bool *need_rcu_barrier) +{ + if (__kmem_cache_shutdown(s) != 0) { + printk(KERN_ERR "kmem_cache_destroy %s: " + "Slab cache still has objects\n", s->name); + dump_stack(); + return -EBUSY; + } + + if (s->flags & SLAB_DESTROY_BY_RCU) + *need_rcu_barrier = true; + +#ifdef CONFIG_MEMCG_KMEM + if (!is_root_cache(s)) { + struct kmem_cache *root_cache = s->memcg_params->root_cache; + int memcg_id = memcg_cache_id(s->memcg_params->memcg); + + BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s); + root_cache->memcg_params->memcg_caches[memcg_id] = NULL; + } +#endif + list_move(&s->list, release); + return 0; +} + +static void do_kmem_cache_release(struct list_head *release, + bool need_rcu_barrier) +{ + struct kmem_cache *s, *s2; + + if (need_rcu_barrier) + rcu_barrier(); + + list_for_each_entry_safe(s, s2, release, list) { +#ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_remove(s); +#else + slab_kmem_cache_release(s); +#endif + } +} + #ifdef CONFIG_MEMCG_KMEM /* * memcg_create_kmem_cache - Create a cache for a memory cgroup. * @memcg: The memory cgroup the new cache is for. * @root_cache: The parent of the new cache. - * @memcg_name: The name of the memory cgroup (used for naming the new cache). * * This function attempts to create a kmem cache that will serve allocation * requests going from @memcg to @root_cache. The new cache inherits properties * from its parent. */ -struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, - struct kmem_cache *root_cache, - const char *memcg_name) +void memcg_create_kmem_cache(struct mem_cgroup *memcg, + struct kmem_cache *root_cache) { + static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */ + int memcg_id = memcg_cache_id(memcg); struct kmem_cache *s = NULL; char *cache_name; @@ -448,8 +491,18 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, mutex_lock(&slab_mutex); + /* + * Since per-memcg caches are created asynchronously on first + * allocation (see memcg_kmem_get_cache()), several threads can try to + * create the same cache, but only one of them may succeed. + */ + if (cache_from_memcg_idx(root_cache, memcg_id)) + goto out_unlock; + + cgroup_name(mem_cgroup_css(memcg)->cgroup, + memcg_name_buf, sizeof(memcg_name_buf)); cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name, - memcg_cache_id(memcg), memcg_name); + memcg_cache_id(memcg), memcg_name_buf); if (!cache_name) goto out_unlock; @@ -457,49 +510,73 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, root_cache->size, root_cache->align, root_cache->flags, root_cache->ctor, memcg, root_cache); + /* + * If we could not create a memcg cache, do not complain, because + * that's not critical at all as we can always proceed with the root + * cache. + */ if (IS_ERR(s)) { kfree(cache_name); - s = NULL; + goto out_unlock; } + /* + * Since readers won't lock (see cache_from_memcg_idx()), we need a + * barrier here to ensure nobody will see the kmem_cache partially + * initialized. + */ + smp_wmb(); + root_cache->memcg_params->memcg_caches[memcg_id] = s; + out_unlock: mutex_unlock(&slab_mutex); put_online_mems(); put_online_cpus(); - - return s; } -static int memcg_cleanup_cache_params(struct kmem_cache *s) +void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) { - int rc; + LIST_HEAD(release); + bool need_rcu_barrier = false; + struct kmem_cache *s, *s2; - if (!s->memcg_params || - !s->memcg_params->is_root_cache) - return 0; + get_online_cpus(); + get_online_mems(); - mutex_unlock(&slab_mutex); - rc = __memcg_cleanup_cache_params(s); mutex_lock(&slab_mutex); + list_for_each_entry_safe(s, s2, &slab_caches, list) { + if (is_root_cache(s) || s->memcg_params->memcg != memcg) + continue; + /* + * The cgroup is about to be freed and therefore has no charges + * left. Hence, all its caches must be empty by now. + */ + BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier)); + } + mutex_unlock(&slab_mutex); - return rc; -} -#else -static int memcg_cleanup_cache_params(struct kmem_cache *s) -{ - return 0; + put_online_mems(); + put_online_cpus(); + + do_kmem_cache_release(&release, need_rcu_barrier); } #endif /* CONFIG_MEMCG_KMEM */ void slab_kmem_cache_release(struct kmem_cache *s) { + memcg_free_cache_params(s); kfree(s->name); kmem_cache_free(kmem_cache, s); } void kmem_cache_destroy(struct kmem_cache *s) { + int i; + LIST_HEAD(release); + bool need_rcu_barrier = false; + bool busy = false; + get_online_cpus(); get_online_mems(); @@ -509,35 +586,23 @@ void kmem_cache_destroy(struct kmem_cache *s) if (s->refcount) goto out_unlock; - if (memcg_cleanup_cache_params(s) != 0) - goto out_unlock; + for_each_memcg_cache_index(i) { + struct kmem_cache *c = cache_from_memcg_idx(s, i); - if (__kmem_cache_shutdown(s) != 0) { - printk(KERN_ERR "kmem_cache_destroy %s: " - "Slab cache still has objects\n", s->name); - dump_stack(); - goto out_unlock; + if (c && do_kmem_cache_shutdown(c, &release, &need_rcu_barrier)) + busy = true; } - list_del(&s->list); - - mutex_unlock(&slab_mutex); - if (s->flags & SLAB_DESTROY_BY_RCU) - rcu_barrier(); - - memcg_free_cache_params(s); -#ifdef SLAB_SUPPORTS_SYSFS - sysfs_slab_remove(s); -#else - slab_kmem_cache_release(s); -#endif - goto out; + if (!busy) + do_kmem_cache_shutdown(s, &release, &need_rcu_barrier); out_unlock: mutex_unlock(&slab_mutex); -out: + put_online_mems(); put_online_cpus(); + + do_kmem_cache_release(&release, need_rcu_barrier); } EXPORT_SYMBOL(kmem_cache_destroy); @@ -2398,13 +2398,24 @@ redo: * reading from one cpu area. That does not matter as long * as we end up on the original cpu again when doing the cmpxchg. * - * Preemption is disabled for the retrieval of the tid because that - * must occur from the current processor. We cannot allow rescheduling - * on a different processor between the determination of the pointer - * and the retrieval of the tid. + * We should guarantee that tid and kmem_cache are retrieved on + * the same cpu. It could be different if CONFIG_PREEMPT so we need + * to check if it is matched or not. */ - preempt_disable(); - c = this_cpu_ptr(s->cpu_slab); + do { + tid = this_cpu_read(s->cpu_slab->tid); + c = raw_cpu_ptr(s->cpu_slab); + } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); + + /* + * Irqless object alloc/free algorithm used here depends on sequence + * of fetching cpu_slab's data. tid should be fetched before anything + * on c to guarantee that object and page associated with previous tid + * won't be used with current tid. If we fetch tid first, object and + * page could be one associated with next tid and our alloc/free + * request will be failed. In this case, we will retry. So, no problem. + */ + barrier(); /* * The transaction ids are globally unique per cpu and per operation on @@ -2412,8 +2423,6 @@ redo: * occurs on the right processor and that there was no operation on the * linked list in between. */ - tid = c->tid; - preempt_enable(); object = c->freelist; page = c->page; @@ -2512,7 +2521,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace); #endif /* - * Slow patch handling. This may still be called frequently since objects + * Slow path handling. This may still be called frequently since objects * have a longer lifetime than the cpu slabs in most processing loads. * * So we still attempt to reduce cache line usage. Just take the slab @@ -2659,11 +2668,13 @@ redo: * data is retrieved via this pointer. If we are on the same cpu * during the cmpxchg then the free will succedd. */ - preempt_disable(); - c = this_cpu_ptr(s->cpu_slab); + do { + tid = this_cpu_read(s->cpu_slab->tid); + c = raw_cpu_ptr(s->cpu_slab); + } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); - tid = c->tid; - preempt_enable(); + /* Same with comment on barrier() in slab_alloc_node() */ + barrier(); if (likely(page == c->page)) { set_freepointer(s, object, c->freelist); @@ -1140,10 +1140,8 @@ void __init swap_setup(void) if (bdi_init(swapper_spaces[0].backing_dev_info)) panic("Failed to init swap bdi"); - for (i = 0; i < MAX_SWAPFILES; i++) { + for (i = 0; i < MAX_SWAPFILES; i++) spin_lock_init(&swapper_spaces[i].tree_lock); - INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear); - } #endif /* Use a smaller cluster for small-memory machines */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 1284f89..9943e5f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -17,6 +17,9 @@ #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/vmstat.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> #include <linux/sched.h> #include <linux/math64.h> #include <linux/writeback.h> @@ -670,66 +673,6 @@ int fragmentation_index(struct zone *zone, unsigned int order) } #endif -#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION) -#include <linux/proc_fs.h> -#include <linux/seq_file.h> - -static char * const migratetype_names[MIGRATE_TYPES] = { - "Unmovable", - "Reclaimable", - "Movable", - "Reserve", -#ifdef CONFIG_CMA - "CMA", -#endif -#ifdef CONFIG_MEMORY_ISOLATION - "Isolate", -#endif -}; - -static void *frag_start(struct seq_file *m, loff_t *pos) -{ - pg_data_t *pgdat; - loff_t node = *pos; - for (pgdat = first_online_pgdat(); - pgdat && node; - pgdat = next_online_pgdat(pgdat)) - --node; - - return pgdat; -} - -static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) -{ - pg_data_t *pgdat = (pg_data_t *)arg; - - (*pos)++; - return next_online_pgdat(pgdat); -} - -static void frag_stop(struct seq_file *m, void *arg) -{ -} - -/* Walk all the zones in a node and print using a callback */ -static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, - void (*print)(struct seq_file *m, pg_data_t *, struct zone *)) -{ - struct zone *zone; - struct zone *node_zones = pgdat->node_zones; - unsigned long flags; - - for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { - if (!populated_zone(zone)) - continue; - - spin_lock_irqsave(&zone->lock, flags); - print(m, pgdat, zone); - spin_unlock_irqrestore(&zone->lock, flags); - } -} -#endif - #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA) #ifdef CONFIG_ZONE_DMA #define TEXT_FOR_DMA(xx) xx "_dma", @@ -907,7 +850,66 @@ const char * const vmstat_text[] = { #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ +#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ + defined(CONFIG_PROC_FS) +static void *frag_start(struct seq_file *m, loff_t *pos) +{ + pg_data_t *pgdat; + loff_t node = *pos; + + for (pgdat = first_online_pgdat(); + pgdat && node; + pgdat = next_online_pgdat(pgdat)) + --node; + + return pgdat; +} + +static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) +{ + pg_data_t *pgdat = (pg_data_t *)arg; + + (*pos)++; + return next_online_pgdat(pgdat); +} + +static void frag_stop(struct seq_file *m, void *arg) +{ +} + +/* Walk all the zones in a node and print using a callback */ +static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, + void (*print)(struct seq_file *m, pg_data_t *, struct zone *)) +{ + struct zone *zone; + struct zone *node_zones = pgdat->node_zones; + unsigned long flags; + + for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { + if (!populated_zone(zone)) + continue; + + spin_lock_irqsave(&zone->lock, flags); + print(m, pgdat, zone); + spin_unlock_irqrestore(&zone->lock, flags); + } +} +#endif + #ifdef CONFIG_PROC_FS +static char * const migratetype_names[MIGRATE_TYPES] = { + "Unmovable", + "Reclaimable", + "Movable", + "Reserve", +#ifdef CONFIG_CMA + "CMA", +#endif +#ifdef CONFIG_MEMORY_ISOLATION + "Isolate", +#endif +}; + static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone) { @@ -1536,8 +1538,6 @@ static int __init setup_vmstat(void) module_init(setup_vmstat) #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) -#include <linux/debugfs.h> - /* * Return an index indicating how much of the available free memory is |